Spaces:
Runtime error
Runtime error
Commit
·
292e395
1
Parent(s):
c8aa6a4
Upload app.py
Browse files
app.py
ADDED
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#pass="Leswhdc2023$!"
|
2 |
+
|
3 |
+
import streamlit as st
|
4 |
+
import pandas as pd
|
5 |
+
import plotly.express as px
|
6 |
+
import cx_Oracle as ora
|
7 |
+
import pandas as pd
|
8 |
+
from pandas_profiling import ProfileReport
|
9 |
+
|
10 |
+
|
11 |
+
QueryDatabase=False
|
12 |
+
if QueryDatabase:
|
13 |
+
dsn="jdbc:oracle:thin:@//ep15-scan01:1521/cdrpr03_4.uhc.com"
|
14 |
+
user="UHG_801117753"
|
15 |
+
passw="MiscPassword2023$!" # Fake Password - don't share this or run without changing to your ID
|
16 |
+
dsn_tns = ora.makedsn('ep15-scan01', '1521', service_name='cdrpr03_4.uhc.com')
|
17 |
+
|
18 |
+
# Create a connection object
|
19 |
+
conn = ora.connect(user=user, password=passw, dsn=dsn_tns)
|
20 |
+
|
21 |
+
# Create a cursor object
|
22 |
+
c = conn.cursor()
|
23 |
+
|
24 |
+
# Execute the SQL query and store the result in a pandas dataframe
|
25 |
+
query = """
|
26 |
+
select
|
27 |
+
count(*) as RecordCount,
|
28 |
+
--Age, SID, MBR_ID, -- Optional toggle - remove these to collapse across members with a record count.
|
29 |
+
TOPICID, TOPIC_DESC, TOPIC, PATHWAY, CATEGORY, INTERVENTION_DESC,
|
30 |
+
SYMPTOM_DESC, KNOWLEDGE_DESC, BEHAVIOR_DESC, STATUS_DESC, INT_CATEGORY_ID, RSALINEOFBUSINESS, TARGET,
|
31 |
+
CAREDESCRIPTOR, URGENCY, TOPICSOURCE, SIGNSSYMPTOMS, POC_SGN_SYMP_ID, SPOKENLANGUAGE, HEALTHTOPIC,
|
32 |
+
CATEGORYID, TARGETID, CAREID, CQM, Gender, Race, AgeGroup
|
33 |
+
from
|
34 |
+
(
|
35 |
+
select
|
36 |
+
aa.SBSCR_ID_TXT SID,
|
37 |
+
a.MBR_ID,
|
38 |
+
b.POC_PROB_ID TopicID,
|
39 |
+
b.POC_PROB_DESC Topic_Desc,
|
40 |
+
REPLACE(REPLACE(b.POC_PROB_NM,'/',''),' ','') Topic,
|
41 |
+
a.POC_PROB_SRC_DESC Pathway,
|
42 |
+
(select POC_INTRVN_CATGY_NM from POC_INTRVN_CATGY pig where pig.POC_INTRVN_CATGY_ID = d.POC_INTRVN_CATGY_ID) as Category,
|
43 |
+
c.ADD_DESC Intervention_Desc,
|
44 |
+
e.ADD_DESC Symptom_Desc,
|
45 |
+
a.KNW_OTCOME_RT_ADD_DESC KNOWLEDGE_DESC,
|
46 |
+
a.BHV_OTCOME_RT_ADD_DESC BEHAVIOR_DESC,
|
47 |
+
a.STS_OTCOME_RT_ADD_DESC STATUS_DESC,
|
48 |
+
d.POC_INTRVN_CATGY_ID Int_Category_ID,
|
49 |
+
RSA_POP_TYP_ID,
|
50 |
+
(select ref_desc from ref where ref_nm = 'rsaPopulationType' and ref_cd = RSA_POP_TYP_ID) as RSALineOfBusiness,
|
51 |
+
(select POC_INTRVN_TGT_NM from POC_INTRVN_TGT pit where pit.POC_INTRVN_TGT_ID = d.POC_INTRVN_TGT_ID) as Target,
|
52 |
+
(select POC_INTRVN_CARE_DESC from POC_INTRVN_CARE pic where pic.POC_INTRVN_CARE_ID = d.POC_INTRVN_CARE_ID) as CareDescriptor,
|
53 |
+
Case to_char(a.POC_PROB_URGNCY_MOD_ID) when '1' then 'Actual' when '3' then 'Potential' else 'Other' end as Urgency,
|
54 |
+
(select ref_desc from ref where ref_nm = 'pocProbSourceType' and ref_cd = a.POC_PROB_SRC_TYP_ID) as TopicSource,
|
55 |
+
(select POC_SGN_SYMP_NM from POC_SGN_SYMP pss where pss.POC_SGN_SYMP_ID = e.POC_SGN_SYMP_ID) as SignsSymptoms,
|
56 |
+
e.POC_SGN_SYMP_ID,
|
57 |
+
CALAP_SPOKEN_LANG_TYP_ID SpokenLanguage,
|
58 |
+
REPLACE(b.POC_PROB_NM,'/','') HealthTopic,
|
59 |
+
a.POC_PROB_ID HealthTopicID,
|
60 |
+
d.POC_INTRVN_CATGY_ID CategoryID,
|
61 |
+
d.POC_INTRVN_TGT_ID TargetID,
|
62 |
+
d.POC_INTRVN_CARE_ID CareID,
|
63 |
+
c.CQM_IND CQM,
|
64 |
+
|
65 |
+
aa.GDR_CD Gender,
|
66 |
+
aa.RACE_CD Race,
|
67 |
+
(2023 - EXTRACT(year FROM aa.BTH_DT)) Age,
|
68 |
+
Case --Five age groups: 0-18, 19-44, 45-64, 65-84, and 85 and over
|
69 |
+
when ((2023 - EXTRACT(year FROM aa.BTH_DT))>=0 and (2023 - EXTRACT(year FROM aa.BTH_DT))<=18) then 'Age0to18'
|
70 |
+
when ((2023 - EXTRACT(year FROM aa.BTH_DT))> 18 and (2023 - EXTRACT(year FROM aa.BTH_DT))<=44) then 'Age19to44'
|
71 |
+
when ((2023 - EXTRACT(year FROM aa.BTH_DT))> 44 and (2023 - EXTRACT(year FROM aa.BTH_DT))<=64) then 'Age44to64'
|
72 |
+
when ((2023 - EXTRACT(year FROM aa.BTH_DT))> 64 and (2023 - EXTRACT(year FROM aa.BTH_DT))<=84) then 'Age64to84'
|
73 |
+
when ((2023 - EXTRACT(year FROM aa.BTH_DT))> 85) then 'Age85andOver'
|
74 |
+
else 'Other' end as AgeGroup
|
75 |
+
|
76 |
+
from MBR_POC_PROB a -- select * from MBR_POC_PROB where MBR_ID=117179570
|
77 |
+
join MBR aa on a.MBR_ID = aa.MBR_ID --and a.POC_PROB_URGNCY_MOD_ID = 1 --actual
|
78 |
+
join STG_HSR.POC_PROB b on a.POC_PROB_ID = b.POC_PROB_ID
|
79 |
+
join MBR_POC_PROB_INTRVN c on
|
80 |
+
(a.MBR_POC_PROB_ID = c.MBR_POC_PROB_ID and c.REMV_FROM_PLN_LIST_IND=0)
|
81 |
+
join POC_INTRVN d on c.POC_INTRVN_ID = d.POC_INTRVN_ID
|
82 |
+
left outer join MBR_POC_PROB_SGN_SYMP e
|
83 |
+
on (e.MBR_POC_PROB_ID = c.MBR_POC_PROB_ID)
|
84 |
+
where a.POC_PROB_URGNCY_MOD_ID = 1 and
|
85 |
+
|
86 |
+
-- Optional toggle - 1 versus 120 days.
|
87 |
+
a.CHG_DTTM > sysdate - 1 -- 1 Day
|
88 |
+
-- a.CHG_DTTM > sysdate - 1 -- 182 Days = 6 Months
|
89 |
+
|
90 |
+
) i
|
91 |
+
group by
|
92 |
+
--Age, SID, MBR_ID, -- Optional toggle - remove these to collapse across members with a record count.
|
93 |
+
TOPICID, TOPIC_DESC, TOPIC, PATHWAY, CATEGORY, INTERVENTION_DESC, SYMPTOM_DESC, KNOWLEDGE_DESC, BEHAVIOR_DESC, STATUS_DESC,INT_CATEGORY_ID,RSALINEOFBUSINESS,
|
94 |
+
TARGET, CAREDESCRIPTOR, URGENCY, TOPICSOURCE, SIGNSSYMPTOMS, POC_SGN_SYMP_ID, SPOKENLANGUAGE, HEALTHTOPIC,
|
95 |
+
CATEGORYID, TARGETID, CAREID, CQM, Gender, Race, AgeGroup
|
96 |
+
-- Optional toggle:
|
97 |
+
--order by MBR_ID desc
|
98 |
+
order by TOPICID desc -- orders by Count
|
99 |
+
"""
|
100 |
+
df = pd.read_sql(query, con=conn)
|
101 |
+
# Close the cursor and connection
|
102 |
+
c.close()
|
103 |
+
conn.close()
|
104 |
+
# Show the dataframe in a streamlit grid
|
105 |
+
st.dataframe(df)
|
106 |
+
|
107 |
+
# automatic visualizer
|
108 |
+
|
109 |
+
# st.set_page_config(page_title="File Upload and Profiling", layout="wide")
|
110 |
+
|
111 |
+
st.title("File Upload and Profiling")
|
112 |
+
|
113 |
+
uploaded_file = st.file_uploader("Upload a CSV file", type="csv")
|
114 |
+
|
115 |
+
RunProfiler=False
|
116 |
+
if uploaded_file is not None:
|
117 |
+
if RunProfiler:
|
118 |
+
|
119 |
+
# Load the data using pandas
|
120 |
+
df = pd.read_csv(uploaded_file)
|
121 |
+
|
122 |
+
# Generate the pandas profiling report
|
123 |
+
profile = ProfileReport(df, explorative=True)
|
124 |
+
|
125 |
+
# Display the pandas profiling report using streamlit
|
126 |
+
st.header("Data Profiling Report")
|
127 |
+
st.write(profile.to_html(), unsafe_allow_html=True)
|
128 |
+
|
129 |
+
# Display word statistics for each categorical string column
|
130 |
+
cat_cols = df.select_dtypes(include='object').columns
|
131 |
+
st.header("Word Statistics for Categorical Columns")
|
132 |
+
for col in cat_cols:
|
133 |
+
st.subheader(col)
|
134 |
+
word_count = df[col].str.split().apply(len).value_counts().sort_index()
|
135 |
+
st.bar_chart(word_count)
|
136 |
+
|
137 |
+
# Grouped count by each feature
|
138 |
+
num_cols = df.select_dtypes(include=['float', 'int']).columns
|
139 |
+
st.header("Grouped Count by Each Feature")
|
140 |
+
for col in num_cols:
|
141 |
+
st.subheader(col)
|
142 |
+
count_by_feature = df.groupby(col).size().reset_index(name='count')
|
143 |
+
st.bar_chart(count_by_feature)
|
144 |
+
|
145 |
+
|
146 |
+
# Upload a CSV dataset
|
147 |
+
uploaded_file = st.file_uploader("Upload your dataset", type=["csv"])
|
148 |
+
if uploaded_file is not None:
|
149 |
+
# Load the dataset and display the first 5 rows
|
150 |
+
df = pd.read_csv(uploaded_file)
|
151 |
+
st.dataframe(df.head())
|
152 |
+
|
153 |
+
# Generate a treemap or sunburst plot based on data types
|
154 |
+
numerical_cols = df.select_dtypes(include=["float", "int"]).columns
|
155 |
+
categorical_cols = df.select_dtypes(include=["object"]).columns
|
156 |
+
|
157 |
+
fig = px.treemap(df, path=categorical_cols)
|
158 |
+
st.plotly_chart(fig)
|
159 |
+
|
160 |
+
#if len(numerical_cols) >= 2:
|
161 |
+
# fig = px.scatter_matrix(df, dimensions=numerical_cols)
|
162 |
+
# st.plotly_chart(fig)
|
163 |
+
#elif len(categorical_cols) >= 2:
|
164 |
+
# fig = px.treemap(df, path=categorical_cols)
|
165 |
+
# st.plotly_chart(fig)
|
166 |
+
#else:
|
167 |
+
# fig = px.sunburst(df, path=categorical_cols + numerical_cols)
|
168 |
+
# st.plotly_chart(fig)
|