Spaces:
Runtime error
Runtime error
File size: 7,556 Bytes
292e395 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
#pass="Leswhdc2023$!"
import streamlit as st
import pandas as pd
import plotly.express as px
import cx_Oracle as ora
import pandas as pd
from pandas_profiling import ProfileReport
QueryDatabase=False
if QueryDatabase:
dsn="jdbc:oracle:thin:@//ep15-scan01:1521/cdrpr03_4.uhc.com"
user="UHG_801117753"
passw="MiscPassword2023$!" # Fake Password - don't share this or run without changing to your ID
dsn_tns = ora.makedsn('ep15-scan01', '1521', service_name='cdrpr03_4.uhc.com')
# Create a connection object
conn = ora.connect(user=user, password=passw, dsn=dsn_tns)
# Create a cursor object
c = conn.cursor()
# Execute the SQL query and store the result in a pandas dataframe
query = """
select
count(*) as RecordCount,
--Age, SID, MBR_ID, -- Optional toggle - remove these to collapse across members with a record count.
TOPICID, TOPIC_DESC, TOPIC, PATHWAY, CATEGORY, INTERVENTION_DESC,
SYMPTOM_DESC, KNOWLEDGE_DESC, BEHAVIOR_DESC, STATUS_DESC, INT_CATEGORY_ID, RSALINEOFBUSINESS, TARGET,
CAREDESCRIPTOR, URGENCY, TOPICSOURCE, SIGNSSYMPTOMS, POC_SGN_SYMP_ID, SPOKENLANGUAGE, HEALTHTOPIC,
CATEGORYID, TARGETID, CAREID, CQM, Gender, Race, AgeGroup
from
(
select
aa.SBSCR_ID_TXT SID,
a.MBR_ID,
b.POC_PROB_ID TopicID,
b.POC_PROB_DESC Topic_Desc,
REPLACE(REPLACE(b.POC_PROB_NM,'/',''),' ','') Topic,
a.POC_PROB_SRC_DESC Pathway,
(select POC_INTRVN_CATGY_NM from POC_INTRVN_CATGY pig where pig.POC_INTRVN_CATGY_ID = d.POC_INTRVN_CATGY_ID) as Category,
c.ADD_DESC Intervention_Desc,
e.ADD_DESC Symptom_Desc,
a.KNW_OTCOME_RT_ADD_DESC KNOWLEDGE_DESC,
a.BHV_OTCOME_RT_ADD_DESC BEHAVIOR_DESC,
a.STS_OTCOME_RT_ADD_DESC STATUS_DESC,
d.POC_INTRVN_CATGY_ID Int_Category_ID,
RSA_POP_TYP_ID,
(select ref_desc from ref where ref_nm = 'rsaPopulationType' and ref_cd = RSA_POP_TYP_ID) as RSALineOfBusiness,
(select POC_INTRVN_TGT_NM from POC_INTRVN_TGT pit where pit.POC_INTRVN_TGT_ID = d.POC_INTRVN_TGT_ID) as Target,
(select POC_INTRVN_CARE_DESC from POC_INTRVN_CARE pic where pic.POC_INTRVN_CARE_ID = d.POC_INTRVN_CARE_ID) as CareDescriptor,
Case to_char(a.POC_PROB_URGNCY_MOD_ID) when '1' then 'Actual' when '3' then 'Potential' else 'Other' end as Urgency,
(select ref_desc from ref where ref_nm = 'pocProbSourceType' and ref_cd = a.POC_PROB_SRC_TYP_ID) as TopicSource,
(select POC_SGN_SYMP_NM from POC_SGN_SYMP pss where pss.POC_SGN_SYMP_ID = e.POC_SGN_SYMP_ID) as SignsSymptoms,
e.POC_SGN_SYMP_ID,
CALAP_SPOKEN_LANG_TYP_ID SpokenLanguage,
REPLACE(b.POC_PROB_NM,'/','') HealthTopic,
a.POC_PROB_ID HealthTopicID,
d.POC_INTRVN_CATGY_ID CategoryID,
d.POC_INTRVN_TGT_ID TargetID,
d.POC_INTRVN_CARE_ID CareID,
c.CQM_IND CQM,
aa.GDR_CD Gender,
aa.RACE_CD Race,
(2023 - EXTRACT(year FROM aa.BTH_DT)) Age,
Case --Five age groups: 0-18, 19-44, 45-64, 65-84, and 85 and over
when ((2023 - EXTRACT(year FROM aa.BTH_DT))>=0 and (2023 - EXTRACT(year FROM aa.BTH_DT))<=18) then 'Age0to18'
when ((2023 - EXTRACT(year FROM aa.BTH_DT))> 18 and (2023 - EXTRACT(year FROM aa.BTH_DT))<=44) then 'Age19to44'
when ((2023 - EXTRACT(year FROM aa.BTH_DT))> 44 and (2023 - EXTRACT(year FROM aa.BTH_DT))<=64) then 'Age44to64'
when ((2023 - EXTRACT(year FROM aa.BTH_DT))> 64 and (2023 - EXTRACT(year FROM aa.BTH_DT))<=84) then 'Age64to84'
when ((2023 - EXTRACT(year FROM aa.BTH_DT))> 85) then 'Age85andOver'
else 'Other' end as AgeGroup
from MBR_POC_PROB a -- select * from MBR_POC_PROB where MBR_ID=117179570
join MBR aa on a.MBR_ID = aa.MBR_ID --and a.POC_PROB_URGNCY_MOD_ID = 1 --actual
join STG_HSR.POC_PROB b on a.POC_PROB_ID = b.POC_PROB_ID
join MBR_POC_PROB_INTRVN c on
(a.MBR_POC_PROB_ID = c.MBR_POC_PROB_ID and c.REMV_FROM_PLN_LIST_IND=0)
join POC_INTRVN d on c.POC_INTRVN_ID = d.POC_INTRVN_ID
left outer join MBR_POC_PROB_SGN_SYMP e
on (e.MBR_POC_PROB_ID = c.MBR_POC_PROB_ID)
where a.POC_PROB_URGNCY_MOD_ID = 1 and
-- Optional toggle - 1 versus 120 days.
a.CHG_DTTM > sysdate - 1 -- 1 Day
-- a.CHG_DTTM > sysdate - 1 -- 182 Days = 6 Months
) i
group by
--Age, SID, MBR_ID, -- Optional toggle - remove these to collapse across members with a record count.
TOPICID, TOPIC_DESC, TOPIC, PATHWAY, CATEGORY, INTERVENTION_DESC, SYMPTOM_DESC, KNOWLEDGE_DESC, BEHAVIOR_DESC, STATUS_DESC,INT_CATEGORY_ID,RSALINEOFBUSINESS,
TARGET, CAREDESCRIPTOR, URGENCY, TOPICSOURCE, SIGNSSYMPTOMS, POC_SGN_SYMP_ID, SPOKENLANGUAGE, HEALTHTOPIC,
CATEGORYID, TARGETID, CAREID, CQM, Gender, Race, AgeGroup
-- Optional toggle:
--order by MBR_ID desc
order by TOPICID desc -- orders by Count
"""
df = pd.read_sql(query, con=conn)
# Close the cursor and connection
c.close()
conn.close()
# Show the dataframe in a streamlit grid
st.dataframe(df)
# automatic visualizer
# st.set_page_config(page_title="File Upload and Profiling", layout="wide")
st.title("File Upload and Profiling")
uploaded_file = st.file_uploader("Upload a CSV file", type="csv")
RunProfiler=False
if uploaded_file is not None:
if RunProfiler:
# Load the data using pandas
df = pd.read_csv(uploaded_file)
# Generate the pandas profiling report
profile = ProfileReport(df, explorative=True)
# Display the pandas profiling report using streamlit
st.header("Data Profiling Report")
st.write(profile.to_html(), unsafe_allow_html=True)
# Display word statistics for each categorical string column
cat_cols = df.select_dtypes(include='object').columns
st.header("Word Statistics for Categorical Columns")
for col in cat_cols:
st.subheader(col)
word_count = df[col].str.split().apply(len).value_counts().sort_index()
st.bar_chart(word_count)
# Grouped count by each feature
num_cols = df.select_dtypes(include=['float', 'int']).columns
st.header("Grouped Count by Each Feature")
for col in num_cols:
st.subheader(col)
count_by_feature = df.groupby(col).size().reset_index(name='count')
st.bar_chart(count_by_feature)
# Upload a CSV dataset
uploaded_file = st.file_uploader("Upload your dataset", type=["csv"])
if uploaded_file is not None:
# Load the dataset and display the first 5 rows
df = pd.read_csv(uploaded_file)
st.dataframe(df.head())
# Generate a treemap or sunburst plot based on data types
numerical_cols = df.select_dtypes(include=["float", "int"]).columns
categorical_cols = df.select_dtypes(include=["object"]).columns
fig = px.treemap(df, path=categorical_cols)
st.plotly_chart(fig)
#if len(numerical_cols) >= 2:
# fig = px.scatter_matrix(df, dimensions=numerical_cols)
# st.plotly_chart(fig)
#elif len(categorical_cols) >= 2:
# fig = px.treemap(df, path=categorical_cols)
# st.plotly_chart(fig)
#else:
# fig = px.sunburst(df, path=categorical_cols + numerical_cols)
# st.plotly_chart(fig)
|