File size: 7,556 Bytes
292e395
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
#pass="Leswhdc2023$!"

import streamlit as st
import pandas as pd
import plotly.express as px
import cx_Oracle as ora
import pandas as pd
from pandas_profiling import ProfileReport


QueryDatabase=False
if QueryDatabase:
    dsn="jdbc:oracle:thin:@//ep15-scan01:1521/cdrpr03_4.uhc.com"
    user="UHG_801117753"
    passw="MiscPassword2023$!" # Fake Password - don't share this or run without changing to your ID
    dsn_tns = ora.makedsn('ep15-scan01', '1521', service_name='cdrpr03_4.uhc.com') 

    # Create a connection object
    conn = ora.connect(user=user, password=passw, dsn=dsn_tns)

    # Create a cursor object
    c = conn.cursor()

    # Execute the SQL query and store the result in a pandas dataframe
    query = """
    select
        count(*) as RecordCount, 
        --Age, SID, MBR_ID, -- Optional toggle - remove these to collapse across members with a record count.
        TOPICID, TOPIC_DESC, TOPIC, PATHWAY, CATEGORY, INTERVENTION_DESC, 
        SYMPTOM_DESC, KNOWLEDGE_DESC, BEHAVIOR_DESC, STATUS_DESC, INT_CATEGORY_ID, RSALINEOFBUSINESS, TARGET, 
        CAREDESCRIPTOR, URGENCY, TOPICSOURCE, SIGNSSYMPTOMS, POC_SGN_SYMP_ID, SPOKENLANGUAGE, HEALTHTOPIC, 
        CATEGORYID, TARGETID, CAREID, CQM, Gender, Race, AgeGroup  
        from 
        (
        select
        aa.SBSCR_ID_TXT SID,
        a.MBR_ID,
        b.POC_PROB_ID TopicID,
        b.POC_PROB_DESC Topic_Desc,
        REPLACE(REPLACE(b.POC_PROB_NM,'/',''),' ','') Topic,
        a.POC_PROB_SRC_DESC Pathway, 
        (select POC_INTRVN_CATGY_NM from POC_INTRVN_CATGY pig where pig.POC_INTRVN_CATGY_ID = d.POC_INTRVN_CATGY_ID) as Category, 
        c.ADD_DESC Intervention_Desc,
        e.ADD_DESC Symptom_Desc,
        a.KNW_OTCOME_RT_ADD_DESC KNOWLEDGE_DESC, 
        a.BHV_OTCOME_RT_ADD_DESC BEHAVIOR_DESC, 
        a.STS_OTCOME_RT_ADD_DESC STATUS_DESC, 
        d.POC_INTRVN_CATGY_ID Int_Category_ID,     
        RSA_POP_TYP_ID,
        (select ref_desc from ref where ref_nm = 'rsaPopulationType' and ref_cd = RSA_POP_TYP_ID) as RSALineOfBusiness,
        (select POC_INTRVN_TGT_NM from POC_INTRVN_TGT pit where pit.POC_INTRVN_TGT_ID = d.POC_INTRVN_TGT_ID) as Target, 
        (select POC_INTRVN_CARE_DESC from POC_INTRVN_CARE pic where pic.POC_INTRVN_CARE_ID = d.POC_INTRVN_CARE_ID) as CareDescriptor,       
        Case to_char(a.POC_PROB_URGNCY_MOD_ID) when '1' then 'Actual' when '3' then 'Potential' else 'Other' end as Urgency, 
        (select ref_desc from ref where ref_nm = 'pocProbSourceType' and ref_cd = a.POC_PROB_SRC_TYP_ID) as TopicSource,
        (select POC_SGN_SYMP_NM from POC_SGN_SYMP pss where pss.POC_SGN_SYMP_ID = e.POC_SGN_SYMP_ID) as SignsSymptoms,
        e.POC_SGN_SYMP_ID,
        CALAP_SPOKEN_LANG_TYP_ID SpokenLanguage,
        REPLACE(b.POC_PROB_NM,'/','') HealthTopic,
        a.POC_PROB_ID HealthTopicID,
        d.POC_INTRVN_CATGY_ID CategoryID,
        d.POC_INTRVN_TGT_ID TargetID,
        d.POC_INTRVN_CARE_ID CareID,
        c.CQM_IND CQM,

        aa.GDR_CD Gender, 
        aa.RACE_CD Race,
        (2023 - EXTRACT(year FROM aa.BTH_DT)) Age,
        Case --Five age groups: 0-18, 19-44, 45-64, 65-84, and 85 and over
                when ((2023 - EXTRACT(year FROM aa.BTH_DT))>=0 and (2023 - EXTRACT(year FROM aa.BTH_DT))<=18) then 'Age0to18' 
                when ((2023 - EXTRACT(year FROM aa.BTH_DT))> 18 and (2023 - EXTRACT(year FROM aa.BTH_DT))<=44) then 'Age19to44' 
                when ((2023 - EXTRACT(year FROM aa.BTH_DT))> 44 and (2023 - EXTRACT(year FROM aa.BTH_DT))<=64) then 'Age44to64' 
                when ((2023 - EXTRACT(year FROM aa.BTH_DT))> 64 and (2023 - EXTRACT(year FROM aa.BTH_DT))<=84) then 'Age64to84' 
                when ((2023 - EXTRACT(year FROM aa.BTH_DT))> 85) then 'Age85andOver' 
                else 'Other' end as AgeGroup
        
    from MBR_POC_PROB a   -- select * from MBR_POC_PROB where MBR_ID=117179570
        join MBR aa on a.MBR_ID = aa.MBR_ID  --and a.POC_PROB_URGNCY_MOD_ID = 1 --actual
        join STG_HSR.POC_PROB b on a.POC_PROB_ID = b.POC_PROB_ID 
        join MBR_POC_PROB_INTRVN c on 
            (a.MBR_POC_PROB_ID = c.MBR_POC_PROB_ID and c.REMV_FROM_PLN_LIST_IND=0)
        join POC_INTRVN d on c.POC_INTRVN_ID = d.POC_INTRVN_ID 
        left outer join MBR_POC_PROB_SGN_SYMP e 
            on (e.MBR_POC_PROB_ID = c.MBR_POC_PROB_ID) 
        where a.POC_PROB_URGNCY_MOD_ID = 1 and 

            -- Optional toggle - 1 versus 120 days.
            a.CHG_DTTM > sysdate - 1    -- 1 Day
            -- a.CHG_DTTM > sysdate - 1    -- 182 Days = 6 Months
            
        ) i
        group by 
        --Age, SID, MBR_ID, -- Optional toggle - remove these to collapse across members with a record count.
        TOPICID, TOPIC_DESC, TOPIC, PATHWAY, CATEGORY, INTERVENTION_DESC, SYMPTOM_DESC, KNOWLEDGE_DESC, BEHAVIOR_DESC, STATUS_DESC,INT_CATEGORY_ID,RSALINEOFBUSINESS, 
        TARGET, CAREDESCRIPTOR, URGENCY, TOPICSOURCE,   SIGNSSYMPTOMS, POC_SGN_SYMP_ID, SPOKENLANGUAGE, HEALTHTOPIC, 
        CATEGORYID, TARGETID, CAREID, CQM, Gender, Race, AgeGroup    
        -- Optional toggle:
        --order by MBR_ID desc
        order by TOPICID desc -- orders by Count
            """
    df = pd.read_sql(query, con=conn)
    # Close the cursor and connection
    c.close()
    conn.close()
    # Show the dataframe in a streamlit grid
    st.dataframe(df)

# automatic visualizer

# st.set_page_config(page_title="File Upload and Profiling", layout="wide")

st.title("File Upload and Profiling")

uploaded_file = st.file_uploader("Upload a CSV file", type="csv")

RunProfiler=False
if uploaded_file is not None:
    if RunProfiler:

        # Load the data using pandas
        df = pd.read_csv(uploaded_file)

        # Generate the pandas profiling report
        profile = ProfileReport(df, explorative=True)

        # Display the pandas profiling report using streamlit
        st.header("Data Profiling Report")
        st.write(profile.to_html(), unsafe_allow_html=True)

        # Display word statistics for each categorical string column
        cat_cols = df.select_dtypes(include='object').columns
        st.header("Word Statistics for Categorical Columns")
        for col in cat_cols:
            st.subheader(col)
            word_count = df[col].str.split().apply(len).value_counts().sort_index()
            st.bar_chart(word_count)

    # Grouped count by each feature
    num_cols = df.select_dtypes(include=['float', 'int']).columns
    st.header("Grouped Count by Each Feature")
    for col in num_cols:
        st.subheader(col)
        count_by_feature = df.groupby(col).size().reset_index(name='count')
        st.bar_chart(count_by_feature)


# Upload a CSV dataset
uploaded_file = st.file_uploader("Upload your dataset", type=["csv"])
if uploaded_file is not None:
    # Load the dataset and display the first 5 rows
    df = pd.read_csv(uploaded_file)
    st.dataframe(df.head())

    # Generate a treemap or sunburst plot based on data types
    numerical_cols = df.select_dtypes(include=["float", "int"]).columns
    categorical_cols = df.select_dtypes(include=["object"]).columns

    fig = px.treemap(df, path=categorical_cols)
    st.plotly_chart(fig)
        
    #if len(numerical_cols) >= 2:
    #    fig = px.scatter_matrix(df, dimensions=numerical_cols)
    #    st.plotly_chart(fig)
    #elif len(categorical_cols) >= 2:
    #    fig = px.treemap(df, path=categorical_cols)
    #    st.plotly_chart(fig)
    #else:
    #    fig = px.sunburst(df, path=categorical_cols + numerical_cols)
    #    st.plotly_chart(fig)