File size: 3,720 Bytes
4bce033
 
 
 
 
 
af59780
4bce033
 
 
 
 
 
 
 
 
 
 
 
 
af59780
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4bce033
cab1fa3
 
4bce033
 
af59780
4bce033
 
 
 
 
 
 
 
 
 
 
af59780
725e817
 
 
 
 
 
 
af59780
725e817
af59780
 
 
 
 
 
 
4bce033
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
af59780
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import streamlit as st
import json
import pandas as pd
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
import streamlit.components.v1 as components

# Function to load JSONL file into a DataFrame
def load_jsonl(file_path):
    data = []
    with open(file_path, 'r') as f:
        for line in f:
            data.append(json.loads(line))
    return pd.DataFrame(data)

# Function to filter DataFrame by keyword
def filter_by_keyword(df, keyword):
    return df[df.apply(lambda row: row.astype(str).str.contains(keyword).any(), axis=1)]

# Function to generate HTML5 code with embedded text
def generate_html(text):
    return f'''
<!DOCTYPE html>
<html>
<head>
    <title>Read It Aloud</title>
    <script type="text/javascript">
        function readAloud() {{
            const text = document.getElementById("textArea").value;
            const speech = new SpeechSynthesisUtterance(text);
            window.speechSynthesis.speak(speech);
        }}
    </script>
</head>
<body>
    <h1>🔊 Read It Aloud</h1>
    <textarea id="textArea" rows="10" cols="80">
{text}
    </textarea>
    <br>
    <button onclick="readAloud()">🔊 Read Aloud</button>
</body>
</html>
'''

# Load the data
small_data = load_jsonl("usmle_16.2MB.jsonl")
large_data = load_jsonl("usmle_2.08MB.jsonl")

# Streamlit App
st.title("Medical Licensing Exam Explorer with Speech Synthesis, Plotly and Seaborn 📊")

# Dropdown for file selection
file_option = st.selectbox("Select file:", ["small_file.jsonl", "large_file.jsonl"])
st.write(f"You selected: {file_option}")

# Show filtered data grid
if file_option == "small_file.jsonl":
    data = small_data
else:
    data = large_data


# Text input for search keyword
search_keyword = st.text_input("Enter a keyword to filter data (e.g., Heart, Lung, Pain, Memory):")

# Button to trigger search
if st.button("Search"):
    filtered_data = filter_by_keyword(data, search_keyword)
    st.write(f"Filtered Dataset by '{search_keyword}'")
    selected_data = st.dataframe(filtered_data)

# Button to read selected row aloud
if st.button("Read Selected Row"):
    selected_indices = st.multiselect("Select the row you want to read:", filtered_data.index.tolist())
    if selected_indices:
        selected_row_text = filtered_data.loc[selected_indices[0]].to_string()
        documentHTML5 = generate_html(selected_row_text)
        components.html(documentHTML5, width=1280, height=1024)

# Plotly and Seaborn charts for EDA
if st.button("Generate Charts"):
    st.subheader("Plotly Charts 📈")

    # 1. Scatter Plot
    fig = px.scatter(data, x=data.columns[0], y=data.columns[1])
    st.plotly_chart(fig)

    # 2. Line Plot
    fig = px.line(data, x=data.columns[0], y=data.columns[1])
    st.plotly_chart(fig)

    # 3. Bar Plot
    fig = px.bar(data, x=data.columns[0], y=data.columns[1])
    st.plotly_chart(fig)

    # 4. Histogram
    fig = px.histogram(data, x=data.columns[0])
    st.plotly_chart(fig)

    # 5. Box Plot
    fig = px.box(data, x=data.columns[0], y=data.columns[1])
    st.plotly_chart(fig)

    st.subheader("Seaborn Charts 📊")

    # 6. Violin Plot
    fig, ax = plt.subplots()
    sns.violinplot(x=data.columns[0], y=data.columns[1], data=data)
    st.pyplot(fig)

    # 7. Swarm Plot
    fig, ax = plt.subplots()
    sns.swarmplot(x=data.columns[0], y=data.columns[1], data=data)
    st.pyplot(fig)

    # 8. Pair Plot
    fig = sns.pairplot(data)
    st.pyplot(fig)

    # 9. Heatmap
    fig, ax = plt.subplots()
    sns.heatmap(data.corr(), annot=True)
    st.pyplot(fig)

    # 10. Regplot (Regression Plot)
    fig, ax = plt.subplots()
    sns.regplot(x=data.columns[0], y=data.columns[1], data=data)
    st.pyplot(fig)