RT-MLE / app.py
awacke1's picture
Update app.py
fefca88
raw
history blame
4 kB
import streamlit as st
import json
import pandas as pd
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
import streamlit.components.v1 as components
# Global variable to hold selected row index
selected_row_index = None
# Initialize an empty DataFrame
filtered_data = pd.DataFrame()
# Function to load JSONL file into a DataFrame
def load_jsonl(file_path):
data = []
with open(file_path, 'r') as f:
for line in f:
data.append(json.loads(line))
return pd.DataFrame(data)
# Function to filter DataFrame by keyword
def filter_by_keyword(df, keyword):
return df[df.apply(lambda row: row.astype(str).str.contains(keyword).any(), axis=1)]
# Function to generate HTML5 code with embedded text
def generate_html(question_text, answer_text, idx):
return f'''
<!DOCTYPE html>
<html>
<head>
<title>Read It Aloud</title>
<script type="text/javascript">
function readAloud{id}() {{
const text = document.getElementById("questionArea{id}").innerText;
const speech = new SpeechSynthesisUtterance(text);
window.speechSynthesis.speak(speech);
}}
</script>
</head>
<body>
<h1>πŸ”Š Read It Aloud - Row {idx}</h1>
<p id="questionArea{id}">{question_text}</p>
<button onclick="readAloud{id}()">πŸ”Š Read Question Aloud</button>
</body>
</html>
'''
# Streamlit App
st.title("Medical Licensing Exam Explorer with Speech Synthesis, Plotly and Seaborn πŸ“Š")
# Dropdown for file selection
file_option = st.selectbox("Select file:", ["small_file.jsonl", "large_file.jsonl"])
st.write(f"You selected: {file_option}")
# Load the data
small_data = load_jsonl("usmle_16.2MB.jsonl")
large_data = load_jsonl("usmle_2.08MB.jsonl")
# Show filtered data grid
if file_option == "small_file.jsonl":
data = small_data
else:
data = large_data
# Text input for search keyword
search_keyword = st.text_input("Enter a keyword to filter data (e.g., Heart, Lung, Pain, Memory):")
# Button to trigger search
if st.button("Search"):
filtered_data = filter_by_keyword(data, search_keyword)
st.write(f"Filtered Dataset by '{search_keyword}'")
selected_data = st.dataframe(filtered_data)
# Button to read all filtered rows
if st.button("Read All Rows"):
if not filtered_data.empty:
html_blocks = []
for idx, row in filtered_data.iterrows():
question_text = row.get("question", "No question field")
documentHTML5 = generate_html(question_text, "", idx)
html_blocks.append(documentHTML5)
all_html = ''.join(html_blocks)
components.html(all_html, width=1280, height=1024)
else:
st.warning("No rows to read.")
# Plotly and Seaborn charts for EDA
if st.button("Generate Charts"):
st.subheader("Plotly Charts πŸ“ˆ")
# 1. Scatter Plot
fig = px.scatter(data, x=data.columns[0], y=data.columns[1])
st.plotly_chart(fig)
# 2. Line Plot
fig = px.line(data, x=data.columns[0], y=data.columns[1])
st.plotly_chart(fig)
# 3. Bar Plot
fig = px.bar(data, x=data.columns[0], y=data.columns[1])
st.plotly_chart(fig)
# 4. Histogram
fig = px.histogram(data, x=data.columns[0])
st.plotly_chart(fig)
# 5. Box Plot
fig = px.box(data, x=data.columns[0], y=data.columns[1])
st.plotly_chart(fig)
st.subheader("Seaborn Charts πŸ“Š")
# 6. Violin Plot
fig, ax = plt.subplots()
sns.violinplot(x=data.columns[0], y=data.columns[1], data=data)
st.pyplot(fig)
# 7. Swarm Plot
fig, ax = plt.subplots()
sns.swarmplot(x=data.columns[0], y=data.columns[1], data=data)
st.pyplot(fig)
# 8. Pair Plot
fig = sns.pairplot(data)
st.pyplot(fig)
# 9. Heatmap
fig, ax = plt.subplots()
sns.heatmap(data.corr(), annot=True)
st.pyplot(fig)
# 10. Regplot (Regression Plot)
fig, ax = plt.subplots()
sns.regplot(x=data.columns[0], y=data.columns[1], data=data)
st.pyplot(fig)