Spaces:

awacke1
/

RT-MLE

Sleeping

App Files Files Community

RT-MLE / app.py

awacke1

Update app.py

fefca88 over 1 year ago

raw

history blame

4 kB

	import streamlit as st
	import json
	import pandas as pd
	import plotly.express as px
	import seaborn as sns
	import matplotlib.pyplot as plt
	import streamlit.components.v1 as components

	# Global variable to hold selected row index
	selected_row_index = None

	# Initialize an empty DataFrame
	filtered_data = pd.DataFrame()


	# Function to load JSONL file into a DataFrame
	def load_jsonl(file_path):
	data = []
	with open(file_path, 'r') as f:
	for line in f:
	data.append(json.loads(line))
	return pd.DataFrame(data)

	# Function to filter DataFrame by keyword
	def filter_by_keyword(df, keyword):
	return df[df.apply(lambda row: row.astype(str).str.contains(keyword).any(), axis=1)]

	# Function to generate HTML5 code with embedded text
	def generate_html(question_text, answer_text, idx):
	return f'''
	<!DOCTYPE html>
	<html>
	<head>
	<title>Read It Aloud</title>
	<script type="text/javascript">
	function readAloud{id}() {{
	const text = document.getElementById("questionArea{id}").innerText;
	const speech = new SpeechSynthesisUtterance(text);
	window.speechSynthesis.speak(speech);
	}}
	</script>
	</head>
	<body>
	<h1>🔊 Read It Aloud - Row {idx}</h1>
	<p id="questionArea{id}">{question_text}</p>
	<button onclick="readAloud{id}()">🔊 Read Question Aloud</button>
	</body>
	</html>
	'''


	# Streamlit App
	st.title("Medical Licensing Exam Explorer with Speech Synthesis, Plotly and Seaborn 📊")

	# Dropdown for file selection
	file_option = st.selectbox("Select file:", ["small_file.jsonl", "large_file.jsonl"])
	st.write(f"You selected: {file_option}")

	# Load the data
	small_data = load_jsonl("usmle_16.2MB.jsonl")
	large_data = load_jsonl("usmle_2.08MB.jsonl")

	# Show filtered data grid
	if file_option == "small_file.jsonl":
	data = small_data
	else:
	data = large_data

	# Text input for search keyword
	search_keyword = st.text_input("Enter a keyword to filter data (e.g., Heart, Lung, Pain, Memory):")

	# Button to trigger search
	if st.button("Search"):
	filtered_data = filter_by_keyword(data, search_keyword)
	st.write(f"Filtered Dataset by '{search_keyword}'")
	selected_data = st.dataframe(filtered_data)

	# Button to read all filtered rows
	if st.button("Read All Rows"):
	if not filtered_data.empty:
	html_blocks = []
	for idx, row in filtered_data.iterrows():
	question_text = row.get("question", "No question field")
	documentHTML5 = generate_html(question_text, "", idx)
	html_blocks.append(documentHTML5)
	all_html = ''.join(html_blocks)
	components.html(all_html, width=1280, height=1024)
	else:
	st.warning("No rows to read.")

	# Plotly and Seaborn charts for EDA
	if st.button("Generate Charts"):
	st.subheader("Plotly Charts 📈")

	# 1. Scatter Plot
	fig = px.scatter(data, x=data.columns[0], y=data.columns[1])
	st.plotly_chart(fig)

	# 2. Line Plot
	fig = px.line(data, x=data.columns[0], y=data.columns[1])
	st.plotly_chart(fig)

	# 3. Bar Plot
	fig = px.bar(data, x=data.columns[0], y=data.columns[1])
	st.plotly_chart(fig)

	# 4. Histogram
	fig = px.histogram(data, x=data.columns[0])
	st.plotly_chart(fig)

	# 5. Box Plot
	fig = px.box(data, x=data.columns[0], y=data.columns[1])
	st.plotly_chart(fig)

	st.subheader("Seaborn Charts 📊")

	# 6. Violin Plot
	fig, ax = plt.subplots()
	sns.violinplot(x=data.columns[0], y=data.columns[1], data=data)
	st.pyplot(fig)

	# 7. Swarm Plot
	fig, ax = plt.subplots()
	sns.swarmplot(x=data.columns[0], y=data.columns[1], data=data)
	st.pyplot(fig)

	# 8. Pair Plot
	fig = sns.pairplot(data)
	st.pyplot(fig)

	# 9. Heatmap
	fig, ax = plt.subplots()
	sns.heatmap(data.corr(), annot=True)
	st.pyplot(fig)

	# 10. Regplot (Regression Plot)
	fig, ax = plt.subplots()
	sns.regplot(x=data.columns[0], y=data.columns[1], data=data)
	st.pyplot(fig)