Spaces:
Runtime error
Runtime error
import gradio as gr | |
import pandas as pd | |
import json | |
import os | |
import re | |
from PyPDF2 import PdfReader | |
from collections import defaultdict | |
# ========== TRANSCRIPT PARSING FUNCTIONS (UPDATED) ========== | |
def extract_courses_with_grade_levels(text): | |
grade_level_pattern = r"(Grade|Year)\s*[:]?\s*(\d+|Freshman|Sophomore|Junior|Senior)" | |
grade_match = re.search(grade_level_pattern, text, re.IGNORECASE) | |
current_grade_level = grade_match.group(2) if grade_match else "Unknown" | |
course_pattern = r""" | |
(?:^|\n) | |
(?: (Grade|Year)\s*[:]?\s*(\d+|Freshman|Sophomore|Junior|Senior)\s*[\n-]* )? | |
( | |
(?:[A-Z]{2,}\s?\d{3}) | |
| | |
[A-Z][a-z]+(?:\s[A-Z][a-z]+)* | |
) | |
\s* | |
(?: [:\-]?\s* ([A-F][+-]?|\d{2,3}%)? )? | |
""" | |
courses_by_grade = defaultdict(list) | |
current_grade = current_grade_level | |
for match in re.finditer(course_pattern, text, re.VERBOSE | re.MULTILINE): | |
grade_context, grade_level, course, grade = match.groups() | |
if grade_context: | |
current_grade = grade_level | |
if course: | |
course_info = {"course": course.strip()} | |
if grade: | |
course_info["grade"] = grade.strip() | |
courses_by_grade[current_grade].append(course_info) | |
return dict(courses_by_grade) | |
def parse_transcript(file): | |
if file.name.endswith('.csv'): | |
df = pd.read_csv(file) | |
elif file.name.endswith('.xlsx'): | |
df = pd.read_excel(file) | |
elif file.name.endswith('.pdf'): | |
text = '' | |
reader = PdfReader(file) | |
for page in reader.pages: | |
page_text = page.extract_text() | |
if page_text: | |
text += page_text + '\n' | |
# Grade level extraction | |
grade_match = re.search(r'(Grade|Year)[\s:]*(\d+|Freshman|Sophomore|Junior|Senior)', text, re.IGNORECASE) | |
grade_level = grade_match.group(2) if grade_match else "Unknown" | |
# Enhanced GPA extraction | |
gpa_data = {'weighted': "N/A", 'unweighted': "N/A"} | |
gpa_patterns = [ | |
r'Weighted GPA[\s:]*(\d\.\d{1,2})', | |
r'GPA \(Weighted\)[\s:]*(\d\.\d{1,2})', | |
r'Cumulative GPA \(Weighted\)[\s:]*(\d\.\d{1,2})', | |
r'Unweighted GPA[\s:]*(\d\.\d{1,2})', | |
r'GPA \(Unweighted\)[\s:]*(\d\.\d{1,2})', | |
r'Cumulative GPA \(Unweighted\)[\s:]*(\d\.\d{1,2})', | |
r'GPA[\s:]*(\d\.\d{1,2})' | |
] | |
for pattern in gpa_patterns: | |
for match in re.finditer(pattern, text, re.IGNORECASE): | |
gpa_value = match.group(1) | |
if 'weighted' in pattern.lower(): | |
gpa_data['weighted'] = gpa_value | |
elif 'unweighted' in pattern.lower(): | |
gpa_data['unweighted'] = gpa_value | |
else: | |
if gpa_data['unweighted'] == "N/A": | |
gpa_data['unweighted'] = gpa_value | |
if gpa_data['weighted'] == "N/A": | |
gpa_data['weighted'] = gpa_value | |
courses_by_grade = extract_courses_with_grade_levels(text) | |
output_text = f"Grade Level: {grade_level}\n\n" | |
if gpa_data['weighted'] != "N/A" or gpa_data['unweighted'] != "N/A": | |
output_text += "GPA Information:\n" | |
if gpa_data['unweighted'] != "N/A": | |
output_text += f"- Unweighted GPA: {gpa_data['unweighted']}\n" | |
if gpa_data['weighted'] != "N/A": | |
output_text += f"- Weighted GPA: {gpa_data['weighted']}\n" | |
else: | |
output_text += "No GPA information found\n" | |
output_text += "\nCourses by Grade Level:\n" | |
for level, courses in courses_by_grade.items(): | |
output_text += f"\nGrade {level}:\n" | |
for course in courses: | |
output_text += f"- {course['course']}" | |
if 'grade' in course: | |
output_text += f" (Grade: {course['grade']})" | |
output_text += "\n" | |
return output_text, { | |
"gpa": gpa_data, | |
"grade_level": grade_level, | |
"courses": courses_by_grade | |
} | |
else: | |
return "Unsupported file format", None | |
# For CSV/XLSX fallback | |
gpa = "N/A" | |
for col in ['GPA', 'Grade Point Average', 'Cumulative GPA']: | |
if col in df.columns: | |
gpa = df[col].iloc[0] if isinstance(df[col].iloc[0], (float, int)) else "N/A" | |
break | |
grade_level = "N/A" | |
for col in ['Grade Level', 'Grade', 'Class', 'Year']: | |
if col in df.columns: | |
grade_level = df[col].iloc[0] | |
break | |
courses = [] | |
for col in ['Course', 'Subject', 'Course Name', 'Class']: | |
if col in df.columns: | |
courses = df[col].tolist() | |
break | |
output_text = f"Grade Level: {grade_level}\nGPA: {gpa}\n\nCourses:\n" | |
output_text += "\n".join(f"- {course}" for course in courses) | |
return output_text, { | |
"gpa": {"unweighted": gpa, "weighted": "N/A"}, | |
"grade_level": grade_level, | |
"courses": courses | |
} | |
# ========== LEARNING STYLE QUIZ FUNCTION ========== | |
def learning_style_quiz(*answers): | |
visual = answers.count("I remember something better when I see it written down.") | |
auditory = answers.count("I remember best by listening to a lecture or a recording.") | |
reading = answers.count("I remember best by reading information on my own.") | |
styles = {"Visual": visual, "Auditory": auditory, "Reading/Writing": reading} | |
top_styles = [k for k, v in styles.items() if v == max(styles.values())] | |
result = ", ".join(top_styles) | |
return result | |
# ========== SAVE STUDENT PROFILE FUNCTION ========== | |
def save_profile(name, age, interests, transcript, learning_style, favorites, blog): | |
data = { | |
"name": name, | |
"age": age, | |
"interests": interests, | |
"transcript": transcript, | |
"learning_style": learning_style, | |
"favorites": favorites, | |
"blog": blog | |
} | |
os.makedirs("student_profiles", exist_ok=True) | |
json_path = os.path.join("student_profiles", f"{name.replace(' ', '_')}_profile.json") | |
with open(json_path, "w") as f: | |
json.dump(data, f, indent=2) | |
markdown_summary = f"""### Student Profile: {name} | |
**Age:** {age} | |
**Interests:** {interests} | |
**Learning Style:** {learning_style} | |
#### Transcript: | |
{transcript_display(transcript)} | |
#### Favorites: | |
- Movie: {favorites['movie']} ({favorites['movie_reason']}) | |
- Show: {favorites['show']} ({favorites['show_reason']}) | |
- Book: {favorites['book']} ({favorites['book_reason']}) | |
- Character: {favorites['character']} ({favorites['character_reason']}) | |
#### Blog: | |
{blog if blog else "_No blog provided_"} | |
""" | |
return markdown_summary | |
def transcript_display(transcript_dict): | |
if not transcript_dict: | |
return "No transcript uploaded." | |
if isinstance(transcript_dict, dict) and all(isinstance(v, list) for v in transcript_dict.values()): | |
display = "" | |
for grade_level, courses in transcript_dict.items(): | |
display += f"\n**Grade {grade_level}**\n" | |
for course in courses: | |
display += f"- {course['course']}" | |
if 'grade' in course: | |
display += f" (Grade: {course['grade']})" | |
display += "\n" | |
return display | |
return "\n".join([f"- {course}" for course in transcript_dict["courses"]] + | |
[f"Grade Level: {transcript_dict['grade_level']}", f"GPA: {transcript_dict['gpa']}"]) | |
# ========== GRADIO INTERFACE ========== | |
with gr.Blocks() as app: | |
with gr.Tab("Step 1: Upload Transcript"): | |
transcript_file = gr.File(label="Upload your transcript (CSV, Excel, or PDF)") | |
transcript_output = gr.Textbox(label="Transcript Output") | |
transcript_data = gr.State() | |
transcript_file.change(fn=parse_transcript, inputs=transcript_file, outputs=[transcript_output, transcript_data]) | |
with gr.Tab("Step 2: Learning Style Quiz"): | |
q1 = gr.Radio(choices=[ | |
"I remember something better when I see it written down.", | |
"I remember best by listening to a lecture or a recording.", | |
"I remember best by reading information on my own." | |
], label="1. How do you best remember information?") | |
q2 = gr.Radio(choices=q1.choices, label="2. What’s your preferred study method?") | |
q3 = gr.Radio(choices=q1.choices, label="3. What helps you understand new topics?") | |
q4 = gr.Radio(choices=q1.choices, label="4. How do you prefer to take notes?") | |
q5 = gr.Radio(choices=q1.choices, label="5. When you visualize concepts, what helps most?") | |
learning_output = gr.Textbox(label="Learning Style Result") | |
gr.Button("Submit Quiz").click(learning_style_quiz, inputs=[q1, q2, q3, q4, q5], outputs=learning_output) | |
with gr.Tab("Step 3: Personal Questions"): | |
name = gr.Textbox(label="What's your name?") | |
age = gr.Number(label="How old are you?") | |
interests = gr.Textbox(label="What are your interests?") | |
movie = gr.Textbox(label="Favorite movie?") | |
movie_reason = gr.Textbox(label="Why do you like that movie?") | |
show = gr.Textbox(label="Favorite TV show?") | |
show_reason = gr.Textbox(label="Why do you like that show?") | |
book = gr.Textbox(label="Favorite book?") | |
book_reason = gr.Textbox(label="Why do you like that book?") | |
character = gr.Textbox(label="Favorite character?") | |
character_reason = gr.Textbox(label="Why do you like that character?") | |
blog_checkbox = gr.Checkbox(label="Do you want to write a blog?", value=False) | |
blog_text = gr.Textbox(label="Write your blog here", visible=False, lines=5) | |
blog_checkbox.change(fn=lambda x: gr.update(visible=x), inputs=blog_checkbox, outputs=blog_text) | |
with gr.Tab("Step 4: Save & Review"): | |
output_summary = gr.Markdown() | |
save_btn = gr.Button("Save Profile") | |
def gather_and_save(name, age, interests, movie, movie_reason, show, show_reason, | |
book, book_reason, character, character_reason, blog, transcript, learning_style): | |
favorites = { | |
"movie": movie, | |
"movie_reason": movie_reason, | |
"show": show, | |
"show_reason": show_reason, | |
"book": book, | |
"book_reason": book_reason, | |
"character": character, | |
"character_reason": character_reason, | |
} | |
return save_profile(name, age, interests, transcript, learning_style, favorites, blog) | |
save_btn.click(fn=gather_and_save, | |
inputs=[name, age, interests, movie, movie_reason, show, show_reason, | |
book, book_reason, character, character_reason, blog_text, | |
transcript_data, learning_output], | |
outputs=output_summary) | |
app.launch() |