Final_project / app.py
Dannyar608's picture
Update app.py
ffbc55b verified
raw
history blame
11 kB
import gradio as gr
import pandas as pd
import json
import os
import re
from PyPDF2 import PdfReader
from collections import defaultdict
# ========== TRANSCRIPT PARSING FUNCTIONS (UPDATED) ==========
def extract_courses_with_grade_levels(text):
grade_level_pattern = r"(Grade|Year)\s*[:]?\s*(\d+|Freshman|Sophomore|Junior|Senior)"
grade_match = re.search(grade_level_pattern, text, re.IGNORECASE)
current_grade_level = grade_match.group(2) if grade_match else "Unknown"
course_pattern = r"""
(?:^|\n)
(?: (Grade|Year)\s*[:]?\s*(\d+|Freshman|Sophomore|Junior|Senior)\s*[\n-]* )?
(
(?:[A-Z]{2,}\s?\d{3})
|
[A-Z][a-z]+(?:\s[A-Z][a-z]+)*
)
\s*
(?: [:\-]?\s* ([A-F][+-]?|\d{2,3}%)? )?
"""
courses_by_grade = defaultdict(list)
current_grade = current_grade_level
for match in re.finditer(course_pattern, text, re.VERBOSE | re.MULTILINE):
grade_context, grade_level, course, grade = match.groups()
if grade_context:
current_grade = grade_level
if course:
course_info = {"course": course.strip()}
if grade:
course_info["grade"] = grade.strip()
courses_by_grade[current_grade].append(course_info)
return dict(courses_by_grade)
def parse_transcript(file):
if file.name.endswith('.csv'):
df = pd.read_csv(file)
elif file.name.endswith('.xlsx'):
df = pd.read_excel(file)
elif file.name.endswith('.pdf'):
text = ''
reader = PdfReader(file)
for page in reader.pages:
page_text = page.extract_text()
if page_text:
text += page_text + '\n'
# Grade level extraction
grade_match = re.search(r'(Grade|Year)[\s:]*(\d+|Freshman|Sophomore|Junior|Senior)', text, re.IGNORECASE)
grade_level = grade_match.group(2) if grade_match else "Unknown"
# Enhanced GPA extraction
gpa_data = {'weighted': "N/A", 'unweighted': "N/A"}
gpa_patterns = [
r'Weighted GPA[\s:]*(\d\.\d{1,2})',
r'GPA \(Weighted\)[\s:]*(\d\.\d{1,2})',
r'Cumulative GPA \(Weighted\)[\s:]*(\d\.\d{1,2})',
r'Unweighted GPA[\s:]*(\d\.\d{1,2})',
r'GPA \(Unweighted\)[\s:]*(\d\.\d{1,2})',
r'Cumulative GPA \(Unweighted\)[\s:]*(\d\.\d{1,2})',
r'GPA[\s:]*(\d\.\d{1,2})'
]
for pattern in gpa_patterns:
for match in re.finditer(pattern, text, re.IGNORECASE):
gpa_value = match.group(1)
if 'weighted' in pattern.lower():
gpa_data['weighted'] = gpa_value
elif 'unweighted' in pattern.lower():
gpa_data['unweighted'] = gpa_value
else:
if gpa_data['unweighted'] == "N/A":
gpa_data['unweighted'] = gpa_value
if gpa_data['weighted'] == "N/A":
gpa_data['weighted'] = gpa_value
courses_by_grade = extract_courses_with_grade_levels(text)
output_text = f"Grade Level: {grade_level}\n\n"
if gpa_data['weighted'] != "N/A" or gpa_data['unweighted'] != "N/A":
output_text += "GPA Information:\n"
if gpa_data['unweighted'] != "N/A":
output_text += f"- Unweighted GPA: {gpa_data['unweighted']}\n"
if gpa_data['weighted'] != "N/A":
output_text += f"- Weighted GPA: {gpa_data['weighted']}\n"
else:
output_text += "No GPA information found\n"
output_text += "\nCourses by Grade Level:\n"
for level, courses in courses_by_grade.items():
output_text += f"\nGrade {level}:\n"
for course in courses:
output_text += f"- {course['course']}"
if 'grade' in course:
output_text += f" (Grade: {course['grade']})"
output_text += "\n"
return output_text, {
"gpa": gpa_data,
"grade_level": grade_level,
"courses": courses_by_grade
}
else:
return "Unsupported file format", None
# For CSV/XLSX fallback
gpa = "N/A"
for col in ['GPA', 'Grade Point Average', 'Cumulative GPA']:
if col in df.columns:
gpa = df[col].iloc[0] if isinstance(df[col].iloc[0], (float, int)) else "N/A"
break
grade_level = "N/A"
for col in ['Grade Level', 'Grade', 'Class', 'Year']:
if col in df.columns:
grade_level = df[col].iloc[0]
break
courses = []
for col in ['Course', 'Subject', 'Course Name', 'Class']:
if col in df.columns:
courses = df[col].tolist()
break
output_text = f"Grade Level: {grade_level}\nGPA: {gpa}\n\nCourses:\n"
output_text += "\n".join(f"- {course}" for course in courses)
return output_text, {
"gpa": {"unweighted": gpa, "weighted": "N/A"},
"grade_level": grade_level,
"courses": courses
}
# ========== LEARNING STYLE QUIZ FUNCTION ==========
def learning_style_quiz(*answers):
visual = answers.count("I remember something better when I see it written down.")
auditory = answers.count("I remember best by listening to a lecture or a recording.")
reading = answers.count("I remember best by reading information on my own.")
styles = {"Visual": visual, "Auditory": auditory, "Reading/Writing": reading}
top_styles = [k for k, v in styles.items() if v == max(styles.values())]
result = ", ".join(top_styles)
return result
# ========== SAVE STUDENT PROFILE FUNCTION ==========
def save_profile(name, age, interests, transcript, learning_style, favorites, blog):
data = {
"name": name,
"age": age,
"interests": interests,
"transcript": transcript,
"learning_style": learning_style,
"favorites": favorites,
"blog": blog
}
os.makedirs("student_profiles", exist_ok=True)
json_path = os.path.join("student_profiles", f"{name.replace(' ', '_')}_profile.json")
with open(json_path, "w") as f:
json.dump(data, f, indent=2)
markdown_summary = f"""### Student Profile: {name}
**Age:** {age}
**Interests:** {interests}
**Learning Style:** {learning_style}
#### Transcript:
{transcript_display(transcript)}
#### Favorites:
- Movie: {favorites['movie']} ({favorites['movie_reason']})
- Show: {favorites['show']} ({favorites['show_reason']})
- Book: {favorites['book']} ({favorites['book_reason']})
- Character: {favorites['character']} ({favorites['character_reason']})
#### Blog:
{blog if blog else "_No blog provided_"}
"""
return markdown_summary
def transcript_display(transcript_dict):
if not transcript_dict:
return "No transcript uploaded."
if isinstance(transcript_dict, dict) and all(isinstance(v, list) for v in transcript_dict.values()):
display = ""
for grade_level, courses in transcript_dict.items():
display += f"\n**Grade {grade_level}**\n"
for course in courses:
display += f"- {course['course']}"
if 'grade' in course:
display += f" (Grade: {course['grade']})"
display += "\n"
return display
return "\n".join([f"- {course}" for course in transcript_dict["courses"]] +
[f"Grade Level: {transcript_dict['grade_level']}", f"GPA: {transcript_dict['gpa']}"])
# ========== GRADIO INTERFACE ==========
with gr.Blocks() as app:
with gr.Tab("Step 1: Upload Transcript"):
transcript_file = gr.File(label="Upload your transcript (CSV, Excel, or PDF)")
transcript_output = gr.Textbox(label="Transcript Output")
transcript_data = gr.State()
transcript_file.change(fn=parse_transcript, inputs=transcript_file, outputs=[transcript_output, transcript_data])
with gr.Tab("Step 2: Learning Style Quiz"):
q1 = gr.Radio(choices=[
"I remember something better when I see it written down.",
"I remember best by listening to a lecture or a recording.",
"I remember best by reading information on my own."
], label="1. How do you best remember information?")
q2 = gr.Radio(choices=q1.choices, label="2. What’s your preferred study method?")
q3 = gr.Radio(choices=q1.choices, label="3. What helps you understand new topics?")
q4 = gr.Radio(choices=q1.choices, label="4. How do you prefer to take notes?")
q5 = gr.Radio(choices=q1.choices, label="5. When you visualize concepts, what helps most?")
learning_output = gr.Textbox(label="Learning Style Result")
gr.Button("Submit Quiz").click(learning_style_quiz, inputs=[q1, q2, q3, q4, q5], outputs=learning_output)
with gr.Tab("Step 3: Personal Questions"):
name = gr.Textbox(label="What's your name?")
age = gr.Number(label="How old are you?")
interests = gr.Textbox(label="What are your interests?")
movie = gr.Textbox(label="Favorite movie?")
movie_reason = gr.Textbox(label="Why do you like that movie?")
show = gr.Textbox(label="Favorite TV show?")
show_reason = gr.Textbox(label="Why do you like that show?")
book = gr.Textbox(label="Favorite book?")
book_reason = gr.Textbox(label="Why do you like that book?")
character = gr.Textbox(label="Favorite character?")
character_reason = gr.Textbox(label="Why do you like that character?")
blog_checkbox = gr.Checkbox(label="Do you want to write a blog?", value=False)
blog_text = gr.Textbox(label="Write your blog here", visible=False, lines=5)
blog_checkbox.change(fn=lambda x: gr.update(visible=x), inputs=blog_checkbox, outputs=blog_text)
with gr.Tab("Step 4: Save & Review"):
output_summary = gr.Markdown()
save_btn = gr.Button("Save Profile")
def gather_and_save(name, age, interests, movie, movie_reason, show, show_reason,
book, book_reason, character, character_reason, blog, transcript, learning_style):
favorites = {
"movie": movie,
"movie_reason": movie_reason,
"show": show,
"show_reason": show_reason,
"book": book,
"book_reason": book_reason,
"character": character,
"character_reason": character_reason,
}
return save_profile(name, age, interests, transcript, learning_style, favorites, blog)
save_btn.click(fn=gather_and_save,
inputs=[name, age, interests, movie, movie_reason, show, show_reason,
book, book_reason, character, character_reason, blog_text,
transcript_data, learning_output],
outputs=output_summary)
app.launch()