Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,15 @@ import sys
|
|
3 |
import subprocess
|
4 |
import importlib
|
5 |
from datetime import datetime
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
required_packages = {
|
8 |
'gradio': 'gradio>=3.0',
|
@@ -26,18 +35,6 @@ def check_and_install_packages():
|
|
26 |
|
27 |
check_and_install_packages()
|
28 |
|
29 |
-
# ========== MAIN IMPORTS ==========
|
30 |
-
import gradio as gr
|
31 |
-
import pandas as pd
|
32 |
-
import json
|
33 |
-
import os
|
34 |
-
import re
|
35 |
-
from PyPDF2 import PdfReader
|
36 |
-
from collections import defaultdict
|
37 |
-
from transformers import pipeline
|
38 |
-
from typing import List, Dict, Union
|
39 |
-
import pdfplumber
|
40 |
-
|
41 |
# ========== TRANSCRIPT PARSING ==========
|
42 |
class UniversalTranscriptParser:
|
43 |
def __init__(self):
|
@@ -53,7 +50,6 @@ class UniversalTranscriptParser:
|
|
53 |
}
|
54 |
|
55 |
def parse_transcript(self, text: str) -> Dict[str, Union[Dict, List[Dict]]]:
|
56 |
-
"""Determine transcript type and parse accordingly"""
|
57 |
transcript_type = self._identify_transcript_type(text)
|
58 |
|
59 |
if transcript_type == 'homeschool':
|
@@ -64,7 +60,6 @@ class UniversalTranscriptParser:
|
|
64 |
return self._parse_miami_dade(text)
|
65 |
|
66 |
def _identify_transcript_type(self, text: str) -> str:
|
67 |
-
"""Identify which type of transcript we're processing"""
|
68 |
if re.search(r'Sample OFFICIAL HIGH SCHOOL TRANSCRIPT', text):
|
69 |
return 'homeschool'
|
70 |
elif re.search(r'DORAL ACADEMY HIGH SCHOOL', text):
|
@@ -72,27 +67,22 @@ class UniversalTranscriptParser:
|
|
72 |
return 'miami_dade'
|
73 |
|
74 |
def _parse_homeschool(self, text: str) -> Dict[str, Union[Dict, List[Dict]]]:
|
75 |
-
"""Parse homeschool transcript format"""
|
76 |
courses = []
|
77 |
current_grade = None
|
78 |
current_year = None
|
79 |
|
80 |
-
# Extract student info
|
81 |
student_info = {}
|
82 |
name_match = re.search(r'Student Name:\s*(.+)\s*SSN:', text)
|
83 |
if name_match:
|
84 |
student_info['name'] = name_match.group(1).strip()
|
85 |
|
86 |
-
# Process each line
|
87 |
for line in text.split('\n'):
|
88 |
-
# Check for grade level header
|
89 |
grade_match = re.match(r'^\|?\s*(\d+th Grade)\s*\|.*(\d{4}-\d{4})', line)
|
90 |
if grade_match:
|
91 |
current_grade = grade_match.group(1)
|
92 |
current_year = grade_match.group(2)
|
93 |
continue
|
94 |
|
95 |
-
# Course line pattern
|
96 |
course_match = re.match(
|
97 |
r'^\|?\s*([^\|]+?)\s*\|\s*([A-Z][+*]?)\s*\|\s*([^\|]+)\s*\|\s*(\d+\.?\d*)\s*\|\s*(\d+)',
|
98 |
line
|
@@ -100,7 +90,6 @@ class UniversalTranscriptParser:
|
|
100 |
|
101 |
if course_match and current_grade:
|
102 |
course_name = course_match.group(1).strip()
|
103 |
-
# Clean course names that start with | or have extra spaces
|
104 |
course_name = re.sub(r'^\|?\s*', '', course_name)
|
105 |
|
106 |
courses.append({
|
@@ -114,14 +103,7 @@ class UniversalTranscriptParser:
|
|
114 |
'transcript_type': 'homeschool'
|
115 |
})
|
116 |
|
117 |
-
|
118 |
-
gpa_data = {}
|
119 |
-
gpa_match = re.search(r'Cum\. GPA\s*\|\s*([\d\.]+)', text)
|
120 |
-
if gpa_match:
|
121 |
-
gpa_value = gpa_match.group(1)
|
122 |
-
gpa_data['unweighted'] = gpa_value
|
123 |
-
gpa_data['weighted'] = gpa_value # Homeschool often has same weighted/unweighted
|
124 |
-
|
125 |
return {
|
126 |
'student_info': student_info,
|
127 |
'courses': {'All': courses},
|
@@ -130,25 +112,19 @@ class UniversalTranscriptParser:
|
|
130 |
}
|
131 |
|
132 |
def _parse_doral_academy(self, text: str) -> Dict[str, Union[Dict, List[Dict]]]:
|
133 |
-
"""Parse Doral Academy specific format"""
|
134 |
courses = []
|
135 |
-
|
136 |
-
# Extract student info
|
137 |
student_info = {}
|
138 |
name_match = re.search(r'LEGAL NAME:\s*([^\n]+)', text)
|
139 |
if name_match:
|
140 |
student_info['name'] = name_match.group(1).strip()
|
141 |
|
142 |
-
# Extract school year information
|
143 |
year_pattern = re.compile(r'YEAR:\s*(\d{4}-\d{4})\s*GRADE LEVEL:\s*(\d{2})', re.MULTILINE)
|
144 |
year_matches = year_pattern.finditer(text)
|
145 |
|
146 |
-
# Create mapping of grade levels to years
|
147 |
grade_year_map = {}
|
148 |
for match in year_matches:
|
149 |
grade_year_map[match.group(2)] = match.group(1)
|
150 |
|
151 |
-
# Course pattern for Doral Academy
|
152 |
course_pattern = re.compile(
|
153 |
r'(\d)\s+(\d{7})\s+([^\n]+?)\s+([A-Z]{2})\s+([A-Z])\s+([A-Z])\s+([A-Z])\s+(\d\.\d{2})\s+(\d\.\d{2})',
|
154 |
re.MULTILINE
|
@@ -176,17 +152,7 @@ class UniversalTranscriptParser:
|
|
176 |
|
177 |
courses_by_grade[grade_level_num].append(course_info)
|
178 |
|
179 |
-
|
180 |
-
gpa_data = {}
|
181 |
-
unweighted_match = re.search(r'Un-weighted GPA\s*([\d\.]+)', text)
|
182 |
-
weighted_match = re.search(r'Weighted GPA\s*([\d\.]+)', text)
|
183 |
-
|
184 |
-
if unweighted_match:
|
185 |
-
gpa_data['unweighted'] = unweighted_match.group(1)
|
186 |
-
if weighted_match:
|
187 |
-
gpa_data['weighted'] = weighted_match.group(1)
|
188 |
-
|
189 |
-
# Extract current grade level
|
190 |
grade_level = "12" if re.search(r'GRADE LEVEL:\s*12', text) else "Unknown"
|
191 |
|
192 |
return {
|
@@ -197,17 +163,14 @@ class UniversalTranscriptParser:
|
|
197 |
}
|
198 |
|
199 |
def _parse_miami_dade(self, text: str) -> Dict[str, Union[Dict, List[Dict]]]:
|
200 |
-
"""Parse standard Miami-Dade format"""
|
201 |
courses = []
|
202 |
courses_by_grade = defaultdict(list)
|
203 |
|
204 |
-
# Extract student info
|
205 |
student_info = {}
|
206 |
name_match = re.search(r'0783977 - ([^,]+),\s*([^\n]+)', text)
|
207 |
if name_match:
|
208 |
student_info['name'] = f"{name_match.group(2)} {name_match.group(1)}"
|
209 |
|
210 |
-
# Course pattern for Miami-Dade
|
211 |
course_pattern = re.compile(
|
212 |
r'([A-Z]-[A-Za-z\s&]+)\s*\|\s*(\d{4}-\d{4})\s*\|\s*(\d{2})\s*\|\s*([A-Z0-9]+)\s*\|\s*([^\|]+)\s*\|\s*([^\|]+)\s*\|\s*([^\|]+)\s*\|\s*([A-Z]?)\s*\|\s*([A-Z]?)\s*\|\s*([^\|]+)',
|
213 |
re.MULTILINE
|
@@ -233,13 +196,7 @@ class UniversalTranscriptParser:
|
|
233 |
|
234 |
courses_by_grade[match.group(3)].append(course_info)
|
235 |
|
236 |
-
|
237 |
-
gpa_data = {
|
238 |
-
'weighted': self._extract_gpa(text, 'Weighted GPA'),
|
239 |
-
'unweighted': self._extract_gpa(text, 'Un-weighted GPA')
|
240 |
-
}
|
241 |
-
|
242 |
-
# Extract current grade level
|
243 |
grade_level = re.search(r'Current Grade:\s*(\d+)', text).group(1) if re.search(r'Current Grade:\s*(\d+)', text) else "Unknown"
|
244 |
|
245 |
return {
|
@@ -249,38 +206,50 @@ class UniversalTranscriptParser:
|
|
249 |
'grade_level': grade_level
|
250 |
}
|
251 |
|
252 |
-
def
|
253 |
-
"""
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
'
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
)
|
282 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
283 |
|
|
|
284 |
def parse_transcript(file):
|
285 |
parser = UniversalTranscriptParser()
|
286 |
|
@@ -292,15 +261,19 @@ def parse_transcript(file):
|
|
292 |
|
293 |
parsed_data = parser.parse_transcript(text)
|
294 |
|
295 |
-
#
|
296 |
gpa_data = parsed_data.get('gpa', {})
|
297 |
-
weighted_gpa = gpa_data.get('weighted', 'Not
|
298 |
-
unweighted_gpa = gpa_data.get('unweighted', 'Not
|
299 |
|
300 |
-
output_text = "
|
301 |
-
output_text += "GPA
|
302 |
-
output_text += f"
|
303 |
-
output_text += f"
|
|
|
|
|
|
|
|
|
304 |
|
305 |
return output_text, parsed_data
|
306 |
else:
|
@@ -309,48 +282,12 @@ def parse_transcript(file):
|
|
309 |
# ========== LEARNING STYLE QUIZ ==========
|
310 |
learning_style_questions = [
|
311 |
"When you study for a test, you prefer to:",
|
312 |
-
|
313 |
-
"When you learn a new skill, you prefer to:",
|
314 |
-
"When you're trying to concentrate, you:",
|
315 |
-
"When you meet new people, you remember them by:",
|
316 |
-
"When you're assembling furniture or a gadget, you:",
|
317 |
-
"When choosing a restaurant, you rely most on:",
|
318 |
-
"When you're in a waiting room, you typically:",
|
319 |
-
"When giving someone instructions, you tend to:",
|
320 |
-
"When you're trying to recall information, you:",
|
321 |
-
"When you're at a museum or exhibit, you:",
|
322 |
-
"When you're learning a new language, you prefer:",
|
323 |
-
"When you're taking notes in class, you:",
|
324 |
-
"When you're explaining something complex, you:",
|
325 |
-
"When you're at a party, you enjoy:",
|
326 |
-
"When you're trying to remember a phone number, you:",
|
327 |
-
"When you're relaxing, you prefer to:",
|
328 |
-
"When you're learning to use new software, you:",
|
329 |
-
"When you're giving a presentation, you rely on:",
|
330 |
-
"When you're solving a difficult problem, you:"
|
331 |
]
|
332 |
|
333 |
learning_style_options = [
|
334 |
["Read the textbook (Reading/Writing)", "Listen to lectures (Auditory)", "Use diagrams/charts (Visual)", "Practice problems (Kinesthetic)"],
|
335 |
-
|
336 |
-
["Read instructions (Reading/Writing)", "Have someone show you (Visual)", "Listen to explanations (Auditory)", "Try it yourself (Kinesthetic)"],
|
337 |
-
["Need quiet (Reading/Writing)", "Need background noise (Auditory)", "Need to move around (Kinesthetic)", "Need visual stimulation (Visual)"],
|
338 |
-
["Their face (Visual)", "Their name (Auditory)", "What you talked about (Reading/Writing)", "What you did together (Kinesthetic)"],
|
339 |
-
["Read the instructions carefully (Reading/Writing)", "Look at the diagrams (Visual)", "Ask someone to explain (Auditory)", "Start putting pieces together (Kinesthetic)"],
|
340 |
-
["Online photos of the food (Visual)", "Recommendations from friends (Auditory)", "Reading the menu online (Reading/Writing)", "Remembering how it felt to eat there (Kinesthetic)"],
|
341 |
-
["Read magazines (Reading/Writing)", "Listen to music (Auditory)", "Watch TV (Visual)", "Fidget or move around (Kinesthetic)"],
|
342 |
-
["Write them down (Reading/Writing)", "Explain verbally (Auditory)", "Demonstrate (Visual)", "Guide them physically (Kinesthetic)"],
|
343 |
-
["See written words in your mind (Visual)", "Hear the information in your head (Auditory)", "Write it down to remember (Reading/Writing)", "Associate it with physical actions (Kinesthetic)"],
|
344 |
-
["Read all the descriptions (Reading/Writing)", "Listen to audio guides (Auditory)", "Look at the displays (Visual)", "Touch interactive exhibits (Kinesthetic)"],
|
345 |
-
["Study grammar rules (Reading/Writing)", "Listen to native speakers (Auditory)", "Use flashcards with images (Visual)", "Practice conversations (Kinesthetic)"],
|
346 |
-
["Write detailed paragraphs (Reading/Writing)", "Record the lecture (Auditory)", "Draw diagrams and charts (Visual)", "Doodle while listening (Kinesthetic)"],
|
347 |
-
["Write detailed steps (Reading/Writing)", "Explain verbally with examples (Auditory)", "Draw diagrams (Visual)", "Use physical objects to demonstrate (Kinesthetic)"],
|
348 |
-
["Conversations with people (Auditory)", "Watching others or the environment (Visual)", "Writing notes or texting (Reading/Writing)", "Dancing or physical activities (Kinesthetic)"],
|
349 |
-
["See the numbers in your mind (Visual)", "Say them aloud (Auditory)", "Write them down (Reading/Writing)", "Dial them on a keypad (Kinesthetic)"],
|
350 |
-
["Read a book (Reading/Writing)", "Listen to music (Auditory)", "Watch TV/movies (Visual)", "Do something physical (Kinesthetic)"],
|
351 |
-
["Read the manual (Reading/Writing)", "Ask someone to show you (Visual)", "Call tech support (Auditory)", "Experiment with the software (Kinesthetic)"],
|
352 |
-
["Detailed notes (Reading/Writing)", "Verbal explanations (Auditory)", "Visual slides (Visual)", "Physical demonstrations (Kinesthetic)"],
|
353 |
-
["Write out possible solutions (Reading/Writing)", "Talk through it with someone (Auditory)", "Draw diagrams (Visual)", "Build a model or prototype (Kinesthetic)"]
|
354 |
]
|
355 |
|
356 |
def learning_style_quiz(*answers):
|
@@ -374,48 +311,19 @@ def learning_style_quiz(*answers):
|
|
374 |
max_score = max(scores.values())
|
375 |
total_questions = len(learning_style_questions)
|
376 |
|
377 |
-
# Calculate percentages
|
378 |
percentages = {style: (score/total_questions)*100 for style, score in scores.items()}
|
379 |
-
|
380 |
-
# Sort styles by score (descending)
|
381 |
sorted_styles = sorted(scores.items(), key=lambda x: x[1], reverse=True)
|
382 |
|
383 |
-
# Prepare detailed results
|
384 |
result = "Your Learning Style Results:\n\n"
|
385 |
for style, score in sorted_styles:
|
386 |
result += f"{style}: {score}/{total_questions} ({percentages[style]:.1f}%)\n"
|
387 |
|
388 |
result += "\n"
|
389 |
-
|
390 |
-
# Determine primary and secondary styles
|
391 |
primary_styles = [style for style, score in scores.items() if score == max_score]
|
392 |
|
393 |
if len(primary_styles) == 1:
|
394 |
result += f"Your primary learning style is: {primary_styles[0]}\n\n"
|
395 |
-
|
396 |
-
result += "Tips for Visual Learners:\n"
|
397 |
-
result += "- Use color coding in your notes\n"
|
398 |
-
result += "- Create mind maps and diagrams\n"
|
399 |
-
result += "- Watch educational videos\n"
|
400 |
-
result += "- Use flashcards with images\n"
|
401 |
-
elif primary_styles[0] == "Auditory":
|
402 |
-
result += "Tips for Auditory Learners:\n"
|
403 |
-
result += "- Record lectures and listen to them\n"
|
404 |
-
result += "- Participate in study groups\n"
|
405 |
-
result += "- Explain concepts out loud to yourself\n"
|
406 |
-
result += "- Use rhymes or songs to remember information\n"
|
407 |
-
elif primary_styles[0] == "Reading/Writing":
|
408 |
-
result += "Tips for Reading/Writing Learners:\n"
|
409 |
-
result += "- Write detailed notes\n"
|
410 |
-
result += "- Create summaries in your own words\n"
|
411 |
-
result += "- Read textbooks and articles\n"
|
412 |
-
result += "- Make lists to organize information\n"
|
413 |
-
else: # Kinesthetic
|
414 |
-
result += "Tips for Kinesthetic Learners:\n"
|
415 |
-
result += "- Use hands-on activities\n"
|
416 |
-
result += "- Take frequent movement breaks\n"
|
417 |
-
result += "- Create physical models\n"
|
418 |
-
result += "- Associate information with physical actions\n"
|
419 |
else:
|
420 |
result += f"You have multiple strong learning styles: {', '.join(primary_styles)}\n\n"
|
421 |
result += "You may benefit from combining different learning approaches.\n"
|
@@ -426,7 +334,6 @@ def learning_style_quiz(*answers):
|
|
426 |
def save_profile(name, age, interests, transcript, learning_style,
|
427 |
movie, movie_reason, show, show_reason,
|
428 |
book, book_reason, character, character_reason, blog):
|
429 |
-
# Convert age to int if it's a numpy number (from gradio Number input)
|
430 |
age = int(age) if age else 0
|
431 |
|
432 |
favorites = {
|
@@ -455,7 +362,6 @@ def save_profile(name, age, interests, transcript, learning_style,
|
|
455 |
with open(json_path, "w") as f:
|
456 |
json.dump(data, f, indent=2)
|
457 |
|
458 |
-
# Create profile summary with clear GPA display
|
459 |
gpa = transcript.get('gpa', {})
|
460 |
markdown_summary = f"""### Student Profile: {name}
|
461 |
**Age:** {age}
|
@@ -492,30 +398,11 @@ def generate_response(message, history):
|
|
492 |
if not profile:
|
493 |
return "Please complete and save your profile first using the previous tabs."
|
494 |
|
495 |
-
# Get profile data
|
496 |
-
learning_style = profile.get("learning_style", "")
|
497 |
transcript = profile.get("transcript", {})
|
498 |
gpa = transcript.get("gpa", {})
|
499 |
-
courses = []
|
500 |
-
|
501 |
-
# Flatten all courses from all grades
|
502 |
-
if 'courses' in transcript:
|
503 |
-
if isinstance(transcript['courses'], dict):
|
504 |
-
for grade_courses in transcript['courses'].values():
|
505 |
-
courses.extend(grade_courses)
|
506 |
-
elif isinstance(transcript['courses'], list):
|
507 |
-
courses = transcript['courses']
|
508 |
-
|
509 |
-
# Common responses
|
510 |
-
greetings = ["hi", "hello", "hey"]
|
511 |
-
study_help = ["study", "learn", "prepare", "exam"]
|
512 |
-
grade_help = ["gpa", "grade", "weighted", "unweighted", "grades"]
|
513 |
-
course_help = ["courses", "classes", "subjects"]
|
514 |
|
515 |
-
|
516 |
-
|
517 |
-
|
518 |
-
elif any(word in message.lower() for word in grade_help):
|
519 |
response = "Your GPA Information:\n"
|
520 |
response += f"- Weighted GPA: {gpa.get('weighted', 'Not Available')}\n"
|
521 |
response += f"- Unweighted GPA: {gpa.get('unweighted', 'Not Available')}\n"
|
@@ -533,61 +420,7 @@ def generate_response(message, history):
|
|
533 |
|
534 |
return response
|
535 |
|
536 |
-
|
537 |
-
# Analyze course performance to give personalized advice
|
538 |
-
strong_subjects = [c['name'] for c in courses if 'grade' in c and c['grade'] in ['A', 'A+', 'B+']]
|
539 |
-
weak_subjects = [c['name'] for c in courses if 'grade' in c and c['grade'] in ['D', 'F']]
|
540 |
-
|
541 |
-
response = "Here are some personalized study tips:\n"
|
542 |
-
|
543 |
-
if strong_subjects:
|
544 |
-
response += f"\nYou're doing well in: {', '.join(strong_subjects[:3])}\n"
|
545 |
-
response += "→ Keep up the good work in these areas!\n"
|
546 |
-
|
547 |
-
if weak_subjects:
|
548 |
-
response += f"\nYou might want to focus more on: {', '.join(weak_subjects[:3])}\n"
|
549 |
-
response += "→ Consider getting extra help or tutoring\n"
|
550 |
-
|
551 |
-
# Add learning style specific tips
|
552 |
-
if "Visual" in learning_style:
|
553 |
-
response += "\nVisual Learner Tip: Try creating diagrams or mind maps\n"
|
554 |
-
elif "Auditory" in learning_style:
|
555 |
-
response += "\nAuditory Learner Tip: Record yourself explaining concepts\n"
|
556 |
-
elif "Reading/Writing" in learning_style:
|
557 |
-
response += "\nReading/Writing Tip: Write summaries in your own words\n"
|
558 |
-
elif "Kinesthetic" in learning_style:
|
559 |
-
response += "\nKinesthetic Tip: Use physical objects to demonstrate concepts\n"
|
560 |
-
|
561 |
-
return response
|
562 |
-
|
563 |
-
elif any(word in message.lower() for word in course_help):
|
564 |
-
if not courses:
|
565 |
-
return "No course information available."
|
566 |
-
|
567 |
-
# Group by subject area
|
568 |
-
subjects = defaultdict(list)
|
569 |
-
for course in courses:
|
570 |
-
if 'name' in course:
|
571 |
-
# Extract first word as subject area
|
572 |
-
subject = course['name'].split()[0]
|
573 |
-
subjects[subject].append(course)
|
574 |
-
|
575 |
-
response = "Your course subjects:\n"
|
576 |
-
for subject, subject_courses in subjects.items():
|
577 |
-
response += f"\n{subject} ({len(subject_courses)} courses)"
|
578 |
-
|
579 |
-
return response
|
580 |
-
|
581 |
-
elif "help" in message.lower():
|
582 |
-
return ("I can help with:\n"
|
583 |
-
"- Your GPA information\n"
|
584 |
-
"- Personalized study tips\n"
|
585 |
-
"- Course information\n"
|
586 |
-
"- Learning style recommendations")
|
587 |
-
|
588 |
-
else:
|
589 |
-
return ("I'm your personalized teaching assistant. "
|
590 |
-
"Ask me about your GPA, courses, or study tips!")
|
591 |
|
592 |
# ========== GRADIO INTERFACE ==========
|
593 |
with gr.Blocks() as app:
|
|
|
3 |
import subprocess
|
4 |
import importlib
|
5 |
from datetime import datetime
|
6 |
+
import re
|
7 |
+
import os
|
8 |
+
import json
|
9 |
+
import pdfplumber
|
10 |
+
from collections import defaultdict
|
11 |
+
from typing import List, Dict, Union
|
12 |
+
import gradio as gr
|
13 |
+
from PyPDF2 import PdfReader
|
14 |
+
from transformers import pipeline
|
15 |
|
16 |
required_packages = {
|
17 |
'gradio': 'gradio>=3.0',
|
|
|
35 |
|
36 |
check_and_install_packages()
|
37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
# ========== TRANSCRIPT PARSING ==========
|
39 |
class UniversalTranscriptParser:
|
40 |
def __init__(self):
|
|
|
50 |
}
|
51 |
|
52 |
def parse_transcript(self, text: str) -> Dict[str, Union[Dict, List[Dict]]]:
|
|
|
53 |
transcript_type = self._identify_transcript_type(text)
|
54 |
|
55 |
if transcript_type == 'homeschool':
|
|
|
60 |
return self._parse_miami_dade(text)
|
61 |
|
62 |
def _identify_transcript_type(self, text: str) -> str:
|
|
|
63 |
if re.search(r'Sample OFFICIAL HIGH SCHOOL TRANSCRIPT', text):
|
64 |
return 'homeschool'
|
65 |
elif re.search(r'DORAL ACADEMY HIGH SCHOOL', text):
|
|
|
67 |
return 'miami_dade'
|
68 |
|
69 |
def _parse_homeschool(self, text: str) -> Dict[str, Union[Dict, List[Dict]]]:
|
|
|
70 |
courses = []
|
71 |
current_grade = None
|
72 |
current_year = None
|
73 |
|
|
|
74 |
student_info = {}
|
75 |
name_match = re.search(r'Student Name:\s*(.+)\s*SSN:', text)
|
76 |
if name_match:
|
77 |
student_info['name'] = name_match.group(1).strip()
|
78 |
|
|
|
79 |
for line in text.split('\n'):
|
|
|
80 |
grade_match = re.match(r'^\|?\s*(\d+th Grade)\s*\|.*(\d{4}-\d{4})', line)
|
81 |
if grade_match:
|
82 |
current_grade = grade_match.group(1)
|
83 |
current_year = grade_match.group(2)
|
84 |
continue
|
85 |
|
|
|
86 |
course_match = re.match(
|
87 |
r'^\|?\s*([^\|]+?)\s*\|\s*([A-Z][+*]?)\s*\|\s*([^\|]+)\s*\|\s*(\d+\.?\d*)\s*\|\s*(\d+)',
|
88 |
line
|
|
|
90 |
|
91 |
if course_match and current_grade:
|
92 |
course_name = course_match.group(1).strip()
|
|
|
93 |
course_name = re.sub(r'^\|?\s*', '', course_name)
|
94 |
|
95 |
courses.append({
|
|
|
103 |
'transcript_type': 'homeschool'
|
104 |
})
|
105 |
|
106 |
+
gpa_data = self._extract_gpa_data(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
return {
|
108 |
'student_info': student_info,
|
109 |
'courses': {'All': courses},
|
|
|
112 |
}
|
113 |
|
114 |
def _parse_doral_academy(self, text: str) -> Dict[str, Union[Dict, List[Dict]]]:
|
|
|
115 |
courses = []
|
|
|
|
|
116 |
student_info = {}
|
117 |
name_match = re.search(r'LEGAL NAME:\s*([^\n]+)', text)
|
118 |
if name_match:
|
119 |
student_info['name'] = name_match.group(1).strip()
|
120 |
|
|
|
121 |
year_pattern = re.compile(r'YEAR:\s*(\d{4}-\d{4})\s*GRADE LEVEL:\s*(\d{2})', re.MULTILINE)
|
122 |
year_matches = year_pattern.finditer(text)
|
123 |
|
|
|
124 |
grade_year_map = {}
|
125 |
for match in year_matches:
|
126 |
grade_year_map[match.group(2)] = match.group(1)
|
127 |
|
|
|
128 |
course_pattern = re.compile(
|
129 |
r'(\d)\s+(\d{7})\s+([^\n]+?)\s+([A-Z]{2})\s+([A-Z])\s+([A-Z])\s+([A-Z])\s+(\d\.\d{2})\s+(\d\.\d{2})',
|
130 |
re.MULTILINE
|
|
|
152 |
|
153 |
courses_by_grade[grade_level_num].append(course_info)
|
154 |
|
155 |
+
gpa_data = self._extract_gpa_data(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
grade_level = "12" if re.search(r'GRADE LEVEL:\s*12', text) else "Unknown"
|
157 |
|
158 |
return {
|
|
|
163 |
}
|
164 |
|
165 |
def _parse_miami_dade(self, text: str) -> Dict[str, Union[Dict, List[Dict]]]:
|
|
|
166 |
courses = []
|
167 |
courses_by_grade = defaultdict(list)
|
168 |
|
|
|
169 |
student_info = {}
|
170 |
name_match = re.search(r'0783977 - ([^,]+),\s*([^\n]+)', text)
|
171 |
if name_match:
|
172 |
student_info['name'] = f"{name_match.group(2)} {name_match.group(1)}"
|
173 |
|
|
|
174 |
course_pattern = re.compile(
|
175 |
r'([A-Z]-[A-Za-z\s&]+)\s*\|\s*(\d{4}-\d{4})\s*\|\s*(\d{2})\s*\|\s*([A-Z0-9]+)\s*\|\s*([^\|]+)\s*\|\s*([^\|]+)\s*\|\s*([^\|]+)\s*\|\s*([A-Z]?)\s*\|\s*([A-Z]?)\s*\|\s*([^\|]+)',
|
176 |
re.MULTILINE
|
|
|
196 |
|
197 |
courses_by_grade[match.group(3)].append(course_info)
|
198 |
|
199 |
+
gpa_data = self._extract_gpa_data(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
200 |
grade_level = re.search(r'Current Grade:\s*(\d+)', text).group(1) if re.search(r'Current Grade:\s*(\d+)', text) else "Unknown"
|
201 |
|
202 |
return {
|
|
|
206 |
'grade_level': grade_level
|
207 |
}
|
208 |
|
209 |
+
def _extract_gpa_data(self, text: str) -> Dict[str, str]:
|
210 |
+
"""Improved GPA extraction with multiple pattern matching"""
|
211 |
+
gpa_data = {}
|
212 |
+
|
213 |
+
# Weighted GPA patterns
|
214 |
+
weighted_patterns = [
|
215 |
+
r'Weighted GPA\s*:\s*([\d\.]+)',
|
216 |
+
r'Weighted GPA\s*([\d\.]+)',
|
217 |
+
r'GPA WTD\s*:\s*([\d\.]+)',
|
218 |
+
r'Weighted\s*:\s*([\d\.]+)'
|
219 |
+
]
|
220 |
+
|
221 |
+
# Unweighted GPA patterns
|
222 |
+
unweighted_patterns = [
|
223 |
+
r'Un-weighted GPA\s*:\s*([\d\.]+)',
|
224 |
+
r'Unweighted GPA\s*([\d\.]+)',
|
225 |
+
r'GPA UNWTD\s*:\s*([\d\.]+)',
|
226 |
+
r'Unweighted\s*:\s*([\d\.]+)'
|
227 |
+
]
|
228 |
+
|
229 |
+
# Try all weighted patterns
|
230 |
+
for pattern in weighted_patterns:
|
231 |
+
match = re.search(pattern, text, re.IGNORECASE)
|
232 |
+
if match:
|
233 |
+
gpa_data['weighted'] = match.group(1)
|
234 |
+
break
|
235 |
+
|
236 |
+
# Try all unweighted patterns
|
237 |
+
for pattern in unweighted_patterns:
|
238 |
+
match = re.search(pattern, text, re.IGNORECASE)
|
239 |
+
if match:
|
240 |
+
gpa_data['unweighted'] = match.group(1)
|
241 |
+
break
|
242 |
+
|
243 |
+
# Fallback to cumulative GPA if not found
|
244 |
+
if not gpa_data:
|
245 |
+
cumulative_match = re.search(r'Cumulative GPA\s*:\s*([\d\.]+)', text, re.IGNORECASE)
|
246 |
+
if cumulative_match:
|
247 |
+
gpa_data['weighted'] = cumulative_match.group(1)
|
248 |
+
gpa_data['unweighted'] = cumulative_match.group(1)
|
249 |
+
|
250 |
+
return gpa_data
|
251 |
|
252 |
+
# ========== TRANSCRIPT PROCESSING ==========
|
253 |
def parse_transcript(file):
|
254 |
parser = UniversalTranscriptParser()
|
255 |
|
|
|
261 |
|
262 |
parsed_data = parser.parse_transcript(text)
|
263 |
|
264 |
+
# Enhanced GPA display
|
265 |
gpa_data = parsed_data.get('gpa', {})
|
266 |
+
weighted_gpa = gpa_data.get('weighted', 'Not Found (Please check transcript)')
|
267 |
+
unweighted_gpa = gpa_data.get('unweighted', 'Not Found (Please check transcript)')
|
268 |
|
269 |
+
output_text = "=== TRANSCRIPT ANALYSIS RESULTS ===\n\n"
|
270 |
+
output_text += "GPA INFORMATION:\n"
|
271 |
+
output_text += f"🔹 Weighted GPA: {weighted_gpa}\n"
|
272 |
+
output_text += f"🔹 Unweighted GPA: {unweighted_gpa}\n\n"
|
273 |
+
|
274 |
+
if 'Not Found' in weighted_gpa or 'Not Found' in unweighted_gpa:
|
275 |
+
output_text += "NOTE: Could not automatically locate GPA information.\n"
|
276 |
+
output_text += "Please check your transcript for GPA details and enter them manually if needed.\n"
|
277 |
|
278 |
return output_text, parsed_data
|
279 |
else:
|
|
|
282 |
# ========== LEARNING STYLE QUIZ ==========
|
283 |
learning_style_questions = [
|
284 |
"When you study for a test, you prefer to:",
|
285 |
+
# ... [rest of your questions] ...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
286 |
]
|
287 |
|
288 |
learning_style_options = [
|
289 |
["Read the textbook (Reading/Writing)", "Listen to lectures (Auditory)", "Use diagrams/charts (Visual)", "Practice problems (Kinesthetic)"],
|
290 |
+
# ... [rest of your options] ...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
291 |
]
|
292 |
|
293 |
def learning_style_quiz(*answers):
|
|
|
311 |
max_score = max(scores.values())
|
312 |
total_questions = len(learning_style_questions)
|
313 |
|
|
|
314 |
percentages = {style: (score/total_questions)*100 for style, score in scores.items()}
|
|
|
|
|
315 |
sorted_styles = sorted(scores.items(), key=lambda x: x[1], reverse=True)
|
316 |
|
|
|
317 |
result = "Your Learning Style Results:\n\n"
|
318 |
for style, score in sorted_styles:
|
319 |
result += f"{style}: {score}/{total_questions} ({percentages[style]:.1f}%)\n"
|
320 |
|
321 |
result += "\n"
|
|
|
|
|
322 |
primary_styles = [style for style, score in scores.items() if score == max_score]
|
323 |
|
324 |
if len(primary_styles) == 1:
|
325 |
result += f"Your primary learning style is: {primary_styles[0]}\n\n"
|
326 |
+
# ... [rest of your learning style tips] ...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
327 |
else:
|
328 |
result += f"You have multiple strong learning styles: {', '.join(primary_styles)}\n\n"
|
329 |
result += "You may benefit from combining different learning approaches.\n"
|
|
|
334 |
def save_profile(name, age, interests, transcript, learning_style,
|
335 |
movie, movie_reason, show, show_reason,
|
336 |
book, book_reason, character, character_reason, blog):
|
|
|
337 |
age = int(age) if age else 0
|
338 |
|
339 |
favorites = {
|
|
|
362 |
with open(json_path, "w") as f:
|
363 |
json.dump(data, f, indent=2)
|
364 |
|
|
|
365 |
gpa = transcript.get('gpa', {})
|
366 |
markdown_summary = f"""### Student Profile: {name}
|
367 |
**Age:** {age}
|
|
|
398 |
if not profile:
|
399 |
return "Please complete and save your profile first using the previous tabs."
|
400 |
|
|
|
|
|
401 |
transcript = profile.get("transcript", {})
|
402 |
gpa = transcript.get("gpa", {})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
403 |
|
404 |
+
# When user asks about GPA
|
405 |
+
if any(word in message.lower() for word in ["gpa", "grade", "weighted", "unweighted"]):
|
|
|
|
|
406 |
response = "Your GPA Information:\n"
|
407 |
response += f"- Weighted GPA: {gpa.get('weighted', 'Not Available')}\n"
|
408 |
response += f"- Unweighted GPA: {gpa.get('unweighted', 'Not Available')}\n"
|
|
|
420 |
|
421 |
return response
|
422 |
|
423 |
+
# ... [rest of your AI assistant logic] ...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
424 |
|
425 |
# ========== GRADIO INTERFACE ==========
|
426 |
with gr.Blocks() as app:
|