Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -5,113 +5,129 @@ import os
|
|
5 |
import re
|
6 |
from PyPDF2 import PdfReader
|
7 |
from collections import defaultdict
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
)
|
25 |
-
\s*
|
26 |
-
(
|
27 |
-
\s*
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
def parse_transcript(file):
|
54 |
-
if file.name.endswith('.
|
55 |
-
df = pd.read_csv(file)
|
56 |
-
elif file.name.endswith('.xlsx'):
|
57 |
-
df = pd.read_excel(file)
|
58 |
-
elif file.name.endswith('.pdf'):
|
59 |
text = ''
|
60 |
reader = PdfReader(file)
|
61 |
for page in reader.pages:
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
output_text += "GPA Information:\n"
|
99 |
-
if gpa_data['unweighted'] != "N/A":
|
100 |
-
output_text += f"- Unweighted GPA: {gpa_data['unweighted']}\n"
|
101 |
-
if gpa_data['weighted'] != "N/A":
|
102 |
-
output_text += f"- Weighted GPA: {gpa_data['weighted']}\n"
|
103 |
else:
|
104 |
-
|
105 |
-
|
|
|
|
|
|
|
|
|
|
|
106 |
return output_text, {
|
107 |
"gpa": gpa_data,
|
108 |
-
"
|
109 |
-
"courses": courses_by_grade
|
110 |
}
|
|
|
|
|
|
|
|
|
111 |
else:
|
112 |
return "Unsupported file format", None
|
113 |
|
114 |
-
#
|
115 |
gpa = "N/A"
|
116 |
for col in ['GPA', 'Grade Point Average', 'Cumulative GPA']:
|
117 |
if col in df.columns:
|
@@ -136,31 +152,6 @@ def parse_transcript(file):
|
|
136 |
"courses": courses
|
137 |
}
|
138 |
|
139 |
-
# ... [keep all other functions the same until transcript_display] ...
|
140 |
-
|
141 |
-
def transcript_display(transcript_dict):
|
142 |
-
if not transcript_dict:
|
143 |
-
return "No transcript uploaded."
|
144 |
-
|
145 |
-
if isinstance(transcript_dict, dict) and "courses" in transcript_dict:
|
146 |
-
if isinstance(transcript_dict["courses"], dict):
|
147 |
-
display = "### Course History\n\n"
|
148 |
-
for grade_level, courses in transcript_dict["courses"].items():
|
149 |
-
display += f"**Grade {grade_level}**\n"
|
150 |
-
for course in courses:
|
151 |
-
display += f"- {course.get('course', 'N/A')}"
|
152 |
-
if 'grade' in course:
|
153 |
-
display += f" (Grade: {course['grade']})"
|
154 |
-
if 'credits' in course:
|
155 |
-
display += f" | Credits: {course['credits']}"
|
156 |
-
display += "\n"
|
157 |
-
display += "\n"
|
158 |
-
return display
|
159 |
-
elif isinstance(transcript_dict["courses"], list):
|
160 |
-
return "### Courses\n" + "\n".join([f"- {course}" for course in transcript_dict["courses"]])
|
161 |
-
|
162 |
-
return "No course information available in the expected format."
|
163 |
-
|
164 |
# ========== LEARNING STYLE QUIZ ==========
|
165 |
learning_style_questions = [
|
166 |
"When you study for a test, you prefer to:",
|
@@ -278,8 +269,10 @@ def learning_style_quiz(*answers):
|
|
278 |
|
279 |
return result
|
280 |
|
281 |
-
# ========== SAVE STUDENT PROFILE
|
282 |
-
def save_profile(name, age, interests, transcript, learning_style,
|
|
|
|
|
283 |
# Convert age to int if it's a numpy number (from gradio Number input)
|
284 |
age = int(age) if age else 0
|
285 |
|
@@ -326,22 +319,43 @@ def save_profile(name, age, interests, transcript, learning_style, movie, movie_
|
|
326 |
return markdown_summary
|
327 |
|
328 |
def transcript_display(transcript_dict):
|
329 |
-
if not transcript_dict:
|
330 |
-
return "No
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
|
|
|
|
|
|
338 |
if 'grade' in course:
|
339 |
display += f" (Grade: {course['grade']})"
|
|
|
|
|
340 |
display += "\n"
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
345 |
|
346 |
# ========== AI TEACHING ASSISTANT ==========
|
347 |
def load_profile():
|
@@ -432,22 +446,25 @@ def generate_response(message, history):
|
|
432 |
# ========== GRADIO INTERFACE ==========
|
433 |
with gr.Blocks() as app:
|
434 |
with gr.Tab("Step 1: Upload Transcript"):
|
435 |
-
|
436 |
-
|
|
|
437 |
transcript_data = gr.State()
|
438 |
-
transcript_file.change(
|
|
|
|
|
|
|
|
|
439 |
|
440 |
with gr.Tab("Step 2: Learning Style Quiz"):
|
441 |
gr.Markdown("### Learning Style Quiz (20 Questions)")
|
442 |
quiz_components = []
|
443 |
for i, (question, options) in enumerate(zip(learning_style_questions, learning_style_options)):
|
444 |
-
quiz_components.append(
|
445 |
-
gr.Radio(options, label=f"{i+1}. {question}")
|
446 |
-
)
|
447 |
|
448 |
-
learning_output = gr.Textbox(label="Learning Style
|
449 |
gr.Button("Submit Quiz").click(
|
450 |
-
learning_style_quiz,
|
451 |
inputs=quiz_components,
|
452 |
outputs=learning_output
|
453 |
)
|
@@ -471,7 +488,6 @@ with gr.Blocks() as app:
|
|
471 |
with gr.Tab("Step 4: Save & Review"):
|
472 |
output_summary = gr.Markdown()
|
473 |
save_btn = gr.Button("Save Profile")
|
474 |
-
|
475 |
save_btn.click(
|
476 |
fn=save_profile,
|
477 |
inputs=[name, age, interests, transcript_data, learning_output,
|
@@ -480,7 +496,6 @@ with gr.Blocks() as app:
|
|
480 |
outputs=output_summary
|
481 |
)
|
482 |
|
483 |
-
# AI Teaching Assistant Tab
|
484 |
with gr.Tab("🤖 AI Teaching Assistant"):
|
485 |
gr.Markdown("## Your Personalized Learning Assistant")
|
486 |
chatbot = gr.ChatInterface(
|
@@ -494,5 +509,5 @@ with gr.Blocks() as app:
|
|
494 |
)
|
495 |
|
496 |
if __name__ == "__main__":
|
497 |
-
app.launch()
|
498 |
|
|
|
5 |
import re
|
6 |
from PyPDF2 import PdfReader
|
7 |
from collections import defaultdict
|
8 |
+
from transformers import pipeline
|
9 |
+
|
10 |
+
# Initialize NER model (will load only if transformers is available)
|
11 |
+
try:
|
12 |
+
ner_pipeline = pipeline("ner", model="dslim/bert-base-NER")
|
13 |
+
except Exception as e:
|
14 |
+
print(f"Could not load NER model: {e}")
|
15 |
+
ner_pipeline = None
|
16 |
+
|
17 |
+
# ========== IMPROVED TRANSCRIPT PARSING ==========
|
18 |
+
def extract_gpa(text):
|
19 |
+
gpa_data = {'weighted': "N/A", 'unweighted': "N/A"}
|
20 |
+
gpa_patterns = [
|
21 |
+
r'Weighted GPA[\s:]*(\d\.\d{1,2})',
|
22 |
+
r'GPA \(Weighted\)[\s:]*(\d\.\d{1,2})',
|
23 |
+
r'Cumulative GPA \(Weighted\)[\s:]*(\d\.\d{1,2})',
|
24 |
+
r'Unweighted GPA[\s:]*(\d\.\d{1,2})',
|
25 |
+
r'GPA \(Unweighted\)[\s:]*(\d\.\d{1,2})',
|
26 |
+
r'Cumulative GPA \(Unweighted\)[\s:]*(\d\.\d{1,2})',
|
27 |
+
r'GPA[\s:]*(\d\.\d{1,2})'
|
28 |
+
]
|
29 |
+
for pattern in gpa_patterns:
|
30 |
+
for match in re.finditer(pattern, text, re.IGNORECASE):
|
31 |
+
gpa_value = match.group(1)
|
32 |
+
if 'weighted' in pattern.lower():
|
33 |
+
gpa_data['weighted'] = gpa_value
|
34 |
+
elif 'unweighted' in pattern.lower():
|
35 |
+
gpa_data['unweighted'] = gpa_value
|
36 |
+
else:
|
37 |
+
if gpa_data['unweighted'] == "N/A":
|
38 |
+
gpa_data['unweighted'] = gpa_value
|
39 |
+
if gpa_data['weighted'] == "N/A":
|
40 |
+
gpa_data['weighted'] = gpa_value
|
41 |
+
return gpa_data
|
42 |
+
|
43 |
+
def extract_courses_with_regex(text):
|
44 |
+
patterns = [
|
45 |
+
r'(?:^|\n)([A-Z]{2,}\s*-?\s*\d{3}[A-Z]?\b)\s*([A-F][+-]?|\d{2,3}%)?',
|
46 |
+
r'(?:^|\n)([A-Z][a-z]+(?:\s+[A-Z]?[a-z]+)+)\s*[:\-]?\s*([A-F][+-]?|\d{2,3}%)?',
|
47 |
+
r'(?:^|\n)([A-Z]{2,})\s*\d{3}\b'
|
48 |
+
]
|
49 |
+
courses = []
|
50 |
+
for pattern in patterns:
|
51 |
+
for match in re.finditer(pattern, text, re.MULTILINE):
|
52 |
+
course_name = match.group(1).strip()
|
53 |
+
grade = match.group(2).strip() if match.group(2) else None
|
54 |
+
courses.append({'name': course_name, 'grade': grade})
|
55 |
+
return courses
|
56 |
+
|
57 |
+
def extract_grade_levels(text):
|
58 |
+
grade_pattern = r'(?:Grade|Year|Term)\s*[:]?\s*(\d+|Freshman|Sophomore|Junior|Senior)\b'
|
59 |
+
grade_matches = list(re.finditer(grade_pattern, text, re.IGNORECASE))
|
60 |
+
grade_sections = []
|
61 |
+
for i, match in enumerate(grade_matches):
|
62 |
+
start_pos = match.start()
|
63 |
+
end_pos = grade_matches[i+1].start() if i+1 < len(grade_matches) else len(text)
|
64 |
+
grade_sections.append({
|
65 |
+
'grade': match.group(1),
|
66 |
+
'text': text[start_pos:end_pos]
|
67 |
+
})
|
68 |
+
return grade_sections
|
69 |
|
70 |
def parse_transcript(file):
|
71 |
+
if file.name.endswith('.pdf'):
|
|
|
|
|
|
|
|
|
72 |
text = ''
|
73 |
reader = PdfReader(file)
|
74 |
for page in reader.pages:
|
75 |
+
text += page.extract_text() + '\n'
|
76 |
+
|
77 |
+
# Try both NER and regex approaches
|
78 |
+
courses = []
|
79 |
+
if ner_pipeline:
|
80 |
+
try:
|
81 |
+
entities = ner_pipeline(text)
|
82 |
+
current_course = {}
|
83 |
+
for entity in entities:
|
84 |
+
if entity['word'].startswith('##'):
|
85 |
+
current_course['name'] = current_course.get('name', '') + entity['word'][2:]
|
86 |
+
elif entity['entity'] in ['B-ORG', 'I-ORG']: # Using ORG as proxy for courses
|
87 |
+
if 'name' in current_course:
|
88 |
+
courses.append(current_course)
|
89 |
+
current_course = {'name': entity['word']}
|
90 |
+
elif entity['entity'] == 'GRADE' and current_course:
|
91 |
+
current_course['grade'] = entity['word']
|
92 |
+
if current_course:
|
93 |
+
courses.append(current_course)
|
94 |
+
except Exception as e:
|
95 |
+
print(f"NER failed: {e}")
|
96 |
+
|
97 |
+
# Fallback to regex if NER didn't find courses
|
98 |
+
if not courses:
|
99 |
+
courses = extract_courses_with_regex(text)
|
100 |
+
|
101 |
+
# Organize by grade level
|
102 |
+
grade_sections = extract_grade_levels(text)
|
103 |
+
courses_by_grade = defaultdict(list)
|
104 |
+
|
105 |
+
if grade_sections:
|
106 |
+
for section in grade_sections:
|
107 |
+
section_courses = extract_courses_with_regex(section['text'])
|
108 |
+
for course in section_courses:
|
109 |
+
course['term'] = section['grade']
|
110 |
+
courses_by_grade[section['grade']].append(course)
|
|
|
|
|
|
|
|
|
|
|
111 |
else:
|
112 |
+
courses_by_grade["All"] = courses
|
113 |
+
|
114 |
+
gpa_data = extract_gpa(text)
|
115 |
+
|
116 |
+
output_text = "Transcript parsed successfully\n"
|
117 |
+
output_text += f"Found {len(courses)} courses across {len(courses_by_grade)} grade levels\n"
|
118 |
+
|
119 |
return output_text, {
|
120 |
"gpa": gpa_data,
|
121 |
+
"courses": dict(courses_by_grade)
|
|
|
122 |
}
|
123 |
+
elif file.name.endswith('.csv'):
|
124 |
+
df = pd.read_csv(file)
|
125 |
+
elif file.name.endswith('.xlsx'):
|
126 |
+
df = pd.read_excel(file)
|
127 |
else:
|
128 |
return "Unsupported file format", None
|
129 |
|
130 |
+
# Fallback for CSV/Excel
|
131 |
gpa = "N/A"
|
132 |
for col in ['GPA', 'Grade Point Average', 'Cumulative GPA']:
|
133 |
if col in df.columns:
|
|
|
152 |
"courses": courses
|
153 |
}
|
154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
# ========== LEARNING STYLE QUIZ ==========
|
156 |
learning_style_questions = [
|
157 |
"When you study for a test, you prefer to:",
|
|
|
269 |
|
270 |
return result
|
271 |
|
272 |
+
# ========== SAVE STUDENT PROFILE ==========
|
273 |
+
def save_profile(name, age, interests, transcript, learning_style,
|
274 |
+
movie, movie_reason, show, show_reason,
|
275 |
+
book, book_reason, character, character_reason, blog):
|
276 |
# Convert age to int if it's a numpy number (from gradio Number input)
|
277 |
age = int(age) if age else 0
|
278 |
|
|
|
319 |
return markdown_summary
|
320 |
|
321 |
def transcript_display(transcript_dict):
|
322 |
+
if not transcript_dict or "courses" not in transcript_dict:
|
323 |
+
return "No course information available"
|
324 |
+
|
325 |
+
display = "### Course History\n\n"
|
326 |
+
courses_by_grade = transcript_dict["courses"]
|
327 |
+
|
328 |
+
if isinstance(courses_by_grade, dict):
|
329 |
+
for grade, courses in courses_by_grade.items():
|
330 |
+
display += f"**{grade}**\n"
|
331 |
+
for course in courses:
|
332 |
+
if isinstance(course, dict):
|
333 |
+
display += f"- {course.get('name', 'N/A')}"
|
334 |
if 'grade' in course:
|
335 |
display += f" (Grade: {course['grade']})"
|
336 |
+
if 'term' in course:
|
337 |
+
display += f" | Term: {course['term']}"
|
338 |
display += "\n"
|
339 |
+
else:
|
340 |
+
display += f"- {str(course)}\n"
|
341 |
+
display += "\n"
|
342 |
+
elif isinstance(courses_by_grade, list):
|
343 |
+
for course in courses_by_grade:
|
344 |
+
if isinstance(course, dict):
|
345 |
+
display += f"- {course.get('name', 'N/A')}"
|
346 |
+
if 'grade' in course:
|
347 |
+
display += f" (Grade: {course['grade']})"
|
348 |
+
display += "\n"
|
349 |
+
else:
|
350 |
+
display += f"- {str(course)}\n"
|
351 |
+
|
352 |
+
if 'gpa' in transcript_dict:
|
353 |
+
gpa = transcript_dict['gpa']
|
354 |
+
display += "\n**GPA Information**\n"
|
355 |
+
display += f"- Unweighted: {gpa.get('unweighted', 'N/A')}\n"
|
356 |
+
display += f"- Weighted: {gpa.get('weighted', 'N/A')}\n"
|
357 |
+
|
358 |
+
return display
|
359 |
|
360 |
# ========== AI TEACHING ASSISTANT ==========
|
361 |
def load_profile():
|
|
|
446 |
# ========== GRADIO INTERFACE ==========
|
447 |
with gr.Blocks() as app:
|
448 |
with gr.Tab("Step 1: Upload Transcript"):
|
449 |
+
gr.Markdown("### Upload your transcript (PDF recommended for best results)")
|
450 |
+
transcript_file = gr.File(label="Transcript file", file_types=[".pdf", ".csv", ".xlsx"])
|
451 |
+
transcript_output = gr.Textbox(label="Parsing Results")
|
452 |
transcript_data = gr.State()
|
453 |
+
transcript_file.change(
|
454 |
+
fn=parse_transcript,
|
455 |
+
inputs=transcript_file,
|
456 |
+
outputs=[transcript_output, transcript_data]
|
457 |
+
)
|
458 |
|
459 |
with gr.Tab("Step 2: Learning Style Quiz"):
|
460 |
gr.Markdown("### Learning Style Quiz (20 Questions)")
|
461 |
quiz_components = []
|
462 |
for i, (question, options) in enumerate(zip(learning_style_questions, learning_style_options)):
|
463 |
+
quiz_components.append(gr.Radio(options, label=f"{i+1}. {question}"))
|
|
|
|
|
464 |
|
465 |
+
learning_output = gr.Textbox(label="Your Learning Style", lines=10)
|
466 |
gr.Button("Submit Quiz").click(
|
467 |
+
fn=learning_style_quiz,
|
468 |
inputs=quiz_components,
|
469 |
outputs=learning_output
|
470 |
)
|
|
|
488 |
with gr.Tab("Step 4: Save & Review"):
|
489 |
output_summary = gr.Markdown()
|
490 |
save_btn = gr.Button("Save Profile")
|
|
|
491 |
save_btn.click(
|
492 |
fn=save_profile,
|
493 |
inputs=[name, age, interests, transcript_data, learning_output,
|
|
|
496 |
outputs=output_summary
|
497 |
)
|
498 |
|
|
|
499 |
with gr.Tab("🤖 AI Teaching Assistant"):
|
500 |
gr.Markdown("## Your Personalized Learning Assistant")
|
501 |
chatbot = gr.ChatInterface(
|
|
|
509 |
)
|
510 |
|
511 |
if __name__ == "__main__":
|
512 |
+
app.launch()
|
513 |
|