Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -85,94 +85,122 @@ def extract_text_with_ocr(file_path: str) -> str:
|
|
85 |
|
86 |
# ========== TRANSCRIPT PARSING ==========
|
87 |
def extract_gpa(text: str, gpa_type: str) -> str:
|
88 |
-
"""
|
89 |
-
|
90 |
-
rf'{gpa_type}\s*GPA\s
|
91 |
-
rf'{gpa_type}\s*GPA\s
|
92 |
-
rf'{gpa_type}\s
|
93 |
-
rf'{gpa_type}\s*([
|
|
|
|
|
94 |
]
|
95 |
|
96 |
-
for pattern in
|
97 |
match = re.search(pattern, text, re.IGNORECASE)
|
98 |
if match:
|
99 |
gpa_value = match.group(1)
|
100 |
try:
|
101 |
gpa_float = float(gpa_value)
|
102 |
-
if not 0.0 <= gpa_float <= 5.0:
|
103 |
return "Invalid GPA"
|
104 |
-
return
|
105 |
except ValueError:
|
106 |
continue
|
107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
return "N/A"
|
109 |
|
110 |
def extract_courses_from_table(text: str) -> Dict[str, List[Dict]]:
|
111 |
-
"""
|
112 |
-
#
|
|
|
|
|
|
|
|
|
113 |
patterns = [
|
114 |
-
# Pattern
|
115 |
re.compile(
|
116 |
-
r'(\
|
117 |
-
r'
|
118 |
-
r'
|
119 |
-
r'
|
120 |
-
r'(
|
121 |
-
r'
|
122 |
-
r'(?:\|\s*[^\|]*)' # Skip Incl column
|
123 |
-
r'\|\s*([\d\.]+|inProgress)' # Credits
|
124 |
),
|
125 |
-
# Pattern
|
126 |
re.compile(
|
127 |
-
r'(\d{4}-\d{4})\s+' #
|
128 |
-
r'(\d+)\s+'
|
129 |
-
r'([A-
|
130 |
-
r'(.+?)\s+'
|
131 |
-
r'([A-
|
132 |
-
r'(
|
133 |
),
|
134 |
-
#
|
135 |
re.compile(
|
136 |
-
r'(
|
137 |
-
r'(
|
138 |
-
r'([A-
|
139 |
-
r'(
|
140 |
-
r'([A-FW][+-]?)\s*' # Grade
|
141 |
-
r'([\d\.]+)' # Credits
|
142 |
)
|
143 |
]
|
144 |
|
145 |
courses_by_grade = defaultdict(list)
|
|
|
146 |
|
147 |
for pattern in patterns:
|
148 |
for match in re.finditer(pattern, text):
|
149 |
if len(match.groups()) == 6:
|
150 |
-
|
151 |
-
term = None
|
152 |
else:
|
153 |
-
|
154 |
-
|
|
|
|
|
155 |
|
156 |
-
#
|
157 |
-
|
158 |
-
if
|
159 |
-
|
160 |
-
|
161 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
|
163 |
course_info = {
|
164 |
-
'
|
165 |
-
'
|
166 |
-
'
|
|
|
|
|
167 |
}
|
168 |
|
169 |
-
if grade
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
|
177 |
return courses_by_grade
|
178 |
|
@@ -237,7 +265,7 @@ def parse_transcript(file_obj) -> Tuple[str, Optional[Dict]]:
|
|
237 |
for grade in sorted(courses_by_grade.keys(), key=lambda x: int(x) if x.isdigit() else x):
|
238 |
output_text += f"\nGrade {grade}:\n{'-'*30}\n"
|
239 |
for course in courses_by_grade[grade]:
|
240 |
-
output_text += f"- {course['name']}"
|
241 |
if 'grade' in course and course['grade']:
|
242 |
output_text += f" (Grade: {course['grade']})"
|
243 |
if 'credits' in course:
|
@@ -614,7 +642,7 @@ class ProfileManager:
|
|
614 |
for grade in sorted(courses_by_grade.keys(), key=lambda x: int(x) if x.isdigit() else x):
|
615 |
display += f"\n**Grade {grade}**\n"
|
616 |
for course in courses_by_grade[grade]:
|
617 |
-
display += f"- {course.get('name', 'Unnamed course')}"
|
618 |
if 'grade' in course and course['grade']:
|
619 |
display += f" (Grade: {course['grade']})"
|
620 |
if 'credits' in course:
|
@@ -787,7 +815,7 @@ class TeachingAssistant:
|
|
787 |
for grade_level, course_list in courses.items():
|
788 |
for course in course_list:
|
789 |
if course.get('grade', '').upper() in ['D', 'F']:
|
790 |
-
weak_subjects.append(course.get('name', 'Unknown course'))
|
791 |
|
792 |
if weak_subjects:
|
793 |
response += ("**Areas for Improvement**:\n"
|
@@ -823,7 +851,7 @@ class TeachingAssistant:
|
|
823 |
for grade in sorted(courses.keys(), key=lambda x: int(x) if x.isdigit() else x):
|
824 |
response += f"\n**Grade {grade}**:\n"
|
825 |
for course in courses[grade]:
|
826 |
-
response += f"- {course.get('name', 'Unnamed course')}"
|
827 |
if 'grade' in course:
|
828 |
response += f" (Grade: {course['grade']})"
|
829 |
response += "\n"
|
@@ -1100,6 +1128,9 @@ def create_interface():
|
|
1100 |
placeholder="e.g., Science, Music, Sports, Art..."
|
1101 |
)
|
1102 |
|
|
|
|
|
|
|
1103 |
gr.Markdown("### Favorites")
|
1104 |
with gr.Group():
|
1105 |
movie = gr.Textbox(label="Favorite Movie")
|
@@ -1130,29 +1161,17 @@ def create_interface():
|
|
1130 |
outputs=blog_text
|
1131 |
)
|
1132 |
|
1133 |
-
|
1134 |
-
def check_personal_info_complete(name, age, interests, current_tab_status):
|
1135 |
if name.strip() and age and interests.strip():
|
1136 |
new_status = current_tab_status.copy()
|
1137 |
new_status[2] = True
|
1138 |
-
return new_status, gr.update(elem_classes="completed-tab"), gr.update(interactive=True), gr.update(visible=False)
|
1139 |
-
return current_tab_status, gr.update(), gr.update(), gr.update()
|
1140 |
|
1141 |
-
|
1142 |
-
|
1143 |
-
fn=check_personal_info_complete,
|
1144 |
-
inputs=[name, age, interests, tab_completed],
|
1145 |
-
outputs=[tab_completed, step3, step4, nav_message]
|
1146 |
-
)
|
1147 |
-
age.change(
|
1148 |
-
fn=check_personal_info_complete,
|
1149 |
inputs=[name, age, interests, tab_completed],
|
1150 |
-
outputs=[tab_completed, step3, step4, nav_message]
|
1151 |
-
)
|
1152 |
-
interests.change(
|
1153 |
-
fn=check_personal_info_complete,
|
1154 |
-
inputs=[name, age, interests, tab_completed],
|
1155 |
-
outputs=[tab_completed, step3, step4, nav_message]
|
1156 |
)
|
1157 |
|
1158 |
# ===== TAB 4: Save & Review =====
|
@@ -1251,7 +1270,7 @@ def create_interface():
|
|
1251 |
fn=lambda: gr.update(visible=bool(profile_manager.list_profiles(session_token.value))),
|
1252 |
outputs=load_btn
|
1253 |
).then(
|
1254 |
-
fn=lambda: gr.update(visible=bool(profile_manager.list_profiles(session_token.value))),
|
1255 |
outputs=delete_btn
|
1256 |
)
|
1257 |
|
@@ -1301,34 +1320,35 @@ def create_interface():
|
|
1301 |
# Check if current tab is completed
|
1302 |
if not tab_completed_status.get(current_tab, False):
|
1303 |
return gr.Tabs(selected=current_tab), \
|
1304 |
-
gr.update(value=f"<div class='nav-message'>Please complete the current tab before proceeding to tab {tab_index + 1}</div>", visible=True)
|
|
|
1305 |
|
1306 |
-
return gr.Tabs(selected=tab_index), gr.update(visible=False)
|
1307 |
|
1308 |
step1.click(
|
1309 |
fn=lambda idx, status: navigate_to_tab(idx, status),
|
1310 |
inputs=[gr.State(0), tab_completed],
|
1311 |
-
outputs=[tabs, nav_message]
|
1312 |
)
|
1313 |
step2.click(
|
1314 |
fn=lambda idx, status: navigate_to_tab(idx, status),
|
1315 |
inputs=[gr.State(1), tab_completed],
|
1316 |
-
outputs=[tabs, nav_message]
|
1317 |
)
|
1318 |
step3.click(
|
1319 |
fn=lambda idx, status: navigate_to_tab(idx, status),
|
1320 |
inputs=[gr.State(2), tab_completed],
|
1321 |
-
outputs=[tabs, nav_message]
|
1322 |
)
|
1323 |
step4.click(
|
1324 |
fn=lambda idx, status: navigate_to_tab(idx, status),
|
1325 |
inputs=[gr.State(3), tab_completed],
|
1326 |
-
outputs=[tabs, nav_message]
|
1327 |
)
|
1328 |
step5.click(
|
1329 |
fn=lambda idx, status: navigate_to_tab(idx, status),
|
1330 |
inputs=[gr.State(4), tab_completed],
|
1331 |
-
outputs=[tabs, nav_message]
|
1332 |
)
|
1333 |
|
1334 |
return app
|
|
|
85 |
|
86 |
# ========== TRANSCRIPT PARSING ==========
|
87 |
def extract_gpa(text: str, gpa_type: str) -> str:
|
88 |
+
"""More robust GPA extraction with multiple patterns."""
|
89 |
+
gpa_patterns = [
|
90 |
+
rf'{gpa_type}\s*GPA\s*[:=]?\s*([0-5]\.\d{{2}}|\d\.\d)', # Weighted GPA: 3.50
|
91 |
+
rf'{gpa_type}\s*GPA\s+([0-5]\.\d{{2}}|\d\.\d)', # Weighted GPA 3.50
|
92 |
+
rf'{gpa_type}\s*[:=]?\s*([0-5]\.\d{{2}}|\d\.\d)', # Weighted: 3.50
|
93 |
+
rf'GPA\s*\({gpa_type}\)\s*[:=]?\s*([0-5]\.\d{{2}}|\d\.\d)', # GPA (Weighted): 3.50
|
94 |
+
rf'{gpa_type}\s*[=:]?\s*([0-5]\.\d{{2}}|\d\.\d)', # Weighted=3.50
|
95 |
+
rf'{gpa_type}\s*[=:]?\s*(\d\.\d{{2}})' # Weighted:3.50
|
96 |
]
|
97 |
|
98 |
+
for pattern in gpa_patterns:
|
99 |
match = re.search(pattern, text, re.IGNORECASE)
|
100 |
if match:
|
101 |
gpa_value = match.group(1)
|
102 |
try:
|
103 |
gpa_float = float(gpa_value)
|
104 |
+
if not 0.0 <= gpa_float <= 5.0:
|
105 |
return "Invalid GPA"
|
106 |
+
return f"{gpa_float:.2f}"
|
107 |
except ValueError:
|
108 |
continue
|
109 |
|
110 |
+
# Fallback to looking for any GPA-like number near the term
|
111 |
+
fallback_pattern = re.compile(rf'(?:{gpa_type}.*?)([0-5]\.\d{{1,2}})(?!\d)')
|
112 |
+
match = re.search(fallback_pattern, text, re.IGNORECASE)
|
113 |
+
if match:
|
114 |
+
return match.group(1)
|
115 |
+
|
116 |
return "N/A"
|
117 |
|
118 |
def extract_courses_from_table(text: str) -> Dict[str, List[Dict]]:
|
119 |
+
"""Enhanced course extraction with better pattern matching."""
|
120 |
+
# Normalize text for better matching
|
121 |
+
text = re.sub(r'\s+', ' ', text) # Replace multiple spaces
|
122 |
+
text = text.replace('\n', ' ') # Replace newlines
|
123 |
+
|
124 |
+
# More robust patterns
|
125 |
patterns = [
|
126 |
+
# Pattern for standard table format
|
127 |
re.compile(
|
128 |
+
r'(?:Year|Term|Semester)[\s:]*(.*?)\s*' # Year/Semester
|
129 |
+
r'(?:Grade|Level)[\s:]*(.*?)\s*' # Grade level
|
130 |
+
r'(?:Course\s*Code|Code)[\s:]*(.*?)\s*' # Course code
|
131 |
+
r'(?:Course\s*Name|Title)[\s:]*(.*?)\s*' # Course name
|
132 |
+
r'(?:Grade|Mark)[\s:]*(.*?)\s*' # Grade
|
133 |
+
r'(?:Credits|Units)[\s:]*(.*?)(?:\s|$)' # Credits
|
|
|
|
|
134 |
),
|
135 |
+
# Pattern for condensed format
|
136 |
re.compile(
|
137 |
+
r'(\d{4}-\d{4}|\w+\s\d{4})\s+' # Year range or Semester Year
|
138 |
+
r'(\d+)\s+' # Grade level
|
139 |
+
r'([A-Z]+\s*\d+[A-Z]*)\s+' # Course code
|
140 |
+
r'(.+?)\s+' # Course name
|
141 |
+
r'([A-F][+-]?|P|F|W|I)\s+' # Grade
|
142 |
+
r'(\d+\.?\d*)' # Credits
|
143 |
),
|
144 |
+
# Fallback pattern for less structured data
|
145 |
re.compile(
|
146 |
+
r'([A-Z]+\s*\d+[A-Z]*)\s+' # Course code
|
147 |
+
r'(.+?)\s+' # Course name
|
148 |
+
r'(?:Grade\s*:\s*)?([A-F][+-]?|P|F|W|I)\s*' # Grade
|
149 |
+
r'(?:Credits\s*:\s*)?(\d+\.?\d*)' # Credits
|
|
|
|
|
150 |
)
|
151 |
]
|
152 |
|
153 |
courses_by_grade = defaultdict(list)
|
154 |
+
extracted_courses = set() # To avoid duplicates
|
155 |
|
156 |
for pattern in patterns:
|
157 |
for match in re.finditer(pattern, text):
|
158 |
if len(match.groups()) == 6:
|
159 |
+
year, grade, code, name, grade_mark, credits = match.groups()
|
|
|
160 |
else:
|
161 |
+
# Handle shorter patterns
|
162 |
+
code, name, grade_mark, credits = match.groups()[:4]
|
163 |
+
year = "Unknown"
|
164 |
+
grade = "Unknown"
|
165 |
|
166 |
+
# Create unique identifier to avoid duplicates
|
167 |
+
course_id = f"{code}_{name}_{year}"
|
168 |
+
if course_id in extracted_courses:
|
169 |
+
continue
|
170 |
+
extracted_courses.add(course_id)
|
171 |
+
|
172 |
+
# Clean and format data
|
173 |
+
code = code.strip()
|
174 |
+
name = name.strip()
|
175 |
+
if 'AP' in code and 'AP ' not in code:
|
176 |
+
code = code.replace('AP', 'AP ')
|
177 |
+
if 'DE' in code and 'DE ' not in code:
|
178 |
+
code = code.replace('DE', 'DE ')
|
179 |
|
180 |
course_info = {
|
181 |
+
'code': code,
|
182 |
+
'name': name,
|
183 |
+
'grade': grade_mark.strip() if grade_mark else None,
|
184 |
+
'credits': credits if credits else '0',
|
185 |
+
'year': year.strip() if year else 'Unknown'
|
186 |
}
|
187 |
|
188 |
+
courses_by_grade[grade.strip() if grade else 'Unknown'].append(course_info)
|
189 |
+
|
190 |
+
# If no courses found with patterns, try a more aggressive approach
|
191 |
+
if not courses_by_grade:
|
192 |
+
# Look for anything that looks like a course code followed by description
|
193 |
+
fallback_pattern = re.compile(r'([A-Z]+\s*\d+[A-Z]*)\s+(.+?)(?:\s+([A-F][+-]?|P|F|W|I))?(?:\s+(\d+\.?\d*))?')
|
194 |
+
for match in re.finditer(fallback_pattern, text):
|
195 |
+
code, name, grade_mark, credits = match.groups()
|
196 |
+
course_info = {
|
197 |
+
'code': code.strip(),
|
198 |
+
'name': name.strip(),
|
199 |
+
'grade': grade_mark.strip() if grade_mark else None,
|
200 |
+
'credits': credits if credits else '0',
|
201 |
+
'year': 'Unknown'
|
202 |
+
}
|
203 |
+
courses_by_grade['Unknown'].append(course_info)
|
204 |
|
205 |
return courses_by_grade
|
206 |
|
|
|
265 |
for grade in sorted(courses_by_grade.keys(), key=lambda x: int(x) if x.isdigit() else x):
|
266 |
output_text += f"\nGrade {grade}:\n{'-'*30}\n"
|
267 |
for course in courses_by_grade[grade]:
|
268 |
+
output_text += f"- {course['code']} {course['name']}"
|
269 |
if 'grade' in course and course['grade']:
|
270 |
output_text += f" (Grade: {course['grade']})"
|
271 |
if 'credits' in course:
|
|
|
642 |
for grade in sorted(courses_by_grade.keys(), key=lambda x: int(x) if x.isdigit() else x):
|
643 |
display += f"\n**Grade {grade}**\n"
|
644 |
for course in courses_by_grade[grade]:
|
645 |
+
display += f"- {course.get('code', '')} {course.get('name', 'Unnamed course')}"
|
646 |
if 'grade' in course and course['grade']:
|
647 |
display += f" (Grade: {course['grade']})"
|
648 |
if 'credits' in course:
|
|
|
815 |
for grade_level, course_list in courses.items():
|
816 |
for course in course_list:
|
817 |
if course.get('grade', '').upper() in ['D', 'F']:
|
818 |
+
weak_subjects.append(f"{course.get('code', '')} {course.get('name', 'Unknown course')}")
|
819 |
|
820 |
if weak_subjects:
|
821 |
response += ("**Areas for Improvement**:\n"
|
|
|
851 |
for grade in sorted(courses.keys(), key=lambda x: int(x) if x.isdigit() else x):
|
852 |
response += f"\n**Grade {grade}**:\n"
|
853 |
for course in courses[grade]:
|
854 |
+
response += f"- {course.get('code', '')} {course.get('name', 'Unnamed course')}"
|
855 |
if 'grade' in course:
|
856 |
response += f" (Grade: {course['grade']})"
|
857 |
response += "\n"
|
|
|
1128 |
placeholder="e.g., Science, Music, Sports, Art..."
|
1129 |
)
|
1130 |
|
1131 |
+
save_personal_btn = gr.Button("Save Information", variant="primary")
|
1132 |
+
save_confirmation = gr.HTML(visible=False)
|
1133 |
+
|
1134 |
gr.Markdown("### Favorites")
|
1135 |
with gr.Group():
|
1136 |
movie = gr.Textbox(label="Favorite Movie")
|
|
|
1161 |
outputs=blog_text
|
1162 |
)
|
1163 |
|
1164 |
+
def save_personal_info(name, age, interests, current_tab_status):
|
|
|
1165 |
if name.strip() and age and interests.strip():
|
1166 |
new_status = current_tab_status.copy()
|
1167 |
new_status[2] = True
|
1168 |
+
return new_status, gr.update(elem_classes="completed-tab"), gr.update(interactive=True), gr.update(value="<div class='alert-box'>Information saved!</div>", visible=True), gr.update(visible=False)
|
1169 |
+
return current_tab_status, gr.update(), gr.update(), gr.update(visible=False), gr.update(visible=True)
|
1170 |
|
1171 |
+
save_personal_btn.click(
|
1172 |
+
fn=save_personal_info,
|
|
|
|
|
|
|
|
|
|
|
|
|
1173 |
inputs=[name, age, interests, tab_completed],
|
1174 |
+
outputs=[tab_completed, step3, step4, save_confirmation, nav_message]
|
|
|
|
|
|
|
|
|
|
|
1175 |
)
|
1176 |
|
1177 |
# ===== TAB 4: Save & Review =====
|
|
|
1270 |
fn=lambda: gr.update(visible=bool(profile_manager.list_profiles(session_token.value))),
|
1271 |
outputs=load_btn
|
1272 |
).then(
|
1273 |
+
fn=lambda: gr.update(visible=bool(profile_manager.list_profiles(session_token.value)))),
|
1274 |
outputs=delete_btn
|
1275 |
)
|
1276 |
|
|
|
1320 |
# Check if current tab is completed
|
1321 |
if not tab_completed_status.get(current_tab, False):
|
1322 |
return gr.Tabs(selected=current_tab), \
|
1323 |
+
gr.update(value=f"<div class='nav-message'>Please complete the current tab before proceeding to tab {tab_index + 1}</div>", visible=True), \
|
1324 |
+
gr.update(visible=False)
|
1325 |
|
1326 |
+
return gr.Tabs(selected=tab_index), gr.update(visible=False), gr.update(visible=False)
|
1327 |
|
1328 |
step1.click(
|
1329 |
fn=lambda idx, status: navigate_to_tab(idx, status),
|
1330 |
inputs=[gr.State(0), tab_completed],
|
1331 |
+
outputs=[tabs, nav_message, quiz_alert]
|
1332 |
)
|
1333 |
step2.click(
|
1334 |
fn=lambda idx, status: navigate_to_tab(idx, status),
|
1335 |
inputs=[gr.State(1), tab_completed],
|
1336 |
+
outputs=[tabs, nav_message, quiz_alert]
|
1337 |
)
|
1338 |
step3.click(
|
1339 |
fn=lambda idx, status: navigate_to_tab(idx, status),
|
1340 |
inputs=[gr.State(2), tab_completed],
|
1341 |
+
outputs=[tabs, nav_message, quiz_alert]
|
1342 |
)
|
1343 |
step4.click(
|
1344 |
fn=lambda idx, status: navigate_to_tab(idx, status),
|
1345 |
inputs=[gr.State(3), tab_completed],
|
1346 |
+
outputs=[tabs, nav_message, quiz_alert]
|
1347 |
)
|
1348 |
step5.click(
|
1349 |
fn=lambda idx, status: navigate_to_tab(idx, status),
|
1350 |
inputs=[gr.State(4), tab_completed],
|
1351 |
+
outputs=[tabs, nav_message, quiz_alert]
|
1352 |
)
|
1353 |
|
1354 |
return app
|