Dannyar608 commited on
Commit
5e10c69
·
verified ·
1 Parent(s): 9cf39ac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +104 -84
app.py CHANGED
@@ -85,94 +85,122 @@ def extract_text_with_ocr(file_path: str) -> str:
85
 
86
  # ========== TRANSCRIPT PARSING ==========
87
  def extract_gpa(text: str, gpa_type: str) -> str:
88
- """Extract GPA information from text with validation."""
89
- patterns = [
90
- rf'{gpa_type}\s*GPA\s*:\s*([\d\.]+)', # "Weighted GPA: 3.5"
91
- rf'{gpa_type}\s*GPA\s*([\d\.]+)', # "Weighted GPA 3.5"
92
- rf'{gpa_type}\s*:\s*([\d\.]+)', # "Weighted: 3.5"
93
- rf'{gpa_type}\s*([\d\.]+)' # "Weighted 3.5"
 
 
94
  ]
95
 
96
- for pattern in patterns:
97
  match = re.search(pattern, text, re.IGNORECASE)
98
  if match:
99
  gpa_value = match.group(1)
100
  try:
101
  gpa_float = float(gpa_value)
102
- if not 0.0 <= gpa_float <= 5.0: # Assuming 5.0 is max for weighted GPA
103
  return "Invalid GPA"
104
- return gpa_value
105
  except ValueError:
106
  continue
107
 
 
 
 
 
 
 
108
  return "N/A"
109
 
110
  def extract_courses_from_table(text: str) -> Dict[str, List[Dict]]:
111
- """Extract course information with multiple pattern fallbacks."""
112
- # Enhanced patterns to handle more transcript formats
 
 
 
 
113
  patterns = [
114
- # Pattern 1: Structured table format
115
  re.compile(
116
- r'(\d{4}-\d{4})\s*' # School year
117
- r'\|?\s*(\d+)\s*' # Grade level
118
- r'\|?\s*([A-Z0-9]+)\s*' # Course code
119
- r'\|?\s*([^\|]+?)\s*' # Course name
120
- r'(?:\|\s*[^\|]*){2}' # Skip Term and DstNumber
121
- r'\|\s*([A-FW][+-]?)\s*' # Grade (FG column)
122
- r'(?:\|\s*[^\|]*)' # Skip Incl column
123
- r'\|\s*([\d\.]+|inProgress)' # Credits
124
  ),
125
- # Pattern 2: Less structured format
126
  re.compile(
127
- r'(\d{4}-\d{4})\s+' # School year
128
- r'(\d+)\s+' # Grade level
129
- r'([A-Z0-9]+)\s+' # Course code
130
- r'(.+?)\s+' # Course name
131
- r'([A-FW][+-]?)\s*' # Grade
132
- r'([\d\.]+|inProgress)' # Credits
133
  ),
134
- # Pattern 3: Semester-based format
135
  re.compile(
136
- r'(Fall|Spring|Summer)\s+(\d{4})\s+' # Term and year
137
- r'(\d+)\s+' # Grade level
138
- r'([A-Z0-9]+)\s+' # Course code
139
- r'(.+?)\s+' # Course name
140
- r'([A-FW][+-]?)\s*' # Grade
141
- r'([\d\.]+)' # Credits
142
  )
143
  ]
144
 
145
  courses_by_grade = defaultdict(list)
 
146
 
147
  for pattern in patterns:
148
  for match in re.finditer(pattern, text):
149
  if len(match.groups()) == 6:
150
- year_range, grade_level, course_code, course_name, grade, credits = match.groups()
151
- term = None
152
  else:
153
- term, year, grade_level, course_code, course_name, grade, credits = match.groups()
154
- year_range = f"{term} {year}"
 
 
155
 
156
- # Clean and format course information
157
- course_name = course_name.strip()
158
- if 'DE:' in course_name:
159
- course_name = course_name.replace('DE:', 'Dual Enrollment:')
160
- if 'AP' in course_name and 'AP ' not in course_name:
161
- course_name = course_name.replace('AP', 'AP ')
 
 
 
 
 
 
 
162
 
163
  course_info = {
164
- 'name': f"{course_code} {course_name}",
165
- 'year': year_range,
166
- 'credits': credits if credits != 'inProgress' else 'In Progress'
 
 
167
  }
168
 
169
- if grade and grade.strip():
170
- course_info['grade'] = grade.strip()
171
-
172
- courses_by_grade[grade_level].append(course_info)
173
-
174
- if courses_by_grade: # If we found matches with this pattern, stop
175
- break
 
 
 
 
 
 
 
 
 
176
 
177
  return courses_by_grade
178
 
@@ -237,7 +265,7 @@ def parse_transcript(file_obj) -> Tuple[str, Optional[Dict]]:
237
  for grade in sorted(courses_by_grade.keys(), key=lambda x: int(x) if x.isdigit() else x):
238
  output_text += f"\nGrade {grade}:\n{'-'*30}\n"
239
  for course in courses_by_grade[grade]:
240
- output_text += f"- {course['name']}"
241
  if 'grade' in course and course['grade']:
242
  output_text += f" (Grade: {course['grade']})"
243
  if 'credits' in course:
@@ -614,7 +642,7 @@ class ProfileManager:
614
  for grade in sorted(courses_by_grade.keys(), key=lambda x: int(x) if x.isdigit() else x):
615
  display += f"\n**Grade {grade}**\n"
616
  for course in courses_by_grade[grade]:
617
- display += f"- {course.get('name', 'Unnamed course')}"
618
  if 'grade' in course and course['grade']:
619
  display += f" (Grade: {course['grade']})"
620
  if 'credits' in course:
@@ -787,7 +815,7 @@ class TeachingAssistant:
787
  for grade_level, course_list in courses.items():
788
  for course in course_list:
789
  if course.get('grade', '').upper() in ['D', 'F']:
790
- weak_subjects.append(course.get('name', 'Unknown course'))
791
 
792
  if weak_subjects:
793
  response += ("**Areas for Improvement**:\n"
@@ -823,7 +851,7 @@ class TeachingAssistant:
823
  for grade in sorted(courses.keys(), key=lambda x: int(x) if x.isdigit() else x):
824
  response += f"\n**Grade {grade}**:\n"
825
  for course in courses[grade]:
826
- response += f"- {course.get('name', 'Unnamed course')}"
827
  if 'grade' in course:
828
  response += f" (Grade: {course['grade']})"
829
  response += "\n"
@@ -1100,6 +1128,9 @@ def create_interface():
1100
  placeholder="e.g., Science, Music, Sports, Art..."
1101
  )
1102
 
 
 
 
1103
  gr.Markdown("### Favorites")
1104
  with gr.Group():
1105
  movie = gr.Textbox(label="Favorite Movie")
@@ -1130,29 +1161,17 @@ def create_interface():
1130
  outputs=blog_text
1131
  )
1132
 
1133
- # Check if required fields are filled to mark as complete
1134
- def check_personal_info_complete(name, age, interests, current_tab_status):
1135
  if name.strip() and age and interests.strip():
1136
  new_status = current_tab_status.copy()
1137
  new_status[2] = True
1138
- return new_status, gr.update(elem_classes="completed-tab"), gr.update(interactive=True), gr.update(visible=False)
1139
- return current_tab_status, gr.update(), gr.update(), gr.update()
1140
 
1141
- # Monitor changes to required fields
1142
- name.change(
1143
- fn=check_personal_info_complete,
1144
- inputs=[name, age, interests, tab_completed],
1145
- outputs=[tab_completed, step3, step4, nav_message]
1146
- )
1147
- age.change(
1148
- fn=check_personal_info_complete,
1149
  inputs=[name, age, interests, tab_completed],
1150
- outputs=[tab_completed, step3, step4, nav_message]
1151
- )
1152
- interests.change(
1153
- fn=check_personal_info_complete,
1154
- inputs=[name, age, interests, tab_completed],
1155
- outputs=[tab_completed, step3, step4, nav_message]
1156
  )
1157
 
1158
  # ===== TAB 4: Save & Review =====
@@ -1251,7 +1270,7 @@ def create_interface():
1251
  fn=lambda: gr.update(visible=bool(profile_manager.list_profiles(session_token.value))),
1252
  outputs=load_btn
1253
  ).then(
1254
- fn=lambda: gr.update(visible=bool(profile_manager.list_profiles(session_token.value))),
1255
  outputs=delete_btn
1256
  )
1257
 
@@ -1301,34 +1320,35 @@ def create_interface():
1301
  # Check if current tab is completed
1302
  if not tab_completed_status.get(current_tab, False):
1303
  return gr.Tabs(selected=current_tab), \
1304
- gr.update(value=f"<div class='nav-message'>Please complete the current tab before proceeding to tab {tab_index + 1}</div>", visible=True)
 
1305
 
1306
- return gr.Tabs(selected=tab_index), gr.update(visible=False)
1307
 
1308
  step1.click(
1309
  fn=lambda idx, status: navigate_to_tab(idx, status),
1310
  inputs=[gr.State(0), tab_completed],
1311
- outputs=[tabs, nav_message]
1312
  )
1313
  step2.click(
1314
  fn=lambda idx, status: navigate_to_tab(idx, status),
1315
  inputs=[gr.State(1), tab_completed],
1316
- outputs=[tabs, nav_message]
1317
  )
1318
  step3.click(
1319
  fn=lambda idx, status: navigate_to_tab(idx, status),
1320
  inputs=[gr.State(2), tab_completed],
1321
- outputs=[tabs, nav_message]
1322
  )
1323
  step4.click(
1324
  fn=lambda idx, status: navigate_to_tab(idx, status),
1325
  inputs=[gr.State(3), tab_completed],
1326
- outputs=[tabs, nav_message]
1327
  )
1328
  step5.click(
1329
  fn=lambda idx, status: navigate_to_tab(idx, status),
1330
  inputs=[gr.State(4), tab_completed],
1331
- outputs=[tabs, nav_message]
1332
  )
1333
 
1334
  return app
 
85
 
86
  # ========== TRANSCRIPT PARSING ==========
87
  def extract_gpa(text: str, gpa_type: str) -> str:
88
+ """More robust GPA extraction with multiple patterns."""
89
+ gpa_patterns = [
90
+ rf'{gpa_type}\s*GPA\s*[:=]?\s*([0-5]\.\d{{2}}|\d\.\d)', # Weighted GPA: 3.50
91
+ rf'{gpa_type}\s*GPA\s+([0-5]\.\d{{2}}|\d\.\d)', # Weighted GPA 3.50
92
+ rf'{gpa_type}\s*[:=]?\s*([0-5]\.\d{{2}}|\d\.\d)', # Weighted: 3.50
93
+ rf'GPA\s*\({gpa_type}\)\s*[:=]?\s*([0-5]\.\d{{2}}|\d\.\d)', # GPA (Weighted): 3.50
94
+ rf'{gpa_type}\s*[=:]?\s*([0-5]\.\d{{2}}|\d\.\d)', # Weighted=3.50
95
+ rf'{gpa_type}\s*[=:]?\s*(\d\.\d{{2}})' # Weighted:3.50
96
  ]
97
 
98
+ for pattern in gpa_patterns:
99
  match = re.search(pattern, text, re.IGNORECASE)
100
  if match:
101
  gpa_value = match.group(1)
102
  try:
103
  gpa_float = float(gpa_value)
104
+ if not 0.0 <= gpa_float <= 5.0:
105
  return "Invalid GPA"
106
+ return f"{gpa_float:.2f}"
107
  except ValueError:
108
  continue
109
 
110
+ # Fallback to looking for any GPA-like number near the term
111
+ fallback_pattern = re.compile(rf'(?:{gpa_type}.*?)([0-5]\.\d{{1,2}})(?!\d)')
112
+ match = re.search(fallback_pattern, text, re.IGNORECASE)
113
+ if match:
114
+ return match.group(1)
115
+
116
  return "N/A"
117
 
118
  def extract_courses_from_table(text: str) -> Dict[str, List[Dict]]:
119
+ """Enhanced course extraction with better pattern matching."""
120
+ # Normalize text for better matching
121
+ text = re.sub(r'\s+', ' ', text) # Replace multiple spaces
122
+ text = text.replace('\n', ' ') # Replace newlines
123
+
124
+ # More robust patterns
125
  patterns = [
126
+ # Pattern for standard table format
127
  re.compile(
128
+ r'(?:Year|Term|Semester)[\s:]*(.*?)\s*' # Year/Semester
129
+ r'(?:Grade|Level)[\s:]*(.*?)\s*' # Grade level
130
+ r'(?:Course\s*Code|Code)[\s:]*(.*?)\s*' # Course code
131
+ r'(?:Course\s*Name|Title)[\s:]*(.*?)\s*' # Course name
132
+ r'(?:Grade|Mark)[\s:]*(.*?)\s*' # Grade
133
+ r'(?:Credits|Units)[\s:]*(.*?)(?:\s|$)' # Credits
 
 
134
  ),
135
+ # Pattern for condensed format
136
  re.compile(
137
+ r'(\d{4}-\d{4}|\w+\s\d{4})\s+' # Year range or Semester Year
138
+ r'(\d+)\s+' # Grade level
139
+ r'([A-Z]+\s*\d+[A-Z]*)\s+' # Course code
140
+ r'(.+?)\s+' # Course name
141
+ r'([A-F][+-]?|P|F|W|I)\s+' # Grade
142
+ r'(\d+\.?\d*)' # Credits
143
  ),
144
+ # Fallback pattern for less structured data
145
  re.compile(
146
+ r'([A-Z]+\s*\d+[A-Z]*)\s+' # Course code
147
+ r'(.+?)\s+' # Course name
148
+ r'(?:Grade\s*:\s*)?([A-F][+-]?|P|F|W|I)\s*' # Grade
149
+ r'(?:Credits\s*:\s*)?(\d+\.?\d*)' # Credits
 
 
150
  )
151
  ]
152
 
153
  courses_by_grade = defaultdict(list)
154
+ extracted_courses = set() # To avoid duplicates
155
 
156
  for pattern in patterns:
157
  for match in re.finditer(pattern, text):
158
  if len(match.groups()) == 6:
159
+ year, grade, code, name, grade_mark, credits = match.groups()
 
160
  else:
161
+ # Handle shorter patterns
162
+ code, name, grade_mark, credits = match.groups()[:4]
163
+ year = "Unknown"
164
+ grade = "Unknown"
165
 
166
+ # Create unique identifier to avoid duplicates
167
+ course_id = f"{code}_{name}_{year}"
168
+ if course_id in extracted_courses:
169
+ continue
170
+ extracted_courses.add(course_id)
171
+
172
+ # Clean and format data
173
+ code = code.strip()
174
+ name = name.strip()
175
+ if 'AP' in code and 'AP ' not in code:
176
+ code = code.replace('AP', 'AP ')
177
+ if 'DE' in code and 'DE ' not in code:
178
+ code = code.replace('DE', 'DE ')
179
 
180
  course_info = {
181
+ 'code': code,
182
+ 'name': name,
183
+ 'grade': grade_mark.strip() if grade_mark else None,
184
+ 'credits': credits if credits else '0',
185
+ 'year': year.strip() if year else 'Unknown'
186
  }
187
 
188
+ courses_by_grade[grade.strip() if grade else 'Unknown'].append(course_info)
189
+
190
+ # If no courses found with patterns, try a more aggressive approach
191
+ if not courses_by_grade:
192
+ # Look for anything that looks like a course code followed by description
193
+ fallback_pattern = re.compile(r'([A-Z]+\s*\d+[A-Z]*)\s+(.+?)(?:\s+([A-F][+-]?|P|F|W|I))?(?:\s+(\d+\.?\d*))?')
194
+ for match in re.finditer(fallback_pattern, text):
195
+ code, name, grade_mark, credits = match.groups()
196
+ course_info = {
197
+ 'code': code.strip(),
198
+ 'name': name.strip(),
199
+ 'grade': grade_mark.strip() if grade_mark else None,
200
+ 'credits': credits if credits else '0',
201
+ 'year': 'Unknown'
202
+ }
203
+ courses_by_grade['Unknown'].append(course_info)
204
 
205
  return courses_by_grade
206
 
 
265
  for grade in sorted(courses_by_grade.keys(), key=lambda x: int(x) if x.isdigit() else x):
266
  output_text += f"\nGrade {grade}:\n{'-'*30}\n"
267
  for course in courses_by_grade[grade]:
268
+ output_text += f"- {course['code']} {course['name']}"
269
  if 'grade' in course and course['grade']:
270
  output_text += f" (Grade: {course['grade']})"
271
  if 'credits' in course:
 
642
  for grade in sorted(courses_by_grade.keys(), key=lambda x: int(x) if x.isdigit() else x):
643
  display += f"\n**Grade {grade}**\n"
644
  for course in courses_by_grade[grade]:
645
+ display += f"- {course.get('code', '')} {course.get('name', 'Unnamed course')}"
646
  if 'grade' in course and course['grade']:
647
  display += f" (Grade: {course['grade']})"
648
  if 'credits' in course:
 
815
  for grade_level, course_list in courses.items():
816
  for course in course_list:
817
  if course.get('grade', '').upper() in ['D', 'F']:
818
+ weak_subjects.append(f"{course.get('code', '')} {course.get('name', 'Unknown course')}")
819
 
820
  if weak_subjects:
821
  response += ("**Areas for Improvement**:\n"
 
851
  for grade in sorted(courses.keys(), key=lambda x: int(x) if x.isdigit() else x):
852
  response += f"\n**Grade {grade}**:\n"
853
  for course in courses[grade]:
854
+ response += f"- {course.get('code', '')} {course.get('name', 'Unnamed course')}"
855
  if 'grade' in course:
856
  response += f" (Grade: {course['grade']})"
857
  response += "\n"
 
1128
  placeholder="e.g., Science, Music, Sports, Art..."
1129
  )
1130
 
1131
+ save_personal_btn = gr.Button("Save Information", variant="primary")
1132
+ save_confirmation = gr.HTML(visible=False)
1133
+
1134
  gr.Markdown("### Favorites")
1135
  with gr.Group():
1136
  movie = gr.Textbox(label="Favorite Movie")
 
1161
  outputs=blog_text
1162
  )
1163
 
1164
+ def save_personal_info(name, age, interests, current_tab_status):
 
1165
  if name.strip() and age and interests.strip():
1166
  new_status = current_tab_status.copy()
1167
  new_status[2] = True
1168
+ return new_status, gr.update(elem_classes="completed-tab"), gr.update(interactive=True), gr.update(value="<div class='alert-box'>Information saved!</div>", visible=True), gr.update(visible=False)
1169
+ return current_tab_status, gr.update(), gr.update(), gr.update(visible=False), gr.update(visible=True)
1170
 
1171
+ save_personal_btn.click(
1172
+ fn=save_personal_info,
 
 
 
 
 
 
1173
  inputs=[name, age, interests, tab_completed],
1174
+ outputs=[tab_completed, step3, step4, save_confirmation, nav_message]
 
 
 
 
 
1175
  )
1176
 
1177
  # ===== TAB 4: Save & Review =====
 
1270
  fn=lambda: gr.update(visible=bool(profile_manager.list_profiles(session_token.value))),
1271
  outputs=load_btn
1272
  ).then(
1273
+ fn=lambda: gr.update(visible=bool(profile_manager.list_profiles(session_token.value)))),
1274
  outputs=delete_btn
1275
  )
1276
 
 
1320
  # Check if current tab is completed
1321
  if not tab_completed_status.get(current_tab, False):
1322
  return gr.Tabs(selected=current_tab), \
1323
+ gr.update(value=f"<div class='nav-message'>Please complete the current tab before proceeding to tab {tab_index + 1}</div>", visible=True), \
1324
+ gr.update(visible=False)
1325
 
1326
+ return gr.Tabs(selected=tab_index), gr.update(visible=False), gr.update(visible=False)
1327
 
1328
  step1.click(
1329
  fn=lambda idx, status: navigate_to_tab(idx, status),
1330
  inputs=[gr.State(0), tab_completed],
1331
+ outputs=[tabs, nav_message, quiz_alert]
1332
  )
1333
  step2.click(
1334
  fn=lambda idx, status: navigate_to_tab(idx, status),
1335
  inputs=[gr.State(1), tab_completed],
1336
+ outputs=[tabs, nav_message, quiz_alert]
1337
  )
1338
  step3.click(
1339
  fn=lambda idx, status: navigate_to_tab(idx, status),
1340
  inputs=[gr.State(2), tab_completed],
1341
+ outputs=[tabs, nav_message, quiz_alert]
1342
  )
1343
  step4.click(
1344
  fn=lambda idx, status: navigate_to_tab(idx, status),
1345
  inputs=[gr.State(3), tab_completed],
1346
+ outputs=[tabs, nav_message, quiz_alert]
1347
  )
1348
  step5.click(
1349
  fn=lambda idx, status: navigate_to_tab(idx, status),
1350
  inputs=[gr.State(4), tab_completed],
1351
+ outputs=[tabs, nav_message, quiz_alert]
1352
  )
1353
 
1354
  return app