Dannyar608 commited on
Commit
ffbc55b
·
verified ·
1 Parent(s): ea801f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -12
app.py CHANGED
@@ -54,37 +54,64 @@ def parse_transcript(file):
54
  if page_text:
55
  text += page_text + '\n'
56
 
57
- # GPA extraction
58
- gpa_match = re.search(r'GPA[:\s]*(\d\.\d{1,2})', text, re.IGNORECASE)
59
- gpa = gpa_match.group(1) if gpa_match else "N/A"
60
-
61
  # Grade level extraction
62
  grade_match = re.search(r'(Grade|Year)[\s:]*(\d+|Freshman|Sophomore|Junior|Senior)', text, re.IGNORECASE)
63
  grade_level = grade_match.group(2) if grade_match else "Unknown"
64
 
65
- # Courses grouped by grade
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  courses_by_grade = extract_courses_with_grade_levels(text)
67
 
68
- output_text = f"Grade Level: {grade_level}\nGPA: {gpa}\n\nCourses by Grade Level:\n\n"
 
 
 
 
 
 
 
 
 
 
69
  for level, courses in courses_by_grade.items():
70
- output_text += f"Grade {level}:\n"
71
  for course in courses:
72
  output_text += f"- {course['course']}"
73
  if 'grade' in course:
74
  output_text += f" (Grade: {course['grade']})"
75
  output_text += "\n"
76
- output_text += "\n"
77
 
78
  return output_text, {
79
- "gpa": gpa,
80
  "grade_level": grade_level,
81
  "courses": courses_by_grade
82
  }
83
-
84
  else:
85
  return "Unsupported file format", None
86
 
87
- # For CSV/XLSX:
88
  gpa = "N/A"
89
  for col in ['GPA', 'Grade Point Average', 'Cumulative GPA']:
90
  if col in df.columns:
@@ -107,7 +134,7 @@ def parse_transcript(file):
107
  output_text += "\n".join(f"- {course}" for course in courses)
108
 
109
  return output_text, {
110
- "gpa": gpa,
111
  "grade_level": grade_level,
112
  "courses": courses
113
  }
 
54
  if page_text:
55
  text += page_text + '\n'
56
 
 
 
 
 
57
  # Grade level extraction
58
  grade_match = re.search(r'(Grade|Year)[\s:]*(\d+|Freshman|Sophomore|Junior|Senior)', text, re.IGNORECASE)
59
  grade_level = grade_match.group(2) if grade_match else "Unknown"
60
 
61
+ # Enhanced GPA extraction
62
+ gpa_data = {'weighted': "N/A", 'unweighted': "N/A"}
63
+ gpa_patterns = [
64
+ r'Weighted GPA[\s:]*(\d\.\d{1,2})',
65
+ r'GPA \(Weighted\)[\s:]*(\d\.\d{1,2})',
66
+ r'Cumulative GPA \(Weighted\)[\s:]*(\d\.\d{1,2})',
67
+ r'Unweighted GPA[\s:]*(\d\.\d{1,2})',
68
+ r'GPA \(Unweighted\)[\s:]*(\d\.\d{1,2})',
69
+ r'Cumulative GPA \(Unweighted\)[\s:]*(\d\.\d{1,2})',
70
+ r'GPA[\s:]*(\d\.\d{1,2})'
71
+ ]
72
+ for pattern in gpa_patterns:
73
+ for match in re.finditer(pattern, text, re.IGNORECASE):
74
+ gpa_value = match.group(1)
75
+ if 'weighted' in pattern.lower():
76
+ gpa_data['weighted'] = gpa_value
77
+ elif 'unweighted' in pattern.lower():
78
+ gpa_data['unweighted'] = gpa_value
79
+ else:
80
+ if gpa_data['unweighted'] == "N/A":
81
+ gpa_data['unweighted'] = gpa_value
82
+ if gpa_data['weighted'] == "N/A":
83
+ gpa_data['weighted'] = gpa_value
84
+
85
  courses_by_grade = extract_courses_with_grade_levels(text)
86
 
87
+ output_text = f"Grade Level: {grade_level}\n\n"
88
+ if gpa_data['weighted'] != "N/A" or gpa_data['unweighted'] != "N/A":
89
+ output_text += "GPA Information:\n"
90
+ if gpa_data['unweighted'] != "N/A":
91
+ output_text += f"- Unweighted GPA: {gpa_data['unweighted']}\n"
92
+ if gpa_data['weighted'] != "N/A":
93
+ output_text += f"- Weighted GPA: {gpa_data['weighted']}\n"
94
+ else:
95
+ output_text += "No GPA information found\n"
96
+
97
+ output_text += "\nCourses by Grade Level:\n"
98
  for level, courses in courses_by_grade.items():
99
+ output_text += f"\nGrade {level}:\n"
100
  for course in courses:
101
  output_text += f"- {course['course']}"
102
  if 'grade' in course:
103
  output_text += f" (Grade: {course['grade']})"
104
  output_text += "\n"
 
105
 
106
  return output_text, {
107
+ "gpa": gpa_data,
108
  "grade_level": grade_level,
109
  "courses": courses_by_grade
110
  }
 
111
  else:
112
  return "Unsupported file format", None
113
 
114
+ # For CSV/XLSX fallback
115
  gpa = "N/A"
116
  for col in ['GPA', 'Grade Point Average', 'Cumulative GPA']:
117
  if col in df.columns:
 
134
  output_text += "\n".join(f"- {course}" for course in courses)
135
 
136
  return output_text, {
137
+ "gpa": {"unweighted": gpa, "weighted": "N/A"},
138
  "grade_level": grade_level,
139
  "courses": courses
140
  }