Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -7,7 +7,6 @@ from PyPDF2 import PdfReader
|
|
7 |
from collections import defaultdict
|
8 |
|
9 |
# ========== TRANSCRIPT PARSING FUNCTIONS (UPDATED) ==========
|
10 |
-
|
11 |
def extract_courses_with_grade_levels(text):
|
12 |
grade_level_pattern = r"(Grade|Year)\s*[:]?\s*(\d+|Freshman|Sophomore|Junior|Senior)"
|
13 |
grade_match = re.search(grade_level_pattern, text, re.IGNORECASE)
|
@@ -51,12 +50,24 @@ def parse_transcript(file):
|
|
51 |
text = ''
|
52 |
reader = PdfReader(file)
|
53 |
for page in reader.pages:
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
courses_by_grade = extract_courses_with_grade_levels(text)
|
56 |
|
57 |
-
output_text = "
|
58 |
-
for
|
59 |
-
output_text += f"Grade {
|
60 |
for course in courses:
|
61 |
output_text += f"- {course['course']}"
|
62 |
if 'grade' in course:
|
@@ -64,10 +75,16 @@ def parse_transcript(file):
|
|
64 |
output_text += "\n"
|
65 |
output_text += "\n"
|
66 |
|
67 |
-
return output_text,
|
|
|
|
|
|
|
|
|
|
|
68 |
else:
|
69 |
return "Unsupported file format", None
|
70 |
|
|
|
71 |
gpa = "N/A"
|
72 |
for col in ['GPA', 'Grade Point Average', 'Cumulative GPA']:
|
73 |
if col in df.columns:
|
|
|
7 |
from collections import defaultdict
|
8 |
|
9 |
# ========== TRANSCRIPT PARSING FUNCTIONS (UPDATED) ==========
|
|
|
10 |
def extract_courses_with_grade_levels(text):
|
11 |
grade_level_pattern = r"(Grade|Year)\s*[:]?\s*(\d+|Freshman|Sophomore|Junior|Senior)"
|
12 |
grade_match = re.search(grade_level_pattern, text, re.IGNORECASE)
|
|
|
50 |
text = ''
|
51 |
reader = PdfReader(file)
|
52 |
for page in reader.pages:
|
53 |
+
page_text = page.extract_text()
|
54 |
+
if page_text:
|
55 |
+
text += page_text + '\n'
|
56 |
+
|
57 |
+
# GPA extraction
|
58 |
+
gpa_match = re.search(r'GPA[:\s]*(\d\.\d{1,2})', text, re.IGNORECASE)
|
59 |
+
gpa = gpa_match.group(1) if gpa_match else "N/A"
|
60 |
+
|
61 |
+
# Grade level extraction
|
62 |
+
grade_match = re.search(r'(Grade|Year)[\s:]*(\d+|Freshman|Sophomore|Junior|Senior)', text, re.IGNORECASE)
|
63 |
+
grade_level = grade_match.group(2) if grade_match else "Unknown"
|
64 |
+
|
65 |
+
# Courses grouped by grade
|
66 |
courses_by_grade = extract_courses_with_grade_levels(text)
|
67 |
|
68 |
+
output_text = f"Grade Level: {grade_level}\nGPA: {gpa}\n\nCourses by Grade Level:\n\n"
|
69 |
+
for level, courses in courses_by_grade.items():
|
70 |
+
output_text += f"Grade {level}:\n"
|
71 |
for course in courses:
|
72 |
output_text += f"- {course['course']}"
|
73 |
if 'grade' in course:
|
|
|
75 |
output_text += "\n"
|
76 |
output_text += "\n"
|
77 |
|
78 |
+
return output_text, {
|
79 |
+
"gpa": gpa,
|
80 |
+
"grade_level": grade_level,
|
81 |
+
"courses": courses_by_grade
|
82 |
+
}
|
83 |
+
|
84 |
else:
|
85 |
return "Unsupported file format", None
|
86 |
|
87 |
+
# For CSV/XLSX:
|
88 |
gpa = "N/A"
|
89 |
for col in ['GPA', 'Grade Point Average', 'Cumulative GPA']:
|
90 |
if col in df.columns:
|