Dannyar608 commited on
Commit
ea801f3
·
verified ·
1 Parent(s): 3957ec0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -6
app.py CHANGED
@@ -7,7 +7,6 @@ from PyPDF2 import PdfReader
7
  from collections import defaultdict
8
 
9
  # ========== TRANSCRIPT PARSING FUNCTIONS (UPDATED) ==========
10
-
11
  def extract_courses_with_grade_levels(text):
12
  grade_level_pattern = r"(Grade|Year)\s*[:]?\s*(\d+|Freshman|Sophomore|Junior|Senior)"
13
  grade_match = re.search(grade_level_pattern, text, re.IGNORECASE)
@@ -51,12 +50,24 @@ def parse_transcript(file):
51
  text = ''
52
  reader = PdfReader(file)
53
  for page in reader.pages:
54
- text += page.extract_text() + '\n'
 
 
 
 
 
 
 
 
 
 
 
 
55
  courses_by_grade = extract_courses_with_grade_levels(text)
56
 
57
- output_text = "Courses by Grade Level:\n\n"
58
- for grade_level, courses in courses_by_grade.items():
59
- output_text += f"Grade {grade_level}:\n"
60
  for course in courses:
61
  output_text += f"- {course['course']}"
62
  if 'grade' in course:
@@ -64,10 +75,16 @@ def parse_transcript(file):
64
  output_text += "\n"
65
  output_text += "\n"
66
 
67
- return output_text, courses_by_grade
 
 
 
 
 
68
  else:
69
  return "Unsupported file format", None
70
 
 
71
  gpa = "N/A"
72
  for col in ['GPA', 'Grade Point Average', 'Cumulative GPA']:
73
  if col in df.columns:
 
7
  from collections import defaultdict
8
 
9
  # ========== TRANSCRIPT PARSING FUNCTIONS (UPDATED) ==========
 
10
  def extract_courses_with_grade_levels(text):
11
  grade_level_pattern = r"(Grade|Year)\s*[:]?\s*(\d+|Freshman|Sophomore|Junior|Senior)"
12
  grade_match = re.search(grade_level_pattern, text, re.IGNORECASE)
 
50
  text = ''
51
  reader = PdfReader(file)
52
  for page in reader.pages:
53
+ page_text = page.extract_text()
54
+ if page_text:
55
+ text += page_text + '\n'
56
+
57
+ # GPA extraction
58
+ gpa_match = re.search(r'GPA[:\s]*(\d\.\d{1,2})', text, re.IGNORECASE)
59
+ gpa = gpa_match.group(1) if gpa_match else "N/A"
60
+
61
+ # Grade level extraction
62
+ grade_match = re.search(r'(Grade|Year)[\s:]*(\d+|Freshman|Sophomore|Junior|Senior)', text, re.IGNORECASE)
63
+ grade_level = grade_match.group(2) if grade_match else "Unknown"
64
+
65
+ # Courses grouped by grade
66
  courses_by_grade = extract_courses_with_grade_levels(text)
67
 
68
+ output_text = f"Grade Level: {grade_level}\nGPA: {gpa}\n\nCourses by Grade Level:\n\n"
69
+ for level, courses in courses_by_grade.items():
70
+ output_text += f"Grade {level}:\n"
71
  for course in courses:
72
  output_text += f"- {course['course']}"
73
  if 'grade' in course:
 
75
  output_text += "\n"
76
  output_text += "\n"
77
 
78
+ return output_text, {
79
+ "gpa": gpa,
80
+ "grade_level": grade_level,
81
+ "courses": courses_by_grade
82
+ }
83
+
84
  else:
85
  return "Unsupported file format", None
86
 
87
+ # For CSV/XLSX:
88
  gpa = "N/A"
89
  for col in ['GPA', 'Grade Point Average', 'Cumulative GPA']:
90
  if col in df.columns: