Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -8,15 +8,34 @@ from collections import defaultdict
|
|
8 |
from typing import Dict, List, Optional, Tuple, Union
|
9 |
import html
|
10 |
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
# ========== CONFIGURATION ==========
|
13 |
PROFILES_DIR = "student_profiles"
|
14 |
-
ALLOWED_FILE_TYPES = [".pdf"]
|
15 |
MAX_FILE_SIZE_MB = 5
|
16 |
MIN_AGE = 5
|
17 |
MAX_AGE = 120
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
# ========== UTILITY FUNCTIONS ==========
|
|
|
|
|
|
|
|
|
|
|
20 |
def sanitize_input(text: str) -> str:
|
21 |
"""Sanitize user input to prevent XSS and injection attacks."""
|
22 |
return html.escape(text.strip())
|
@@ -55,52 +74,84 @@ def validate_file(file_obj) -> None:
|
|
55 |
if file_size > MAX_FILE_SIZE_MB:
|
56 |
raise gr.Error(f"File too large. Max size: {MAX_FILE_SIZE_MB}MB")
|
57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
# ========== TRANSCRIPT PARSING ==========
|
59 |
def extract_gpa(text: str, gpa_type: str) -> str:
|
60 |
"""Extract GPA information from text with validation."""
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
|
|
|
|
65 |
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
|
|
|
|
|
|
|
|
|
|
74 |
|
75 |
def extract_courses_from_table(text: str) -> Dict[str, List[Dict]]:
|
76 |
"""Extract course information with multiple pattern fallbacks."""
|
77 |
-
#
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
|
99 |
courses_by_grade = defaultdict(list)
|
100 |
|
101 |
-
for pattern in
|
102 |
for match in re.finditer(pattern, text):
|
103 |
-
|
|
|
|
|
|
|
|
|
|
|
104 |
|
105 |
# Clean and format course information
|
106 |
course_name = course_name.strip()
|
@@ -126,7 +177,7 @@ def extract_courses_from_table(text: str) -> Dict[str, List[Dict]]:
|
|
126 |
return courses_by_grade
|
127 |
|
128 |
def parse_transcript(file_obj) -> Tuple[str, Optional[Dict]]:
|
129 |
-
"""Parse transcript file with robust error handling."""
|
130 |
try:
|
131 |
if not file_obj:
|
132 |
raise gr.Error("Please upload a file first")
|
@@ -134,28 +185,44 @@ def parse_transcript(file_obj) -> Tuple[str, Optional[Dict]]:
|
|
134 |
validate_file(file_obj)
|
135 |
|
136 |
text = ''
|
|
|
|
|
137 |
try:
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
except Exception as e:
|
144 |
-
raise gr.Error(f"Error processing
|
145 |
|
146 |
if not text.strip():
|
147 |
-
raise gr.Error("No text could be extracted from the
|
148 |
|
149 |
-
#
|
150 |
gpa_data = {
|
151 |
-
'weighted': extract_gpa(text, 'Weighted
|
152 |
-
'unweighted': extract_gpa(text, '
|
153 |
}
|
154 |
|
155 |
-
# Extract grade level with fallback
|
156 |
-
grade_match =
|
157 |
-
|
158 |
-
|
|
|
|
|
|
|
|
|
159 |
grade_level = grade_match.group(1) if grade_match else "Unknown"
|
160 |
|
161 |
courses_by_grade = extract_courses_from_table(text)
|
@@ -244,6 +311,10 @@ class LearningStyleQuiz:
|
|
244 |
"Watch educational videos",
|
245 |
"Use flashcards with images",
|
246 |
"Highlight important information in different colors"
|
|
|
|
|
|
|
|
|
247 |
]
|
248 |
},
|
249 |
"Auditory": {
|
@@ -254,6 +325,10 @@ class LearningStyleQuiz:
|
|
254 |
"Explain concepts out loud to yourself",
|
255 |
"Use rhymes or songs to remember information",
|
256 |
"Listen to educational podcasts"
|
|
|
|
|
|
|
|
|
257 |
]
|
258 |
},
|
259 |
"Reading/Writing": {
|
@@ -264,6 +339,10 @@ class LearningStyleQuiz:
|
|
264 |
"Read textbooks and articles",
|
265 |
"Make lists to organize information",
|
266 |
"Rewrite your notes to reinforce learning"
|
|
|
|
|
|
|
|
|
267 |
]
|
268 |
},
|
269 |
"Kinesthetic": {
|
@@ -274,12 +353,16 @@ class LearningStyleQuiz:
|
|
274 |
"Create physical models",
|
275 |
"Associate information with physical actions",
|
276 |
"Study while walking or pacing"
|
|
|
|
|
|
|
|
|
277 |
]
|
278 |
}
|
279 |
}
|
280 |
|
281 |
def evaluate_quiz(self, *answers) -> str:
|
282 |
-
"""Evaluate quiz answers and generate results."""
|
283 |
answers = list(answers) # Convert tuple to list
|
284 |
if len(answers) != len(self.questions):
|
285 |
raise gr.Error("Not all questions were answered")
|
@@ -302,7 +385,7 @@ class LearningStyleQuiz:
|
|
302 |
percentages = {style: (score/total_answered)*100 for style, score in scores.items()}
|
303 |
sorted_styles = sorted(scores.items(), key=lambda x: x[1], reverse=True)
|
304 |
|
305 |
-
# Generate results report
|
306 |
result = "## Your Learning Style Results\n\n"
|
307 |
result += "### Scores:\n"
|
308 |
for style, score in sorted_styles:
|
@@ -324,6 +407,10 @@ class LearningStyleQuiz:
|
|
324 |
for tip in style_info['tips']:
|
325 |
result += f"- {tip}\n"
|
326 |
|
|
|
|
|
|
|
|
|
327 |
# Add complementary strategies
|
328 |
complementary = [s for s in sorted_styles if s[0] != primary_style][0][0]
|
329 |
result += f"\nYou might also benefit from some **{complementary}** strategies:\n"
|
@@ -340,6 +427,10 @@ class LearningStyleQuiz:
|
|
340 |
result += f"\n**{style}** techniques:\n"
|
341 |
for tip in self.learning_styles[style]['tips'][:2]:
|
342 |
result += f"- {tip}\n"
|
|
|
|
|
|
|
|
|
343 |
|
344 |
return result
|
345 |
|
@@ -351,6 +442,17 @@ class ProfileManager:
|
|
351 |
def __init__(self):
|
352 |
self.profiles_dir = Path(PROFILES_DIR)
|
353 |
self.profiles_dir.mkdir(exist_ok=True, parents=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
354 |
|
355 |
def save_profile(self, name: str, age: Union[int, str], interests: str,
|
356 |
transcript: Dict, learning_style: str,
|
@@ -384,34 +486,67 @@ class ProfileManager:
|
|
384 |
"transcript": transcript if transcript else {},
|
385 |
"learning_style": learning_style if learning_style else "Not assessed",
|
386 |
"favorites": favorites,
|
387 |
-
"blog": sanitize_input(blog) if blog else ""
|
|
|
388 |
}
|
389 |
|
390 |
# Save to JSON file
|
391 |
-
|
392 |
-
filepath = self.profiles_dir / filename
|
393 |
|
394 |
with open(filepath, "w", encoding='utf-8') as f:
|
395 |
json.dump(data, f, indent=2, ensure_ascii=False)
|
396 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
397 |
return self._generate_profile_summary(data)
|
398 |
|
399 |
except Exception as e:
|
400 |
raise gr.Error(f"Error saving profile: {str(e)}")
|
401 |
|
402 |
-
def load_profile(self, name: str = None) -> Dict:
|
403 |
"""Load profile by name or return the first one found."""
|
404 |
try:
|
405 |
-
|
|
|
|
|
|
|
|
|
|
|
406 |
if not profiles:
|
407 |
return {}
|
408 |
|
409 |
if name:
|
410 |
# Find profile by name
|
411 |
name = name.replace(" ", "_")
|
412 |
-
|
|
|
|
|
|
|
|
|
413 |
if not profile_file.exists():
|
414 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
415 |
else:
|
416 |
# Load the first profile found
|
417 |
profile_file = profiles[0]
|
@@ -423,26 +558,36 @@ class ProfileManager:
|
|
423 |
print(f"Error loading profile: {str(e)}")
|
424 |
return {}
|
425 |
|
426 |
-
def list_profiles(self) -> List[str]:
|
427 |
-
"""List all available profile names."""
|
428 |
-
|
429 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
430 |
|
431 |
def _generate_profile_summary(self, data: Dict) -> str:
|
432 |
"""Generate markdown summary of the profile."""
|
433 |
transcript = data.get("transcript", {})
|
434 |
favorites = data.get("favorites", {})
|
|
|
435 |
|
436 |
markdown = f"""## Student Profile: {data['name']}
|
437 |
-
|
438 |
### Basic Information
|
439 |
- **Age:** {data['age']}
|
440 |
- **Interests:** {data['interests']}
|
441 |
-
- **Learning Style:** {
|
442 |
-
|
443 |
### Academic Information
|
444 |
{self._format_transcript(transcript)}
|
445 |
-
|
446 |
### Favorites
|
447 |
- **Movie:** {favorites.get('movie', 'Not specified')}
|
448 |
*Reason:* {favorites.get('movie_reason', 'Not specified')}
|
@@ -452,7 +597,6 @@ class ProfileManager:
|
|
452 |
*Reason:* {favorites.get('book_reason', 'Not specified')}
|
453 |
- **Character:** {favorites.get('character', 'Not specified')}
|
454 |
*Reason:* {favorites.get('character_reason', 'Not specified')}
|
455 |
-
|
456 |
### Personal Blog
|
457 |
{data.get('blog', '_No blog provided_')}
|
458 |
"""
|
@@ -494,11 +638,11 @@ class TeachingAssistant:
|
|
494 |
self.context_history = []
|
495 |
self.max_context_length = 5 # Keep last 5 exchanges for context
|
496 |
|
497 |
-
def generate_response(self, message: str, history: List[List[Union[str, None]]]) -> str:
|
498 |
"""Generate personalized response based on student profile and context."""
|
499 |
try:
|
500 |
-
# Load profile
|
501 |
-
profile = profile_manager.load_profile()
|
502 |
if not profile:
|
503 |
return "Please complete and save your profile first using the previous tabs."
|
504 |
|
@@ -725,7 +869,6 @@ class TeachingAssistant:
|
|
725 |
- **Course advice**: "Show me my course history"
|
726 |
- **Interest suggestions**: "What clubs match my interests?"
|
727 |
- **General advice**: "How can I improve my grades?"
|
728 |
-
|
729 |
Try asking about any of these topics!""")
|
730 |
|
731 |
# Initialize teaching assistant
|
@@ -734,6 +877,10 @@ teaching_assistant = TeachingAssistant()
|
|
734 |
# ========== GRADIO INTERFACE ==========
|
735 |
def create_interface():
|
736 |
with gr.Blocks(theme=gr.themes.Soft(), title="Student Learning Assistant") as app:
|
|
|
|
|
|
|
|
|
737 |
# Custom CSS for better styling
|
738 |
app.css = """
|
739 |
.gradio-container {
|
@@ -758,6 +905,16 @@ def create_interface():
|
|
758 |
background: #f5f5f5;
|
759 |
border-radius: 5px;
|
760 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
761 |
"""
|
762 |
|
763 |
gr.Markdown("""
|
@@ -786,17 +943,20 @@ def create_interface():
|
|
786 |
with gr.Row():
|
787 |
with gr.Column(scale=1):
|
788 |
gr.Markdown("### Step 1: Upload Your Transcript")
|
789 |
-
gr.Markdown("Upload a PDF of your academic transcript to analyze your courses and GPA.")
|
790 |
|
791 |
with gr.Group():
|
792 |
transcript_file = gr.File(
|
793 |
-
label="Transcript PDF",
|
794 |
file_types=ALLOWED_FILE_TYPES,
|
795 |
type="filepath"
|
796 |
)
|
797 |
upload_btn = gr.Button("Upload & Analyze", variant="primary")
|
798 |
|
799 |
-
gr.Markdown("
|
|
|
|
|
|
|
800 |
|
801 |
with gr.Column(scale=2):
|
802 |
transcript_output = gr.Textbox(
|
@@ -913,12 +1073,18 @@ def create_interface():
|
|
913 |
gr.Markdown("Verify your information before saving. You can return to previous steps to make changes.")
|
914 |
|
915 |
save_btn = gr.Button("Save Profile", variant="primary")
|
916 |
-
|
917 |
-
|
918 |
-
|
919 |
-
|
920 |
-
|
921 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
922 |
clear_btn = gr.Button("Clear Form")
|
923 |
|
924 |
with gr.Column(scale=2):
|
@@ -927,6 +1093,7 @@ def create_interface():
|
|
927 |
label="Profile Summary"
|
928 |
)
|
929 |
|
|
|
930 |
save_btn.click(
|
931 |
fn=profile_manager.save_profile,
|
932 |
inputs=[
|
@@ -935,14 +1102,55 @@ def create_interface():
|
|
935 |
book, book_reason, character, character_reason, blog_text
|
936 |
],
|
937 |
outputs=output_summary
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
938 |
)
|
939 |
|
|
|
940 |
load_btn.click(
|
941 |
-
fn=lambda name: profile_manager.load_profile(name),
|
942 |
inputs=load_profile_dropdown,
|
943 |
outputs=output_summary
|
944 |
)
|
945 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
946 |
clear_btn.click(
|
947 |
fn=lambda: [gr.update(value="") for _ in range(12)],
|
948 |
outputs=[
|
@@ -958,8 +1166,9 @@ def create_interface():
|
|
958 |
gr.Markdown("## Your Personalized Learning Assistant")
|
959 |
gr.Markdown("Ask me anything about studying, your courses, grades, or learning strategies.")
|
960 |
|
|
|
961 |
chatbot = gr.ChatInterface(
|
962 |
-
fn=teaching_assistant.generate_response,
|
963 |
examples=[
|
964 |
"How should I study for my next math test?",
|
965 |
"What's my current GPA?",
|
@@ -967,7 +1176,10 @@ def create_interface():
|
|
967 |
"How can I improve my grades in science?",
|
968 |
"What study methods match my learning style?"
|
969 |
],
|
970 |
-
title=""
|
|
|
|
|
|
|
971 |
)
|
972 |
|
973 |
# Tab navigation logic
|
|
|
8 |
from typing import Dict, List, Optional, Tuple, Union
|
9 |
import html
|
10 |
from pathlib import Path
|
11 |
+
import fitz # PyMuPDF for better PDF text extraction
|
12 |
+
import pytesseract
|
13 |
+
from PIL import Image
|
14 |
+
import io
|
15 |
+
import secrets
|
16 |
+
import string
|
17 |
+
from huggingface_hub import HfApi, HfFolder
|
18 |
|
19 |
# ========== CONFIGURATION ==========
|
20 |
PROFILES_DIR = "student_profiles"
|
21 |
+
ALLOWED_FILE_TYPES = [".pdf", ".png", ".jpg", ".jpeg"] # Added image support
|
22 |
MAX_FILE_SIZE_MB = 5
|
23 |
MIN_AGE = 5
|
24 |
MAX_AGE = 120
|
25 |
+
SESSION_TOKEN_LENGTH = 32
|
26 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
27 |
+
|
28 |
+
# Initialize Hugging Face API
|
29 |
+
if HF_TOKEN:
|
30 |
+
hf_api = HfApi(token=HF_TOKEN)
|
31 |
+
HfFolder.save_token(HF_TOKEN)
|
32 |
|
33 |
# ========== UTILITY FUNCTIONS ==========
|
34 |
+
def generate_session_token() -> str:
|
35 |
+
"""Generate a random session token for user identification."""
|
36 |
+
alphabet = string.ascii_letters + string.digits
|
37 |
+
return ''.join(secrets.choice(alphabet) for _ in range(SESSION_TOKEN_LENGTH))
|
38 |
+
|
39 |
def sanitize_input(text: str) -> str:
|
40 |
"""Sanitize user input to prevent XSS and injection attacks."""
|
41 |
return html.escape(text.strip())
|
|
|
74 |
if file_size > MAX_FILE_SIZE_MB:
|
75 |
raise gr.Error(f"File too large. Max size: {MAX_FILE_SIZE_MB}MB")
|
76 |
|
77 |
+
def extract_text_with_ocr(file_path: str) -> str:
|
78 |
+
"""Extract text from image files using OCR."""
|
79 |
+
try:
|
80 |
+
image = Image.open(file_path)
|
81 |
+
text = pytesseract.image_to_string(image)
|
82 |
+
return text
|
83 |
+
except Exception as e:
|
84 |
+
raise gr.Error(f"OCR processing failed: {str(e)}")
|
85 |
+
|
86 |
# ========== TRANSCRIPT PARSING ==========
|
87 |
def extract_gpa(text: str, gpa_type: str) -> str:
|
88 |
"""Extract GPA information from text with validation."""
|
89 |
+
patterns = [
|
90 |
+
rf'{gpa_type}\s*GPA\s*:\s*([\d\.]+)', # "Weighted GPA: 3.5"
|
91 |
+
rf'{gpa_type}\s*GPA\s*([\d\.]+)', # "Weighted GPA 3.5"
|
92 |
+
rf'{gpa_type}\s*:\s*([\d\.]+)', # "Weighted: 3.5"
|
93 |
+
rf'{gpa_type}\s*([\d\.]+)' # "Weighted 3.5"
|
94 |
+
]
|
95 |
|
96 |
+
for pattern in patterns:
|
97 |
+
match = re.search(pattern, text, re.IGNORECASE)
|
98 |
+
if match:
|
99 |
+
gpa_value = match.group(1)
|
100 |
+
try:
|
101 |
+
gpa_float = float(gpa_value)
|
102 |
+
if not 0.0 <= gpa_float <= 5.0: # Assuming 5.0 is max for weighted GPA
|
103 |
+
return "Invalid GPA"
|
104 |
+
return gpa_value
|
105 |
+
except ValueError:
|
106 |
+
continue
|
107 |
+
|
108 |
+
return "N/A"
|
109 |
|
110 |
def extract_courses_from_table(text: str) -> Dict[str, List[Dict]]:
|
111 |
"""Extract course information with multiple pattern fallbacks."""
|
112 |
+
# Enhanced patterns to handle more transcript formats
|
113 |
+
patterns = [
|
114 |
+
# Pattern 1: Structured table format
|
115 |
+
re.compile(
|
116 |
+
r'(\d{4}-\d{4})\s*' # School year
|
117 |
+
r'\|?\s*(\d+)\s*' # Grade level
|
118 |
+
r'\|?\s*([A-Z0-9]+)\s*' # Course code
|
119 |
+
r'\|?\s*([^\|]+?)\s*' # Course name
|
120 |
+
r'(?:\|\s*[^\|]*){2}' # Skip Term and DstNumber
|
121 |
+
r'\|\s*([A-FW][+-]?)\s*' # Grade (FG column)
|
122 |
+
r'(?:\|\s*[^\|]*)' # Skip Incl column
|
123 |
+
r'\|\s*([\d\.]+|inProgress)' # Credits
|
124 |
+
),
|
125 |
+
# Pattern 2: Less structured format
|
126 |
+
re.compile(
|
127 |
+
r'(\d{4}-\d{4})\s+' # School year
|
128 |
+
r'(\d+)\s+' # Grade level
|
129 |
+
r'([A-Z0-9]+)\s+' # Course code
|
130 |
+
r'(.+?)\s+' # Course name
|
131 |
+
r'([A-FW][+-]?)\s*' # Grade
|
132 |
+
r'([\d\.]+|inProgress)' # Credits
|
133 |
+
),
|
134 |
+
# Pattern 3: Semester-based format
|
135 |
+
re.compile(
|
136 |
+
r'(Fall|Spring|Summer)\s+(\d{4})\s+' # Term and year
|
137 |
+
r'(\d+)\s+' # Grade level
|
138 |
+
r'([A-Z0-9]+)\s+' # Course code
|
139 |
+
r'(.+?)\s+' # Course name
|
140 |
+
r'([A-FW][+-]?)\s*' # Grade
|
141 |
+
r'([\d\.]+)' # Credits
|
142 |
+
)
|
143 |
+
]
|
144 |
|
145 |
courses_by_grade = defaultdict(list)
|
146 |
|
147 |
+
for pattern in patterns:
|
148 |
for match in re.finditer(pattern, text):
|
149 |
+
if len(match.groups()) == 6:
|
150 |
+
year_range, grade_level, course_code, course_name, grade, credits = match.groups()
|
151 |
+
term = None
|
152 |
+
else:
|
153 |
+
term, year, grade_level, course_code, course_name, grade, credits = match.groups()
|
154 |
+
year_range = f"{term} {year}"
|
155 |
|
156 |
# Clean and format course information
|
157 |
course_name = course_name.strip()
|
|
|
177 |
return courses_by_grade
|
178 |
|
179 |
def parse_transcript(file_obj) -> Tuple[str, Optional[Dict]]:
|
180 |
+
"""Parse transcript file with robust error handling and OCR support."""
|
181 |
try:
|
182 |
if not file_obj:
|
183 |
raise gr.Error("Please upload a file first")
|
|
|
185 |
validate_file(file_obj)
|
186 |
|
187 |
text = ''
|
188 |
+
file_ext = os.path.splitext(file_obj.name)[1].lower()
|
189 |
+
|
190 |
try:
|
191 |
+
if file_ext == '.pdf':
|
192 |
+
# Try PyMuPDF first for better text extraction
|
193 |
+
try:
|
194 |
+
doc = fitz.open(file_obj.name)
|
195 |
+
for page in doc:
|
196 |
+
text += page.get_text() + '\n'
|
197 |
+
except:
|
198 |
+
# Fallback to PyPDF2
|
199 |
+
reader = PdfReader(file_obj.name)
|
200 |
+
for page in reader.pages:
|
201 |
+
page_text = page.extract_text()
|
202 |
+
if page_text:
|
203 |
+
text += page_text + '\n'
|
204 |
+
elif file_ext in ['.png', '.jpg', '.jpeg']:
|
205 |
+
text = extract_text_with_ocr(file_obj.name)
|
206 |
except Exception as e:
|
207 |
+
raise gr.Error(f"Error processing file: {str(e)}")
|
208 |
|
209 |
if not text.strip():
|
210 |
+
raise gr.Error("No text could be extracted from the file")
|
211 |
|
212 |
+
# Enhanced GPA extraction
|
213 |
gpa_data = {
|
214 |
+
'weighted': extract_gpa(text, 'Weighted'),
|
215 |
+
'unweighted': extract_gpa(text, 'Unweighted')
|
216 |
}
|
217 |
|
218 |
+
# Extract grade level with multiple fallback patterns
|
219 |
+
grade_match = (
|
220 |
+
re.search(r'Current Grade:\s*(\d+)', text) or
|
221 |
+
re.search(r'Grade\s*:\s*(\d+)', text) or
|
222 |
+
re.search(r'Grade\s+(\d+)', text) or
|
223 |
+
re.search(r'Grade\s+Level:\s*(\d+)', text) or
|
224 |
+
re.search(r'Grade\s*\(?\s*(\d+)\s*\)?', text)
|
225 |
+
)
|
226 |
grade_level = grade_match.group(1) if grade_match else "Unknown"
|
227 |
|
228 |
courses_by_grade = extract_courses_from_table(text)
|
|
|
311 |
"Watch educational videos",
|
312 |
"Use flashcards with images",
|
313 |
"Highlight important information in different colors"
|
314 |
+
],
|
315 |
+
"careers": [
|
316 |
+
"Graphic Designer", "Architect", "Photographer",
|
317 |
+
"Engineer", "Surgeon", "Pilot"
|
318 |
]
|
319 |
},
|
320 |
"Auditory": {
|
|
|
325 |
"Explain concepts out loud to yourself",
|
326 |
"Use rhymes or songs to remember information",
|
327 |
"Listen to educational podcasts"
|
328 |
+
],
|
329 |
+
"careers": [
|
330 |
+
"Musician", "Journalist", "Lawyer",
|
331 |
+
"Psychologist", "Teacher", "Customer Service"
|
332 |
]
|
333 |
},
|
334 |
"Reading/Writing": {
|
|
|
339 |
"Read textbooks and articles",
|
340 |
"Make lists to organize information",
|
341 |
"Rewrite your notes to reinforce learning"
|
342 |
+
],
|
343 |
+
"careers": [
|
344 |
+
"Writer", "Researcher", "Editor",
|
345 |
+
"Accountant", "Programmer", "Historian"
|
346 |
]
|
347 |
},
|
348 |
"Kinesthetic": {
|
|
|
353 |
"Create physical models",
|
354 |
"Associate information with physical actions",
|
355 |
"Study while walking or pacing"
|
356 |
+
],
|
357 |
+
"careers": [
|
358 |
+
"Athlete", "Chef", "Mechanic",
|
359 |
+
"Dancer", "Physical Therapist", "Carpenter"
|
360 |
]
|
361 |
}
|
362 |
}
|
363 |
|
364 |
def evaluate_quiz(self, *answers) -> str:
|
365 |
+
"""Evaluate quiz answers and generate enhanced results."""
|
366 |
answers = list(answers) # Convert tuple to list
|
367 |
if len(answers) != len(self.questions):
|
368 |
raise gr.Error("Not all questions were answered")
|
|
|
385 |
percentages = {style: (score/total_answered)*100 for style, score in scores.items()}
|
386 |
sorted_styles = sorted(scores.items(), key=lambda x: x[1], reverse=True)
|
387 |
|
388 |
+
# Generate enhanced results report
|
389 |
result = "## Your Learning Style Results\n\n"
|
390 |
result += "### Scores:\n"
|
391 |
for style, score in sorted_styles:
|
|
|
407 |
for tip in style_info['tips']:
|
408 |
result += f"- {tip}\n"
|
409 |
|
410 |
+
result += "\n**Potential Career Paths**:\n"
|
411 |
+
for career in style_info['careers'][:6]:
|
412 |
+
result += f"- {career}\n"
|
413 |
+
|
414 |
# Add complementary strategies
|
415 |
complementary = [s for s in sorted_styles if s[0] != primary_style][0][0]
|
416 |
result += f"\nYou might also benefit from some **{complementary}** strategies:\n"
|
|
|
427 |
result += f"\n**{style}** techniques:\n"
|
428 |
for tip in self.learning_styles[style]['tips'][:2]:
|
429 |
result += f"- {tip}\n"
|
430 |
+
|
431 |
+
result += f"\n**{style}** career suggestions:\n"
|
432 |
+
for career in self.learning_styles[style]['careers'][:3]:
|
433 |
+
result += f"- {career}\n"
|
434 |
|
435 |
return result
|
436 |
|
|
|
442 |
def __init__(self):
|
443 |
self.profiles_dir = Path(PROFILES_DIR)
|
444 |
self.profiles_dir.mkdir(exist_ok=True, parents=True)
|
445 |
+
self.current_session = None
|
446 |
+
|
447 |
+
def set_session(self, session_token: str) -> None:
|
448 |
+
"""Set the current session token."""
|
449 |
+
self.current_session = session_token
|
450 |
+
|
451 |
+
def get_profile_path(self, name: str) -> Path:
|
452 |
+
"""Get profile path with session token if available."""
|
453 |
+
if self.current_session:
|
454 |
+
return self.profiles_dir / f"{name.replace(' ', '_')}_{self.current_session}_profile.json"
|
455 |
+
return self.profiles_dir / f"{name.replace(' ', '_')}_profile.json"
|
456 |
|
457 |
def save_profile(self, name: str, age: Union[int, str], interests: str,
|
458 |
transcript: Dict, learning_style: str,
|
|
|
486 |
"transcript": transcript if transcript else {},
|
487 |
"learning_style": learning_style if learning_style else "Not assessed",
|
488 |
"favorites": favorites,
|
489 |
+
"blog": sanitize_input(blog) if blog else "",
|
490 |
+
"session_token": self.current_session
|
491 |
}
|
492 |
|
493 |
# Save to JSON file
|
494 |
+
filepath = self.get_profile_path(name)
|
|
|
495 |
|
496 |
with open(filepath, "w", encoding='utf-8') as f:
|
497 |
json.dump(data, f, indent=2, ensure_ascii=False)
|
498 |
|
499 |
+
# Upload to HF Hub if token is available
|
500 |
+
if HF_TOKEN:
|
501 |
+
try:
|
502 |
+
hf_api.upload_file(
|
503 |
+
path_or_fileobj=filepath,
|
504 |
+
path_in_repo=f"profiles/{filepath.name}",
|
505 |
+
repo_id="your-username/student-learning-assistant",
|
506 |
+
repo_type="dataset"
|
507 |
+
)
|
508 |
+
except Exception as e:
|
509 |
+
print(f"Failed to upload to HF Hub: {str(e)}")
|
510 |
+
|
511 |
return self._generate_profile_summary(data)
|
512 |
|
513 |
except Exception as e:
|
514 |
raise gr.Error(f"Error saving profile: {str(e)}")
|
515 |
|
516 |
+
def load_profile(self, name: str = None, session_token: str = None) -> Dict:
|
517 |
"""Load profile by name or return the first one found."""
|
518 |
try:
|
519 |
+
if session_token:
|
520 |
+
profile_pattern = f"*{session_token}_profile.json"
|
521 |
+
else:
|
522 |
+
profile_pattern = "*.json"
|
523 |
+
|
524 |
+
profiles = list(self.profiles_dir.glob(profile_pattern))
|
525 |
if not profiles:
|
526 |
return {}
|
527 |
|
528 |
if name:
|
529 |
# Find profile by name
|
530 |
name = name.replace(" ", "_")
|
531 |
+
if session_token:
|
532 |
+
profile_file = self.profiles_dir / f"{name}_{session_token}_profile.json"
|
533 |
+
else:
|
534 |
+
profile_file = self.profiles_dir / f"{name}_profile.json"
|
535 |
+
|
536 |
if not profile_file.exists():
|
537 |
+
# Try loading from HF Hub
|
538 |
+
if HF_TOKEN:
|
539 |
+
try:
|
540 |
+
hf_api.download_file(
|
541 |
+
path_in_repo=f"profiles/{profile_file.name}",
|
542 |
+
repo_id="your-username/student-learning-assistant",
|
543 |
+
repo_type="dataset",
|
544 |
+
local_dir=self.profiles_dir
|
545 |
+
)
|
546 |
+
except:
|
547 |
+
raise gr.Error(f"No profile found for {name}")
|
548 |
+
else:
|
549 |
+
raise gr.Error(f"No profile found for {name}")
|
550 |
else:
|
551 |
# Load the first profile found
|
552 |
profile_file = profiles[0]
|
|
|
558 |
print(f"Error loading profile: {str(e)}")
|
559 |
return {}
|
560 |
|
561 |
+
def list_profiles(self, session_token: str = None) -> List[str]:
|
562 |
+
"""List all available profile names for the current session."""
|
563 |
+
if session_token:
|
564 |
+
profiles = list(self.profiles_dir.glob(f"*{session_token}_profile.json"))
|
565 |
+
else:
|
566 |
+
profiles = list(self.profiles_dir.glob("*.json"))
|
567 |
+
|
568 |
+
# Extract just the name part (without session token)
|
569 |
+
profile_names = []
|
570 |
+
for p in profiles:
|
571 |
+
name_part = p.stem.replace("_profile", "")
|
572 |
+
if session_token:
|
573 |
+
name_part = name_part.replace(f"_{session_token}", "")
|
574 |
+
profile_names.append(name_part.replace("_", " "))
|
575 |
+
|
576 |
+
return profile_names
|
577 |
|
578 |
def _generate_profile_summary(self, data: Dict) -> str:
|
579 |
"""Generate markdown summary of the profile."""
|
580 |
transcript = data.get("transcript", {})
|
581 |
favorites = data.get("favorites", {})
|
582 |
+
learning_style = data.get("learning_style", "Not assessed")
|
583 |
|
584 |
markdown = f"""## Student Profile: {data['name']}
|
|
|
585 |
### Basic Information
|
586 |
- **Age:** {data['age']}
|
587 |
- **Interests:** {data['interests']}
|
588 |
+
- **Learning Style:** {learning_style.split('##')[0].strip()}
|
|
|
589 |
### Academic Information
|
590 |
{self._format_transcript(transcript)}
|
|
|
591 |
### Favorites
|
592 |
- **Movie:** {favorites.get('movie', 'Not specified')}
|
593 |
*Reason:* {favorites.get('movie_reason', 'Not specified')}
|
|
|
597 |
*Reason:* {favorites.get('book_reason', 'Not specified')}
|
598 |
- **Character:** {favorites.get('character', 'Not specified')}
|
599 |
*Reason:* {favorites.get('character_reason', 'Not specified')}
|
|
|
600 |
### Personal Blog
|
601 |
{data.get('blog', '_No blog provided_')}
|
602 |
"""
|
|
|
638 |
self.context_history = []
|
639 |
self.max_context_length = 5 # Keep last 5 exchanges for context
|
640 |
|
641 |
+
def generate_response(self, message: str, history: List[List[Union[str, None]]], session_token: str) -> str:
|
642 |
"""Generate personalized response based on student profile and context."""
|
643 |
try:
|
644 |
+
# Load profile with session token
|
645 |
+
profile = profile_manager.load_profile(session_token=session_token)
|
646 |
if not profile:
|
647 |
return "Please complete and save your profile first using the previous tabs."
|
648 |
|
|
|
869 |
- **Course advice**: "Show me my course history"
|
870 |
- **Interest suggestions**: "What clubs match my interests?"
|
871 |
- **General advice**: "How can I improve my grades?"
|
|
|
872 |
Try asking about any of these topics!""")
|
873 |
|
874 |
# Initialize teaching assistant
|
|
|
877 |
# ========== GRADIO INTERFACE ==========
|
878 |
def create_interface():
|
879 |
with gr.Blocks(theme=gr.themes.Soft(), title="Student Learning Assistant") as app:
|
880 |
+
# Session state
|
881 |
+
session_token = gr.State(value=generate_session_token())
|
882 |
+
profile_manager.set_session(session_token.value)
|
883 |
+
|
884 |
# Custom CSS for better styling
|
885 |
app.css = """
|
886 |
.gradio-container {
|
|
|
905 |
background: #f5f5f5;
|
906 |
border-radius: 5px;
|
907 |
}
|
908 |
+
.profile-card {
|
909 |
+
border: 1px solid #e0e0e0;
|
910 |
+
border-radius: 8px;
|
911 |
+
padding: 15px;
|
912 |
+
margin-bottom: 15px;
|
913 |
+
background: white;
|
914 |
+
}
|
915 |
+
.chatbot {
|
916 |
+
min-height: 500px;
|
917 |
+
}
|
918 |
"""
|
919 |
|
920 |
gr.Markdown("""
|
|
|
943 |
with gr.Row():
|
944 |
with gr.Column(scale=1):
|
945 |
gr.Markdown("### Step 1: Upload Your Transcript")
|
946 |
+
gr.Markdown("Upload a PDF or image of your academic transcript to analyze your courses and GPA.")
|
947 |
|
948 |
with gr.Group():
|
949 |
transcript_file = gr.File(
|
950 |
+
label="Transcript (PDF or Image)",
|
951 |
file_types=ALLOWED_FILE_TYPES,
|
952 |
type="filepath"
|
953 |
)
|
954 |
upload_btn = gr.Button("Upload & Analyze", variant="primary")
|
955 |
|
956 |
+
gr.Markdown("""
|
957 |
+
**Supported Formats**: PDF, PNG, JPG
|
958 |
+
**Note**: Your file is processed locally and not stored permanently.
|
959 |
+
""")
|
960 |
|
961 |
with gr.Column(scale=2):
|
962 |
transcript_output = gr.Textbox(
|
|
|
1073 |
gr.Markdown("Verify your information before saving. You can return to previous steps to make changes.")
|
1074 |
|
1075 |
save_btn = gr.Button("Save Profile", variant="primary")
|
1076 |
+
|
1077 |
+
# Profile management section
|
1078 |
+
with gr.Group():
|
1079 |
+
load_profile_dropdown = gr.Dropdown(
|
1080 |
+
label="Load Existing Profile",
|
1081 |
+
choices=profile_manager.list_profiles(session_token.value),
|
1082 |
+
visible=bool(profile_manager.list_profiles(session_token.value))
|
1083 |
+
)
|
1084 |
+
with gr.Row():
|
1085 |
+
load_btn = gr.Button("Load", visible=bool(profile_manager.list_profiles(session_token.value)))
|
1086 |
+
delete_btn = gr.Button("Delete", variant="stop", visible=bool(profile_manager.list_profiles(session_token.value)))
|
1087 |
+
|
1088 |
clear_btn = gr.Button("Clear Form")
|
1089 |
|
1090 |
with gr.Column(scale=2):
|
|
|
1093 |
label="Profile Summary"
|
1094 |
)
|
1095 |
|
1096 |
+
# Save profile
|
1097 |
save_btn.click(
|
1098 |
fn=profile_manager.save_profile,
|
1099 |
inputs=[
|
|
|
1102 |
book, book_reason, character, character_reason, blog_text
|
1103 |
],
|
1104 |
outputs=output_summary
|
1105 |
+
).then(
|
1106 |
+
fn=lambda: profile_manager.list_profiles(session_token.value),
|
1107 |
+
outputs=load_profile_dropdown
|
1108 |
+
).then(
|
1109 |
+
fn=lambda: gr.update(visible=True),
|
1110 |
+
outputs=load_btn
|
1111 |
+
).then(
|
1112 |
+
fn=lambda: gr.update(visible=True),
|
1113 |
+
outputs=delete_btn
|
1114 |
)
|
1115 |
|
1116 |
+
# Load profile
|
1117 |
load_btn.click(
|
1118 |
+
fn=lambda name: profile_manager.load_profile(name, session_token.value),
|
1119 |
inputs=load_profile_dropdown,
|
1120 |
outputs=output_summary
|
1121 |
)
|
1122 |
|
1123 |
+
# Delete profile
|
1124 |
+
def delete_profile(name, session_token):
|
1125 |
+
if not name:
|
1126 |
+
raise gr.Error("Please select a profile to delete")
|
1127 |
+
try:
|
1128 |
+
profile_path = profile_manager.get_profile_path(name)
|
1129 |
+
if profile_path.exists():
|
1130 |
+
profile_path.unlink()
|
1131 |
+
return "Profile deleted successfully", ""
|
1132 |
+
except Exception as e:
|
1133 |
+
raise gr.Error(f"Error deleting profile: {str(e)}")
|
1134 |
+
|
1135 |
+
delete_btn.click(
|
1136 |
+
fn=delete_profile,
|
1137 |
+
inputs=[load_profile_dropdown, session_token],
|
1138 |
+
outputs=[output_summary, load_profile_dropdown]
|
1139 |
+
).then(
|
1140 |
+
fn=lambda: gr.update(
|
1141 |
+
choices=profile_manager.list_profiles(session_token.value),
|
1142 |
+
visible=bool(profile_manager.list_profiles(session_token.value))
|
1143 |
+
),
|
1144 |
+
outputs=load_profile_dropdown
|
1145 |
+
).then(
|
1146 |
+
fn=lambda: gr.update(visible=bool(profile_manager.list_profiles(session_token.value))),
|
1147 |
+
outputs=load_btn
|
1148 |
+
).then(
|
1149 |
+
fn=lambda: gr.update(visible=bool(profile_manager.list_profiles(session_token.value))),
|
1150 |
+
outputs=delete_btn
|
1151 |
+
)
|
1152 |
+
|
1153 |
+
# Clear form
|
1154 |
clear_btn.click(
|
1155 |
fn=lambda: [gr.update(value="") for _ in range(12)],
|
1156 |
outputs=[
|
|
|
1166 |
gr.Markdown("## Your Personalized Learning Assistant")
|
1167 |
gr.Markdown("Ask me anything about studying, your courses, grades, or learning strategies.")
|
1168 |
|
1169 |
+
# Chat interface with session token
|
1170 |
chatbot = gr.ChatInterface(
|
1171 |
+
fn=lambda msg, hist: teaching_assistant.generate_response(msg, hist, session_token.value),
|
1172 |
examples=[
|
1173 |
"How should I study for my next math test?",
|
1174 |
"What's my current GPA?",
|
|
|
1176 |
"How can I improve my grades in science?",
|
1177 |
"What study methods match my learning style?"
|
1178 |
],
|
1179 |
+
title="",
|
1180 |
+
retry_btn=None,
|
1181 |
+
undo_btn=None,
|
1182 |
+
clear_btn="Clear Chat"
|
1183 |
)
|
1184 |
|
1185 |
# Tab navigation logic
|