cuneytkaya commited on
Commit
6494950
·
verified ·
1 Parent(s): bd63e0c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +391 -230
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import os
2
  import json
3
  import gradio as gr
 
4
  import google.generativeai as genai
5
  from PIL import Image
6
  import numpy as np
@@ -9,30 +10,56 @@ from dotenv import load_dotenv
9
  import traceback
10
  import pytesseract
11
  import cv2
12
- import time
13
 
14
- # Load environment variables
15
  load_dotenv()
16
-
17
- # Set API key for Gemini
18
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") or HfFolder.get_token("GEMINI_API_KEY")
19
  if not GEMINI_API_KEY:
20
- raise ValueError("Gemini API key not found. Please set the GEMINI_API_KEY environment variable.")
 
 
 
 
 
 
 
 
 
21
  genai.configure(api_key=GEMINI_API_KEY)
22
 
23
- # Define model names - using latest models
24
- CLASSIFICATION_MODEL = "gemini-1.5-flash" # For classification
25
- SOLUTION_MODEL = "gemini-1.5-pro-latest" # For solution generation
26
- EXPLANATION_MODEL = "gemini-1.5-pro-latest" # For explanation generation
27
- SIMILAR_MODEL = "gemini-1.5-pro-latest" # For similar problems generation
28
 
29
  print(f"Using models: Classification: {CLASSIFICATION_MODEL}, Solution: {SOLUTION_MODEL}, Explanation: {EXPLANATION_MODEL}, Similar: {SIMILAR_MODEL}")
30
 
31
- # Set up Gemini for image analysis
32
- MODEL_IMAGE = "gemini-1.5-pro-latest" # Use Gemini for OCR as well
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
- # Set Tesseract path
35
- pytesseract.pytesseract.tesseract_cmd = '/opt/homebrew/bin/tesseract'
36
 
37
  # Extract text using Gemini directly (with Tesseract as fallback)
38
  def extract_text_with_gemini(image):
@@ -40,7 +67,7 @@ def extract_text_with_gemini(image):
40
  try:
41
  if isinstance(image, np.ndarray):
42
  image = Image.fromarray(image)
43
-
44
  model = genai.GenerativeModel(MODEL_IMAGE)
45
  prompt = """
46
  Extract ALL text, numbers, and mathematical equations from this image precisely.
@@ -48,10 +75,10 @@ def extract_text_with_gemini(image):
48
  Format any equations properly and maintain their layout.
49
  Don't explain the content, just extract the text verbatim.
50
  """
51
-
52
  response = model.generate_content([prompt, image])
53
  extracted_text = response.text.strip()
54
-
55
  # If Gemini returns a very short result, try Tesseract as fallback
56
  if len(extracted_text) < 10:
57
  print("Gemini returned limited text, trying Tesseract as fallback")
@@ -59,21 +86,21 @@ def extract_text_with_gemini(image):
59
  image_array = np.array(image)
60
  else:
61
  image_array = image
62
-
63
  if len(image_array.shape) == 3:
64
  gray = cv2.cvtColor(image_array, cv2.COLOR_RGB2GRAY)
65
  else:
66
  gray = image_array
67
-
68
  custom_config = r'--oem 1 --psm 6'
69
  tesseract_text = pytesseract.image_to_string(gray, config=custom_config)
70
-
71
  if len(tesseract_text) > len(extracted_text):
72
  extracted_text = tesseract_text
73
-
74
  print(f"Extracted text: {extracted_text[:100]}...")
75
  return extracted_text
76
-
77
  except Exception as e:
78
  print(f"Extraction Error: {e}")
79
  print(traceback.format_exc())
@@ -82,12 +109,12 @@ def extract_text_with_gemini(image):
82
  image_array = np.array(image)
83
  else:
84
  image_array = image
85
-
86
  if len(image_array.shape) == 3:
87
  gray = cv2.cvtColor(image_array, cv2.COLOR_RGB2GRAY)
88
  else:
89
  gray = image_array
90
-
91
  return pytesseract.image_to_string(gray, config=r'--oem 1 --psm 6')
92
  except Exception as e2:
93
  print(f"Fallback OCR Error: {e2}")
@@ -102,45 +129,44 @@ def classify_with_gemini_flash(math_problem):
102
  generation_config={
103
  "temperature": 0.1,
104
  "top_p": 0.95,
105
- "max_output_tokens": 150,
106
  "response_mime_type": "application/json",
107
  }
108
  )
109
-
110
  prompt = f"""
111
- Task: Classify the following math problem.
112
 
113
- PROBLEM: {math_problem}
114
 
115
- Classify this math problem according to:
116
- 1. Primary category (e.g., Algebra, Calculus, Geometry, Trigonometry, Statistics, Number Theory)
117
- 2. Specific subtopic (e.g., Linear Equations, Derivatives, Integrals, Probability)
118
- 3. Difficulty level (Basic, Intermediate, Advanced)
119
- 4. Key concepts involved
120
 
121
  Format the response as a JSON object with the fields: "category", "subtopic", "difficulty", "key_concepts".
122
  """
123
-
124
  response = model.generate_content(prompt)
125
  try:
126
- classification = json.loads(response.text)
 
 
127
  return classification
128
- except json.JSONDecodeError:
129
- print(f"JSON Decode Error: Unable to parse response: {response.text}")
 
130
  return {
131
- "category": "Unknown",
132
- "subtopic": "Unknown",
133
- "difficulty": "Unknown",
134
- "key_concepts": ["Unknown"]
135
  }
136
  except Exception as e:
137
  print(f"Classification Error: {e}")
138
  print(traceback.format_exc())
139
  return {
140
- "category": "Error",
141
- "subtopic": "Error",
142
- "difficulty": "Error",
143
- "key_concepts": [f"Error: {str(e)}"]
144
  }
145
 
146
  # Solve the math problem using Gemini model
@@ -152,54 +178,52 @@ def solve_with_gemini_pro(math_problem, classification):
152
  generation_config={
153
  "temperature": 0.2,
154
  "top_p": 0.9,
155
- "max_output_tokens": 1000,
156
  }
157
  )
158
-
159
- # Ensure classification has the required fields with fallbacks
160
  if not isinstance(classification, dict):
161
  classification = {
162
- "category": "Unknown",
163
- "subtopic": "Unknown",
164
- "difficulty": "Unknown",
165
- "key_concepts": ["Unknown"]
166
  }
167
-
168
  for field in ["category", "subtopic", "difficulty"]:
169
  if field not in classification or not classification[field]:
170
  classification[field] = "Unknown"
171
-
172
  if "key_concepts" not in classification or not classification["key_concepts"]:
173
  classification["key_concepts"] = ["Unknown"]
174
-
175
- # Format key concepts as a string
176
  if isinstance(classification["key_concepts"], list):
177
  key_concepts = ", ".join(classification["key_concepts"])
178
  else:
179
  key_concepts = str(classification["key_concepts"])
180
-
181
  prompt = f"""
182
- Task: Solve the following math problem with clear step-by-step explanations.
183
 
184
- PROBLEM: {math_problem}
185
 
186
- CLASSIFICATION:
187
- - Category: {classification["category"]}
188
- - Subtopic: {classification["subtopic"]}
189
- - Difficulty: {classification["difficulty"]}
190
- - Key Concepts: {key_concepts}
191
 
192
- Provide a complete solution following these guidelines:
193
- 1. Start with an overview of the approach
194
- 2. Break down the problem into clear, logical steps
195
- 3. Explain each step thoroughly, mentioning the mathematical principles applied
196
- 4. Show all work and calculations
197
- 5. Verify the answer if possible
198
- 6. Summarize the key takeaway from this problem
199
 
200
  Format the solution to be readable on a mobile device, with appropriate spacing between steps.
201
  """
202
-
203
  response = model.generate_content(prompt)
204
  return response.text
205
  except Exception as e:
@@ -212,33 +236,33 @@ def explain_solution(math_problem, solution):
212
  """Provide a more detailed explanation of the solution"""
213
  try:
214
  print(f"Generating detailed explanation...")
215
-
216
  model = genai.GenerativeModel(
217
  model_name=EXPLANATION_MODEL,
218
  generation_config={
219
  "temperature": 0.3,
220
  "top_p": 0.95,
221
- "max_output_tokens": 1500,
222
  }
223
  )
224
-
225
  prompt = f"""
226
- Task: Provide a more detailed explanation of the solution to this math problem.
227
 
228
- PROBLEM: {math_problem}
229
- SOLUTION: {solution}
230
 
231
- Provide a more comprehensive explanation that:
232
- 1. Breaks down complex steps into simpler components
233
- 2. Explains the underlying mathematical principles in depth
234
- 3. Connects this problem to fundamental concepts
235
- 4. Offers visual or intuitive ways to understand the concepts
236
- 5. Highlights common mistakes students make with this type of problem
237
- 6. Suggests alternative solution approaches if applicable
238
 
239
  Make the explanation accessible to a student who is struggling with this topic.
240
  """
241
-
242
  response = model.generate_content(prompt)
243
  return response.text
244
  except Exception as e:
@@ -251,39 +275,39 @@ def generate_similar_problems(math_problem, classification):
251
  """Generate similar practice math problems"""
252
  try:
253
  print(f"Generating similar problems...")
254
-
255
  model = genai.GenerativeModel(
256
  model_name=SIMILAR_MODEL,
257
  generation_config={
258
  "temperature": 0.7,
259
  "top_p": 0.95,
260
- "max_output_tokens": 1000,
261
  }
262
  )
263
-
264
- # Prepare classification string
265
  classification_str = json.dumps(classification, indent=2)
266
-
267
  prompt = f"""
268
- Task: Generate similar practice math problems based on the following problem.
269
 
270
- ORIGINAL PROBLEM: {math_problem}
271
- CLASSIFICATION: {classification_str}
272
 
273
- Generate 3 similar practice problems that:
274
- 1. Cover the same mathematical concepts and principles
275
- 2. Vary in difficulty (one easier, one similar, one harder)
276
- 3. Use different numerical values or variables
277
- 4. Test the same underlying skills
278
 
279
- For each problem:
280
- - Provide the complete problem statement
281
- - Include a brief hint for solving it
282
- - Provide the correct answer (but not the full solution)
283
 
284
  Format as three separate problems with clear numbering.
285
  """
286
-
287
  response = model.generate_content(prompt)
288
  return response.text
289
  except Exception as e:
@@ -291,150 +315,287 @@ def generate_similar_problems(math_problem, classification):
291
  print(traceback.format_exc())
292
  return f"Error generating similar problems: {str(e)}"
293
 
294
- # Main function for processing images
295
- def process_image(image, progress=gr.Progress()):
296
- """Main processing pipeline for the NerdAI app"""
 
 
297
  try:
298
  if image is None:
299
- return None, "No image uploaded", "No image uploaded", "No image uploaded", "No image uploaded"
300
-
301
- progress(0, desc="Starting processing...")
302
-
303
- # Step 1: Extract text with Gemini model
304
- progress(0.4, desc="Extracting text with Gemini Pro Vision...")
 
305
  extracted_text = extract_text_with_gemini(image)
306
-
307
- if not extracted_text or extracted_text.strip() == "":
308
- return image, "No text was extracted from the image. Please try a clearer image.", "No text extracted", "No text was extracted from the image.", ""
309
-
310
- # Step 2: Classify with Gemini model
311
- progress(0.6, desc=f"Classifying problem with {CLASSIFICATION_MODEL}...")
 
 
312
  classification = classify_with_gemini_flash(extracted_text)
313
- classification_json = json.dumps(classification, indent=2)
314
-
315
- # Step 3: Solve with Gemini model
316
- progress(0.8, desc=f"Solving problem with {SOLUTION_MODEL}...")
317
  solution = solve_with_gemini_pro(extracted_text, classification)
318
-
319
- # Complete
320
- progress(1.0, desc="Processing complete")
321
-
322
- return image, extracted_text, classification_json, solution, extracted_text
323
-
 
 
324
  except Exception as e:
325
  print(f"Process Image Error: {e}")
326
  print(traceback.format_exc())
327
- return None, f"Error processing image: {str(e)}", "Error", "Error", ""
328
-
329
- # Create the Gradio interface
330
- with gr.Blocks(title="NerdAI Math Problem Solver") as demo:
331
- gr.Markdown("# NerdAI Math Problem Solver")
332
- gr.Markdown("Upload an image of a math problem to get a step-by-step solution")
333
-
334
- # Store state variables
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
335
  extracted_text_state = gr.State("")
336
-
337
- with gr.Row():
338
- with gr.Column(scale=1):
339
- # Input section
340
- input_image = gr.Image(label="Upload Math Problem Image", type="pil")
341
- process_btn = gr.Button("Process Image", variant="primary")
342
-
343
- with gr.Column(scale=1):
344
- # Processed image output
345
- processed_image = gr.Image(label="Processed Image")
346
-
347
- with gr.Row():
348
- # Text extraction output
349
- extracted_text = gr.Textbox(label="Extracted Text", lines=3)
350
-
351
- with gr.Row():
352
- # Classification output
353
- classification = gr.Textbox(label=f"Problem Classification", lines=6)
354
-
355
- with gr.Row():
356
- # Solution output
357
- solution = gr.Markdown(label="Solution")
358
-
359
- with gr.Row():
360
- explain_btn = gr.Button("Explain It", variant="secondary")
361
- similar_btn = gr.Button("Similar Questions", variant="secondary")
362
-
363
  with gr.Row():
364
- # Additional outputs
365
- with gr.Tabs():
366
- with gr.TabItem("Detailed Explanation"):
367
- explanation = gr.Markdown()
368
- with gr.TabItem("Similar Practice Problems"):
369
- similar_problems = gr.Markdown()
370
-
371
- # Event handlers for the buttons
372
- def explain_button_handler(math_problem, solution_text):
373
- """Handler for Explain It button"""
374
- print(f"Explain button clicked")
375
- if not math_problem or math_problem == "No image uploaded":
376
- return "Please process an image first"
377
- return explain_solution(math_problem, solution_text)
378
-
379
- def similar_button_handler(math_problem, classification_json):
380
- """Handler for Similar Questions button"""
381
- print(f"Similar button clicked")
382
- if not math_problem or math_problem == "No image uploaded":
383
- return "Please process an image first"
384
- try:
385
- # Parse classification JSON
386
- try:
387
- classification = json.loads(classification_json)
388
- except:
389
- classification = {
390
- "category": "Unknown",
391
- "subtopic": "Unknown",
392
- "difficulty": "Unknown",
393
- "key_concepts": ["Unknown"]
394
- }
395
-
396
- # Validate classification
397
- if not isinstance(classification, dict):
398
- classification = {
399
- "category": "Unknown",
400
- "subtopic": "Unknown",
401
- "difficulty": "Unknown",
402
- "key_concepts": ["Unknown"]
403
- }
404
-
405
- # Ensure fields exist
406
- for field in ["category", "subtopic", "difficulty"]:
407
- if field not in classification or not classification[field]:
408
- classification[field] = "Unknown"
409
-
410
- if "key_concepts" not in classification or not classification["key_concepts"]:
411
- classification["key_concepts"] = ["Unknown"]
412
-
413
- return generate_similar_problems(math_problem, classification)
414
- except Exception as e:
415
- print(f"Error in similar_button_handler: {e}")
416
- print(traceback.format_exc())
417
- return f"Error generating similar problems: {str(e)}"
418
-
419
- # Set up event handlers
420
  process_btn.click(
421
- fn=process_image,
422
  inputs=[input_image],
423
- outputs=[processed_image, extracted_text, classification, solution, extracted_text_state]
 
 
 
 
 
 
 
 
424
  )
425
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
426
  explain_btn.click(
427
  fn=explain_button_handler,
428
- inputs=[extracted_text_state, solution],
429
- outputs=explanation
430
  )
431
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
432
  similar_btn.click(
433
  fn=similar_button_handler,
434
- inputs=[extracted_text_state, classification],
435
- outputs=similar_problems
436
  )
437
 
438
- # Launch the app
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
439
  if __name__ == "__main__":
440
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import json
3
  import gradio as gr
4
+ import gradio.themes as gr_themes # Import themes for UI
5
  import google.generativeai as genai
6
  from PIL import Image
7
  import numpy as np
 
10
  import traceback
11
  import pytesseract
12
  import cv2
13
+ import time # Keep time import for potential use later, though not in original process_image
14
 
15
+ # --- Load Environment Variables (As per Original Script) ---
16
  load_dotenv()
 
 
17
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") or HfFolder.get_token("GEMINI_API_KEY")
18
  if not GEMINI_API_KEY:
19
+ # Try to get it from Gradio secrets if running on Spaces (Added for robustness)
20
+ try:
21
+ import secrets
22
+ GEMINI_API_KEY = secrets.GEMINI_API_KEY
23
+ except (ImportError, AttributeError):
24
+ raise ValueError("Gemini API key not found. Please set the GEMINI_API_KEY environment variable or add it as a Secret if running on Hugging Face Spaces.")
25
+
26
+ if not GEMINI_API_KEY:
27
+ raise ValueError("Gemini API key not found. Please set the GEMINI_API_KEY environment variable.")
28
+
29
  genai.configure(api_key=GEMINI_API_KEY)
30
 
31
+ # --- Define Model Names (As per Original Script) ---
32
+ CLASSIFICATION_MODEL = "gemini-1.5-flash"
33
+ SOLUTION_MODEL = "gemini-1.5-pro-latest"
34
+ EXPLANATION_MODEL = "gemini-1.5-pro-latest"
35
+ SIMILAR_MODEL = "gemini-1.5-pro-latest"
36
 
37
  print(f"Using models: Classification: {CLASSIFICATION_MODEL}, Solution: {SOLUTION_MODEL}, Explanation: {EXPLANATION_MODEL}, Similar: {SIMILAR_MODEL}")
38
 
39
+ # --- Set up Gemini for image analysis (As per Original Script) ---
40
+ MODEL_IMAGE = "gemini-1.5-pro-latest" # Use Gemini for OCR as well
41
+
42
+ # --- Set Tesseract Path (As per Original Script, with robustness check) ---
43
+ # Make sure this path is correct for your deployment environment
44
+ try:
45
+ # Check common paths
46
+ if os.path.exists('/opt/homebrew/bin/tesseract'): # Original path check
47
+ pytesseract.pytesseract.tesseract_cmd = '/opt/homebrew/bin/tesseract'
48
+ elif os.path.exists('/usr/bin/tesseract'): # Added common Linux path
49
+ pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'
50
+ else:
51
+ # Attempt to find Tesseract in PATH (might work in some environments)
52
+ from shutil import which
53
+ tesseract_path = which('tesseract')
54
+ if tesseract_path:
55
+ pytesseract.pytesseract.tesseract_cmd = tesseract_path
56
+ else:
57
+ print("Warning: Tesseract command not found at specified paths or in PATH. Fallback OCR might fail.")
58
+ except Exception as e:
59
+ print(f"Warning: Error setting Tesseract path: {e}. Fallback OCR might fail.")
60
+
61
 
62
+ # --- Backend Functions (Copied *EXACTLY* from Original User Script) ---
 
63
 
64
  # Extract text using Gemini directly (with Tesseract as fallback)
65
  def extract_text_with_gemini(image):
 
67
  try:
68
  if isinstance(image, np.ndarray):
69
  image = Image.fromarray(image)
70
+
71
  model = genai.GenerativeModel(MODEL_IMAGE)
72
  prompt = """
73
  Extract ALL text, numbers, and mathematical equations from this image precisely.
 
75
  Format any equations properly and maintain their layout.
76
  Don't explain the content, just extract the text verbatim.
77
  """
78
+
79
  response = model.generate_content([prompt, image])
80
  extracted_text = response.text.strip()
81
+
82
  # If Gemini returns a very short result, try Tesseract as fallback
83
  if len(extracted_text) < 10:
84
  print("Gemini returned limited text, trying Tesseract as fallback")
 
86
  image_array = np.array(image)
87
  else:
88
  image_array = image
89
+
90
  if len(image_array.shape) == 3:
91
  gray = cv2.cvtColor(image_array, cv2.COLOR_RGB2GRAY)
92
  else:
93
  gray = image_array
94
+
95
  custom_config = r'--oem 1 --psm 6'
96
  tesseract_text = pytesseract.image_to_string(gray, config=custom_config)
97
+
98
  if len(tesseract_text) > len(extracted_text):
99
  extracted_text = tesseract_text
100
+
101
  print(f"Extracted text: {extracted_text[:100]}...")
102
  return extracted_text
103
+
104
  except Exception as e:
105
  print(f"Extraction Error: {e}")
106
  print(traceback.format_exc())
 
109
  image_array = np.array(image)
110
  else:
111
  image_array = image
112
+
113
  if len(image_array.shape) == 3:
114
  gray = cv2.cvtColor(image_array, cv2.COLOR_RGB2GRAY)
115
  else:
116
  gray = image_array
117
+
118
  return pytesseract.image_to_string(gray, config=r'--oem 1 --psm 6')
119
  except Exception as e2:
120
  print(f"Fallback OCR Error: {e2}")
 
129
  generation_config={
130
  "temperature": 0.1,
131
  "top_p": 0.95,
132
+ "max_output_tokens": 150, # Original value
133
  "response_mime_type": "application/json",
134
  }
135
  )
136
+
137
  prompt = f"""
138
+ Task: Classify the following math problem.
139
 
140
+ PROBLEM: {math_problem}
141
 
142
+ Classify this math problem according to:
143
+ 1. Primary category (e.g., Algebra, Calculus, Geometry, Trigonometry, Statistics, Number Theory)
144
+ 2. Specific subtopic (e.g., Linear Equations, Derivatives, Integrals, Probability)
145
+ 3. Difficulty level (Basic, Intermediate, Advanced)
146
+ 4. Key concepts involved
147
 
148
  Format the response as a JSON object with the fields: "category", "subtopic", "difficulty", "key_concepts".
149
  """
150
+
151
  response = model.generate_content(prompt)
152
  try:
153
+ # Clean potential markdown code fences before parsing (Added robustness)
154
+ cleaned_text = response.text.strip().replace("```json", "").replace("```", "").strip()
155
+ classification = json.loads(cleaned_text)
156
  return classification
157
+ except (json.JSONDecodeError, AttributeError): # Added AttributeError check
158
+ print(f"JSON Decode/Attribute Error: Unable to parse response: {response.text}")
159
+ # Return default structure on failure, as per original implicit behavior
160
  return {
161
+ "category": "Unknown", "subtopic": "Unknown",
162
+ "difficulty": "Unknown", "key_concepts": ["Unknown"]
 
 
163
  }
164
  except Exception as e:
165
  print(f"Classification Error: {e}")
166
  print(traceback.format_exc())
167
  return {
168
+ "category": "Error", "subtopic": "Error",
169
+ "difficulty": "Error", "key_concepts": [f"Error: {str(e)}"]
 
 
170
  }
171
 
172
  # Solve the math problem using Gemini model
 
178
  generation_config={
179
  "temperature": 0.2,
180
  "top_p": 0.9,
181
+ "max_output_tokens": 1000, # Original value
182
  }
183
  )
184
+
185
+ # Ensure classification has the required fields with fallbacks (As per Original Script)
186
  if not isinstance(classification, dict):
187
  classification = {
188
+ "category": "Unknown", "subtopic": "Unknown",
189
+ "difficulty": "Unknown", "key_concepts": ["Unknown"]
 
 
190
  }
191
+
192
  for field in ["category", "subtopic", "difficulty"]:
193
  if field not in classification or not classification[field]:
194
  classification[field] = "Unknown"
195
+
196
  if "key_concepts" not in classification or not classification["key_concepts"]:
197
  classification["key_concepts"] = ["Unknown"]
198
+
199
+ # Format key concepts as a string (As per Original Script)
200
  if isinstance(classification["key_concepts"], list):
201
  key_concepts = ", ".join(classification["key_concepts"])
202
  else:
203
  key_concepts = str(classification["key_concepts"])
204
+
205
  prompt = f"""
206
+ Task: Solve the following math problem with clear step-by-step explanations.
207
 
208
+ PROBLEM: {math_problem}
209
 
210
+ CLASSIFICATION:
211
+ - Category: {classification["category"]}
212
+ - Subtopic: {classification["subtopic"]}
213
+ - Difficulty: {classification["difficulty"]}
214
+ - Key Concepts: {key_concepts}
215
 
216
+ Provide a complete solution following these guidelines:
217
+ 1. Start with an overview of the approach
218
+ 2. Break down the problem into clear, logical steps
219
+ 3. Explain each step thoroughly, mentioning the mathematical principles applied
220
+ 4. Show all work and calculations
221
+ 5. Verify the answer if possible
222
+ 6. Summarize the key takeaway from this problem
223
 
224
  Format the solution to be readable on a mobile device, with appropriate spacing between steps.
225
  """
226
+
227
  response = model.generate_content(prompt)
228
  return response.text
229
  except Exception as e:
 
236
  """Provide a more detailed explanation of the solution"""
237
  try:
238
  print(f"Generating detailed explanation...")
239
+
240
  model = genai.GenerativeModel(
241
  model_name=EXPLANATION_MODEL,
242
  generation_config={
243
  "temperature": 0.3,
244
  "top_p": 0.95,
245
+ "max_output_tokens": 1500, # Original value
246
  }
247
  )
248
+
249
  prompt = f"""
250
+ Task: Provide a more detailed explanation of the solution to this math problem.
251
 
252
+ PROBLEM: {math_problem}
253
+ SOLUTION: {solution}
254
 
255
+ Provide a more comprehensive explanation that:
256
+ 1. Breaks down complex steps into simpler components
257
+ 2. Explains the underlying mathematical principles in depth
258
+ 3. Connects this problem to fundamental concepts
259
+ 4. Offers visual or intuitive ways to understand the concepts
260
+ 5. Highlights common mistakes students make with this type of problem
261
+ 6. Suggests alternative solution approaches if applicable
262
 
263
  Make the explanation accessible to a student who is struggling with this topic.
264
  """
265
+
266
  response = model.generate_content(prompt)
267
  return response.text
268
  except Exception as e:
 
275
  """Generate similar practice math problems"""
276
  try:
277
  print(f"Generating similar problems...")
278
+
279
  model = genai.GenerativeModel(
280
  model_name=SIMILAR_MODEL,
281
  generation_config={
282
  "temperature": 0.7,
283
  "top_p": 0.95,
284
+ "max_output_tokens": 1000, # Original value
285
  }
286
  )
287
+
288
+ # Prepare classification string (As per Original Script)
289
  classification_str = json.dumps(classification, indent=2)
290
+
291
  prompt = f"""
292
+ Task: Generate similar practice math problems based on the following problem.
293
 
294
+ ORIGINAL PROBLEM: {math_problem}
295
+ CLASSIFICATION: {classification_str}
296
 
297
+ Generate 3 similar practice problems that:
298
+ 1. Cover the same mathematical concepts and principles
299
+ 2. Vary in difficulty (one easier, one similar, one harder)
300
+ 3. Use different numerical values or variables
301
+ 4. Test the same underlying skills
302
 
303
+ For each problem:
304
+ - Provide the complete problem statement
305
+ - Include a brief hint for solving it
306
+ - Provide the correct answer (but not the full solution)
307
 
308
  Format as three separate problems with clear numbering.
309
  """
310
+
311
  response = model.generate_content(prompt)
312
  return response.text
313
  except Exception as e:
 
315
  print(traceback.format_exc())
316
  return f"Error generating similar problems: {str(e)}"
317
 
318
+
319
+ # Main function for processing images (As per Original Script)
320
+ # Note: The original function didn't use gr.Progress. We will call the backend functions directly.
321
+ def process_image(image):
322
+ """Main processing pipeline for the NerdAI app (Original Logic)"""
323
  try:
324
  if image is None:
325
+ # Return values matching the expected outputs for the UI structure
326
+ return None, "No image uploaded", "{}", "No image uploaded", "", "{}" # Added empty state values
327
+
328
+ print("Starting processing...") # Simple print instead of progress
329
+
330
+ # Step 1: Extract text
331
+ print("Extracting text...")
332
  extracted_text = extract_text_with_gemini(image)
333
+
334
+ if not extracted_text or extracted_text.strip() == "" or extracted_text.startswith("Error"):
335
+ err_msg = extracted_text if extracted_text.startswith("Error") else "No text was extracted from the image. Please try a clearer image."
336
+ img_display = image if isinstance(image, Image.Image) else Image.fromarray(image)
337
+ return img_display, err_msg, "{}", err_msg, "", "{}" # Return error state
338
+
339
+ # Step 2: Classify
340
+ print("Classifying problem...")
341
  classification = classify_with_gemini_flash(extracted_text)
342
+ classification_json = json.dumps(classification, indent=2) # Ensure it's JSON string
343
+
344
+ # Step 3: Solve
345
+ print("Solving problem...")
346
  solution = solve_with_gemini_pro(extracted_text, classification)
347
+
348
+ print("Processing complete")
349
+
350
+ # Return values needed by the UI components and state variables
351
+ # We need 6 values for: processed_image, extracted_text_output, classification_output, solution_output, extracted_text_state, classification_state
352
+ img_display = image if isinstance(image, Image.Image) else Image.fromarray(image)
353
+ return img_display, extracted_text, classification_json, solution, extracted_text, classification_json
354
+
355
  except Exception as e:
356
  print(f"Process Image Error: {e}")
357
  print(traceback.format_exc())
358
+ error_message = f"Error processing image: {str(e)}"
359
+ img_display = None
360
+ if image is not None:
361
+ img_display = image if isinstance(image, Image.Image) else Image.fromarray(image)
362
+ # Return error message and empty states
363
+ return img_display, error_message, "{}", error_message, "", "{}"
364
+
365
+
366
+ # --- Gradio Interface (Modern UI from Previous Refactoring) ---
367
+
368
+ # Custom CSS for styling
369
+ css = """
370
+ body { font-family: 'Inter', sans-serif; } /* Modern font */
371
+ .gradio-container { background-color: #f8f9fa; } /* Light background */
372
+
373
+ #title_markdown h1 {
374
+ text-align: center;
375
+ color: #4A90E2; /* Theme color */
376
+ font-weight: 600;
377
+ margin-bottom: 0px; /* Adjust spacing */
378
+ }
379
+ #subtitle_markdown p {
380
+ text-align: center;
381
+ color: #555;
382
+ margin-top: 5px; /* Adjust spacing */
383
+ margin-bottom: 20px;
384
+ }
385
+
386
+ /* Input/Output Image Area */
387
+ #input_col, #output_col { padding: 10px; }
388
+ #input_image, #processed_image {
389
+ border-radius: 8px; /* Rounded corners for images */
390
+ border: 1px solid #dee2e6;
391
+ overflow: hidden; /* Ensure border radius applies */
392
+ height: 350px; /* Fixed height */
393
+ object-fit: contain; /* Scale image nicely */
394
+ }
395
+ #input_image div[data-testid="image"], #processed_image div[data-testid="image"] {
396
+ height: 100%; /* Make inner div fill height */
397
+ }
398
+ #input_image img, #processed_image img {
399
+ height: 100%; object-fit: contain; /* Control image scaling */
400
+ }
401
+
402
+
403
+ /* Main button */
404
+ #process_button { margin-top: 15px; }
405
+
406
+ /* Output sections */
407
+ #results_group {
408
+ border: 1px solid #e9ecef;
409
+ border-radius: 8px;
410
+ padding: 15px;
411
+ background-color: #ffffff; /* White background for results */
412
+ box-shadow: 0 2px 4px rgba(0,0,0,0.05); /* Subtle shadow */
413
+ margin-top: 20px;
414
+ }
415
+
416
+ #extracted_text_output textarea, #classification_output textarea {
417
+ background-color: #f1f3f4 !important; /* Light grey background for text boxes */
418
+ border-radius: 4px;
419
+ }
420
+
421
+ #solution_output { margin-top: 15px; }
422
+
423
+ /* Action buttons below solution */
424
+ #action_buttons { margin-top: 15px; margin-bottom: 15px; }
425
+
426
+ /* Accordion styling */
427
+ .gradio-accordion > button { /* Target the accordion header button */
428
+ background-color: #eef2f6; /* Lighter header background */
429
+ border-radius: 5px 5px 0 0; /* Rounded top corners */
430
+ font-weight: 500;
431
+ }
432
+ .gradio-accordion .gradio-markdown { /* Content inside accordion */
433
+ border: 1px solid #dee2e6;
434
+ border-top: none; /* Remove top border as header has it */
435
+ padding: 15px;
436
+ border-radius: 0 0 5px 5px; /* Rounded bottom corners */
437
+ background-color: #fff; /* White background */
438
+ }
439
+
440
+ footer { visibility: hidden } /* Hide default Gradio footer */
441
+ """
442
+
443
+ # Define a theme
444
+ theme = gr_themes.Default(primary_hue=gr.themes.colors.blue, secondary_hue=gr.themes.colors.sky).set(
445
+ button_primary_background_fill="#4A90E2",
446
+ button_primary_background_fill_hover="#357ABD",
447
+ button_secondary_background_fill="#E1E8ED",
448
+ button_secondary_background_fill_hover="#CED9E0",
449
+ block_radius="8px",
450
+ )
451
+
452
+
453
+ with gr.Blocks(theme=theme, css=css, title="NerdAI Math Solver") as demo:
454
+
455
+ # --- State Variables ---
456
  extracted_text_state = gr.State("")
457
+ classification_state = gr.State("{}") # Store classification as JSON string
458
+
459
+ # --- UI Layout ---
460
+ gr.Markdown("# 🧠 NerdAI Math Problem Solver", elem_id="title_markdown")
461
+ gr.Markdown("Upload a clear image of a math problem. NerdAI will extract the text, classify it, solve it step-by-step, and offer further help!", elem_id="subtitle_markdown")
462
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
463
  with gr.Row():
464
+ with gr.Column(scale=1, elem_id="input_col"):
465
+ input_image = gr.Image(label="Upload Math Problem", type="pil", elem_id="input_image", height=350)
466
+ process_btn = gr.Button(" Process Image and Solve", variant="primary", elem_id="process_button")
467
+ with gr.Column(scale=1, elem_id="output_col"):
468
+ processed_image = gr.Image(label="Processed Image", interactive=False, elem_id="processed_image", height=350)
469
+
470
+ # --- Results Area ---
471
+ with gr.Group(elem_id="results_group"):
472
+ gr.Markdown("### Results")
473
+ with gr.Box():
474
+ extracted_text_output = gr.Textbox(label="📝 Extracted Text", lines=3, interactive=False, placeholder="Text from the image will appear here...", elem_id="extracted_text_output")
475
+ with gr.Box():
476
+ # Display classification using the original model name constant
477
+ classification_output = gr.Textbox(label=f"📊 Problem Classification ({CLASSIFICATION_MODEL})", lines=5, interactive=False, placeholder="Problem type analysis will appear here...", elem_id="classification_output")
478
+
479
+ solution_output = gr.Markdown(label="✅ Solution Steps", value="*Solution steps will appear here after processing...*", elem_id="solution_output")
480
+
481
+ # --- Action Buttons ---
482
+ with gr.Row(elem_id="action_buttons"):
483
+ explain_btn = gr.Button("🤔 Explain Further", variant="secondary")
484
+ similar_btn = gr.Button("📚 Similar Questions", variant="secondary")
485
+
486
+ # --- Accordion for Detailed Outputs ---
487
+ with gr.Accordion("Detailed Explanation", open=False):
488
+ explanation_output = gr.Markdown(value="*Click 'Explain Further' above to get a detailed breakdown.*")
489
+
490
+ with gr.Accordion("Similar Practice Problems", open=False):
491
+ similar_problems_output = gr.Markdown(value="*Click 'Similar Questions' above to generate practice problems.*")
492
+
493
+ # --- Event Handlers (Connecting UI to *Original* Backend Functions) ---
494
+
495
+ # Main process button click
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
496
  process_btn.click(
497
+ fn=process_image, # Uses the original process_image function
498
  inputs=[input_image],
499
+ outputs=[
500
+ processed_image,
501
+ extracted_text_output,
502
+ classification_output, # Populated by classification_json from return
503
+ solution_output,
504
+ extracted_text_state, # Populated by extracted_text from return
505
+ classification_state # Populated by classification_json from return
506
+ ],
507
+ # No progress tracking here as original function didn't support it
508
  )
509
+
510
+ # Explain button click handler (Calls original explain_solution)
511
+ def explain_button_handler(current_problem_text, current_solution_md):
512
+ """Handler for Explain It button using state and original backend"""
513
+ print("Explain button clicked (using original backend).")
514
+ # Basic input validation
515
+ if not current_problem_text or current_problem_text.startswith("Error:") or current_problem_text == "No image uploaded":
516
+ return "Please successfully process an image first to get text and a solution."
517
+ if not current_solution_md or current_solution_md.startswith("Error") or "will appear here" in current_solution_md:
518
+ return "Cannot explain: A valid solution needs to be generated first."
519
+
520
+ # Add simple feedback, yield not applicable without queue/progress
521
+ explanation_output.value = "*Generating detailed explanation... please wait.*" # Direct update attempt
522
+ explanation_result = explain_solution(current_problem_text, current_solution_md) # Call original function
523
+ return explanation_result # Return result to update the Markdown output
524
+
525
  explain_btn.click(
526
  fn=explain_button_handler,
527
+ inputs=[extracted_text_state, solution_output], # Use state and current solution output
528
+ outputs=explanation_output # Target the Markdown inside the Accordion
529
  )
530
+
531
+ # Similar problems button click handler (Calls original generate_similar_problems)
532
+ def similar_button_handler(current_problem_text, current_classification_json):
533
+ """Handler for Similar Questions button using state and original backend"""
534
+ print("Similar button clicked (using original backend).")
535
+ # Basic input validation
536
+ if not current_problem_text or current_problem_text.startswith("Error:") or current_problem_text == "No image uploaded":
537
+ return "Please successfully process an image first to get the problem text and classification."
538
+
539
+ # Add simple feedback
540
+ similar_problems_output.value = "*Generating similar problems... please wait.*" # Direct update attempt
541
+
542
+ try:
543
+ # Parse classification JSON from state
544
+ classification_dict = json.loads(current_classification_json)
545
+ # Minimal validation (check if it's a dictionary)
546
+ if not isinstance(classification_dict, dict):
547
+ raise ValueError("Invalid classification data format.")
548
+ except (json.JSONDecodeError, ValueError, TypeError) as e: # Added TypeError
549
+ print(f"Error parsing classification state for similar problems: {e}")
550
+ print(f"Classification JSON received: {current_classification_json}")
551
+ # Use the original classification function's error structure for consistency
552
+ return f"Error: Could not use problem classification data ({e}). Please ensure the problem was classified correctly (should be JSON)."
553
+
554
+ # Call original function
555
+ similar_result = generate_similar_problems(current_problem_text, classification_dict)
556
+ return similar_result # Return result to update the Markdown output
557
+
558
  similar_btn.click(
559
  fn=similar_button_handler,
560
+ inputs=[extracted_text_state, classification_state], # Use state
561
+ outputs=similar_problems_output # Target the Markdown inside the Accordion
562
  )
563
 
564
+ # Add an example image (optional, as in refactored UI)
565
+ gr.Examples(
566
+ examples=[
567
+ # Add paths to example images accessible by the script
568
+ ["examples/algebra_problem.png"],
569
+ ["examples/calculus_problem.jpg"],
570
+ ["examples/geometry_problem.png"],
571
+ ],
572
+ inputs=input_image,
573
+ # outputs=[processed_image, extracted_text_output, classification_output, solution_output, extracted_text_state, classification_state], # Outputs for examples if you want to auto-run them
574
+ # fn=process_image, # Function to run when example is clicked
575
+ cache_examples=False, # Better to re-run for dynamic models
576
+ label="Example Math Problems"
577
+ )
578
+
579
+
580
+ # --- Launch the App ---
581
  if __name__ == "__main__":
582
+ # Create dummy example files if they don't exist for local testing
583
+ if not os.path.exists("examples"):
584
+ os.makedirs("examples")
585
+ for fname in ["algebra_problem.png", "calculus_problem.jpg", "geometry_problem.png"]:
586
+ fpath = os.path.join("examples", fname)
587
+ if not os.path.exists(fpath):
588
+ try:
589
+ # Create a simple placeholder image
590
+ img = Image.new('RGB', (200, 100), color = (73, 109, 137))
591
+ from PIL import ImageDraw
592
+ d = ImageDraw.Draw(img)
593
+ d.text((10,10), f"Placeholder for\n{fname}", fill=(255,255,0))
594
+ img.save(fpath)
595
+ print(f"Created placeholder example: {fpath}")
596
+ except Exception as e:
597
+ print(f"Could not create placeholder image {fpath}: {e}")
598
+
599
+ # Recommended: Enable queue for better handling of multiple users/long tasks
600
+ # Queue helps manage multiple clicks even if progress isn't used in the main function
601
+ demo.queue().launch(debug=True)