Spaces:

cuneytkaya
/

NerdAI

Sleeping

App Files Files Community

cuneytkaya commited on Apr 8

Commit

0bfe3b6

verified ·

1 Parent(s): 58c11a0

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -70

app.py CHANGED Viewed

@@ -1,16 +1,16 @@
 import os
 import json
 import gradio as gr
-import gradio.themes as gr_themes # Import themes for UI
 import google.generativeai as genai
 from PIL import Image
 import numpy as np
 from huggingface_hub import HfFolder
 from dotenv import load_dotenv
 import traceback
-import time # Keep time import for process_image duration calculation
-# --- Load Environment Variables ---
 load_dotenv()
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") or HfFolder.get_token("GEMINI_API_KEY")
 if not GEMINI_API_KEY:
@@ -25,22 +25,20 @@ if not GEMINI_API_KEY:
 genai.configure(api_key=GEMINI_API_KEY)
-# --- Define Model Names (As per Original Script) ---
 CLASSIFICATION_MODEL = "gemini-1.5-flash"
 SOLUTION_MODEL = "gemini-1.5-pro-latest"
 EXPLANATION_MODEL = "gemini-1.5-pro-latest"
 SIMILAR_MODEL = "gemini-1.5-pro-latest"
-MODEL_IMAGE = "gemini-1.5-pro-latest" # For OCR
 print(f"Using models: Classification: {CLASSIFICATION_MODEL}, Solution: {SOLUTION_MODEL}, Explanation: {EXPLANATION_MODEL}, Similar: {SIMILAR_MODEL}, Image Analysis: {MODEL_IMAGE}")
-# --- Tesseract Related Code Removed ---
-print("Tesseract OCR dependency and fallback have been removed.")
-# --- Backend Functions (Reverted to Original Parameters & Prompts, Tesseract Removed) ---
-# Extract text using Gemini Pro Vision ONLY (Reverted Prompt)
 def extract_text_with_gemini(image):
     """Extract text from image using Gemini Pro Vision ONLY"""
     try:
@@ -51,7 +49,7 @@ def extract_text_with_gemini(image):
         print("Attempting text extraction with Gemini Pro Vision...")
         model = genai.GenerativeModel(MODEL_IMAGE)
-        # Original simple prompt
         prompt = """
         Extract ALL text, numbers, and mathematical equations from this image precisely.
         Include ALL symbols, numbers, letters, and mathematical notation exactly as they appear.
@@ -73,7 +71,7 @@ def extract_text_with_gemini(image):
         print(traceback.format_exc())
         return f"Error during text extraction with Gemini: {str(e)}"
-# Classify the math problem using Gemini 1.5 Flash (Original Parameters & Prompt)
 def classify_with_gemini_flash(math_problem):
     """Classify the math problem using Gemini model (Original settings)"""
     if not math_problem or math_problem.startswith("Error:"):
@@ -89,7 +87,7 @@ def classify_with_gemini_flash(math_problem):
                 "response_mime_type": "application/json",
             }
         )
-        # Original prompt
         prompt = f"""
         Task: Classify the following math problem.
@@ -107,7 +105,7 @@ def classify_with_gemini_flash(math_problem):
         try:
             cleaned_text = response.text.strip().replace("```json", "").replace("```", "").strip()
             classification = json.loads(cleaned_text)
-            # Basic validation (optional but good)
             keys_needed = ["category", "subtopic", "difficulty", "key_concepts"]
             for key in keys_needed:
                 classification.setdefault(key, "Unknown")
@@ -122,7 +120,7 @@ def classify_with_gemini_flash(math_problem):
         print(traceback.format_exc())
         return { "category": "Error", "subtopic": "API Error", "difficulty": "Error", "key_concepts": [f"Classification failed: {str(e)}"] }
-# Solve the math problem using Gemini model (Original Parameters & Prompt)
 def solve_with_gemini_pro(math_problem, classification):
     """Solve the math problem using Gemini model (Original settings)"""
     if not math_problem or math_problem.startswith("Error:"):
@@ -136,7 +134,7 @@ def solve_with_gemini_pro(math_problem, classification):
                 "max_output_tokens": 1000, # Original value
             }
         )
-        # Original classification handling
         if not isinstance(classification, dict):
             classification = { "category": "Unknown", "subtopic": "Unknown", "difficulty": "Unknown", "key_concepts": ["Unknown"] }
         for field in ["category", "subtopic", "difficulty"]:
@@ -149,7 +147,7 @@ def solve_with_gemini_pro(math_problem, classification):
         else:
             key_concepts_str = str(classification["key_concepts"])
-        # Original prompt
         prompt = f"""
         Task: Solve the following math problem with clear step-by-step explanations.
@@ -180,7 +178,7 @@ def solve_with_gemini_pro(math_problem, classification):
         print(traceback.format_exc())
         return f"Error generating solution: {str(e)}"
-# Explain the solution in more detail (Original Parameters & Prompt)
 def explain_solution(math_problem, solution):
     """Provide a more detailed explanation of the solution (Original settings)"""
     if not math_problem or math_problem.startswith("Error:"): return "Error: Cannot explain problem due to invalid input text."
@@ -195,7 +193,7 @@ def explain_solution(math_problem, solution):
                 "max_output_tokens": 1500, # Original value
             }
         )
-        # Original prompt
         prompt = f"""
         Task: Provide a more detailed explanation of the solution to this math problem.
@@ -220,7 +218,7 @@ def explain_solution(math_problem, solution):
         print(traceback.format_exc())
         return f"Error generating explanation: {str(e)}"
-# Generate similar practice problems (Original Parameters & Prompt)
 def generate_similar_problems(math_problem, classification):
     """Generate similar practice math problems (Original settings)"""
     if not math_problem or math_problem.startswith("Error:"): return "Error: Cannot generate problems due to invalid input text."
@@ -235,9 +233,9 @@ def generate_similar_problems(math_problem, classification):
                 "max_output_tokens": 1000, # Original value
             }
         )
-        # Original classification string preparation
         classification_str = json.dumps(classification, indent=2)
-        # Original prompt
         prompt = f"""
         Task: Generate similar practice math problems based on the following problem.
@@ -266,7 +264,7 @@ def generate_similar_problems(math_problem, classification):
         return f"Error generating similar problems: {str(e)}"
-# --- Main Processing Function (No Tesseract, No gr.Progress calls) ---
 def process_image(image):
     """Main processing pipeline for the NerdAI app (No Tesseract, No Progress)"""
     start_time = time.time() # Keep start time
@@ -274,37 +272,37 @@ def process_image(image):
         if image is None:
             return None, "Error: No image uploaded.", "{}", "Error: No image uploaded.", "", "{}"
-        # Use print instead of progress
         print("🚀 Starting processing...")
         # time.sleep(0.5) # Removed sleep associated with progress update
-        # Step 1: Extract text
-        print("🔍 Extracting text with Gemini...") # Use print
-        extracted_text = extract_text_with_gemini(image) # Calls updated function
         if extracted_text.startswith("Error:"):
             print(f"Text extraction failed: {extracted_text}")
-            # Ensure image is PIL before returning if possible
             img_display = None
             if image is not None:
                 try:
                     img_display = image if isinstance(image, Image.Image) else Image.fromarray(image)
-                except Exception: pass # Ignore conversion error on error path
             return img_display, extracted_text, "{}", extracted_text, "", "{}"
-        # Step 2: Classify
-        print(f"📊 Classifying problem ({CLASSIFICATION_MODEL})...") # Use print
-        classification = classify_with_gemini_flash(extracted_text) # Uses original settings
         classification_json = json.dumps(classification, indent=2)
-        # Step 3: Solve
-        print(f"💡 Solving problem ({SOLUTION_MODEL})...") # Use print
-        solution = solve_with_gemini_pro(extracted_text, classification) # Uses original settings
         end_time = time.time() # Keep end time
-        print(f"✅ Done in {end_time - start_time:.2f}s!") # Use print for final status
-        # Return processed image (or original), text, classification, solution, and update state
         img_display = image if isinstance(image, Image.Image) else Image.fromarray(image)
         return img_display, extracted_text, classification_json, solution, extracted_text, classification_json
@@ -320,9 +318,7 @@ def process_image(image):
         return img_display, error_message, "{}", error_message, "", "{}"
-# --- Gradio Interface (Modern UI, No gr.Box) ---
-# Custom CSS (Kept as is)
 css = """
 body { font-family: 'Inter', sans-serif; }
 .gradio-container { background-color: #f8f9fa; }
@@ -343,7 +339,7 @@ body { font-family: 'Inter', sans-serif; }
 footer { visibility: hidden }
 """
-# Define a theme (Kept as is)
 theme = gr_themes.Default(primary_hue=gr.themes.colors.blue, secondary_hue=gr.themes.colors.sky).set(
     button_primary_background_fill="#4A90E2", button_primary_background_fill_hover="#357ABD",
     button_secondary_background_fill="#E1E8ED", button_secondary_background_fill_hover="#CED9E0",
@@ -353,11 +349,11 @@ theme = gr_themes.Default(primary_hue=gr.themes.colors.blue, secondary_hue=gr.th
 with gr.Blocks(theme=theme, css=css, title="NerdAI Math Solver") as demo:
-    # --- State Variables ---
     extracted_text_state = gr.State("")
     classification_state = gr.State("{}")
-    # --- UI Layout ---
     gr.Markdown("# 🧠 NerdAI Math Problem Solver", elem_id="title_markdown")
     gr.Markdown("Upload a clear image of a math problem. NerdAI will extract the text, classify it, solve it step-by-step, and offer further help!", elem_id="subtitle_markdown")
@@ -368,27 +364,25 @@ with gr.Blocks(theme=theme, css=css, title="NerdAI Math Solver") as demo:
         with gr.Column(scale=1, elem_id="output_col"):
             processed_image = gr.Image(label="Processed Image", interactive=False, elem_id="processed_image", height=350)
-    # --- Results Area (No gr.Box) ---
     with gr.Group(elem_id="results_group"):
         gr.Markdown("### Results")
         extracted_text_output = gr.Textbox(label="📝 Extracted Text", lines=3, interactive=False, placeholder="Text from the image will appear here...", elem_id="extracted_text_output")
         classification_output = gr.Textbox(label=f"📊 Problem Classification ({CLASSIFICATION_MODEL})", lines=5, interactive=False, placeholder="Problem type analysis will appear here...", elem_id="classification_output")
         solution_output = gr.Markdown(label="✅ Solution Steps", value="*Solution steps will appear here after processing...*", elem_id="solution_output")
-    # --- Action Buttons ---
     with gr.Row(elem_id="action_buttons"):
         explain_btn = gr.Button("🤔 Explain Further", variant="secondary")
         similar_btn = gr.Button("📚 Similar Questions", variant="secondary")
-    # --- Accordion for Detailed Outputs ---
     with gr.Accordion("Detailed Explanation", open=False):
         explanation_output = gr.Markdown(value="*Click 'Explain Further' above to get a detailed breakdown.*")
     with gr.Accordion("Similar Practice Problems", open=False):
         similar_problems_output = gr.Markdown(value="*Click 'Similar Questions' above to generate practice problems.*")
-    # --- Event Handlers (Connecting UI to Original Backend Logic) ---
-    # Main process button click (No progress tracking passed)
     process_btn.click(
         fn=process_image,
         inputs=[input_image],
@@ -398,11 +392,10 @@ with gr.Blocks(theme=theme, css=css, title="NerdAI Math Solver") as demo:
         ],
     )
-    # Explain button click handler (Uses original explain_solution)
-    # Using yield for feedback requires queue enabled
     def explain_button_handler(current_problem_text, current_solution_md):
         print("Explain button clicked.")
-        # Input validation remains important
         if not current_problem_text or current_problem_text.startswith("Error:") : yield "Please process an image successfully first." ; return
         if not current_solution_md or current_solution_md.startswith("Error:") or "will appear here" in current_solution_md: yield "A valid solution needs to be generated first." ; return
@@ -416,14 +409,13 @@ with gr.Blocks(theme=theme, css=css, title="NerdAI Math Solver") as demo:
         outputs=explanation_output
     )
-    # Similar problems button click handler (Uses original generate_similar_problems)
-    # Using yield for feedback requires queue enabled
     def similar_button_handler(current_problem_text, current_classification_json):
         print("Similar button clicked.")
-        # Input validation
         if not current_problem_text or current_problem_text.startswith("Error:") : yield "Please process an image successfully first." ; return
-        yield "*Generating similar problems... please wait.*" # Provide feedback
         classification_dict = {}
         try:
             if isinstance(current_classification_json, str) and current_classification_json.strip():
@@ -437,7 +429,7 @@ with gr.Blocks(theme=theme, css=css, title="NerdAI Math Solver") as demo:
         except (json.JSONDecodeError, ValueError, TypeError) as e:
             print(f"Error parsing/validating classification state: {e}")
             yield f"Error: Could not use classification data ({e}). Cannot generate similar problems."
-            return # Stop execution
         similar_result = generate_similar_problems(current_problem_text, classification_dict)
         yield similar_result
@@ -448,21 +440,11 @@ with gr.Blocks(theme=theme, css=css, title="NerdAI Math Solver") as demo:
         outputs=similar_problems_output
     )
-    # Example Images (Kept as is)
-    gr.Examples(
-        examples=[
-             ["examples/algebra_problem.png"],
-             ["examples/calculus_problem.jpg"],
-             ["examples/geometry_problem.png"],
-        ],
-        inputs=input_image,
-        cache_examples=False,
-        label="Example Math Problems"
-    )
-# --- Launch the App ---
 if __name__ == "__main__":
-    # Create dummy example files (Kept as is)
     if not os.path.exists("examples"): os.makedirs("examples")
     for fname in ["algebra_problem.png", "calculus_problem.jpg", "geometry_problem.png"]:
         fpath = os.path.join("examples", fname)
@@ -474,5 +456,5 @@ if __name__ == "__main__":
                 print(f"Created placeholder example: {fpath}")
             except Exception as e: print(f"Could not create placeholder image {fpath}: {e}")
-    # Enable queue for better handling of multiple users and yield feedback
-    demo.queue().launch(debug=True) # Set debug=False for production

 import os
 import json
 import gradio as gr
+import gradio.themes as gr_themes
 import google.generativeai as genai
 from PIL import Image
 import numpy as np
 from huggingface_hub import HfFolder
 from dotenv import load_dotenv
 import traceback
+import time
 load_dotenv()
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") or HfFolder.get_token("GEMINI_API_KEY")
 if not GEMINI_API_KEY:
 genai.configure(api_key=GEMINI_API_KEY)
 CLASSIFICATION_MODEL = "gemini-1.5-flash"
 SOLUTION_MODEL = "gemini-1.5-pro-latest"
 EXPLANATION_MODEL = "gemini-1.5-pro-latest"
 SIMILAR_MODEL = "gemini-1.5-pro-latest"
+MODEL_IMAGE = "gemini-1.5-pro-latest"
 print(f"Using models: Classification: {CLASSIFICATION_MODEL}, Solution: {SOLUTION_MODEL}, Explanation: {EXPLANATION_MODEL}, Similar: {SIMILAR_MODEL}, Image Analysis: {MODEL_IMAGE}")
 def extract_text_with_gemini(image):
     """Extract text from image using Gemini Pro Vision ONLY"""
     try:
         print("Attempting text extraction with Gemini Pro Vision...")
         model = genai.GenerativeModel(MODEL_IMAGE)
         prompt = """
         Extract ALL text, numbers, and mathematical equations from this image precisely.
         Include ALL symbols, numbers, letters, and mathematical notation exactly as they appear.
         print(traceback.format_exc())
         return f"Error during text extraction with Gemini: {str(e)}"
 def classify_with_gemini_flash(math_problem):
     """Classify the math problem using Gemini model (Original settings)"""
     if not math_problem or math_problem.startswith("Error:"):
                 "response_mime_type": "application/json",
             }
         )
         prompt = f"""
         Task: Classify the following math problem.
         try:
             cleaned_text = response.text.strip().replace("```json", "").replace("```", "").strip()
             classification = json.loads(cleaned_text)
             keys_needed = ["category", "subtopic", "difficulty", "key_concepts"]
             for key in keys_needed:
                 classification.setdefault(key, "Unknown")
         print(traceback.format_exc())
         return { "category": "Error", "subtopic": "API Error", "difficulty": "Error", "key_concepts": [f"Classification failed: {str(e)}"] }
 def solve_with_gemini_pro(math_problem, classification):
     """Solve the math problem using Gemini model (Original settings)"""
     if not math_problem or math_problem.startswith("Error:"):
                 "max_output_tokens": 1000, # Original value
             }
         )
         if not isinstance(classification, dict):
             classification = { "category": "Unknown", "subtopic": "Unknown", "difficulty": "Unknown", "key_concepts": ["Unknown"] }
         for field in ["category", "subtopic", "difficulty"]:
         else:
             key_concepts_str = str(classification["key_concepts"])
         prompt = f"""
         Task: Solve the following math problem with clear step-by-step explanations.
         print(traceback.format_exc())
         return f"Error generating solution: {str(e)}"
 def explain_solution(math_problem, solution):
     """Provide a more detailed explanation of the solution (Original settings)"""
     if not math_problem or math_problem.startswith("Error:"): return "Error: Cannot explain problem due to invalid input text."
                 "max_output_tokens": 1500, # Original value
             }
         )
         prompt = f"""
         Task: Provide a more detailed explanation of the solution to this math problem.
         print(traceback.format_exc())
         return f"Error generating explanation: {str(e)}"
 def generate_similar_problems(math_problem, classification):
     """Generate similar practice math problems (Original settings)"""
     if not math_problem or math_problem.startswith("Error:"): return "Error: Cannot generate problems due to invalid input text."
                 "max_output_tokens": 1000, # Original value
             }
         )
         classification_str = json.dumps(classification, indent=2)
         prompt = f"""
         Task: Generate similar practice math problems based on the following problem.
         return f"Error generating similar problems: {str(e)}"
 def process_image(image):
     """Main processing pipeline for the NerdAI app (No Tesseract, No Progress)"""
     start_time = time.time() # Keep start time
         if image is None:
             return None, "Error: No image uploaded.", "{}", "Error: No image uploaded.", "", "{}"
         print("🚀 Starting processing...")
         # time.sleep(0.5) # Removed sleep associated with progress update
+        print("🔍 Extracting text with Gemini...")
+        extracted_text = extract_text_with_gemini(image)
         if extracted_text.startswith("Error:"):
             print(f"Text extraction failed: {extracted_text}")
             img_display = None
             if image is not None:
                 try:
                     img_display = image if isinstance(image, Image.Image) else Image.fromarray(image)
+                except Exception: pass
             return img_display, extracted_text, "{}", extracted_text, "", "{}"
+        print(f"📊 Classifying problem ({CLASSIFICATION_MODEL})...")
+        classification = classify_with_gemini_flash(extracted_text)
         classification_json = json.dumps(classification, indent=2)
+        print(f"💡 Solving problem ({SOLUTION_MODEL})...")
+        solution = solve_with_gemini_pro(extracted_text, classification)
         end_time = time.time() # Keep end time
+        print(f"✅ Done in {end_time - start_time:.2f}s!")
         img_display = image if isinstance(image, Image.Image) else Image.fromarray(image)
         return img_display, extracted_text, classification_json, solution, extracted_text, classification_json
         return img_display, error_message, "{}", error_message, "", "{}"
 css = """
 body { font-family: 'Inter', sans-serif; }
 .gradio-container { background-color: #f8f9fa; }
 footer { visibility: hidden }
 """
 theme = gr_themes.Default(primary_hue=gr.themes.colors.blue, secondary_hue=gr.themes.colors.sky).set(
     button_primary_background_fill="#4A90E2", button_primary_background_fill_hover="#357ABD",
     button_secondary_background_fill="#E1E8ED", button_secondary_background_fill_hover="#CED9E0",
 with gr.Blocks(theme=theme, css=css, title="NerdAI Math Solver") as demo:
     extracted_text_state = gr.State("")
     classification_state = gr.State("{}")
     gr.Markdown("# 🧠 NerdAI Math Problem Solver", elem_id="title_markdown")
     gr.Markdown("Upload a clear image of a math problem. NerdAI will extract the text, classify it, solve it step-by-step, and offer further help!", elem_id="subtitle_markdown")
         with gr.Column(scale=1, elem_id="output_col"):
             processed_image = gr.Image(label="Processed Image", interactive=False, elem_id="processed_image", height=350)
     with gr.Group(elem_id="results_group"):
         gr.Markdown("### Results")
         extracted_text_output = gr.Textbox(label="📝 Extracted Text", lines=3, interactive=False, placeholder="Text from the image will appear here...", elem_id="extracted_text_output")
         classification_output = gr.Textbox(label=f"📊 Problem Classification ({CLASSIFICATION_MODEL})", lines=5, interactive=False, placeholder="Problem type analysis will appear here...", elem_id="classification_output")
         solution_output = gr.Markdown(label="✅ Solution Steps", value="*Solution steps will appear here after processing...*", elem_id="solution_output")
     with gr.Row(elem_id="action_buttons"):
         explain_btn = gr.Button("🤔 Explain Further", variant="secondary")
         similar_btn = gr.Button("📚 Similar Questions", variant="secondary")
     with gr.Accordion("Detailed Explanation", open=False):
         explanation_output = gr.Markdown(value="*Click 'Explain Further' above to get a detailed breakdown.*")
     with gr.Accordion("Similar Practice Problems", open=False):
         similar_problems_output = gr.Markdown(value="*Click 'Similar Questions' above to generate practice problems.*")
     process_btn.click(
         fn=process_image,
         inputs=[input_image],
         ],
     )
     def explain_button_handler(current_problem_text, current_solution_md):
         print("Explain button clicked.")
         if not current_problem_text or current_problem_text.startswith("Error:") : yield "Please process an image successfully first." ; return
         if not current_solution_md or current_solution_md.startswith("Error:") or "will appear here" in current_solution_md: yield "A valid solution needs to be generated first." ; return
         outputs=explanation_output
     )
     def similar_button_handler(current_problem_text, current_classification_json):
         print("Similar button clicked.")
         if not current_problem_text or current_problem_text.startswith("Error:") : yield "Please process an image successfully first." ; return
+        yield "*Generating similar problems... please wait.*"
         classification_dict = {}
         try:
             if isinstance(current_classification_json, str) and current_classification_json.strip():
         except (json.JSONDecodeError, ValueError, TypeError) as e:
             print(f"Error parsing/validating classification state: {e}")
             yield f"Error: Could not use classification data ({e}). Cannot generate similar problems."
+            return
         similar_result = generate_similar_problems(current_problem_text, classification_dict)
         yield similar_result
         outputs=similar_problems_output
     )
 if __name__ == "__main__":
     if not os.path.exists("examples"): os.makedirs("examples")
     for fname in ["algebra_problem.png", "calculus_problem.jpg", "geometry_problem.png"]:
         fpath = os.path.join("examples", fname)
                 print(f"Created placeholder example: {fpath}")
             except Exception as e: print(f"Could not create placeholder image {fpath}: {e}")
+    demo.queue().launch(debug=True)