Spaces:

Hammedalmodel
/

handwritten_to_text

Running on Zero

App Files Files Community

Hammedalmodel commited on Jan 20

Commit

8d587d1

verified ·

1 Parent(s): 100fd2d

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -21

app.py CHANGED Viewed

@@ -1,13 +1,10 @@
-from fastapi import FastAPI, HTTPException
-from pydantic import BaseModel
 from transformers import MllamaForConditionalGeneration, AutoProcessor
 from PIL import Image
 import torch
 import requests
 from io import BytesIO
-app = FastAPI()
 # Initialize model and processor
 ckpt = "unsloth/Llama-3.2-11B-Vision-Instruct"
 model = MllamaForConditionalGeneration.from_pretrained(
@@ -16,19 +13,15 @@ model = MllamaForConditionalGeneration.from_pretrained(
 ).to("cuda")
 processor = AutoProcessor.from_pretrained(ckpt)
-class ImageRequest(BaseModel):
-    image_path: str
-@app.post("/extract_text")
-async def extract_text(request: ImageRequest):
     try:
-        # Download image from URL
-        response = requests.get(request.image_path)
-        if response.status_code != 200:
-            raise HTTPException(status_code=400, detail="Failed to fetch image from URL")
-        # Open image from bytes
-        image = Image.open(BytesIO(response.content)).convert("RGB")
         # Create message structure
         messages = [
@@ -55,11 +48,19 @@ async def extract_text(request: ImageRequest):
         result = result.replace("user", "").replace("Extract handwritten text from the image and output only the extracted text without any additional description or commentary in output", "").strip()
-        return {"text": f"\n{result}\n"}
     except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=7860)

 from transformers import MllamaForConditionalGeneration, AutoProcessor
 from PIL import Image
 import torch
+import gradio as gr
 import requests
 from io import BytesIO
 # Initialize model and processor
 ckpt = "unsloth/Llama-3.2-11B-Vision-Instruct"
 model = MllamaForConditionalGeneration.from_pretrained(
 ).to("cuda")
 processor = AutoProcessor.from_pretrained(ckpt)
+def extract_text(image_input):
     try:
+        # Handle URL input
+        if isinstance(image_input, str):
+            response = requests.get(image_input)
+            image = Image.open(BytesIO(response.content)).convert("RGB")
+        # Handle direct file upload
+        else:
+            image = Image.open(image_input).convert("RGB")
         # Create message structure
         messages = [
         result = result.replace("user", "").replace("Extract handwritten text from the image and output only the extracted text without any additional description or commentary in output", "").strip()
+        return f"\n{result}\n"
     except Exception as e:
+        return f"Error: {str(e)}"
+# Create Gradio interface
+demo = gr.Interface(
+    fn=extract_text,
+    inputs=gr.Text(label="Image URL or Upload"),  # Changed to accept both URL and file
+    outputs=gr.Textbox(label="Extracted Text"),
+    title="Handwritten Text Extractor",
+    description="Enter an image URL or upload an image to extract handwritten text.",
+)
+# Launch the app
+demo.launch()