Spaces:

Hammedalmodel
/

handwritten_to_text

Running on Zero

App Files Files Community

Hammedalmodel commited on Jan 20

Commit

3387487

verified ·

1 Parent(s): f43cd25

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -47

app.py CHANGED Viewed

@@ -2,8 +2,7 @@ from transformers import MllamaForConditionalGeneration, AutoProcessor
 from PIL import Image
 import torch
 import gradio as gr
-import requests
-from io import BytesIO
 # Initialize model and processor
 ckpt = "unsloth/Llama-3.2-11B-Vision-Instruct"
@@ -13,57 +12,52 @@ model = MllamaForConditionalGeneration.from_pretrained(
 ).to("cuda")
 processor = AutoProcessor.from_pretrained(ckpt)
-def extract_text(image_input):
-    """
-    Extract handwritten text from the given image.
-    `image_input` can be a file path or a URL.
-    """
-    try:
-        # Handle file upload or URL
-        if isinstance(image_input, str) and image_input.startswith("http"):
-            response = requests.get(image_input)
-            response.raise_for_status()  # Check for errors
-            image = Image.open(BytesIO(response.content)).convert("RGB")
-        else:
-            image = Image.open(image_input).convert("RGB")
-        # Create message structure
-        messages = [
-            {
-                "role": "user",
-                "content": [
-                    {"type": "text", "text": "Extract handwritten text from the image and output only the extracted text without any additional description or commentary in output"},
-                    {"type": "image"}
-                ]
-            }
-        ]
-        # Process input
-        texts = processor.apply_chat_template(messages, add_generation_prompt=True)
-        inputs = processor(text=texts, images=[image], return_tensors="pt").to("cuda")
-        # Generate output
-        outputs = model.generate(**inputs, max_new_tokens=250)
-        result = processor.decode(outputs[0], skip_special_tokens=True)
-        # Clean up the output
-        if "assistant" in result.lower():
-            result = result[result.lower().find("assistant") + len("assistant"):].strip()
-        result = result.replace("user", "").strip()
-        return result
-    except Exception as e:
-        return f"Error: {str(e)}"
 # Create Gradio interface
 demo = gr.Interface(
     fn=extract_text,
-    inputs=gr.Textbox(label="Image URL or Upload Image"),
     outputs=gr.Textbox(label="Extracted Text"),
     title="Handwritten Text Extractor",
-    description="Provide an image URL or upload an image containing handwritten text to extract its content.",
 )
 # Launch the app
-demo.launch(debug=True)

 from PIL import Image
 import torch
 import gradio as gr
+import spaces
 # Initialize model and processor
 ckpt = "unsloth/Llama-3.2-11B-Vision-Instruct"
 ).to("cuda")
 processor = AutoProcessor.from_pretrained(ckpt)
+@spaces.GPU
+def extract_text(image):
+    # Convert image to RGB
+    image = Image.open(image).convert("RGB")
+    # Create message structure
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "Extract handwritten text from the image and output only the extracted text without any additional description or commentary in output"},
+                {"type": "image"}
+            ]
+        }
+    ]
+    # Process input
+    texts = processor.apply_chat_template(messages, add_generation_prompt=True)
+    inputs = processor(text=texts, images=[image], return_tensors="pt").to("cuda")
+    # Generate output
+    outputs = model.generate(**inputs, max_new_tokens=250)
+    result = processor.decode(outputs[0], skip_special_tokens=True)
+    print(result)
+    # Clean up the output to remove the prompt and assistant text
+    if "assistant" in result.lower():
+        result = result[result.lower().find("assistant") + len("assistant"):].strip()
+    # Remove any remaining conversation markers
+    result = result.replace("user", "").replace("Extract handwritten text from the image and output only the extracted text without any additional description or commentary in output", "").strip()
+    print(result)
+    return result
 # Create Gradio interface
 demo = gr.Interface(
     fn=extract_text,
+    inputs=gr.Image(type="filepath", label="Upload Image"),
     outputs=gr.Textbox(label="Extracted Text"),
     title="Handwritten Text Extractor",
+    description="Upload an image containing handwritten text to extract its content.",
 )
 # Launch the app
+demo.launch(debug=True)