Hammedalmodel commited on
Commit
8d587d1
·
verified ·
1 Parent(s): 100fd2d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -21
app.py CHANGED
@@ -1,13 +1,10 @@
1
- from fastapi import FastAPI, HTTPException
2
- from pydantic import BaseModel
3
  from transformers import MllamaForConditionalGeneration, AutoProcessor
4
  from PIL import Image
5
  import torch
 
6
  import requests
7
  from io import BytesIO
8
 
9
- app = FastAPI()
10
-
11
  # Initialize model and processor
12
  ckpt = "unsloth/Llama-3.2-11B-Vision-Instruct"
13
  model = MllamaForConditionalGeneration.from_pretrained(
@@ -16,19 +13,15 @@ model = MllamaForConditionalGeneration.from_pretrained(
16
  ).to("cuda")
17
  processor = AutoProcessor.from_pretrained(ckpt)
18
 
19
- class ImageRequest(BaseModel):
20
- image_path: str
21
-
22
- @app.post("/extract_text")
23
- async def extract_text(request: ImageRequest):
24
  try:
25
- # Download image from URL
26
- response = requests.get(request.image_path)
27
- if response.status_code != 200:
28
- raise HTTPException(status_code=400, detail="Failed to fetch image from URL")
29
-
30
- # Open image from bytes
31
- image = Image.open(BytesIO(response.content)).convert("RGB")
32
 
33
  # Create message structure
34
  messages = [
@@ -55,11 +48,19 @@ async def extract_text(request: ImageRequest):
55
 
56
  result = result.replace("user", "").replace("Extract handwritten text from the image and output only the extracted text without any additional description or commentary in output", "").strip()
57
 
58
- return {"text": f"\n{result}\n"}
59
 
60
  except Exception as e:
61
- raise HTTPException(status_code=500, detail=str(e))
 
 
 
 
 
 
 
 
 
62
 
63
- if __name__ == "__main__":
64
- import uvicorn
65
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
 
 
1
  from transformers import MllamaForConditionalGeneration, AutoProcessor
2
  from PIL import Image
3
  import torch
4
+ import gradio as gr
5
  import requests
6
  from io import BytesIO
7
 
 
 
8
  # Initialize model and processor
9
  ckpt = "unsloth/Llama-3.2-11B-Vision-Instruct"
10
  model = MllamaForConditionalGeneration.from_pretrained(
 
13
  ).to("cuda")
14
  processor = AutoProcessor.from_pretrained(ckpt)
15
 
16
+ def extract_text(image_input):
 
 
 
 
17
  try:
18
+ # Handle URL input
19
+ if isinstance(image_input, str):
20
+ response = requests.get(image_input)
21
+ image = Image.open(BytesIO(response.content)).convert("RGB")
22
+ # Handle direct file upload
23
+ else:
24
+ image = Image.open(image_input).convert("RGB")
25
 
26
  # Create message structure
27
  messages = [
 
48
 
49
  result = result.replace("user", "").replace("Extract handwritten text from the image and output only the extracted text without any additional description or commentary in output", "").strip()
50
 
51
+ return f"\n{result}\n"
52
 
53
  except Exception as e:
54
+ return f"Error: {str(e)}"
55
+
56
+ # Create Gradio interface
57
+ demo = gr.Interface(
58
+ fn=extract_text,
59
+ inputs=gr.Text(label="Image URL or Upload"), # Changed to accept both URL and file
60
+ outputs=gr.Textbox(label="Extracted Text"),
61
+ title="Handwritten Text Extractor",
62
+ description="Enter an image URL or upload an image to extract handwritten text.",
63
+ )
64
 
65
+ # Launch the app
66
+ demo.launch()