Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -23,7 +23,7 @@ qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
|
|
23 |
processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", trust_remote_code=True)
|
24 |
|
25 |
@spaces.GPU # Decorate the function for GPU management
|
26 |
-
def ocr_and_extract(image
|
27 |
try:
|
28 |
# Save the uploaded image temporarily
|
29 |
temp_image_path = "temp_image.jpg"
|
@@ -41,7 +41,7 @@ def ocr_and_extract(image, text_query):
|
|
41 |
)
|
42 |
|
43 |
# Perform the search query on the indexed image
|
44 |
-
results = rag_model.search(
|
45 |
|
46 |
# Prepare the input for Qwen2-VL
|
47 |
image_data = Image.open(temp_image_path)
|
@@ -51,7 +51,6 @@ def ocr_and_extract(image, text_query):
|
|
51 |
"role": "user",
|
52 |
"content": [
|
53 |
{"type": "image", "image": image_data},
|
54 |
-
{"type": "text", "text": text_query},
|
55 |
],
|
56 |
}
|
57 |
]
|
@@ -90,10 +89,7 @@ def ocr_and_extract(image, text_query):
|
|
90 |
# Gradio interface for image input
|
91 |
iface = gr.Interface(
|
92 |
fn=ocr_and_extract,
|
93 |
-
inputs=
|
94 |
-
gr.Image(type="pil"),
|
95 |
-
gr.Textbox(label="Enter your query (optional)"),
|
96 |
-
],
|
97 |
outputs="text",
|
98 |
title="Image OCR with Byaldi + Qwen2-VL",
|
99 |
description="Upload an image (JPEG/PNG) containing Hindi and English text for OCR.",
|
|
|
23 |
processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", trust_remote_code=True)
|
24 |
|
25 |
@spaces.GPU # Decorate the function for GPU management
|
26 |
+
def ocr_and_extract(image):
|
27 |
try:
|
28 |
# Save the uploaded image temporarily
|
29 |
temp_image_path = "temp_image.jpg"
|
|
|
41 |
)
|
42 |
|
43 |
# Perform the search query on the indexed image
|
44 |
+
results = rag_model.search("", k=1)
|
45 |
|
46 |
# Prepare the input for Qwen2-VL
|
47 |
image_data = Image.open(temp_image_path)
|
|
|
51 |
"role": "user",
|
52 |
"content": [
|
53 |
{"type": "image", "image": image_data},
|
|
|
54 |
],
|
55 |
}
|
56 |
]
|
|
|
89 |
# Gradio interface for image input
|
90 |
iface = gr.Interface(
|
91 |
fn=ocr_and_extract,
|
92 |
+
inputs=gr.Image(type="pil"), # Only the image input
|
|
|
|
|
|
|
93 |
outputs="text",
|
94 |
title="Image OCR with Byaldi + Qwen2-VL",
|
95 |
description="Upload an image (JPEG/PNG) containing Hindi and English text for OCR.",
|