Spaces:

gauri-sharan
/

test-two

Sleeping

gauri-sharan commited on Sep 29, 2024

Commit

c5b6958

verified ·

1 Parent(s): 200c7bb

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -23,7 +23,7 @@ qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
 processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", trust_remote_code=True)
 @spaces.GPU  # Decorate the function for GPU management
-def ocr_and_extract(image, text_query):
     try:
         # Save the uploaded image temporarily
         temp_image_path = "temp_image.jpg"
@@ -41,7 +41,7 @@ def ocr_and_extract(image, text_query):
         )
         # Perform the search query on the indexed image
-        results = rag_model.search(text_query, k=1)
         # Prepare the input for Qwen2-VL
         image_data = Image.open(temp_image_path)
@@ -51,7 +51,6 @@ def ocr_and_extract(image, text_query):
                 "role": "user",
                 "content": [
                     {"type": "image", "image": image_data},
-                    {"type": "text", "text": text_query},
                 ],
             }
         ]
@@ -90,10 +89,7 @@ def ocr_and_extract(image, text_query):
 # Gradio interface for image input
 iface = gr.Interface(
     fn=ocr_and_extract,
-    inputs=[
-        gr.Image(type="pil"),
-        gr.Textbox(label="Enter your query (optional)"),
-    ],
     outputs="text",
     title="Image OCR with Byaldi + Qwen2-VL",
     description="Upload an image (JPEG/PNG) containing Hindi and English text for OCR.",

 processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", trust_remote_code=True)
 @spaces.GPU  # Decorate the function for GPU management
+def ocr_and_extract(image):
     try:
         # Save the uploaded image temporarily
         temp_image_path = "temp_image.jpg"
         )
         # Perform the search query on the indexed image
+        results = rag_model.search("", k=1)
         # Prepare the input for Qwen2-VL
         image_data = Image.open(temp_image_path)
                 "role": "user",
                 "content": [
                     {"type": "image", "image": image_data},
                 ],
             }
         ]
 # Gradio interface for image input
 iface = gr.Interface(
     fn=ocr_and_extract,
+    inputs=gr.Image(type="pil"),  # Only the image input
     outputs="text",
     title="Image OCR with Byaldi + Qwen2-VL",
     description="Upload an image (JPEG/PNG) containing Hindi and English text for OCR.",