gauri-sharan commited on
Commit
c5b6958
·
verified ·
1 Parent(s): 200c7bb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -7
app.py CHANGED
@@ -23,7 +23,7 @@ qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
23
  processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", trust_remote_code=True)
24
 
25
  @spaces.GPU # Decorate the function for GPU management
26
- def ocr_and_extract(image, text_query):
27
  try:
28
  # Save the uploaded image temporarily
29
  temp_image_path = "temp_image.jpg"
@@ -41,7 +41,7 @@ def ocr_and_extract(image, text_query):
41
  )
42
 
43
  # Perform the search query on the indexed image
44
- results = rag_model.search(text_query, k=1)
45
 
46
  # Prepare the input for Qwen2-VL
47
  image_data = Image.open(temp_image_path)
@@ -51,7 +51,6 @@ def ocr_and_extract(image, text_query):
51
  "role": "user",
52
  "content": [
53
  {"type": "image", "image": image_data},
54
- {"type": "text", "text": text_query},
55
  ],
56
  }
57
  ]
@@ -90,10 +89,7 @@ def ocr_and_extract(image, text_query):
90
  # Gradio interface for image input
91
  iface = gr.Interface(
92
  fn=ocr_and_extract,
93
- inputs=[
94
- gr.Image(type="pil"),
95
- gr.Textbox(label="Enter your query (optional)"),
96
- ],
97
  outputs="text",
98
  title="Image OCR with Byaldi + Qwen2-VL",
99
  description="Upload an image (JPEG/PNG) containing Hindi and English text for OCR.",
 
23
  processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", trust_remote_code=True)
24
 
25
  @spaces.GPU # Decorate the function for GPU management
26
+ def ocr_and_extract(image):
27
  try:
28
  # Save the uploaded image temporarily
29
  temp_image_path = "temp_image.jpg"
 
41
  )
42
 
43
  # Perform the search query on the indexed image
44
+ results = rag_model.search("", k=1)
45
 
46
  # Prepare the input for Qwen2-VL
47
  image_data = Image.open(temp_image_path)
 
51
  "role": "user",
52
  "content": [
53
  {"type": "image", "image": image_data},
 
54
  ],
55
  }
56
  ]
 
89
  # Gradio interface for image input
90
  iface = gr.Interface(
91
  fn=ocr_and_extract,
92
+ inputs=gr.Image(type="pil"), # Only the image input
 
 
 
93
  outputs="text",
94
  title="Image OCR with Byaldi + Qwen2-VL",
95
  description="Upload an image (JPEG/PNG) containing Hindi and English text for OCR.",