Spaces:

gauri-sharan
/

test-two

Sleeping

App Files Files Community

gauri-sharan commited on Sep 29, 2024

Commit

cacc570

verified ·

1 Parent(s): d7c725d

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -10

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ from PIL import Image
 import os
 import traceback
 import spaces
 # Check if CUDA is available
 device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -21,8 +22,12 @@ qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
 # Processor for Qwen2-VL
 processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", trust_remote_code=True)
 @spaces.GPU  # Decorate the function for GPU management
 def ocr_and_extract(image):
     try:
         # Save the uploaded image temporarily
         temp_image_path = "temp_image.jpg"
@@ -71,25 +76,48 @@ def ocr_and_extract(image):
         # Filter out "You are a helpful assistant" and "assistant" labels
         filtered_output = [line for line in output_text[0].split("\n") if not any(kw in line.lower() for kw in ["you are a helpful assistant", "assistant", "user", "system"])]
         # Clean up the temporary file
         os.remove(temp_image_path)
-        return "\n".join(filtered_output).strip()
     except Exception as e:
         error_message = str(e)
         traceback.print_exc()
         return f"Error: {error_message}"
-# Gradio interface for image input
-iface = gr.Interface(
-    fn=ocr_and_extract,
-    inputs=gr.Image(type="pil"),  # Only the image input
-    outputs="text",
-    title="Image OCR with Byaldi + Qwen2-VL",
-    description="Upload an image (JPEG/PNG) containing Hindi and English text for OCR.",
-)
-# Launch the Gradio app
 iface.launch()

 import os
 import traceback
 import spaces
+import re
 # Check if CUDA is available
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Processor for Qwen2-VL
 processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", trust_remote_code=True)
+# Global variable to store extracted text
+extracted_text = ""
 @spaces.GPU  # Decorate the function for GPU management
 def ocr_and_extract(image):
+    global extracted_text
     try:
         # Save the uploaded image temporarily
         temp_image_path = "temp_image.jpg"
         # Filter out "You are a helpful assistant" and "assistant" labels
         filtered_output = [line for line in output_text[0].split("\n") if not any(kw in line.lower() for kw in ["you are a helpful assistant", "assistant", "user", "system"])]
+        extracted_text = "\n".join(filtered_output).strip()
         # Clean up the temporary file
         os.remove(temp_image_path)
+        return extracted_text
     except Exception as e:
         error_message = str(e)
         traceback.print_exc()
         return f"Error: {error_message}"
+def search_keywords(keywords):
+    if not extracted_text:
+        return "No text extracted yet. Please upload an image."
+    # Highlight matching keywords in the extracted text
+    highlighted_text = extracted_text
+    for keyword in keywords.split():
+        highlighted_text = re.sub(f"({re.escape(keyword)})", r"<mark>\1</mark>", highlighted_text, flags=re.IGNORECASE)
+    # Return the highlighted text
+    return highlighted_text
+# Gradio interface for image input and keyword search
+with gr.Blocks() as iface:
+    # Image upload and text extraction section
+    with gr.Column():
+        img_input = gr.Image(type="pil", label="Upload an Image")
+        extracted_output = gr.Textbox(label="Extracted Text", interactive=False)
+        # Functionality to trigger the OCR and extraction
+        img_button = gr.Button("Extract Text")
+        img_button.click(fn=ocr_and_extract, inputs=img_input, outputs=extracted_output)
+    # Keyword search section
+    with gr.Column():
+        search_input = gr.Textbox(label="Enter keywords to search")
+        search_output = gr.HTML(label="Search Results")
+        # Functionality to search within the extracted text
+        search_button = gr.Button("Search")
+        search_button.click(fn=search_keywords, inputs=search_input, outputs=search_output)
 iface.launch()