gauri-sharan commited on
Commit
cacc570
·
verified ·
1 Parent(s): d7c725d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -10
app.py CHANGED
@@ -7,6 +7,7 @@ from PIL import Image
7
  import os
8
  import traceback
9
  import spaces
 
10
 
11
  # Check if CUDA is available
12
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -21,8 +22,12 @@ qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
21
  # Processor for Qwen2-VL
22
  processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", trust_remote_code=True)
23
 
 
 
 
24
  @spaces.GPU # Decorate the function for GPU management
25
  def ocr_and_extract(image):
 
26
  try:
27
  # Save the uploaded image temporarily
28
  temp_image_path = "temp_image.jpg"
@@ -71,25 +76,48 @@ def ocr_and_extract(image):
71
 
72
  # Filter out "You are a helpful assistant" and "assistant" labels
73
  filtered_output = [line for line in output_text[0].split("\n") if not any(kw in line.lower() for kw in ["you are a helpful assistant", "assistant", "user", "system"])]
 
74
 
75
  # Clean up the temporary file
76
  os.remove(temp_image_path)
77
 
78
- return "\n".join(filtered_output).strip()
79
 
80
  except Exception as e:
81
  error_message = str(e)
82
  traceback.print_exc()
83
  return f"Error: {error_message}"
84
 
85
- # Gradio interface for image input
86
- iface = gr.Interface(
87
- fn=ocr_and_extract,
88
- inputs=gr.Image(type="pil"), # Only the image input
89
- outputs="text",
90
- title="Image OCR with Byaldi + Qwen2-VL",
91
- description="Upload an image (JPEG/PNG) containing Hindi and English text for OCR.",
92
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
- # Launch the Gradio app
95
  iface.launch()
 
7
  import os
8
  import traceback
9
  import spaces
10
+ import re
11
 
12
  # Check if CUDA is available
13
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
22
  # Processor for Qwen2-VL
23
  processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", trust_remote_code=True)
24
 
25
+ # Global variable to store extracted text
26
+ extracted_text = ""
27
+
28
  @spaces.GPU # Decorate the function for GPU management
29
  def ocr_and_extract(image):
30
+ global extracted_text
31
  try:
32
  # Save the uploaded image temporarily
33
  temp_image_path = "temp_image.jpg"
 
76
 
77
  # Filter out "You are a helpful assistant" and "assistant" labels
78
  filtered_output = [line for line in output_text[0].split("\n") if not any(kw in line.lower() for kw in ["you are a helpful assistant", "assistant", "user", "system"])]
79
+ extracted_text = "\n".join(filtered_output).strip()
80
 
81
  # Clean up the temporary file
82
  os.remove(temp_image_path)
83
 
84
+ return extracted_text
85
 
86
  except Exception as e:
87
  error_message = str(e)
88
  traceback.print_exc()
89
  return f"Error: {error_message}"
90
 
91
+ def search_keywords(keywords):
92
+ if not extracted_text:
93
+ return "No text extracted yet. Please upload an image."
94
+
95
+ # Highlight matching keywords in the extracted text
96
+ highlighted_text = extracted_text
97
+ for keyword in keywords.split():
98
+ highlighted_text = re.sub(f"({re.escape(keyword)})", r"<mark>\1</mark>", highlighted_text, flags=re.IGNORECASE)
99
+
100
+ # Return the highlighted text
101
+ return highlighted_text
102
+
103
+ # Gradio interface for image input and keyword search
104
+ with gr.Blocks() as iface:
105
+ # Image upload and text extraction section
106
+ with gr.Column():
107
+ img_input = gr.Image(type="pil", label="Upload an Image")
108
+ extracted_output = gr.Textbox(label="Extracted Text", interactive=False)
109
+
110
+ # Functionality to trigger the OCR and extraction
111
+ img_button = gr.Button("Extract Text")
112
+ img_button.click(fn=ocr_and_extract, inputs=img_input, outputs=extracted_output)
113
+
114
+ # Keyword search section
115
+ with gr.Column():
116
+ search_input = gr.Textbox(label="Enter keywords to search")
117
+ search_output = gr.HTML(label="Search Results")
118
+
119
+ # Functionality to search within the extracted text
120
+ search_button = gr.Button("Search")
121
+ search_button.click(fn=search_keywords, inputs=search_input, outputs=search_output)
122
 
 
123
  iface.launch()