Update app.py
Browse files
app.py
CHANGED
@@ -1,12 +1,13 @@
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
-
from transformers import pipeline
|
3 |
import re
|
4 |
|
5 |
-
#
|
6 |
-
|
7 |
|
8 |
def perform_ocr(image):
|
9 |
-
text =
|
10 |
return text
|
11 |
|
12 |
def search_first_keyword_in_text(text, keyword):
|
@@ -23,6 +24,8 @@ def search_first_keyword_in_text(text, keyword):
|
|
23 |
|
24 |
def ocr_and_search(image, keyword):
|
25 |
try:
|
|
|
|
|
26 |
extracted_text = perform_ocr(image)
|
27 |
search_result = search_first_keyword_in_text(extracted_text, keyword)
|
28 |
return extracted_text, search_result
|
|
|
1 |
+
import pytesseract
|
2 |
+
from PIL import Image
|
3 |
import gradio as gr
|
|
|
4 |
import re
|
5 |
|
6 |
+
# Configure Tesseract path (update if needed)
|
7 |
+
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
|
8 |
|
9 |
def perform_ocr(image):
|
10 |
+
text = pytesseract.image_to_string(image, lang='hin+eng')
|
11 |
return text
|
12 |
|
13 |
def search_first_keyword_in_text(text, keyword):
|
|
|
24 |
|
25 |
def ocr_and_search(image, keyword):
|
26 |
try:
|
27 |
+
# Resize the image to a manageable size for processing
|
28 |
+
image = image.resize((800, 600)) # Adjust size as needed
|
29 |
extracted_text = perform_ocr(image)
|
30 |
search_result = search_first_keyword_in_text(extracted_text, keyword)
|
31 |
return extracted_text, search_result
|