Sakshiw1 commited on
Commit
340c40c
·
verified ·
1 Parent(s): 1388e60

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -48
app.py CHANGED
@@ -1,48 +1,48 @@
1
- import pytesseract
2
- from PIL import Image
3
- import gradio as gr
4
- import re
5
-
6
- pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
7
-
8
- def perform_ocr(image):
9
- text = pytesseract.image_to_string(image, lang='hin+eng')
10
- return text
11
-
12
- def search_first_keyword_in_text(text, keyword):
13
- if keyword:
14
- text = text.replace('\n', ' ')
15
- sentences = re.split(r'(?<=[.!?]) +', text)
16
- for sentence in sentences:
17
- if re.search(keyword, sentence, re.IGNORECASE):
18
- highlighted_sentence = re.sub(f'({re.escape(keyword)})', r'<b>\1</b>', sentence, flags=re.IGNORECASE)
19
- return highlighted_sentence.strip()
20
- return "No matching sentence found."
21
- else:
22
- return "Please enter a keyword to search."
23
-
24
- def ocr_and_search(image, keyword):
25
- try:
26
- extracted_text = perform_ocr(image)
27
- search_result = search_first_keyword_in_text(extracted_text, keyword)
28
- return extracted_text, search_result
29
- except Exception as e:
30
- return str(e), str(e)
31
-
32
- def web_app():
33
- interface = gr.Interface(
34
- fn=ocr_and_search,
35
- inputs=[
36
- gr.Image(type="pil", label="Upload Image"),
37
- gr.Textbox(placeholder="Enter keyword to search", label="Keyword Search")
38
- ],
39
- outputs=[
40
- gr.Textbox(label="Extracted Text", lines=10),
41
- gr.HTML(label="Search Result (First Matching Sentence)")
42
- ],
43
- title="OCR and Keyword Search Application"
44
- )
45
- interface.launch()
46
-
47
- if __name__ == "__main__":
48
- web_app()
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import re
4
+
5
+ # Load a publicly available OCR pipeline (make sure this model exists)
6
+ ocr_pipeline = pipeline("image-to-text", model="microsoft/vision-ocr")
7
+
8
+ def perform_ocr(image):
9
+ text = ocr_pipeline(image)[0]['generated_text']
10
+ return text
11
+
12
+ def search_first_keyword_in_text(text, keyword):
13
+ if keyword:
14
+ text = text.replace('\n', ' ')
15
+ sentences = re.split(r'(?<=[.!?]) +', text)
16
+ for sentence in sentences:
17
+ if re.search(keyword, sentence, re.IGNORECASE):
18
+ highlighted_sentence = re.sub(f'({re.escape(keyword)})', r'<b>\1</b>', sentence, flags=re.IGNORECASE)
19
+ return highlighted_sentence.strip()
20
+ return "No matching sentence found."
21
+ else:
22
+ return "Please enter a keyword to search."
23
+
24
+ def ocr_and_search(image, keyword):
25
+ try:
26
+ extracted_text = perform_ocr(image)
27
+ search_result = search_first_keyword_in_text(extracted_text, keyword)
28
+ return extracted_text, search_result
29
+ except Exception as e:
30
+ return str(e), str(e)
31
+
32
+ def web_app():
33
+ interface = gr.Interface(
34
+ fn=ocr_and_search,
35
+ inputs=[
36
+ gr.Image(type="pil", label="Upload Image"),
37
+ gr.Textbox(placeholder="Enter keyword to search", label="Keyword Search")
38
+ ],
39
+ outputs=[
40
+ gr.Textbox(label="Extracted Text", lines=10),
41
+ gr.HTML(label="Search Result (First Matching Sentence)")
42
+ ],
43
+ title="OCR and Keyword Search Application"
44
+ )
45
+ interface.launch()
46
+
47
+ if __name__ == "__main__":
48
+ web_app()