Sakshiw1 commited on
Commit
f60c847
·
verified ·
1 Parent(s): c2b1a45

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -4
app.py CHANGED
@@ -1,12 +1,13 @@
 
 
1
  import gradio as gr
2
- from transformers import pipeline
3
  import re
4
 
5
- # Load the OCR pipeline
6
- ocr_pipeline = pipeline("image-to-text", model="microsoft/trocr-base-stage1")
7
 
8
  def perform_ocr(image):
9
- text = ocr_pipeline(image)[0]['generated_text']
10
  return text
11
 
12
  def search_first_keyword_in_text(text, keyword):
@@ -23,6 +24,8 @@ def search_first_keyword_in_text(text, keyword):
23
 
24
  def ocr_and_search(image, keyword):
25
  try:
 
 
26
  extracted_text = perform_ocr(image)
27
  search_result = search_first_keyword_in_text(extracted_text, keyword)
28
  return extracted_text, search_result
 
1
+ import pytesseract
2
+ from PIL import Image
3
  import gradio as gr
 
4
  import re
5
 
6
+ # Configure Tesseract path (update if needed)
7
+ pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
8
 
9
  def perform_ocr(image):
10
+ text = pytesseract.image_to_string(image, lang='hin+eng')
11
  return text
12
 
13
  def search_first_keyword_in_text(text, keyword):
 
24
 
25
  def ocr_and_search(image, keyword):
26
  try:
27
+ # Resize the image to a manageable size for processing
28
+ image = image.resize((800, 600)) # Adjust size as needed
29
  extracted_text = perform_ocr(image)
30
  search_result = search_first_keyword_in_text(extracted_text, keyword)
31
  return extracted_text, search_result