Vishwas1 commited on
Commit
9453fd8
·
verified ·
1 Parent(s): 1170f39

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -0
app.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pytesseract
3
+ from pdf2image import convert_from_bytes
4
+ import os
5
+
6
+ def ocr_marathi_from_pdf(pdf_file):
7
+ # Convert PDF to images
8
+ images = convert_from_bytes(pdf_file.read())
9
+
10
+ # Perform OCR on each page
11
+ all_text = []
12
+ for img in images:
13
+ text = pytesseract.image_to_string(img, lang='mar')
14
+ all_text.append(text)
15
+
16
+ # Join all pages
17
+ full_text = "\n".join(all_text)
18
+ return full_text
19
+
20
+ iface = gr.Interface(
21
+ fn=ocr_marathi_from_pdf,
22
+ inputs=gr.inputs.File(label="Upload PDF"),
23
+ outputs="text",
24
+ title="Marathi OCR"
25
+ )
26
+
27
+ if __name__ == "__main__":
28
+ iface.launch()