DocTRv1 / app.py
hantech's picture
Update app.py
89fdbfc verified
raw
history blame
2.81 kB
import os
os.environ['USE_TORCH'] = '1'
from doctr.io import DocumentFile
from doctr.models import ocr_predictor
import gradio as gr
from PIL import Image
import base64
from utils import HocrParser
import google.generativeai as genai
api_key = 'AIzaSyB7-RzBwTAfVA-7ZGk2mEOQwOxshpwzhpM' # put your API key here
genai.configure(api_key=api_key)
geminiModel = genai.GenerativeModel(model_name='gemini-pro')
predictor = ocr_predictor(det_arch='db_mobilenet_v3_large', reco_arch='crnn_vgg16_bn',pretrained=True)
predictor.reco_predictor.model.cfg['vocab']='0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!”#$%&’()*+,-./:;<=>?@[]^_`{|}~°£€¥¢฿áàảạãăắằẳẵặâấầẩẫậéèẻẽẹêếềểễệóòỏõọôốồổộỗơớờởợỡúùủũụưứừửữựiíìỉĩịýỳỷỹỵÁÀẢẠÃĂẮẰẲẴẶÂẤẦẨẪẬÉÈẺẼẸÊẾỀỂỄỆÓÒỎÕỌÔỐỒỔỘỖƠỚỜỞỢỠÚÙỦŨỤƯỨỪỬỮỰIÍÌỈĨỊÝỲỶỸỴ'
title="DocTR OCR (PDL Demo)"
description="Upload an image to get the OCR results !"
def greet(img):
img.save("out.jpg")
doc = DocumentFile.from_images("out.jpg")
output=predictor(doc)
xml_outputs = output.export_as_xml()
parser = HocrParser()
res=""
for obj in output.pages:
for obj1 in obj.blocks:
for obj2 in obj1.lines:
for obj3 in obj2.words:
res=res + " " + obj3.value
res=res + "\n"
res=res + "\n"
# generate text
prompt = "take this peace of information and give all the information in point wise better format also give some recomendation related to them: " + res
# print(prompt)
response = geminiModel.generate_content(prompt)
print(response)
res = response.text
_output_name = "RESULT_OCR.txt"
_output_name_pdf="RESULT_OCR.pdf"
open(_output_name, 'w').close() # clear file
with open(_output_name, "w", encoding="utf-8", errors="ignore") as f:
f.write(res)
print("Writing into file")
base64_encoded_pdfs = list()
for i, (xml, img) in enumerate(zip(xml_outputs, doc)):
xml_element_tree = xml[1]
parser.export_pdfa(_output_name_pdf,
hocr=xml_element_tree, image=img)
with open(_output_name_pdf, 'rb') as f:
base64_encoded_pdfs.append(base64.b64encode(f.read()))
return res, _output_name, _output_name_pdf
demo = gr.Interface(fn=greet,
inputs=gr.Image(type="pil"),
outputs=["text", "file","file"],
title=title,
description=description,
examples=[["Examples/Book.png"],["Examples/News.png"],["Examples/Manuscript.jpg"],["Examples/Files.jpg"]]
)
demo.launch(debug=True)