import os os.environ['USE_TORCH'] = '1' from doctr.io import DocumentFile from doctr.models import ocr_predictor import gradio as gr from PIL import Image import base64 from utils import HocrParser import google.generativeai as genai api_key = 'AIzaSyB7-RzBwTAfVA-7ZGk2mEOQwOxshpwzhpM' # put your API key here genai.configure(api_key=api_key) geminiModel = genai.GenerativeModel(model_name='gemini-pro') predictor = ocr_predictor(det_arch='db_mobilenet_v3_large', reco_arch='crnn_vgg16_bn',pretrained=True) predictor.reco_predictor.model.cfg['vocab']='0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!”#$%&’()*+,-./:;<=>?@[]^_`{|}~°£€¥¢฿áàảạãăắằẳẵặâấầẩẫậéèẻẽẹêếềểễệóòỏõọôốồổộỗơớờởợỡúùủũụưứừửữựiíìỉĩịýỳỷỹỵÁÀẢẠÃĂẮẰẲẴẶÂẤẦẨẪẬÉÈẺẼẸÊẾỀỂỄỆÓÒỎÕỌÔỐỒỔỘỖƠỚỜỞỢỠÚÙỦŨỤƯỨỪỬỮỰIÍÌỈĨỊÝỲỶỸỴ' title="DocTR OCR (PDL Demo)" description="Upload an image to get the OCR results !" def greet(img): img.save("out.jpg") doc = DocumentFile.from_images("out.jpg") output=predictor(doc) xml_outputs = output.export_as_xml() parser = HocrParser() res="" for obj in output.pages: for obj1 in obj.blocks: for obj2 in obj1.lines: for obj3 in obj2.words: res=res + " " + obj3.value res=res + "\n" res=res + "\n" # generate text prompt = "take this peace of information and give all the information in point wise better format also give some recomendation related to them: " + res # print(prompt) response = geminiModel.generate_content(prompt) print(response) res = response.text _output_name = "RESULT_OCR.txt" _output_name_pdf="RESULT_OCR.pdf" open(_output_name, 'w').close() # clear file with open(_output_name, "w", encoding="utf-8", errors="ignore") as f: f.write(res) print("Writing into file") base64_encoded_pdfs = list() for i, (xml, img) in enumerate(zip(xml_outputs, doc)): xml_element_tree = xml[1] parser.export_pdfa(_output_name_pdf, hocr=xml_element_tree, image=img) with open(_output_name_pdf, 'rb') as f: base64_encoded_pdfs.append(base64.b64encode(f.read())) return res, _output_name, _output_name_pdf demo = gr.Interface(fn=greet, inputs=gr.Image(type="pil"), outputs=["text", "file","file"], title=title, description=description, examples=[["Examples/Book.png"],["Examples/News.png"],["Examples/Manuscript.jpg"],["Examples/Files.jpg"]] ) demo.launch(debug=True)