File size: 2,792 Bytes
dbdbddf
 
 
 
 
 
 
1cfd79c
 
ac800d9
dbdbddf
ac800d9
 
 
dc813d0
4733ddf
8b21536
 
 
dbdbddf
 
 
 
1cfd79c
 
 
 
dbdbddf
 
 
 
 
 
 
 
ac800d9
 
 
 
 
fe10d73
1cfd79c
 
fe10d73
 
 
 
1cfd79c
 
 
 
 
 
 
 
 
dbdbddf
fe10d73
 
1cfd79c
fe10d73
3d45b3a
2c62fa3
fe10d73
dbdbddf
fe10d73
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import os
os.environ['USE_TORCH'] = '1'

from doctr.io import DocumentFile
from doctr.models import ocr_predictor
import gradio as gr
from PIL import Image
import base64
from utils import HocrParser
import google.generativeai as genai

api_key = 'AIzaSyB7-RzBwTAfVA-7ZGk2mEOQwOxshpwzhpM' # put your API key here
genai.configure(api_key=api_key)
geminiModel = genai.GenerativeModel(model_name='gemini-pro')
predictor = ocr_predictor(det_arch='db_mobilenet_v3_large', reco_arch='crnn_vgg16_bn',pretrained=True)
predictor.reco_predictor.model.cfg['vocab']='0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!”#$%&’()*+,-./:;<=>?@[]^_`{|}~°£€¥¢฿áàảạãăắằẳẵặâấầẩẫậéèẻẽẹêếềểễệóòỏõọôốồổộỗơớờởợỡúùủũụưứừửữựiíìỉĩịýỳỷỹỵÁÀẢẠÃĂẮẰẲẴẶÂẤẦẨẪẬÉÈẺẼẸÊẾỀỂỄỆÓÒỎÕỌÔỐỒỔỘỖƠỚỜỞỢỠÚÙỦŨỤƯỨỪỬỮỰIÍÌỈĨỊÝỲỶỸỴ'
title="DocTR OCR (PDL Demo)"
description="Upload an image to get the OCR results !"

def greet(img):
    img.save("out.jpg")
    doc = DocumentFile.from_images("out.jpg")
    output=predictor(doc)

    xml_outputs = output.export_as_xml()
    parser = HocrParser()

    res=""
    for obj in output.pages:
      for obj1 in obj.blocks:
        for obj2 in obj1.lines:
          for obj3 in obj2.words:
            res=res + " " + obj3.value
        res=res + "\n"
      res=res + "\n"
    # generate text
    prompt = "take this peace of information and give all the information in point wise better format also give some recomendation related to them: " + res
    # print(prompt)
    response  = geminiModel.generate_content(prompt)
    res = response.text
    _output_name = "RESULT_OCR.txt"
    _output_name_pdf="RESULT_OCR.pdf"

    open(_output_name, 'w').close() # clear file
    with open(_output_name, "w", encoding="utf-8", errors="ignore") as f:
        f.write(res)
        print("Writing into file")

    base64_encoded_pdfs = list()
    for i, (xml, img) in enumerate(zip(xml_outputs, doc)):
      xml_element_tree = xml[1]
      parser.export_pdfa(_output_name_pdf,
            hocr=xml_element_tree, image=img)
      with open(_output_name_pdf, 'rb') as f:
            base64_encoded_pdfs.append(base64.b64encode(f.read()))
    return res, _output_name, _output_name_pdf

demo = gr.Interface(fn=greet,
                    inputs=gr.Image(type="pil"),
                    outputs=["text", "file","file"],
                    title=title,
                    description=description,
                    examples=[["Examples/Book.png"],["Examples/News.png"],["Examples/Manuscript.jpg"],["Examples/Files.jpg"]]
                    )

demo.launch(debug=True)