File size: 1,802 Bytes
dbdbddf
 
ac800d9
2d19186
dbdbddf
ac800d9
 
2d19186
8b21536
 
 
2d19186
 
 
 
dbdbddf
ac800d9
fd6bd9e
ac800d9
2d19186
 
89fdbfc
ac800d9
fe10d73
1cfd79c
 
fe10d73
 
 
 
1cfd79c
 
 
 
 
 
 
 
 
dbdbddf
fe10d73
 
1cfd79c
fe10d73
a6094b4
fe10d73
dbdbddf
fe10d73
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import gradio as gr
from PIL import Image
import google.generativeai as genai
from IPython.display import Markdown

api_key = 'AIzaSyB7-RzBwTAfVA-7ZGk2mEOQwOxshpwzhpM' # put your API key here
genai.configure(api_key=api_key)
geminiModel = genai.GenerativeModel(model_name='gemini-pro-vision')
title="DocTR OCR (PDL Demo)"
description="Upload an image to get the OCR results !"

def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))
    
def greet(img):
    # generate text
    prompt = "the input text in vietnamese, please add accend and take this peace of information and give all the information in point wise better format also give some recomendation related to them: " + res
    # print(prompt)
    response  = geminiModel.generate_content([prompt,img], stream=True)
    response.resolve()
    print(response)
    res = response.text
    _output_name = "RESULT_OCR.txt"
    _output_name_pdf="RESULT_OCR.pdf"

    open(_output_name, 'w').close() # clear file
    with open(_output_name, "w", encoding="utf-8", errors="ignore") as f:
        f.write(res)
        print("Writing into file")

    base64_encoded_pdfs = list()
    for i, (xml, img) in enumerate(zip(xml_outputs, doc)):
      xml_element_tree = xml[1]
      parser.export_pdfa(_output_name_pdf,
            hocr=xml_element_tree, image=img)
      with open(_output_name_pdf, 'rb') as f:
            base64_encoded_pdfs.append(base64.b64encode(f.read()))
    return res, _output_name, _output_name_pdf

demo = gr.Interface(fn=greet,
                    inputs=gr.Image(type="pil"),
                    outputs=["text", "file","file"],
                    title=title,
                    description=description
                    )

demo.launch(debug=True)