# -*- encoding: utf-8 -*- # @Author: SWHL # @Contact: liekkaskono@163.com import gradio as gr from gradio_pdf import PDF from rapidocr_pdf import RapidOCRPDF pdf_extracter = RapidOCRPDF() def get_pdf_result(pdf_path: str, page_num: int = 0): return pdf_extracter(pdf_path, page_num_list=[page_num]) with gr.Blocks(title="RapidOCR 📄 PDF", theme=gr.themes.Soft()) as demo: gr.HTML( """

RapidOCR 📄 PDF

PyPI SemVer2.0 GitHub
""" ) pdf_path = PDF(label="Upload a PDF", interactive=True) page_num = gr.Number(value=0, label="Select Extract Page", minimum=0, maximum=5) btn = gr.Button("Run") results = gr.Dataframe( label="PDF Extract Results", headers=["Index", "Txt", "Score"], datatype=["number", "str", "number"], show_copy_button=True, show_fullscreen_button=True, ) btn.click(get_pdf_result, inputs=[pdf_path, page_num], outputs=[results]) examples = gr.Examples( examples=[ ["./files/direct_and_image.pdf", 0], ["./files/direct_extract.pdf", 0], ], examples_per_page=5, inputs=[pdf_path, page_num], fn=get_pdf_result, outputs=[results], cache_examples=False, ) if __name__ == "__main__": demo.launch()