Spaces:

vuvko
/

from_pdf

Sleeping

File size: 4,230 Bytes

77eac00
 
 
 
 
 
 
 
d15e89e
77eac00
8ae9c70
77eac00
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d15e89e
 
 
 
 
1482457
d15e89e
 
 
 
 
 
77eac00
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d5d236
 
23c0f1b
 
 
 
 
77eac00
 
caccbf6
9fc3c5a
 
caccbf6
9fc3c5a
 
 
 
d15e89e
9fc3c5a
 
caccbf6
9fc3c5a
 
 
 
77eac00
9fc3c5a
 
 
 
 
 
 
 
 
 
77eac00
9fc3c5a
 
 
 
 
 
 
caccbf6
 
 
 
 
 
 
 
 
 
 
 
 
77eac00
caccbf6

import gradio as gr
from time import time
from pathlib import Path
from gradio_pdf import PDF
from pdf2image import convert_from_path
import shutil
import tempfile
from transformers import pipeline
import subprocess as sp

FILE_TIMEOUT = 10 ** 3
MAX_FILES = 10

p = pipeline(
    "document-question-answering",
    model="impira/layoutlm-document-qa",
)

def handle_files(cur_files):
    cur_time = cur_files[-1][0]
    deleted_indices = set()
    for other_idx, (other_time, other_file) in enumerate(cur_files[:-1]):
        if abs(cur_time - other_time) > FILE_TIMEOUT:
            shutil.rmtree(other_file.parent)
            deleted_indices.add(other_idx)
    cur_files = [cur_files[idx] for idx in range(len(cur_files)) if idx not in deleted_indices]
    
    if len(cur_files) > MAX_FILES:
        for _, other_file in cur_files[:-MAX_FILES]:
            shutil.rmtree(other_file.parent)
        cur_files = cur_files[-MAX_FILES:]
    return cur_files


def extract_text(pdf_file):
    """
    Generate a text rendering of a PDF file in the form of a list of lines.
    """
    args = ['pdftotext', '-layout', pdf_file, '-']
    cp = sp.run(
      args, stdout=sp.PIPE, stderr=sp.DEVNULL,
      check=True, text=True
    )
    return cp.stdout

# Function to process PDF and generate ZIP file
def process_pdf(pdf_file, cur_files):
    
    zip_output = Path(tempfile.mkdtemp()) / f'{Path(pdf_file).stem}'
    # zip_output.parent.mkdir()
    
    with tempfile.TemporaryDirectory() as path:
        pdf_output = path
        convert_from_path(pdf_file, output_folder=str(pdf_output))
    
        # Create a BytesIO object to store zip file in memory
        shutil.make_archive(zip_output, 'zip', pdf_output)
    
    zip_output = zip_output.with_suffix('.zip')
    
    cur_time = time()
    cur_files.append((cur_time, zip_output))
    cur_files = handle_files(cur_files)
    
    return str(zip_output), cur_files


def interact_with_pdf(doc, question):
    with tempfile.TemporaryDirectory() as path:
        images = convert_from_path(doc, output_folder=path)
        outputs = []
        for img in images:
            outputs += p(img, question)
    return sorted(outputs, key=lambda x: x["score"], reverse=True)[0]['answer']

'''
text_interface = gr.Interface(
    fn=extract_text,
    inputs=PDF(label="Загрузить PDF"),
    outputs=gr.Textbox(label="Полученный текст"),
    title="PDF в текст",
    description="Сервис вынимает содержащийся в PDF контейнере текст."
)

pdf_interface = gr.Interface(
    fn=process_pdf,
    inputs=[PDF(label="Загрузить PDF"), out_files],
    outputs=[gr.File(label="Скачать ZIP архив"), out_files],
    title="PDF в картинки",
    description="Переводит постранично файл в изображения, и предлагает бскачать в виде ZIP архива."
)

image_interface = gr.Interface(
    fn=interact_with_pdf,
    inputs=[
        PDF(label="Загрузить PDF"),
        gr.Textbox(label="Что хотите найти?")
    ],
    outputs=gr.Textbox(label="Возможный ответ"),
    title="Спроси PDF",
    description="Сервис через методы OCR сканирует загруженный файл для нахождения ответа на заданный вопрос."
)

# Create a tabbed interface
tabbed_interface = gr.TabbedInterface(
    [text_interface, pdf_interface, image_interface],
    title="Взаимодействие с PDF",
    tab_names=["В текст", "В картинки", "Задай вопрос"],
    # description="Choose a tab to perform the desired task."
)
'''

with gr.Blocks() as tabbed_interface:
    out_files = gr.State([])
    with gr.Tab("В текст"):
        inp1 = PDF(label="Загрузить PDF")
        out1 = gr.Textbox(label="Полученный текст")
        btn1 = gr.Button("Запустить")
        btn1.click(fn=extract_text, inputs=inp1, outputs=out1)
    with gt.Tab("В картинки"):
        pass
    with gr.Tab("Задай вопрос"):
        pass

tabbed_interface.launch(show_api=False, max_threads=8)