|
import gradio as gr |
|
import pandas as pd |
|
import os |
|
from utils import pdf_to_text, align_text |
|
|
|
def process_files(source_file, target_file, lang1, lang2): |
|
if source_file is None or target_file is None: |
|
return "Please upload both PDF files.", None |
|
|
|
if lang1 == lang2: |
|
return "Please select different languages.", None |
|
|
|
|
|
text_content1 = pdf_to_text(source_file.name) |
|
text_content2 = pdf_to_text(target_file.name) |
|
|
|
|
|
aligned_df = align_text(text_content1, text_content2, lang1, lang2) |
|
|
|
|
|
aligned_html = aligned_df.to_html(index=False) |
|
|
|
|
|
excel_path = "aligned_data.xlsx" |
|
aligned_df.to_excel(excel_path, index=False) |
|
|
|
return aligned_html, excel_path |
|
|
|
|
|
with gr.Blocks() as interface: |
|
gr.Markdown("# PDF Text Aligner\nUpload two PDF files and select languages to align the text.") |
|
source_file = gr.File(label="Upload Source PDF") |
|
target_file = gr.File(label="Upload Target PDF") |
|
lang1 = gr.Dropdown(choices=["en", "es", "fr", "zh", "ar", "ru", "pt"], label="Select Language 1") |
|
lang2 = gr.Dropdown(choices=["en", "es", "fr", "zh", "ar", "ru", "pt"], label="Select Language 2", value="es") |
|
start_button = gr.Button(value="Start") |
|
aligned_html = gr.HTML(label="Aligned DataFrame") |
|
download_button = gr.File(label="Download Aligned Data as Excel") |
|
|
|
start_button.click( |
|
fn=process_files, |
|
inputs=[source_file, target_file, lang1, lang2], |
|
outputs=[aligned_html, download_button] |
|
) |
|
|
|
if __name__ == "__main__": |
|
interface.launch() |