import gradio as gr import pandas as pd import os from utils import pdf_to_text, align_text def process_files(source_file, target_file, lang1, lang2): if source_file is None or target_file is None: return "Please upload both PDF files.", None if lang1 == lang2: return "Please select different languages.", None # Convert PDFs to text text_content1 = pdf_to_text(source_file.name) text_content2 = pdf_to_text(target_file.name) # Align the texts aligned_df = align_text(text_content1, text_content2, lang1, lang2) # Convert DataFrame to HTML aligned_html = aligned_df.to_html(index=False) # Save DataFrame as Excel file excel_path = "aligned_data.xlsx" aligned_df.to_excel(excel_path, index=False) return aligned_html, excel_path # Define the Gradio interface with gr.Blocks() as interface: gr.Markdown("# PDF Text Aligner\nUpload two PDF files and select languages to align the text.") source_file = gr.File(label="Upload Source PDF") target_file = gr.File(label="Upload Target PDF") lang1 = gr.Dropdown(choices=["en", "es", "fr", "zh", "ar", "ru", "pt"], label="Select Language 1") lang2 = gr.Dropdown(choices=["en", "es", "fr", "zh", "ar", "ru", "pt"], label="Select Language 2", value="es") start_button = gr.Button(value="Start") aligned_html = gr.HTML(label="Aligned DataFrame") download_button = gr.File(label="Download Aligned Data as Excel") start_button.click( fn=process_files, inputs=[source_file, target_file, lang1, lang2], outputs=[aligned_html, download_button] ) if __name__ == "__main__": interface.launch()