import os import fitz import docx import tempfile from typing import Tuple, Optional def process_pdf_file(pdf_file) -> Tuple[str, str]: try: text_path = tempfile.mktemp(suffix='.txt') doc = fitz.open(pdf_file) text_content = "" for page_num in range(len(doc)): page = doc.load_page(page_num) text_content += page.get_text() with open(text_path, 'w', encoding='utf-8') as text_file: text_file.write(text_content) return text_path, text_content except Exception as e: error_message = f"Error processing PDF file: {str(e)}" return None, error_message def process_docx_file(docx_file) -> Tuple[str, str]: try: text_path = tempfile.mktemp(suffix='.txt') doc = docx.Document(docx_file) text_content = "" for para in doc.paragraphs: text_content += para.text + "\n" with open(text_path, 'w', encoding='utf-8') as text_file: text_file.write(text_content) return text_path, text_content except Exception as e: error_message = f"Error processing Word document: {str(e)}" return None, error_message