import pandas as pd import io from pypdf import PdfReader def process_uploaded_file(uploaded_file): """Extracts text from uploaded PDF or Excel files""" if uploaded_file.type == "application/pdf": return extract_text_from_pdf(uploaded_file) elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": return extract_text_from_excel(uploaded_file) else: return "Unsupported file format." def extract_text_from_pdf(pdf_file): """Extract text from a PDF""" reader = PdfReader(pdf_file) text = "\n".join([page.extract_text() for page in reader.pages if page.extract_text()]) return text def extract_text_from_excel(excel_file): """Extract text from an Excel file""" df = pd.read_excel(excel_file, sheet_name=None) text = "" for sheet, data in df.items(): text += f"\nSheet: {sheet}\n" + data.to_string(index=False) return text