File size: 972 Bytes
6363d82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import pandas as pd
import io
from pypdf import PdfReader

def process_uploaded_file(uploaded_file):
    """Extracts text from uploaded PDF or Excel files"""
    if uploaded_file.type == "application/pdf":
        return extract_text_from_pdf(uploaded_file)
    elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
        return extract_text_from_excel(uploaded_file)
    else:
        return "Unsupported file format."

def extract_text_from_pdf(pdf_file):
    """Extract text from a PDF"""
    reader = PdfReader(pdf_file)
    text = "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
    return text

def extract_text_from_excel(excel_file):
    """Extract text from an Excel file"""
    df = pd.read_excel(excel_file, sheet_name=None)
    text = ""
    for sheet, data in df.items():
        text += f"\nSheet: {sheet}\n" + data.to_string(index=False)
    return text