Spaces:

akashshahade
/

multidoc_chat

Sleeping

multidoc_chat / utils.py

Upload 7 files

6363d82 verified 2 months ago

972 Bytes

	import pandas as pd
	import io
	from pypdf import PdfReader

	def process_uploaded_file(uploaded_file):
	"""Extracts text from uploaded PDF or Excel files"""
	if uploaded_file.type == "application/pdf":
	return extract_text_from_pdf(uploaded_file)
	elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
	return extract_text_from_excel(uploaded_file)
	else:
	return "Unsupported file format."

	def extract_text_from_pdf(pdf_file):
	"""Extract text from a PDF"""
	reader = PdfReader(pdf_file)
	text = "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
	return text

	def extract_text_from_excel(excel_file):
	"""Extract text from an Excel file"""
	df = pd.read_excel(excel_file, sheet_name=None)
	text = ""
	for sheet, data in df.items():
	text += f"\nSheet: {sheet}\n" + data.to_string(index=False)
	return text