Spaces:
Runtime error
Runtime error
import fitz # PyMuPDF | |
import pandas as pd | |
from pptx import Presentation | |
def extract_text_from_file(v_file_path): | |
""" | |
Extracts text from PDF, PPTX, or CSV files. | |
""" | |
v_text = "" | |
if v_file_path.lower().endswith('.pdf'): | |
obj_pdf = fitz.open(v_file_path) | |
for obj_page in obj_pdf: | |
v_text += obj_page.get_text() | |
obj_pdf.close() | |
elif v_file_path.lower().endswith('.pptx'): | |
obj_ppt = Presentation(v_file_path) | |
for obj_slide in obj_ppt.slides: | |
for obj_shape in obj_slide.shapes: | |
if obj_shape.has_text_frame: | |
v_text += obj_shape.text_frame.text + "\n" | |
elif v_file_path.lower().endswith('.csv'): | |
v_data = pd.read_csv(v_file_path) | |
v_text += v_data.to_string() | |
return v_text | |