import fitz # PyMuPDF import pandas as pd from pptx import Presentation def extract_text_from_file(v_file_path): """ Extracts text from PDF, PPTX, or CSV files. """ v_text = "" if v_file_path.lower().endswith('.pdf'): obj_pdf = fitz.open(v_file_path) for obj_page in obj_pdf: v_text += obj_page.get_text() obj_pdf.close() elif v_file_path.lower().endswith('.pptx'): obj_ppt = Presentation(v_file_path) for obj_slide in obj_ppt.slides: for obj_shape in obj_slide.shapes: if obj_shape.has_text_frame: v_text += obj_shape.text_frame.text + "\n" elif v_file_path.lower().endswith('.csv'): v_data = pd.read_csv(v_file_path) v_text += v_data.to_string() return v_text