import asyncio from src.file_handler.handlers import ( convert_docx_to_markdown, convert_excel_bytes_to_llm_format, convert_file_to_string, convert_image_to_pillow, convert_pdf_to_markdown, ) async def aparse_file(task_id: str, file_name: str, api_base_url: str) -> str: """ Parses a file and returns its content in a format suitable for LLMs. Args: task_id (str): The ID of the task. file_name (str): The name of the file. api_base_url (str): The base URL of the API. Returns: str: The content of the file in a format suitable for LLMs. """ file_extension = file_name.split(".")[-1] if file_extension == "xlsx": return await convert_excel_bytes_to_llm_format(task_id, api_base_url) elif file_extension == "docx": return await convert_docx_to_markdown(task_id, api_base_url) elif file_extension in ["jpg", "jpeg", "png", "gif", "bmp", "tiff", "webp"]: return await convert_image_to_pillow(task_id, api_base_url) elif file_extension == "pdf": return await convert_pdf_to_markdown(task_id, api_base_url) elif file_extension == "mp3": return None else: return await convert_file_to_string(task_id, api_base_url) def parse_file(task_id: str, file_name: str, api_base_url: str) -> str: """ Parses a file and returns its content in a format suitable for LLMs. Args: task_id (str): The ID of the task. file_name (str): The name of the file. api_base_url (str): The base URL of the API. Returns: str: The content of the file in a format suitable for LLMs. """ return asyncio.run(aparse_file(task_id, file_name, api_base_url))