File size: 1,732 Bytes
912f746 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import asyncio
from src.file_handler.handlers import (
convert_docx_to_markdown,
convert_excel_bytes_to_llm_format,
convert_file_to_string,
convert_image_to_pillow,
convert_pdf_to_markdown,
)
async def aparse_file(task_id: str, file_name: str, api_base_url: str) -> str:
"""
Parses a file and returns its content in a format suitable for LLMs.
Args:
task_id (str): The ID of the task.
file_name (str): The name of the file.
api_base_url (str): The base URL of the API.
Returns:
str: The content of the file in a format suitable for LLMs.
"""
file_extension = file_name.split(".")[-1]
if file_extension == "xlsx":
return await convert_excel_bytes_to_llm_format(task_id, api_base_url)
elif file_extension == "docx":
return await convert_docx_to_markdown(task_id, api_base_url)
elif file_extension in ["jpg", "jpeg", "png", "gif", "bmp", "tiff", "webp"]:
return await convert_image_to_pillow(task_id, api_base_url)
elif file_extension == "pdf":
return await convert_pdf_to_markdown(task_id, api_base_url)
elif file_extension == "mp3":
return None
else:
return await convert_file_to_string(task_id, api_base_url)
def parse_file(task_id: str, file_name: str, api_base_url: str) -> str:
"""
Parses a file and returns its content in a format suitable for LLMs.
Args:
task_id (str): The ID of the task.
file_name (str): The name of the file.
api_base_url (str): The base URL of the API.
Returns:
str: The content of the file in a format suitable for LLMs.
"""
return asyncio.run(aparse_file(task_id, file_name, api_base_url))
|