mhattingpete's picture
add first version of agent
912f746
raw
history blame
1.73 kB
import asyncio
from src.file_handler.handlers import (
convert_docx_to_markdown,
convert_excel_bytes_to_llm_format,
convert_file_to_string,
convert_image_to_pillow,
convert_pdf_to_markdown,
)
async def aparse_file(task_id: str, file_name: str, api_base_url: str) -> str:
"""
Parses a file and returns its content in a format suitable for LLMs.
Args:
task_id (str): The ID of the task.
file_name (str): The name of the file.
api_base_url (str): The base URL of the API.
Returns:
str: The content of the file in a format suitable for LLMs.
"""
file_extension = file_name.split(".")[-1]
if file_extension == "xlsx":
return await convert_excel_bytes_to_llm_format(task_id, api_base_url)
elif file_extension == "docx":
return await convert_docx_to_markdown(task_id, api_base_url)
elif file_extension in ["jpg", "jpeg", "png", "gif", "bmp", "tiff", "webp"]:
return await convert_image_to_pillow(task_id, api_base_url)
elif file_extension == "pdf":
return await convert_pdf_to_markdown(task_id, api_base_url)
elif file_extension == "mp3":
return None
else:
return await convert_file_to_string(task_id, api_base_url)
def parse_file(task_id: str, file_name: str, api_base_url: str) -> str:
"""
Parses a file and returns its content in a format suitable for LLMs.
Args:
task_id (str): The ID of the task.
file_name (str): The name of the file.
api_base_url (str): The base URL of the API.
Returns:
str: The content of the file in a format suitable for LLMs.
"""
return asyncio.run(aparse_file(task_id, file_name, api_base_url))