import os import PIL.Image from dotenv import load_dotenv from loguru import logger from smolagents import ( AzureOpenAIServerModel, CodeAgent, GoogleSearchTool, PythonInterpreterTool, SpeechToTextTool, VisitWebpageTool, ) from src.file_handler.parse import parse_file from src.tools.reasoning import ReasoningToolkit from src.tools.reverse_question import reverse_question from src.tracing import add_tracing load_dotenv() add_tracing() class Agent: def __init__(self): model = AzureOpenAIServerModel( model_id=os.getenv("AZURE_OPENAI_MODEL_ID"), azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"), api_key=os.getenv("AZURE_OPENAI_API_KEY"), api_version=os.getenv("OPENAI_API_VERSION"), ) reasoning_toolkit = ReasoningToolkit() tools = [ GoogleSearchTool(provider="serper"), VisitWebpageTool(), PythonInterpreterTool(), SpeechToTextTool(), *reasoning_toolkit.tools, reverse_question, ] self.agent = CodeAgent( tools=tools, model=model, ) self.user_prompt = """ I will ask you a question. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. You MUST use the following tools: - think, used before all other tool call and before the final answer - analyze, used after all other tool call and before the final answer Question: {question} Attached content: {content} """ logger.info("BasicAgent initialized.") def __call__( self, question: str, task_id: str, file_name: str, api_url: str ) -> str: logger.info( f"Agent received question (first 50 chars): {question[:50]}..." ) images = None content = "" if file_name: content = parse_file(task_id, file_name, api_url) if content: if isinstance( content, PIL.Image.Image ): # Parse content as image images = [content] else: # Append content to question logger.info(f"Question with content: {question}") prompt = self.user_prompt.format(question=question, content=content) answer = self.agent.run(prompt, images=images) answer = str(answer).replace("FINAL ANSWER:", "").strip() logger.info(f"Agent returning answer: {answer}") return answer if __name__ == "__main__": import requests api_url = "https://agents-course-unit4-scoring.hf.space" question_url = f"{api_url}/random-question" data = requests.get(question_url).json() agent = Agent() task_id = data["task_id"] question = data["question"] file_name = data["file_name"] logger.info( f"Task ID: {task_id}\nQuestion: {question}\nFile Name: {file_name}\n\n" ) answer = agent(question, task_id, file_name, api_url)