Spaces:
Sleeping
Sleeping
from smolagents import Tool, DuckDuckGoSearchTool, GoogleSearchTool, VisitWebpageTool, WikipediaSearchTool, PythonInterpreterTool, FinalAnswerTool | |
from tavily import TavilyClient | |
from dotenv import load_dotenv | |
import os | |
load_dotenv() # Cargar variables de entorno desde .env | |
# Inicializar herramientas directamente en agent.py | |
duck_search = DuckDuckGoSearchTool() | |
google_search = GoogleSearchTool() | |
visit_page = VisitWebpageTool() | |
wiki_search = WikipediaSearchTool() | |
do_python = PythonInterpreterTool() | |
final_answer = FinalAnswerTool() | |
tavily_search = TavilyClient() | |
# Herramienta de transcripción de audio a texto | |
speech_to_text_tool = Tool.from_space("hf-audio/whisper-large-v3-turbo", | |
name="speech_to_text_tool", | |
description="""Convierte un archivo de audio a texto. Usa este comando: | |
'speech_to_text_tool(filename)'""", | |
api_name="/predict") | |
# Herramienta de QA visual | |
visual_qa_tool = Tool.from_space("sitammeur/PicQ", | |
name="visual_qa_tool", | |
description="""Responde preguntas sobre una imagen proporcionada. | |
Usa el comando: visual_qa_tool(question=<pregunta>, image=<nombre_de_imagen>)""", | |
api_name="/predict") | |
# Inicializar el modelo de Azure OpenAI | |
from smolagents import AzureOpenAIServerModel | |
import app_tokens | |
model = AzureOpenAIServerModel( | |
model_id=app_tokens.AZURE_OPENAI_MODEL, | |
azure_endpoint=app_tokens.AZURE_OPENAI_ENDPOINT, | |
api_key=app_tokens.AZURE_OPENAI_API_KEY, | |
api_version=app_tokens.OPENAI_API_VERSION | |
) | |
class BasicAgent: | |
def __init__(self): | |
# Agente de búsqueda en la web | |
self.web_agent = CodeAgent( | |
model=model, | |
tools=[visit_page, wiki_search, google_search, final_answer], | |
max_steps=8, | |
name="web_agent", | |
description="Este agente realiza búsquedas en la web.", | |
add_base_tools=True | |
) | |
# Agente de conversión de audio a texto | |
self.audio_agent = CodeAgent( | |
model=model, | |
tools=[speech_to_text_tool, final_answer], | |
max_steps=4, | |
name="audio_agent", | |
description="Este agente convierte audio a texto.", | |
add_base_tools=True | |
) | |
# Agente para ejecución de código Python | |
self.py_agent = CodeAgent( | |
model=model, | |
tools=[do_python, final_answer], | |
additional_authorized_imports=["json", "pandas", "numpy", "regex"], | |
max_steps=8, | |
name="python_code_agent", | |
description="Este agente ejecuta y valida código Python.", | |
add_base_tools=True | |
) | |
# Agente para análisis de imágenes | |
self.visual_agent = CodeAgent( | |
model=model, | |
tools=[visual_qa_tool, final_answer], | |
max_steps=4, | |
name="visual_qa_agent", | |
description="Este agente responde preguntas sobre imágenes.", | |
add_base_tools=True | |
) | |
# Agente principal que coordina otros agentes | |
self.manager_agent = CodeAgent( | |
model=model, | |
tools=[], | |
managed_agents=[self.web_agent, self.audio_agent, self.py_agent, self.visual_agent], | |
planning_interval=8, | |
verbosity_level=2, | |
max_steps=12, | |
add_base_tools=True | |
) | |
def forward(self, question: str, attachment: str = None) -> str: | |
if attachment: | |
result = self.manager_agent.run(question, additional_args={"attachment": attachment}) | |
else: | |
result = self.manager_agent.run(question) | |
return result | |