Spaces:
Sleeping
Sleeping
File size: 3,888 Bytes
b8ac549 73152bb 667e88a b8ac549 c02d868 b8ac549 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
from smolagents import Tool, DuckDuckGoSearchTool, GoogleSearchTool, VisitWebpageTool, WikipediaSearchTool, PythonInterpreterTool, FinalAnswerTool
from tavily import TavilyClient
from dotenv import load_dotenv
import os
load_dotenv() # Cargar variables de entorno desde .env
# Inicializar herramientas directamente en agent.py
duck_search = DuckDuckGoSearchTool()
google_search = GoogleSearchTool()
visit_page = VisitWebpageTool()
wiki_search = WikipediaSearchTool()
do_python = PythonInterpreterTool()
final_answer = FinalAnswerTool()
tavily_search = TavilyClient()
# Herramienta de transcripción de audio a texto
speech_to_text_tool = Tool.from_space("hf-audio/whisper-large-v3-turbo",
name="speech_to_text_tool",
description="""Convierte un archivo de audio a texto. Usa este comando:
'speech_to_text_tool(filename)'""",
api_name="/predict")
# Herramienta de QA visual
visual_qa_tool = Tool.from_space("sitammeur/PicQ",
name="visual_qa_tool",
description="""Responde preguntas sobre una imagen proporcionada.
Usa el comando: visual_qa_tool(question=<pregunta>, image=<nombre_de_imagen>)""",
api_name="/predict")
# Inicializar el modelo de Azure OpenAI
from smolagents import AzureOpenAIServerModel
import app_tokens
model = AzureOpenAIServerModel(
model_id=app_tokens.AZURE_OPENAI_MODEL,
azure_endpoint=app_tokens.AZURE_OPENAI_ENDPOINT,
api_key=app_tokens.AZURE_OPENAI_API_KEY,
api_version=app_tokens.OPENAI_API_VERSION
)
class BasicAgent:
def __init__(self):
# Agente de búsqueda en la web
self.web_agent = CodeAgent(
model=model,
tools=[visit_page, wiki_search, google_search, final_answer],
max_steps=8,
name="web_agent",
description="Este agente realiza búsquedas en la web.",
add_base_tools=True
)
# Agente de conversión de audio a texto
self.audio_agent = CodeAgent(
model=model,
tools=[speech_to_text_tool, final_answer],
max_steps=4,
name="audio_agent",
description="Este agente convierte audio a texto.",
add_base_tools=True
)
# Agente para ejecución de código Python
self.py_agent = CodeAgent(
model=model,
tools=[do_python, final_answer],
additional_authorized_imports=["json", "pandas", "numpy", "regex"],
max_steps=8,
name="python_code_agent",
description="Este agente ejecuta y valida código Python.",
add_base_tools=True
)
# Agente para análisis de imágenes
self.visual_agent = CodeAgent(
model=model,
tools=[visual_qa_tool, final_answer],
max_steps=4,
name="visual_qa_agent",
description="Este agente responde preguntas sobre imágenes.",
add_base_tools=True
)
# Agente principal que coordina otros agentes
self.manager_agent = CodeAgent(
model=model,
tools=[],
managed_agents=[self.web_agent, self.audio_agent, self.py_agent, self.visual_agent],
planning_interval=8,
verbosity_level=2,
max_steps=12,
add_base_tools=True
)
def forward(self, question: str, attachment: str = None) -> str:
if attachment:
result = self.manager_agent.run(question, additional_args={"attachment": attachment})
else:
result = self.manager_agent.run(question)
return result
|