AIxGAIA / agent.py
AC-Angelo93's picture
Update agent.py
4f8cd09 verified
raw
history blame
3.35 kB
# agent.py
import os
from typing import Optional, List, Mapping, Any
from google import genai
from langchain.tools import tool
from langchain.agents import initialize_agent, AgentType
from langchain_community.document_loaders import WikipediaLoader
from langchain.llms.base import LLM
# β€”β€”β€” 1) Gemini LLM wrapper β€”β€”β€”
# Initialize your Gemini client once
GENAI_CLIENT = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
GEMINI_MODEL = "gemini-1.5-pro" # or "gemini-1.0-turbo" per your trial
class GeminiLLM(LLM):
"""
A LangChain-compatible wrapper around Google Gemini via the google-genai SDK.
"""
def __init__(self, client: genai.Client, model: str):
self.client = client
self.model = model
@property
def _llm_type(self) -> str:
return "gemini"
@property
def _identifying_params(self) -> Mapping[str, Any]:
return {"model": self.model}
def _call(
self,
prompt: str,
stop: Optional[List[str]] = None,
) -> str:
# Call the Gemini SDK
response = self.client.generate_content(
model=self.model,
contents=[prompt]
)
return response.text
# β€”β€”β€” 2) Tools definitions β€”β€”β€”
@tool
def calculator(expr: str) -> str:
"""
Safely evaluates a math expression and returns the result.
"""
try:
# simple sandbox
return str(eval(expr, {"__builtins__": {}}))
except Exception as e:
return f"Error: {e}"
@tool
def wiki_search(query: str) -> str:
"""
Fetches up to 2 Wikipedia pages matching the query and concatenates their text.
"""
docs = WikipediaLoader(query=query, load_max_docs=2).load()
return "\n\n".join(doc.page_content for doc in docs)
# β€”β€”β€” 3) Agent construction β€”β€”β€”
class BasicAgent:
"""
A Zero-Shot React agent powered by Google Gemini,
with access to calculator and wiki_search tools.
"""
def __init__(self):
# Ensure your secret is set
assert "GEMINI_API_KEY" in os.environ, "❌ GEMINI_API_KEY not found in Secrets"
# Wrap Gemini as an LLM
gemini_llm = GeminiLLM(GENAI_CLIENT, GEMINI_MODEL)
# Initialize the agent with our tools
self.agent = initialize_agent(
tools=[calculator, wiki_search],
llm=gemini_llm,
agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
verbose=True, # turn on logs so you can debug
max_iterations=5, # allow up to 5 tool calls
early_stopping_method="generate",
)
def __call__(self, question: str) -> str:
"""
Runs the agent on the question and returns only the final answer line.
"""
# System prompt + question
prompt = (
"You have access to two tools:\n"
" β€’ calculator(expr)\n"
" β€’ wiki_search(query)\n"
"Think internally; output ONLY the final answer (no chain-of-thought).\n\n"
f"Question: {question}"
)
# Run the agent
raw_output = self.agent.run(prompt)
# Extract the last non-empty line as the answer
lines = [line.strip() for line in raw_output.splitlines() if line.strip()]
return lines[-1] if lines else raw_output.strip()