Spaces:
Sleeping
Sleeping
Work In Progress
Browse files- .gitignore +1 -0
- app.py +56 -52
- test_agents,py +0 -42
- test_agents.py +193 -0
.gitignore
CHANGED
@@ -1,2 +1,3 @@
|
|
1 |
.env
|
2 |
__pycache__/
|
|
|
|
1 |
.env
|
2 |
__pycache__/
|
3 |
+
logs/
|
app.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import re
|
2 |
import os
|
3 |
import gradio as gr
|
@@ -9,15 +10,10 @@ import pandas as pd
|
|
9 |
from dotenv import load_dotenv
|
10 |
|
11 |
# Import smolagents components
|
12 |
-
from smolagents import CodeAgent, HfApiModel, DuckDuckGoSearchTool
|
13 |
-
|
14 |
-
|
15 |
|
16 |
# Load environment variables from .env file
|
17 |
load_dotenv()
|
18 |
-
print(os.getenv("HF_TOKEN"))
|
19 |
-
|
20 |
-
|
21 |
|
22 |
# (Keep Constants as is)
|
23 |
# --- Constants ---
|
@@ -33,26 +29,33 @@ search_tool = DuckDuckGoSearchTool()
|
|
33 |
class BasicAgent:
|
34 |
def __init__(self):
|
35 |
print("BasicAgent initialized.")
|
|
|
|
|
|
|
36 |
# Create a filename with current date and time
|
37 |
current_time = datetime.datetime.now().strftime("%Y%m%d_%H%M")
|
38 |
self.filename = f"questions_{current_time}.txt"
|
39 |
-
print(f"Questions will be written to {self.filename}")
|
40 |
|
41 |
-
|
42 |
-
|
43 |
-
|
|
|
|
|
44 |
|
45 |
|
46 |
# Initialize the Large Language Model
|
47 |
# The model is used by both agents in this simple setup
|
48 |
self.model = HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
|
|
|
|
|
49 |
|
50 |
|
51 |
# Define the Web Search Agent
|
52 |
# This agent is specialised for searching the web using a specific tool
|
53 |
self.web_search_agent = CodeAgent(
|
54 |
model=self.model, # Assign the model to the agent [
|
55 |
-
tools=[DuckDuckGoSearchTool()
|
|
|
56 |
name="web_search_agent", # Give the agent a name
|
57 |
# Describe its capability [
|
58 |
description="Searches the web for information.",
|
@@ -64,7 +67,7 @@ class BasicAgent:
|
|
64 |
# This agent manages tasks and delegates to other agents
|
65 |
self.manager_agent = CodeAgent(
|
66 |
model=self.model, # Assign the model to the manager
|
67 |
-
tools=[],
|
68 |
managed_agents=[self.web_search_agent], # Specify the agents this manager oversees
|
69 |
name="manager_agent", # Give the manager agent a name
|
70 |
description="Manages tasks by delegating to other agents.", # Describe its role
|
@@ -82,63 +85,64 @@ class BasicAgent:
|
|
82 |
|
83 |
|
84 |
# For all other questions, use the manager agent with web search
|
85 |
-
manager_prompt = dedent(f"""
|
86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
-
|
89 |
|
90 |
-
|
91 |
-
|
92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
""")
|
94 |
|
95 |
manager_agent_response = "I apologize, but I couldn't find an answer to this question."
|
96 |
-
answer = ""
|
97 |
source = ""
|
98 |
try:
|
99 |
manager_agent_response = self.manager_agent.run(manager_prompt)
|
100 |
source = "manager_agent"
|
101 |
-
|
102 |
-
#
|
103 |
-
# if
|
104 |
-
#
|
105 |
-
|
106 |
-
# answer_match = re.search(answer_pattern, manager_agent_response, re.IGNORECASE)
|
107 |
-
|
108 |
-
# if answer_match:
|
109 |
-
# answer = answer_match.group(1).strip()
|
110 |
-
# else:
|
111 |
-
# # Get the last paragraph
|
112 |
-
# paragraphs = [p for p in answer.split('\n') if p.strip()]
|
113 |
-
# if paragraphs:
|
114 |
-
# answer = paragraphs[-1].strip()
|
115 |
-
# source = "long_answer"
|
116 |
-
|
117 |
-
#return answer
|
118 |
-
|
119 |
except Exception as e:
|
120 |
print(f"Error in manager agent: {e}")
|
121 |
source = f"Exception {e} "
|
122 |
|
123 |
-
# # Fall back to direct web search
|
124 |
-
# try:
|
125 |
-
# answer = self.web_search_agent.run(f"Please find accurate information to answer: {question}")
|
126 |
-
# source = "web_search_agent"
|
127 |
-
|
128 |
-
# except Exception as e2:
|
129 |
-
# print(f"Error in web agent: {e2}")
|
130 |
-
# answer="I apologize, but I couldn't find an answer to this question."
|
131 |
-
|
132 |
-
|
133 |
# Append the question to the file
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
|
|
138 |
|
139 |
print(f"Final answer: {manager_agent_response}")
|
140 |
return manager_agent_response
|
141 |
-
|
142 |
|
143 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
144 |
"""
|
|
|
1 |
+
import textwrap
|
2 |
import re
|
3 |
import os
|
4 |
import gradio as gr
|
|
|
10 |
from dotenv import load_dotenv
|
11 |
|
12 |
# Import smolagents components
|
13 |
+
from smolagents import CodeAgent, HfApiModel, DuckDuckGoSearchTool, FinalAnswerTool
|
|
|
|
|
14 |
|
15 |
# Load environment variables from .env file
|
16 |
load_dotenv()
|
|
|
|
|
|
|
17 |
|
18 |
# (Keep Constants as is)
|
19 |
# --- Constants ---
|
|
|
29 |
class BasicAgent:
|
30 |
def __init__(self):
|
31 |
print("BasicAgent initialized.")
|
32 |
+
|
33 |
+
self.store_questions_to_log_file = False
|
34 |
+
|
35 |
# Create a filename with current date and time
|
36 |
current_time = datetime.datetime.now().strftime("%Y%m%d_%H%M")
|
37 |
self.filename = f"questions_{current_time}.txt"
|
|
|
38 |
|
39 |
+
if self.store_questions_to_log_file:
|
40 |
+
print(f"Questions will be written to {self.filename}")
|
41 |
+
# Clear the file if it exists or create a new one
|
42 |
+
with open(self.filename, 'w', encoding='utf-8') as f:
|
43 |
+
f.write('') # Create empty file
|
44 |
|
45 |
|
46 |
# Initialize the Large Language Model
|
47 |
# The model is used by both agents in this simple setup
|
48 |
self.model = HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
|
49 |
+
# mistralai/Mixtral-8x7B-Instruct-v0.1
|
50 |
+
#self.model = HfApiModel(model_id="mistralai/Mixtral-8x7B-Instruct-v0.1")
|
51 |
|
52 |
|
53 |
# Define the Web Search Agent
|
54 |
# This agent is specialised for searching the web using a specific tool
|
55 |
self.web_search_agent = CodeAgent(
|
56 |
model=self.model, # Assign the model to the agent [
|
57 |
+
tools=[DuckDuckGoSearchTool(),
|
58 |
+
FinalAnswerTool()], # Provide the web search tool
|
59 |
name="web_search_agent", # Give the agent a name
|
60 |
# Describe its capability [
|
61 |
description="Searches the web for information.",
|
|
|
67 |
# This agent manages tasks and delegates to other agents
|
68 |
self.manager_agent = CodeAgent(
|
69 |
model=self.model, # Assign the model to the manager
|
70 |
+
tools=[FinalAnswerTool()],
|
71 |
managed_agents=[self.web_search_agent], # Specify the agents this manager oversees
|
72 |
name="manager_agent", # Give the manager agent a name
|
73 |
description="Manages tasks by delegating to other agents.", # Describe its role
|
|
|
85 |
|
86 |
|
87 |
# For all other questions, use the manager agent with web search
|
88 |
+
# manager_prompt = dedent(f"""
|
89 |
+
# I need to answer the following question accurately:
|
90 |
+
|
91 |
+
# {question}
|
92 |
+
|
93 |
+
# Please analyze this question and determine the best approach to answer it.
|
94 |
+
# If needed, use web search to find relevant information.
|
95 |
+
# Provide a concise, accurate answer to the question.
|
96 |
+
# """)
|
97 |
+
|
98 |
+
manager_prompt = textwrap.dedent(f"""
|
99 |
+
I need to answer the following question accurately:
|
100 |
|
101 |
+
{question}
|
102 |
|
103 |
+
Please analyze this question and determine the best approach to answer it.
|
104 |
+
If needed, use web search to find relevant information.
|
105 |
+
Provide a concise, accurate answer to the question.
|
106 |
+
|
107 |
+
IMPORTANT: If you identify that specialized tools are needed that you don't have access to, respond with:
|
108 |
+
"Missing Tool Warning: Can't process the question. Missing tool for [specify the missing capability]."
|
109 |
+
|
110 |
+
Examples of missing capabilities to check for:
|
111 |
+
- YouTube video analysis (if question mentions YouTube videos)
|
112 |
+
- Image analysis (if question refers to analyzing images)
|
113 |
+
- Audio file processing (if question refers to audio files)
|
114 |
+
- Excel/spreadsheet analysis (if question refers to Excel files)
|
115 |
+
- Chess position analysis (if question refers to chess positions)
|
116 |
+
- Code execution (if question requires running Python code)
|
117 |
+
|
118 |
+
Only use the "Missing Tool Warning" format if you CANNOT answer the question with your available tools.
|
119 |
+
If you can answer the question with web search or your existing knowledge, provide the answer.
|
120 |
""")
|
121 |
|
122 |
manager_agent_response = "I apologize, but I couldn't find an answer to this question."
|
|
|
123 |
source = ""
|
124 |
try:
|
125 |
manager_agent_response = self.manager_agent.run(manager_prompt)
|
126 |
source = "manager_agent"
|
127 |
+
|
128 |
+
# Check if the answer contains a missing tool warning
|
129 |
+
# if "Missing Tool Warning:" in manager_agent_response:
|
130 |
+
# return manager_agent_response
|
131 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
except Exception as e:
|
133 |
print(f"Error in manager agent: {e}")
|
134 |
source = f"Exception {e} "
|
135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
# Append the question to the file
|
137 |
+
if self.store_questions_to_log_file:
|
138 |
+
with open(self.filename, 'a', encoding='utf-8') as f:
|
139 |
+
f.write(f"{question}\n")
|
140 |
+
f.write(f"ANSWER by {source}: {manager_agent_response}\n")
|
141 |
+
f.write(f"{'*'*50}\n")
|
142 |
|
143 |
print(f"Final answer: {manager_agent_response}")
|
144 |
return manager_agent_response
|
145 |
+
|
146 |
|
147 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
148 |
"""
|
test_agents,py
DELETED
@@ -1,42 +0,0 @@
|
|
1 |
-
# test_agents,py
|
2 |
-
import os
|
3 |
-
from dotenv import load_dotenv
|
4 |
-
|
5 |
-
# Import the BasicAgent from your app module
|
6 |
-
try:
|
7 |
-
from app import BasicAgent
|
8 |
-
except ImportError as e:
|
9 |
-
print(f"Error importing BasicAgent from app.py: {e}")
|
10 |
-
print("Please ensure app.py is in the same directory or accessible in the Python path.")
|
11 |
-
exit(1)
|
12 |
-
|
13 |
-
# --- Define Question-Answer Pairs ---
|
14 |
-
# Note: The 'A' part is just for reference here; the agent will generate its own answer.
|
15 |
-
QA_PAIRS = {
|
16 |
-
"What is the capital of France?": "Paris",
|
17 |
-
"Who wrote 'Hamlet'?": "William Shakespeare",
|
18 |
-
"What is the formula for water?": "H2O",
|
19 |
-
"How does photosynthesis work?": "Plants use sunlight, water, and carbon dioxide to create their own food.",
|
20 |
-
# Agent should find current data
|
21 |
-
"What is the current population of Earth?": "Approximately 8 billion",
|
22 |
-
}
|
23 |
-
|
24 |
-
|
25 |
-
def run_test_questions():
|
26 |
-
"""Instantiates the agent and runs it on the predefined questions."""
|
27 |
-
print("--- Starting Agent Test ---")
|
28 |
-
# Load environment variables (needed for BasicAgent initialization)
|
29 |
-
load_dotenv()
|
30 |
-
print(f"HF_TOKEN found: {'Yes' if os.getenv('HF_TOKEN') else 'No'}")
|
31 |
-
|
32 |
-
agent = BasicAgent()
|
33 |
-
|
34 |
-
for question in QA_PAIRS.keys():
|
35 |
-
print(f"\n--- Testing Question ---")
|
36 |
-
print(f"Q: {question}")
|
37 |
-
answer = agent(question) # Call the agent instance
|
38 |
-
print(f"Agent A: {answer}")
|
39 |
-
|
40 |
-
|
41 |
-
if __name__ == "__main__":
|
42 |
-
run_test_questions()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
test_agents.py
ADDED
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# test_agents,py
|
2 |
+
import json
|
3 |
+
import time
|
4 |
+
import datetime
|
5 |
+
import os
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
from app import BasicAgent # Assuming app.py is accessible
|
8 |
+
|
9 |
+
# Import the BasicAgent from your app module
|
10 |
+
try:
|
11 |
+
from app import BasicAgent
|
12 |
+
except ImportError as e:
|
13 |
+
print(f"Error importing BasicAgent from app.py: {e}")
|
14 |
+
print("Please ensure app.py is in the same directory or accessible in the Python path.")
|
15 |
+
exit(1)
|
16 |
+
|
17 |
+
# --- Define Question-Answer Pairs ---
|
18 |
+
# Note: The 'A' part is just for reference here; the agent will generate its own answer.
|
19 |
+
QA_PAIRS = {
|
20 |
+
"What is the capital of France?": "Paris",
|
21 |
+
"Who wrote 'Hamlet'?": "William Shakespeare",
|
22 |
+
"What is the formula for water?": "H2O",
|
23 |
+
"How does photosynthesis work?": "Plants use sunlight, water, and carbon dioxide to create their own food.",
|
24 |
+
# Agent should find current data
|
25 |
+
"What is the current population of Earth?": "Approximately 8 billion",
|
26 |
+
}
|
27 |
+
|
28 |
+
|
29 |
+
def eval_GAIA(json_file_path="GAIA_level1_status.json"):
|
30 |
+
"""
|
31 |
+
Loads GAIA level 1 questions from a JSON file, evaluates unanswered
|
32 |
+
questions using the BasicAgent, logs incorrect answers, updates the
|
33 |
+
status in the JSON data, and saves the updated data.
|
34 |
+
|
35 |
+
Args:
|
36 |
+
json_file_path (str): The path to the GAIA status JSON file.
|
37 |
+
Defaults to "GAIA_level1_status.json".
|
38 |
+
"""
|
39 |
+
print(f"--- Starting GAIA Evaluation from {json_file_path} ---")
|
40 |
+
|
41 |
+
tmp_json_file_path = json_file_path.replace(".json", "_tmp.json")
|
42 |
+
|
43 |
+
# 2. Load GAIA data
|
44 |
+
try:
|
45 |
+
with open(json_file_path, 'r', encoding='utf-8') as f:
|
46 |
+
gaia_data = json.load(f)
|
47 |
+
print(
|
48 |
+
f"Successfully loaded {len(gaia_data)} questions from {json_file_path}.")
|
49 |
+
except FileNotFoundError:
|
50 |
+
print(f"Error: JSON file not found at {json_file_path}")
|
51 |
+
return
|
52 |
+
except json.JSONDecodeError:
|
53 |
+
print(f"Error: Could not decode JSON from {json_file_path}")
|
54 |
+
return
|
55 |
+
except Exception as e:
|
56 |
+
print(f"An unexpected error occurred loading the JSON file: {e}")
|
57 |
+
return
|
58 |
+
|
59 |
+
# 3. Initialize Agent, Log file, and Tracking variables
|
60 |
+
try:
|
61 |
+
agent = BasicAgent()
|
62 |
+
except Exception as e:
|
63 |
+
print(f"Error initializing BasicAgent: {e}")
|
64 |
+
print("Evaluation cannot proceed.")
|
65 |
+
return
|
66 |
+
|
67 |
+
log_filename = f"Response_{datetime.datetime.now().strftime('%Y%m%d_%H%M')}.log"
|
68 |
+
print(f"Incorrect answers will be logged to: {log_filename}")
|
69 |
+
|
70 |
+
total_questions = len(gaia_data)
|
71 |
+
processed_count = 0
|
72 |
+
correct_count = 0
|
73 |
+
initially_correct = sum(1 for item in gaia_data.values()
|
74 |
+
if item.get("status") is True)
|
75 |
+
questions_to_process = total_questions - initially_correct
|
76 |
+
|
77 |
+
print(f"Found {initially_correct} questions already marked as correct.")
|
78 |
+
if questions_to_process == 0:
|
79 |
+
print("No questions with status=false found to process.")
|
80 |
+
# Still save the file in case formatting needs update, or just return
|
81 |
+
# For consistency, let's save it.
|
82 |
+
else:
|
83 |
+
print(f"Attempting to answer {questions_to_process} questions...")
|
84 |
+
|
85 |
+
start_time = time.time()
|
86 |
+
|
87 |
+
# 4. Process questions
|
88 |
+
for q_num, data in gaia_data.items():
|
89 |
+
if data.get("status") is False:
|
90 |
+
processed_count += 1
|
91 |
+
question = data.get("Q")
|
92 |
+
correct_answer = data.get("A")
|
93 |
+
status = data.get("status") # Should be False here
|
94 |
+
|
95 |
+
if question is None or correct_answer is None:
|
96 |
+
print(f"Skipping question {q_num}: Missing 'Q' or 'A'.")
|
97 |
+
continue
|
98 |
+
|
99 |
+
elapsed_time = time.time() - start_time
|
100 |
+
print(
|
101 |
+
f"\nProcessing question {processed_count}/{questions_to_process} (ID: {q_num}) | Elapsed: {elapsed_time:.2f}s")
|
102 |
+
print(f"Q: {question[:100]}...") # Print first 100 chars
|
103 |
+
|
104 |
+
try:
|
105 |
+
agent_response = agent(question)
|
106 |
+
print(f"Agent A: {agent_response}")
|
107 |
+
print(f"Correct A: {correct_answer}")
|
108 |
+
|
109 |
+
# Simple comparison (case-sensitive, exact match)
|
110 |
+
# Consider adding .strip() or lower() for more robust comparison if needed
|
111 |
+
if str(agent_response).strip() == str(correct_answer).strip():
|
112 |
+
print(f"Result for Q {q_num}: CORRECT")
|
113 |
+
gaia_data[q_num]["status"] = True
|
114 |
+
correct_count += 1
|
115 |
+
else:
|
116 |
+
print(f"Result for Q {q_num}: INCORRECT")
|
117 |
+
# Append to log file
|
118 |
+
with open(log_filename, 'a', encoding='utf-8') as log_f:
|
119 |
+
log_f.write(f"*question number {q_num} *\n")
|
120 |
+
log_f.write(f"Q: {question}\n")
|
121 |
+
log_f.write(f"A: {correct_answer}\n")
|
122 |
+
log_f.write(f"Agent: {agent_response}\n")
|
123 |
+
log_f.write("<END>\n\n")
|
124 |
+
|
125 |
+
except Exception as e:
|
126 |
+
print(f"Error processing question {q_num} with agent: {e}")
|
127 |
+
# Optionally log agent errors too
|
128 |
+
with open(log_filename, 'a', encoding='utf-8') as log_f:
|
129 |
+
log_f.write(f"*question number {q_num} *\n")
|
130 |
+
log_f.write(f"Q: {question}\n")
|
131 |
+
log_f.write(f"A: {correct_answer}\n")
|
132 |
+
log_f.write(f"Agent: ERROR - {e}\n")
|
133 |
+
log_f.write("<END>\n\n")
|
134 |
+
|
135 |
+
with open(tmp_json_file_path, 'w', encoding='utf-8') as f:
|
136 |
+
json.dump(gaia_data, f, indent=4, ensure_ascii=False)
|
137 |
+
|
138 |
+
else:
|
139 |
+
correct_count += 1
|
140 |
+
print(f"Skipping question {q_num}: Status is already True.")
|
141 |
+
|
142 |
+
# Exit
|
143 |
+
#break
|
144 |
+
|
145 |
+
end_time = time.time()
|
146 |
+
total_time = end_time - start_time
|
147 |
+
|
148 |
+
# 5. Summary
|
149 |
+
print("\n--- Evaluation Summary ---")
|
150 |
+
print(f"Processed {processed_count} questions with status=false.")
|
151 |
+
print(f"Correct answers provided by agent: {correct_count}")
|
152 |
+
final_correct_count = initially_correct + correct_count
|
153 |
+
print(
|
154 |
+
f"Total correct answers (initial + agent): {final_correct_count}/{total_questions}")
|
155 |
+
print(f"Total evaluation time: {total_time:.2f} seconds")
|
156 |
+
|
157 |
+
# 6. Save updated data
|
158 |
+
try:
|
159 |
+
with open(json_file_path, 'w', encoding='utf-8') as f:
|
160 |
+
json.dump(gaia_data, f, indent=4, ensure_ascii=False)
|
161 |
+
print(f"Successfully saved updated data to {json_file_path}")
|
162 |
+
except Exception as e:
|
163 |
+
print(f"Error saving updated data to {json_file_path}: {e}")
|
164 |
+
|
165 |
+
print("--- GAIA Evaluation Finished ---")
|
166 |
+
|
167 |
+
|
168 |
+
def run_test_questions():
|
169 |
+
"""Instantiates the agent and runs it on the predefined questions."""
|
170 |
+
print("--- Starting Agent Test ---")
|
171 |
+
# Load environment variables (needed for BasicAgent initialization)
|
172 |
+
#load_dotenv()
|
173 |
+
#print(f"HF_TOKEN found: {'Yes' if os.getenv('HF_TOKEN') else 'No'}")
|
174 |
+
|
175 |
+
agent = BasicAgent()
|
176 |
+
|
177 |
+
for question in QA_PAIRS.keys():
|
178 |
+
print(f"\n--- Testing Question ---")
|
179 |
+
print(f"Q: {question}")
|
180 |
+
answer = agent(question) # Call the agent instance
|
181 |
+
print(f"Agent A: {answer}")
|
182 |
+
|
183 |
+
|
184 |
+
if __name__ == "__main__":
|
185 |
+
|
186 |
+
load_dotenv()
|
187 |
+
if not os.getenv('HF_TOKEN'):
|
188 |
+
print("Warning: HF_TOKEN environment variable not found. Agent might fail.")
|
189 |
+
|
190 |
+
# run_test_questions()
|
191 |
+
|
192 |
+
#
|
193 |
+
eval_GAIA()
|