TzurVaich commited on
Commit
e62e166
·
1 Parent(s): e6036f2

Work In Progress

Browse files
Files changed (4) hide show
  1. .gitignore +1 -0
  2. app.py +56 -52
  3. test_agents,py +0 -42
  4. test_agents.py +193 -0
.gitignore CHANGED
@@ -1,2 +1,3 @@
1
  .env
2
  __pycache__/
 
 
1
  .env
2
  __pycache__/
3
+ logs/
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import re
2
  import os
3
  import gradio as gr
@@ -9,15 +10,10 @@ import pandas as pd
9
  from dotenv import load_dotenv
10
 
11
  # Import smolagents components
12
- from smolagents import CodeAgent, HfApiModel, DuckDuckGoSearchTool
13
-
14
-
15
 
16
  # Load environment variables from .env file
17
  load_dotenv()
18
- print(os.getenv("HF_TOKEN"))
19
-
20
-
21
 
22
  # (Keep Constants as is)
23
  # --- Constants ---
@@ -33,26 +29,33 @@ search_tool = DuckDuckGoSearchTool()
33
  class BasicAgent:
34
  def __init__(self):
35
  print("BasicAgent initialized.")
 
 
 
36
  # Create a filename with current date and time
37
  current_time = datetime.datetime.now().strftime("%Y%m%d_%H%M")
38
  self.filename = f"questions_{current_time}.txt"
39
- print(f"Questions will be written to {self.filename}")
40
 
41
- # Clear the file if it exists or create a new one
42
- with open(self.filename, 'w', encoding='utf-8') as f:
43
- f.write('') # Create empty file
 
 
44
 
45
 
46
  # Initialize the Large Language Model
47
  # The model is used by both agents in this simple setup
48
  self.model = HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
 
 
49
 
50
 
51
  # Define the Web Search Agent
52
  # This agent is specialised for searching the web using a specific tool
53
  self.web_search_agent = CodeAgent(
54
  model=self.model, # Assign the model to the agent [
55
- tools=[DuckDuckGoSearchTool()], # Provide the web search tool
 
56
  name="web_search_agent", # Give the agent a name
57
  # Describe its capability [
58
  description="Searches the web for information.",
@@ -64,7 +67,7 @@ class BasicAgent:
64
  # This agent manages tasks and delegates to other agents
65
  self.manager_agent = CodeAgent(
66
  model=self.model, # Assign the model to the manager
67
- tools=[],
68
  managed_agents=[self.web_search_agent], # Specify the agents this manager oversees
69
  name="manager_agent", # Give the manager agent a name
70
  description="Manages tasks by delegating to other agents.", # Describe its role
@@ -82,63 +85,64 @@ class BasicAgent:
82
 
83
 
84
  # For all other questions, use the manager agent with web search
85
- manager_prompt = dedent(f"""
86
- I need to answer the following question accurately:
 
 
 
 
 
 
 
 
 
 
87
 
88
- {question}
89
 
90
- Please analyze this question and determine the best approach to answer it.
91
- If needed, use web search to find relevant information.
92
- Provide a concise, accurate answer to the question.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  """)
94
 
95
  manager_agent_response = "I apologize, but I couldn't find an answer to this question."
96
- answer = ""
97
  source = ""
98
  try:
99
  manager_agent_response = self.manager_agent.run(manager_prompt)
100
  source = "manager_agent"
101
-
102
- # # Clean up the answer - remove explanations, etc.
103
- # if len(manager_agent_response) > 300:
104
- # # Try to extract just the answer
105
- # answer_pattern = r'(?:answer|result)(?:\s+is)?(?:\s*:)?\s*(.+?)(?:\.|$)'
106
- # answer_match = re.search(answer_pattern, manager_agent_response, re.IGNORECASE)
107
-
108
- # if answer_match:
109
- # answer = answer_match.group(1).strip()
110
- # else:
111
- # # Get the last paragraph
112
- # paragraphs = [p for p in answer.split('\n') if p.strip()]
113
- # if paragraphs:
114
- # answer = paragraphs[-1].strip()
115
- # source = "long_answer"
116
-
117
- #return answer
118
-
119
  except Exception as e:
120
  print(f"Error in manager agent: {e}")
121
  source = f"Exception {e} "
122
 
123
- # # Fall back to direct web search
124
- # try:
125
- # answer = self.web_search_agent.run(f"Please find accurate information to answer: {question}")
126
- # source = "web_search_agent"
127
-
128
- # except Exception as e2:
129
- # print(f"Error in web agent: {e2}")
130
- # answer="I apologize, but I couldn't find an answer to this question."
131
-
132
-
133
  # Append the question to the file
134
- with open(self.filename, 'a', encoding='utf-8') as f:
135
- f.write(f"{question}\n")
136
- f.write(f"ANSWER by {source}: {manager_agent_response}\n")
137
- f.write(f"{'*'*50}\n")
 
138
 
139
  print(f"Final answer: {manager_agent_response}")
140
  return manager_agent_response
141
-
142
 
143
  def run_and_submit_all( profile: gr.OAuthProfile | None):
144
  """
 
1
+ import textwrap
2
  import re
3
  import os
4
  import gradio as gr
 
10
  from dotenv import load_dotenv
11
 
12
  # Import smolagents components
13
+ from smolagents import CodeAgent, HfApiModel, DuckDuckGoSearchTool, FinalAnswerTool
 
 
14
 
15
  # Load environment variables from .env file
16
  load_dotenv()
 
 
 
17
 
18
  # (Keep Constants as is)
19
  # --- Constants ---
 
29
  class BasicAgent:
30
  def __init__(self):
31
  print("BasicAgent initialized.")
32
+
33
+ self.store_questions_to_log_file = False
34
+
35
  # Create a filename with current date and time
36
  current_time = datetime.datetime.now().strftime("%Y%m%d_%H%M")
37
  self.filename = f"questions_{current_time}.txt"
 
38
 
39
+ if self.store_questions_to_log_file:
40
+ print(f"Questions will be written to {self.filename}")
41
+ # Clear the file if it exists or create a new one
42
+ with open(self.filename, 'w', encoding='utf-8') as f:
43
+ f.write('') # Create empty file
44
 
45
 
46
  # Initialize the Large Language Model
47
  # The model is used by both agents in this simple setup
48
  self.model = HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
49
+ # mistralai/Mixtral-8x7B-Instruct-v0.1
50
+ #self.model = HfApiModel(model_id="mistralai/Mixtral-8x7B-Instruct-v0.1")
51
 
52
 
53
  # Define the Web Search Agent
54
  # This agent is specialised for searching the web using a specific tool
55
  self.web_search_agent = CodeAgent(
56
  model=self.model, # Assign the model to the agent [
57
+ tools=[DuckDuckGoSearchTool(),
58
+ FinalAnswerTool()], # Provide the web search tool
59
  name="web_search_agent", # Give the agent a name
60
  # Describe its capability [
61
  description="Searches the web for information.",
 
67
  # This agent manages tasks and delegates to other agents
68
  self.manager_agent = CodeAgent(
69
  model=self.model, # Assign the model to the manager
70
+ tools=[FinalAnswerTool()],
71
  managed_agents=[self.web_search_agent], # Specify the agents this manager oversees
72
  name="manager_agent", # Give the manager agent a name
73
  description="Manages tasks by delegating to other agents.", # Describe its role
 
85
 
86
 
87
  # For all other questions, use the manager agent with web search
88
+ # manager_prompt = dedent(f"""
89
+ # I need to answer the following question accurately:
90
+
91
+ # {question}
92
+
93
+ # Please analyze this question and determine the best approach to answer it.
94
+ # If needed, use web search to find relevant information.
95
+ # Provide a concise, accurate answer to the question.
96
+ # """)
97
+
98
+ manager_prompt = textwrap.dedent(f"""
99
+ I need to answer the following question accurately:
100
 
101
+ {question}
102
 
103
+ Please analyze this question and determine the best approach to answer it.
104
+ If needed, use web search to find relevant information.
105
+ Provide a concise, accurate answer to the question.
106
+
107
+ IMPORTANT: If you identify that specialized tools are needed that you don't have access to, respond with:
108
+ "Missing Tool Warning: Can't process the question. Missing tool for [specify the missing capability]."
109
+
110
+ Examples of missing capabilities to check for:
111
+ - YouTube video analysis (if question mentions YouTube videos)
112
+ - Image analysis (if question refers to analyzing images)
113
+ - Audio file processing (if question refers to audio files)
114
+ - Excel/spreadsheet analysis (if question refers to Excel files)
115
+ - Chess position analysis (if question refers to chess positions)
116
+ - Code execution (if question requires running Python code)
117
+
118
+ Only use the "Missing Tool Warning" format if you CANNOT answer the question with your available tools.
119
+ If you can answer the question with web search or your existing knowledge, provide the answer.
120
  """)
121
 
122
  manager_agent_response = "I apologize, but I couldn't find an answer to this question."
 
123
  source = ""
124
  try:
125
  manager_agent_response = self.manager_agent.run(manager_prompt)
126
  source = "manager_agent"
127
+
128
+ # Check if the answer contains a missing tool warning
129
+ # if "Missing Tool Warning:" in manager_agent_response:
130
+ # return manager_agent_response
131
+
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  except Exception as e:
133
  print(f"Error in manager agent: {e}")
134
  source = f"Exception {e} "
135
 
 
 
 
 
 
 
 
 
 
 
136
  # Append the question to the file
137
+ if self.store_questions_to_log_file:
138
+ with open(self.filename, 'a', encoding='utf-8') as f:
139
+ f.write(f"{question}\n")
140
+ f.write(f"ANSWER by {source}: {manager_agent_response}\n")
141
+ f.write(f"{'*'*50}\n")
142
 
143
  print(f"Final answer: {manager_agent_response}")
144
  return manager_agent_response
145
+
146
 
147
  def run_and_submit_all( profile: gr.OAuthProfile | None):
148
  """
test_agents,py DELETED
@@ -1,42 +0,0 @@
1
- # test_agents,py
2
- import os
3
- from dotenv import load_dotenv
4
-
5
- # Import the BasicAgent from your app module
6
- try:
7
- from app import BasicAgent
8
- except ImportError as e:
9
- print(f"Error importing BasicAgent from app.py: {e}")
10
- print("Please ensure app.py is in the same directory or accessible in the Python path.")
11
- exit(1)
12
-
13
- # --- Define Question-Answer Pairs ---
14
- # Note: The 'A' part is just for reference here; the agent will generate its own answer.
15
- QA_PAIRS = {
16
- "What is the capital of France?": "Paris",
17
- "Who wrote 'Hamlet'?": "William Shakespeare",
18
- "What is the formula for water?": "H2O",
19
- "How does photosynthesis work?": "Plants use sunlight, water, and carbon dioxide to create their own food.",
20
- # Agent should find current data
21
- "What is the current population of Earth?": "Approximately 8 billion",
22
- }
23
-
24
-
25
- def run_test_questions():
26
- """Instantiates the agent and runs it on the predefined questions."""
27
- print("--- Starting Agent Test ---")
28
- # Load environment variables (needed for BasicAgent initialization)
29
- load_dotenv()
30
- print(f"HF_TOKEN found: {'Yes' if os.getenv('HF_TOKEN') else 'No'}")
31
-
32
- agent = BasicAgent()
33
-
34
- for question in QA_PAIRS.keys():
35
- print(f"\n--- Testing Question ---")
36
- print(f"Q: {question}")
37
- answer = agent(question) # Call the agent instance
38
- print(f"Agent A: {answer}")
39
-
40
-
41
- if __name__ == "__main__":
42
- run_test_questions()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
test_agents.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # test_agents,py
2
+ import json
3
+ import time
4
+ import datetime
5
+ import os
6
+ from dotenv import load_dotenv
7
+ from app import BasicAgent # Assuming app.py is accessible
8
+
9
+ # Import the BasicAgent from your app module
10
+ try:
11
+ from app import BasicAgent
12
+ except ImportError as e:
13
+ print(f"Error importing BasicAgent from app.py: {e}")
14
+ print("Please ensure app.py is in the same directory or accessible in the Python path.")
15
+ exit(1)
16
+
17
+ # --- Define Question-Answer Pairs ---
18
+ # Note: The 'A' part is just for reference here; the agent will generate its own answer.
19
+ QA_PAIRS = {
20
+ "What is the capital of France?": "Paris",
21
+ "Who wrote 'Hamlet'?": "William Shakespeare",
22
+ "What is the formula for water?": "H2O",
23
+ "How does photosynthesis work?": "Plants use sunlight, water, and carbon dioxide to create their own food.",
24
+ # Agent should find current data
25
+ "What is the current population of Earth?": "Approximately 8 billion",
26
+ }
27
+
28
+
29
+ def eval_GAIA(json_file_path="GAIA_level1_status.json"):
30
+ """
31
+ Loads GAIA level 1 questions from a JSON file, evaluates unanswered
32
+ questions using the BasicAgent, logs incorrect answers, updates the
33
+ status in the JSON data, and saves the updated data.
34
+
35
+ Args:
36
+ json_file_path (str): The path to the GAIA status JSON file.
37
+ Defaults to "GAIA_level1_status.json".
38
+ """
39
+ print(f"--- Starting GAIA Evaluation from {json_file_path} ---")
40
+
41
+ tmp_json_file_path = json_file_path.replace(".json", "_tmp.json")
42
+
43
+ # 2. Load GAIA data
44
+ try:
45
+ with open(json_file_path, 'r', encoding='utf-8') as f:
46
+ gaia_data = json.load(f)
47
+ print(
48
+ f"Successfully loaded {len(gaia_data)} questions from {json_file_path}.")
49
+ except FileNotFoundError:
50
+ print(f"Error: JSON file not found at {json_file_path}")
51
+ return
52
+ except json.JSONDecodeError:
53
+ print(f"Error: Could not decode JSON from {json_file_path}")
54
+ return
55
+ except Exception as e:
56
+ print(f"An unexpected error occurred loading the JSON file: {e}")
57
+ return
58
+
59
+ # 3. Initialize Agent, Log file, and Tracking variables
60
+ try:
61
+ agent = BasicAgent()
62
+ except Exception as e:
63
+ print(f"Error initializing BasicAgent: {e}")
64
+ print("Evaluation cannot proceed.")
65
+ return
66
+
67
+ log_filename = f"Response_{datetime.datetime.now().strftime('%Y%m%d_%H%M')}.log"
68
+ print(f"Incorrect answers will be logged to: {log_filename}")
69
+
70
+ total_questions = len(gaia_data)
71
+ processed_count = 0
72
+ correct_count = 0
73
+ initially_correct = sum(1 for item in gaia_data.values()
74
+ if item.get("status") is True)
75
+ questions_to_process = total_questions - initially_correct
76
+
77
+ print(f"Found {initially_correct} questions already marked as correct.")
78
+ if questions_to_process == 0:
79
+ print("No questions with status=false found to process.")
80
+ # Still save the file in case formatting needs update, or just return
81
+ # For consistency, let's save it.
82
+ else:
83
+ print(f"Attempting to answer {questions_to_process} questions...")
84
+
85
+ start_time = time.time()
86
+
87
+ # 4. Process questions
88
+ for q_num, data in gaia_data.items():
89
+ if data.get("status") is False:
90
+ processed_count += 1
91
+ question = data.get("Q")
92
+ correct_answer = data.get("A")
93
+ status = data.get("status") # Should be False here
94
+
95
+ if question is None or correct_answer is None:
96
+ print(f"Skipping question {q_num}: Missing 'Q' or 'A'.")
97
+ continue
98
+
99
+ elapsed_time = time.time() - start_time
100
+ print(
101
+ f"\nProcessing question {processed_count}/{questions_to_process} (ID: {q_num}) | Elapsed: {elapsed_time:.2f}s")
102
+ print(f"Q: {question[:100]}...") # Print first 100 chars
103
+
104
+ try:
105
+ agent_response = agent(question)
106
+ print(f"Agent A: {agent_response}")
107
+ print(f"Correct A: {correct_answer}")
108
+
109
+ # Simple comparison (case-sensitive, exact match)
110
+ # Consider adding .strip() or lower() for more robust comparison if needed
111
+ if str(agent_response).strip() == str(correct_answer).strip():
112
+ print(f"Result for Q {q_num}: CORRECT")
113
+ gaia_data[q_num]["status"] = True
114
+ correct_count += 1
115
+ else:
116
+ print(f"Result for Q {q_num}: INCORRECT")
117
+ # Append to log file
118
+ with open(log_filename, 'a', encoding='utf-8') as log_f:
119
+ log_f.write(f"*question number {q_num} *\n")
120
+ log_f.write(f"Q: {question}\n")
121
+ log_f.write(f"A: {correct_answer}\n")
122
+ log_f.write(f"Agent: {agent_response}\n")
123
+ log_f.write("<END>\n\n")
124
+
125
+ except Exception as e:
126
+ print(f"Error processing question {q_num} with agent: {e}")
127
+ # Optionally log agent errors too
128
+ with open(log_filename, 'a', encoding='utf-8') as log_f:
129
+ log_f.write(f"*question number {q_num} *\n")
130
+ log_f.write(f"Q: {question}\n")
131
+ log_f.write(f"A: {correct_answer}\n")
132
+ log_f.write(f"Agent: ERROR - {e}\n")
133
+ log_f.write("<END>\n\n")
134
+
135
+ with open(tmp_json_file_path, 'w', encoding='utf-8') as f:
136
+ json.dump(gaia_data, f, indent=4, ensure_ascii=False)
137
+
138
+ else:
139
+ correct_count += 1
140
+ print(f"Skipping question {q_num}: Status is already True.")
141
+
142
+ # Exit
143
+ #break
144
+
145
+ end_time = time.time()
146
+ total_time = end_time - start_time
147
+
148
+ # 5. Summary
149
+ print("\n--- Evaluation Summary ---")
150
+ print(f"Processed {processed_count} questions with status=false.")
151
+ print(f"Correct answers provided by agent: {correct_count}")
152
+ final_correct_count = initially_correct + correct_count
153
+ print(
154
+ f"Total correct answers (initial + agent): {final_correct_count}/{total_questions}")
155
+ print(f"Total evaluation time: {total_time:.2f} seconds")
156
+
157
+ # 6. Save updated data
158
+ try:
159
+ with open(json_file_path, 'w', encoding='utf-8') as f:
160
+ json.dump(gaia_data, f, indent=4, ensure_ascii=False)
161
+ print(f"Successfully saved updated data to {json_file_path}")
162
+ except Exception as e:
163
+ print(f"Error saving updated data to {json_file_path}: {e}")
164
+
165
+ print("--- GAIA Evaluation Finished ---")
166
+
167
+
168
+ def run_test_questions():
169
+ """Instantiates the agent and runs it on the predefined questions."""
170
+ print("--- Starting Agent Test ---")
171
+ # Load environment variables (needed for BasicAgent initialization)
172
+ #load_dotenv()
173
+ #print(f"HF_TOKEN found: {'Yes' if os.getenv('HF_TOKEN') else 'No'}")
174
+
175
+ agent = BasicAgent()
176
+
177
+ for question in QA_PAIRS.keys():
178
+ print(f"\n--- Testing Question ---")
179
+ print(f"Q: {question}")
180
+ answer = agent(question) # Call the agent instance
181
+ print(f"Agent A: {answer}")
182
+
183
+
184
+ if __name__ == "__main__":
185
+
186
+ load_dotenv()
187
+ if not os.getenv('HF_TOKEN'):
188
+ print("Warning: HF_TOKEN environment variable not found. Agent might fail.")
189
+
190
+ # run_test_questions()
191
+
192
+ #
193
+ eval_GAIA()