Datawithsarah commited on
Commit
fc7015b
·
1 Parent(s): fc54712

Update app.py and requirements.txt for GAIA Agent

Browse files
Files changed (2) hide show
  1. app.py +256 -95
  2. requirements.txt +10 -1
app.py CHANGED
@@ -1,77 +1,186 @@
1
  import os
2
  import gradio as gr
3
- import requests
4
- import inspect
5
  import pandas as pd
 
 
 
 
 
 
 
 
 
 
6
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
  # --- Basic Agent Definition ---
12
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
- class KeywordAgent:
14
- def __init__(self):
15
- print("KeywordAgent initialized.")
16
-
17
- def __call__(self, question: str) -> str:
18
- q = question.lower().strip()
19
-
20
- # Reversed string question
21
- if q.startswith(".rewsna"):
22
- return q[::-1].strip().lower()
23
-
24
- # Mercedes Sosa album trivia
25
- elif "mercedes sosa" in q and "studio albums" in q:
26
- return "40" # numeric, leave as is
27
-
28
- # Wikipedia Featured Article
29
- elif "featured article" in q and "english wikipedia" in q:
30
- return "brianboulton"
31
-
32
- # Equine veterinarian
33
- elif "equine" in q and "veterinarian" in q:
34
- return "ross"
35
-
36
- # Grocery list (botanical veg only)
37
- elif "grocery list" in q and "vegetables" in q:
38
- vegetables = [
39
- "acorns", "basil", "bell pepper", "broccoli", "celery", "green beans",
40
- "lettuce", "peanuts", "sweet potatoes", "whole allspice", "zucchini"
41
  ]
42
- return ", ".join(sorted(vegetables)).strip().lower()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
- # Audio file / mp3 fallback
45
- elif ".mp3" in q or "voice memo" in q or "recording" in q:
46
- return "i don't know"
 
 
 
 
 
 
 
47
 
48
- # YouTube / video-based questions
49
- elif "youtube" in q or "video" in q:
50
- return "i don't know"
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
- # Chess move or image-based logic
53
- elif "chess" in q or "position" in q or "image" in q:
54
- return "i don't know"
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
- # Table operation for commutativity
57
- elif "set s" in q and "*" in q:
58
- return "b, c"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
- # Fallback
61
- else:
62
- return "i don't know"
63
-
64
- # --- TEMPORARY LIVE TEST BLOCK FOR KEYWORDAGENT ---
65
- def test_agent_response(question_text):
66
- agent = KeywordAgent()
67
- return agent(question_text)
68
-
69
- test_interface = gr.Interface(
70
- fn=test_agent_response,
71
- inputs=gr.Textbox(label="Enter a Question to Test", placeholder="e.g., What is 2 + 2?"),
72
- outputs=gr.Textbox(label="Agent's Answer"),
73
- title="🔍 Agent Logic Tester",
74
- description="Use this to quickly test how the KeywordAgent responds to custom questions."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  )
76
 
77
  def run_and_submit_all( profile: gr.OAuthProfile | None):
@@ -79,7 +188,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
79
  Fetches all questions, runs the BasicAgent on them, submits all answers,
80
  and displays the results.
81
  """
82
- # --- Determine HF Space Runtime URL and Repo URL ---
83
  space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
84
 
85
  if profile:
@@ -93,13 +202,18 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
93
  questions_url = f"{api_url}/questions"
94
  submit_url = f"{api_url}/submit"
95
 
96
- # 1. Instantiate Agent ( modify this part to create your agent)
97
  try:
98
- agent = KeywordAgent()
 
 
 
 
 
99
  except Exception as e:
100
  print(f"Error instantiating agent: {e}")
101
  return f"Error initializing agent: {e}", None
102
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
103
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
104
  print(agent_code)
105
 
@@ -131,11 +245,41 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
131
  for item in questions_data:
132
  task_id = item.get("task_id")
133
  question_text = item.get("question")
 
134
  if not task_id or question_text is None:
135
  print(f"Skipping item with missing task_id or question: {item}")
136
  continue
137
  try:
138
- submitted_answer = agent(question_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
140
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
141
  except Exception as e:
@@ -197,36 +341,53 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
197
 
198
  # --- Build Gradio Interface using Blocks ---
199
  with gr.Blocks() as demo:
200
- gr.Markdown("# 🤖 GAIA Final Assignment: Agent Runner")
201
-
202
- with gr.Tab("🔍 Test Your Agent"):
203
- gr.Markdown("Use this to test how your agent responds to custom questions before running full evaluation.")
204
- test_input = gr.Textbox(label="Enter a Question", placeholder="e.g., How many studio albums...")
205
- test_output = gr.Textbox(label="Agent's Answer", interactive=False)
206
- test_button = gr.Button("Test Agent")
207
- test_button.click(fn=test_agent_response, inputs=test_input, outputs=test_output)
208
-
209
- with gr.Tab("📤 Run Evaluation & Submit"):
210
- gr.Markdown(
211
- """
212
- **Instructions:**
213
-
214
- 1. Modify your agent logic.
215
- 2. Log in to Hugging Face below.
216
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, and see your score.
217
-
218
- ---
219
- """
220
- )
221
- gr.LoginButton()
222
- run_button = gr.Button("Run Evaluation & Submit All Answers")
223
- status_output = gr.Textbox(label="Submission Result", lines=5, interactive=False)
224
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
225
-
226
- run_button.click(
227
- fn=run_and_submit_all,
228
- outputs=[status_output, results_table]
229
- )
230
 
231
  if __name__ == "__main__":
232
- demo.launch(debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import gradio as gr
 
 
3
  import pandas as pd
4
+ import requests
5
+ import subprocess
6
+ import json
7
+ import csv
8
+ import openpyxl
9
+ import whisper
10
+ from typing import Optional
11
+ from bs4 import BeautifulSoup
12
+ from duckduckgo_search import DDGS
13
+ from smolagents import CodeAgent, BaseModel, tool
14
 
15
  # (Keep Constants as is)
16
  # --- Constants ---
17
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
18
 
19
  # --- Basic Agent Definition ---
20
+ # ----- THIS IS WHERE YOU CAN BUILD WHAT YOU WANT ------
21
+ class ClaudeServerModel(BaseModel):
22
+ def __init__(self, api_key: str, model_id: str = "claude-3-opus-20240229", temperature: float = 0.0):
23
+ self.api_key = api_key
24
+ self.model_id = model_id
25
+ self.temperature = temperature
26
+
27
+ def complete(self, prompt: str) -> str:
28
+ headers = {
29
+ "x-api-key": self.api_key,
30
+ "anthropic-version": "2023-06-01",
31
+ "content-type": "application/json"
32
+ }
33
+ body = {
34
+ "model": self.model_id,
35
+ "max_tokens": 1024,
36
+ "temperature": self.temperature,
37
+ "messages": [
38
+ {"role": "user", "content": prompt}
 
 
 
 
 
 
 
 
 
 
39
  ]
40
+ }
41
+ response = requests.post("https://api.anthropic.com/v1/messages", headers=headers, json=body)
42
+ response.raise_for_status()
43
+ return response.json()["content"][0]["text"].strip()
44
+
45
+ # --- Constants ---
46
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
47
+
48
+ def download_file(file_name: str) -> None:
49
+ if not os.path.exists(file_name):
50
+ url = f"{DEFAULT_API_URL}/files/{file_name.split('.')[0]}"
51
+ r = requests.get(url)
52
+ with open(file_name, "wb") as f:
53
+ f.write(r.content)
54
+
55
+ @tool
56
+ def open_file_as_text(file_name: str, filetype: Optional[str] = "txt") -> str:
57
+ download_file(file_name)
58
+ try:
59
+ if filetype == "txt":
60
+ with open(file_name, "r", encoding="utf-8") as f:
61
+ return f.read()
62
+ elif filetype == "json":
63
+ with open(file_name, "r", encoding="utf-8") as f:
64
+ data = json.load(f)
65
+ return json.dumps(data, indent=2)
66
+ elif filetype == "csv":
67
+ with open(file_name, "r", encoding="utf-8") as f:
68
+ reader = csv.reader(f)
69
+ rows = list(reader)
70
+ return "\n".join([", ".join(row) for row in rows])
71
+ elif filetype == "xlsx":
72
+ wb = openpyxl.load_workbook(file_name, data_only=True)
73
+ sheet = wb.active
74
+ content = []
75
+ for row in sheet.iter_rows(values_only=True):
76
+ content.append(", ".join(str(cell) if cell is not None else "" for cell in row))
77
+ return "\n".join(content)
78
+ elif filetype == "mp3":
79
+ w = whisper.load_model("base")
80
+ res = w.transcribe(file_name)
81
+ return res["text"]
82
+ else:
83
+ return f"Unsupported filetype '{filetype}'."
84
+ except Exception as e:
85
+ return f"Error opening file '{file_name}': {str(e)}"
86
 
87
+ @tool
88
+ def web_search(query: str) -> str:
89
+ try:
90
+ with DDGS() as ddgs:
91
+ results = ddgs.text(query, max_results=3)
92
+ if not results:
93
+ return "No results found."
94
+ return "\n\n".join([f"Title: {r['title']}\nSnippet: {r['body']}\nURL: {r['href']}" for r in results])
95
+ except Exception as e:
96
+ return f"Error during search: {str(e)}"
97
 
98
+ def parse_wikipedia_table(table) -> str:
99
+ rows = []
100
+ headers = []
101
+ thead = table.find('thead')
102
+ if thead:
103
+ for th in thead.find_all('th'):
104
+ headers.append(th.get_text(separator=" ", strip=True))
105
+ if headers:
106
+ rows.append(" | ".join(headers))
107
+ tbody = table.find('tbody') or table
108
+ for tr in tbody.find_all('tr'):
109
+ cells = tr.find_all(['th', 'td'])
110
+ cell_texts = [cell.get_text(separator=" ", strip=True) for cell in cells if cell]
111
+ if cell_texts:
112
+ rows.append(" | ".join(cell_texts))
113
+ return "\n".join(rows)
114
 
115
+ @tool
116
+ def read_wikipedia_page(url: str) -> str:
117
+ headers = {"User-Agent": "Mozilla/5.0"}
118
+ resp = requests.get(url, headers=headers, timeout=10)
119
+ resp.raise_for_status()
120
+ soup = BeautifulSoup(resp.text, "html.parser")
121
+ content_div = soup.find('div', id='mw-content-text')
122
+ parts = []
123
+ for elem in content_div.find_all(['h2', 'h3', 'p', 'ul', 'ol', 'table']):
124
+ if elem.name in ['h2', 'h3']:
125
+ parts.append("\n\n" + elem.get_text(strip=True) + "\n")
126
+ elif elem.name in ['p', 'ul', 'ol']:
127
+ parts.append(elem.get_text(strip=True))
128
+ elif elem.name == 'table':
129
+ parts.append(parse_wikipedia_table(elem))
130
+ return "\n".join(parts)
131
 
132
+ @tool
133
+ def smart_paginate_around_query(full_text: str, query: str) -> list:
134
+ before_chars = 1000
135
+ after_chars = 3000
136
+ q = query.lower()
137
+ text_lower = full_text.lower()
138
+ pages = []
139
+ start = 0
140
+ while True:
141
+ idx = text_lower.find(q, start)
142
+ if idx == -1:
143
+ break
144
+ s = max(0, idx - before_chars)
145
+ e = min(len(full_text), idx + len(q) + after_chars)
146
+ pages.append(full_text[s:e])
147
+ start = e
148
+ return pages
149
 
150
+ @tool
151
+ def reverse_sentence(text: str) -> str:
152
+ return text[::-1]
153
+
154
+ @tool
155
+ def run_python_code(file_name: str) -> str:
156
+ download_file(file_name)
157
+ try:
158
+ result = subprocess.run(["python", file_name], capture_output=True, text=True, timeout=10)
159
+ if result.returncode != 0:
160
+ return f"Error: {result.stderr.strip()}"
161
+ return result.stdout.strip()
162
+ except Exception as e:
163
+ return f"Execution failed: {e}"
164
+
165
+ # Agent Setup
166
+ tools = [
167
+ open_file_as_text,
168
+ web_search,
169
+ read_wikipedia_page,
170
+ smart_paginate_around_query,
171
+ reverse_sentence,
172
+ run_python_code
173
+ ]
174
+
175
+ model = ClaudeServerModel(
176
+ api_key=os.getenv("CLAUDE_API_KEY"),
177
+ model_id="claude-3-opus-20240229"
178
+ )
179
+
180
+ agent = CodeAgent(
181
+ model=model,
182
+ tools=tools,
183
+ additional_authorized_imports=["pandas", "numpy", "datetime", "json", "re", "math", "os", "requests", "csv", "urllib"]
184
  )
185
 
186
  def run_and_submit_all( profile: gr.OAuthProfile | None):
 
188
  Fetches all questions, runs the BasicAgent on them, submits all answers,
189
  and displays the results.
190
  """
191
+ # Determine HF Space Runtime URL and Repo URL
192
  space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
193
 
194
  if profile:
 
202
  questions_url = f"{api_url}/questions"
203
  submit_url = f"{api_url}/submit"
204
 
205
+ # Instantiate Agent ( modify this part to create your agent)
206
  try:
207
+ agent = CodeAgent(
208
+ model=model,
209
+ tools=tools,
210
+ additional_authorized_imports=["pandas", "numpy", "datetime", "json", "re", "math", "os", "requests", "csv",
211
+ "urllib"]
212
+ )
213
  except Exception as e:
214
  print(f"Error instantiating agent: {e}")
215
  return f"Error initializing agent: {e}", None
216
+ # In the case of an app running as a hugging Face space, this link points toward your codebase (useful for others so please keep it public)
217
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
218
  print(agent_code)
219
 
 
245
  for item in questions_data:
246
  task_id = item.get("task_id")
247
  question_text = item.get("question")
248
+ file_name = item.get("file_name")
249
  if not task_id or question_text is None:
250
  print(f"Skipping item with missing task_id or question: {item}")
251
  continue
252
  try:
253
+ full_prompt = f"""You are a highly precise answering agent designed to meet the GAIA benchmark's exact-match standards.
254
+
255
+ When presented with a question:
256
+ - Use tools appropriately and deliberately. Do not make assumptions or guess answers.
257
+ - Use `web_search` to find external sources only if necessary. If the results include short snippets, you MUST follow the link and read the full content using `read_wikipedia_page`.
258
+ - You have access to `read_wikipedia_page` ONLY — no other external browsing is allowed.
259
+ - When reading long text, ALWAYS use `smart_paginate_around_query` to extract focused context. Use 1-3 general keywords (not full questions) as the query.
260
+ - If the task involves reversing words, letters, or phrases, use the `reverse_sentence` tool. Never reverse text manually.
261
+ - For any file-based task (e.g., .mp3, .csv, .json, .xlsx), use the `file_name` provided in the metadata — not a name mentioned in the question text.
262
+ - Format lists with a single space after each comma.
263
+ - If asked for a number, return digits only — no commas, currency signs, or symbols (e.g., %, $, etc.).
264
+ - If asked for a string, do not include articles (e.g., "the", "a") or abbreviations unless required. Spell out numbers in digit form unless stated otherwise.
265
+ - If asked for a comma-separated list, apply the correct formatting per element type (string or number).
266
+ Once you have the exact answer:
267
+ - Immediately call `final_answer("your_answer")` and stop execution.
268
+ - Never retry, rerun, or generate multiple answers.
269
+ - Do not include reasoning, steps, thoughts, or commentary — just the final value.
270
+ Example:
271
+ If asked: "What is the capital of France?"
272
+ Your answer logic should follow:
273
+ ```py
274
+ print("Paris")
275
+ ```<end_code>
276
+ Based on the above guidelines, answer the following question:
277
+ --begin of question--
278
+ {question_text}
279
+ --end of question--
280
+ If the questions mentions the need to use a file, use the following `file_name` value as the `file_name` parameter in any function calls:
281
+ file_name: {file_name}"""
282
+ submitted_answer = agent.run(full_prompt)
283
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
284
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
285
  except Exception as e:
 
341
 
342
  # --- Build Gradio Interface using Blocks ---
343
  with gr.Blocks() as demo:
344
+ gr.Markdown("# Basic Agent Evaluation Runner")
345
+ gr.Markdown(
346
+ """
347
+ **Instructions:**
348
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
349
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
350
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
351
+ ---
352
+ **Disclaimers:**
353
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
354
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
355
+ """
356
+ )
357
+
358
+ gr.LoginButton()
359
+
360
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
361
+
362
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
363
+ # Removed max_rows=10 from DataFrame constructor
364
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
365
+
366
+ run_button.click(
367
+ fn=run_and_submit_all,
368
+ outputs=[status_output, results_table]
369
+ )
 
 
 
 
370
 
371
  if __name__ == "__main__":
372
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
373
+ # Check for SPACE_HOST and SPACE_ID at startup for information
374
+ space_host_startup = os.getenv("SPACE_HOST")
375
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
376
+
377
+ if space_host_startup:
378
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
379
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
380
+ else:
381
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
382
+
383
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
384
+ print(f"✅ SPACE_ID found: {space_id_startup}")
385
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
386
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
387
+ else:
388
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
389
+
390
+ print("-"*(60 + len(" App Starting ")) + "\n")
391
+
392
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
393
+ demo.launch(debug=True, share=False)
requirements.txt CHANGED
@@ -1,2 +1,11 @@
1
  gradio
2
- requests
 
 
 
 
 
 
 
 
 
 
1
  gradio
2
+ smolagents
3
+ pandas
4
+ requests
5
+ beautifulsoup4
6
+ duckduckgo-search
7
+ openpyxl
8
+ whisper
9
+ torch
10
+ ffmpeg-python
11
+ python-dotenv