SergeyO7 commited on
Commit
36d03df
·
verified ·
1 Parent(s): 0ec45cf

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +107 -26
agent.py CHANGED
@@ -4,13 +4,20 @@ import os
4
  import re
5
  import pandas as pd
6
  from typing import Optional
7
- from token_bucket import Limiter
8
  import yaml
9
  from PIL import Image
10
  import requests
11
  from io import BytesIO
12
  from markdownify import markdownify
13
  import whisper
 
 
 
 
 
 
 
14
 
15
  # Simulated additional tools (implementation depends on external APIs or setup)
16
  #@tool
@@ -23,7 +30,6 @@ import whisper
23
  # """
24
  # cse_id = os.environ.get("GOOGLE_CSE_ID")
25
  # if not api_key or not cse_id:
26
-
27
  # raise ValueError("GOOGLE_API_KEY and GOOGLE_CSE_ID must be set in environment variables.")
28
  # url = "https://www.googleapis.com/customsearch/v1"
29
  # params = {
@@ -50,14 +56,13 @@ import whisper
50
  # """
51
  # # Extract URL from question using regex
52
  # url_pattern = r'https?://\S+'
53
-
54
  # match = re.search(url_pattern, question)
55
  # if not match:
56
  # return "No image URL found in the question."
57
  # image_url = match.group(0)
58
  #
59
  # headers = {
60
-
61
  # "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
62
  # }
63
  # try:
@@ -71,7 +76,6 @@ import whisper
71
  # tools=[],
72
  # model=model,
73
  # max_steps=10,
74
-
75
  # verbosity_level=2
76
  # )
77
  #
@@ -82,6 +86,64 @@ import whisper
82
  #
83
  # return f"The image description: '{response}'"
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  class VisitWebpageTool(Tool):
86
  name = "visit_webpage"
87
  description = "Visits a webpage at the given url and reads its content as a markdown string. Use this to browse webpages."
@@ -124,11 +186,11 @@ class VisitWebpageTool(Tool):
124
 
125
  class DownloadTaskAttachmentTool(Tool):
126
  name = "download_file"
127
- description = "Downloads the file attached to the task ID and returns the local file path. Supports Excel (.xlsx), image (.png, .jpg), and audio (.mp3) files."
128
  inputs = {'task_id': {'type': 'string', 'description': 'The task id to download attachment from.'}}
129
  output_type = "string"
130
 
131
- def __init__(self, rate_limiter: Optional[Limiter] = None, default_api_url: str = "https://agents-course-unit4-scoring.hf.space", *args, **kwargs):
132
  self.is_initialized = False
133
  self.rate_limiter = rate_limiter
134
  self.default_api_url = default_api_url
@@ -154,8 +216,12 @@ class DownloadTaskAttachmentTool(Tool):
154
  extension = '.xlsx'
155
  elif 'audio/mpeg' in content_type:
156
  extension = '.mp3'
 
 
 
 
157
  else:
158
- return f"Error: Unsupported file type {content_type} for task {task_id}"
159
 
160
  local_file_path = f"downloads/{task_id}{extension}"
161
  os.makedirs("downloads", exist_ok=True)
@@ -164,6 +230,10 @@ class DownloadTaskAttachmentTool(Tool):
164
  file.write(chunk)
165
  print(f"File downloaded successfully: {local_file_path}")
166
  return local_file_path
 
 
 
 
167
  except requests.exceptions.RequestException as e:
168
  return f"Error downloading file for task {task_id}: {str(e)}"
169
 
@@ -242,20 +312,24 @@ class ExcelReaderTool(Tool):
242
  except Exception as e:
243
  return f"Error reading Excel file: {str(e)}"
244
 
245
- #@tool
246
- #class LocalFileAudioTool:
247
- # """Tool for transcribing audio files"""
248
- #
249
- # @tool
250
- # def transcribe(self, file_path: str) -> str:
251
- # """Transcribe audio from file
252
- # Args:
253
- # file_path (str): Path to audio file
254
- # Returns:
255
- # str: Transcription text
256
- # """
257
- # return f"Transcribed audio from '{file_path}' (simulated)."
258
 
 
 
 
 
 
 
 
 
 
 
259
  class MagAgent:
260
  def __init__(self, rate_limiter: Optional[Limiter] = None):
261
  """Initialize the MagAgent with search tools."""
@@ -270,17 +344,24 @@ class MagAgent:
270
  # Load prompt templates
271
  with open("prompts.yaml", 'r') as stream:
272
  prompt_templates = yaml.safe_load(stream)
 
 
 
273
 
274
  self.agent = CodeAgent(
275
  model= model,
276
  tools=[
277
- # GoogleSearchTool,
278
- DownloadTaskAttachmentTool(),
279
- DuckDuckGoSearchTool(),
280
  WikipediaSearchTool(),
281
- # ImageAnalysisTool,
282
  SpeechToTextTool,
283
- ExcelReaderTool()
 
 
 
 
 
 
284
  # LocalFileAudioTool()
285
  ],
286
  verbosity_level=3,
 
4
  import re
5
  import pandas as pd
6
  from typing import Optional
7
+ from token_bucket import Limiter, MemoryStorage
8
  import yaml
9
  from PIL import Image
10
  import requests
11
  from io import BytesIO
12
  from markdownify import markdownify
13
  import whisper
14
+ import time
15
+ from youtube_transcript_api import YouTubeTranscriptApi
16
+ from SPARQLWrapper import SPARQLWrapper, JSON
17
+ import chess
18
+ import chess.engine
19
+ import shutil
20
+ import traceback
21
 
22
  # Simulated additional tools (implementation depends on external APIs or setup)
23
  #@tool
 
30
  # """
31
  # cse_id = os.environ.get("GOOGLE_CSE_ID")
32
  # if not api_key or not cse_id:
 
33
  # raise ValueError("GOOGLE_API_KEY and GOOGLE_CSE_ID must be set in environment variables.")
34
  # url = "https://www.googleapis.com/customsearch/v1"
35
  # params = {
 
56
  # """
57
  # # Extract URL from question using regex
58
  # url_pattern = r'https?://\S+'
59
+ #
60
  # match = re.search(url_pattern, question)
61
  # if not match:
62
  # return "No image URL found in the question."
63
  # image_url = match.group(0)
64
  #
65
  # headers = {
 
66
  # "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
67
  # }
68
  # try:
 
76
  # tools=[],
77
  # model=model,
78
  # max_steps=10,
 
79
  # verbosity_level=2
80
  # )
81
  #
 
86
  #
87
  # return f"The image description: '{response}'"
88
 
89
+ class ChessEngineTool(Tool):
90
+ name = "chess_engine"
91
+ description = "Analyzes a chess position (FEN) with Stockfish and returns the best move."
92
+ inputs = {
93
+ "fen": {"type": "string", "description": "FEN string of the position."},
94
+ "time_limit": {"type": "number", "description": "Time in seconds for engine analysis.", "nullable": True}
95
+ }
96
+ output_type = "string"
97
+
98
+ def forward(self, fen: str, time_limit: float = 0.1) -> str:
99
+ # figure out where the binary actually is
100
+ sf_bin = shutil.which("stockfish") or "/usr/games/stockfish"
101
+ if not sf_bin:
102
+ raise RuntimeError(
103
+ f"Cannot find stockfish on PATH or at /usr/games/stockfish. "
104
+ "Did you install it in apt.txt or via apt-get?"
105
+ )
106
+
107
+ board = chess.Board(fen)
108
+ engine = chess.engine.SimpleEngine.popen_uci(sf_bin)
109
+ result = engine.play(board, chess.engine.Limit(time=time_limit))
110
+ engine.quit()
111
+ return board.san(result.move)
112
+
113
+ class DuckDuckGoSearchTool(Tool):
114
+ name = "web_search"
115
+ description = "Searches the web using DuckDuckGo and returns results as a string."
116
+ inputs = {'query': {'type': 'string', 'description': 'The search query.'}}
117
+ output_type = "string"
118
+
119
+ def __init__(self, rate_limiter: Optional[Limiter] = None, *args, **kwargs):
120
+ self.is_initialized = False
121
+ self.rate_limiter = rate_limiter
122
+
123
+ def forward(self, query: str) -> str:
124
+ max_retries = 3
125
+ retry_delay = 2 # Seconds
126
+ for attempt in range(max_retries):
127
+ try:
128
+ if self.rate_limiter:
129
+ while not self.rate_limiter.consume(1):
130
+ print(f"Rate limit reached for web search. Waiting...")
131
+ time.sleep(retry_delay)
132
+ # Simplified DuckDuckGo search logic (replace with actual implementation)
133
+ response = requests.get(f"https://lite.duckduckgo.com/lite/?q={query}", timeout=10)
134
+ response.raise_for_status()
135
+ return response.text # Process results as needed
136
+ except requests.exceptions.HTTPError as e:
137
+ if e.response.status_code == 202: # Rate limit
138
+ print(f"Rate limit hit for web search (attempt {attempt+1}/{max_retries}). Retrying in {retry_delay}s...")
139
+ time.sleep(retry_delay)
140
+ retry_delay *= 2 # Exponential backoff
141
+ continue
142
+ return f"Error searching web: {str(e)}"
143
+ except Exception as e:
144
+ return f"Error searching web: {str(e)}"
145
+ return "Error: Web search failed due to rate limiting."
146
+
147
  class VisitWebpageTool(Tool):
148
  name = "visit_webpage"
149
  description = "Visits a webpage at the given url and reads its content as a markdown string. Use this to browse webpages."
 
186
 
187
  class DownloadTaskAttachmentTool(Tool):
188
  name = "download_file"
189
+ description = "Downloads the file attached to the task ID and returns the local file path. Supports Excel (.xlsx), image (.png, .jpg), audio (.mp3), PDF (.pdf), and Python (.py) files."
190
  inputs = {'task_id': {'type': 'string', 'description': 'The task id to download attachment from.'}}
191
  output_type = "string"
192
 
193
+ def __init__(self, rate_limiter: Optional[Limiter] = None, default_api_url: str = DEFAULT_API_URL, *args, **kwargs):
194
  self.is_initialized = False
195
  self.rate_limiter = rate_limiter
196
  self.default_api_url = default_api_url
 
216
  extension = '.xlsx'
217
  elif 'audio/mpeg' in content_type:
218
  extension = '.mp3'
219
+ elif 'application/pdf' in content_type:
220
+ extension = '.pdf'
221
+ elif 'text/x-python' in content_type:
222
+ extension = '.py'
223
  else:
224
+ return f"Error: Unsupported file type {content_type} for task {task_id}. Try using visit_webpage or web_search if the content is online."
225
 
226
  local_file_path = f"downloads/{task_id}{extension}"
227
  os.makedirs("downloads", exist_ok=True)
 
230
  file.write(chunk)
231
  print(f"File downloaded successfully: {local_file_path}")
232
  return local_file_path
233
+ except requests.exceptions.HTTPError as e:
234
+ if e.response.status_code == 429:
235
+ return f"Error: Rate limit exceeded for task {task_id}. Try again later."
236
+ return f"Error downloading file for task {task_id}: {str(e)}"
237
  except requests.exceptions.RequestException as e:
238
  return f"Error downloading file for task {task_id}: {str(e)}"
239
 
 
312
  except Exception as e:
313
  return f"Error reading Excel file: {str(e)}"
314
 
315
+ class PythonCodeReaderTool(Tool):
316
+ name = "read_python_code"
317
+ description = "Reads a Python (.py) file and returns its content as a string."
318
+ inputs = {
319
+ "file_path": {"type": "string", "description": "The path to the Python file to read"}
320
+ }
321
+ output_type = "string"
 
 
 
 
 
 
322
 
323
+ def forward(self, file_path: str) -> str:
324
+ try:
325
+ if not os.path.exists(file_path):
326
+ return f"Error: Python file not found at {file_path}"
327
+ with open(file_path, "r", encoding="utf-8") as file:
328
+ content = file.read()
329
+ return content
330
+ except Exception as e:
331
+ return f"Error reading Python file: {str(e)}"
332
+
333
  class MagAgent:
334
  def __init__(self, rate_limiter: Optional[Limiter] = None):
335
  """Initialize the MagAgent with search tools."""
 
344
  # Load prompt templates
345
  with open("prompts.yaml", 'r') as stream:
346
  prompt_templates = yaml.safe_load(stream)
347
+
348
+ # Initialize rate limiter for DuckDuckGoSearchTool
349
+ search_rate_limiter = Limiter(rate=30/60, capacity=30, storage=MemoryStorage()) if not rate_limiter else rate_limiter
350
 
351
  self.agent = CodeAgent(
352
  model= model,
353
  tools=[
354
+ DownloadTaskAttachmentTool(rate_limiter=rate_limiter),
355
+ DuckDuckGoSearchTool(rate_limiter=search_rate_limiter),
 
356
  WikipediaSearchTool(),
 
357
  SpeechToTextTool,
358
+ ExcelReaderTool(),
359
+ VisitWebpageTool(),
360
+ PythonCodeReaderTool()
361
+ # Uncomment to add ChessEngineTool (requires python-chess and Stockfish)
362
+ ChessEngineTool()
363
+ # GoogleSearchTool,
364
+ # ImageAnalysisTool,
365
  # LocalFileAudioTool()
366
  ],
367
  verbosity_level=3,