strickvl commited on
Commit
bc7ee1f
·
unverified ·
1 Parent(s): 4e257c7

Enhance error handling and add retry mechanism for API calls

Browse files

- Implemented RetryingHfApiModel with exponential backoff for rate-limited API requests
- Updated Gradio UI stream_to_gradio function with improved error handling and token tracking
- Added logging support for better error tracking and debugging

Files changed (2) hide show
  1. Gradio_UI.py +21 -49
  2. app.py +32 -2
Gradio_UI.py CHANGED
@@ -123,56 +123,28 @@ def pull_messages_from_step(
123
  yield gr.ChatMessage(role="assistant", content="-----")
124
 
125
 
126
- def stream_to_gradio(
127
- agent,
128
- task: str,
129
- reset_agent_memory: bool = False,
130
- additional_args: Optional[dict] = None,
131
- ):
132
- """Runs an agent with the given task and streams the messages from the agent as gradio ChatMessages."""
133
- if not _is_package_available("gradio"):
134
- raise ModuleNotFoundError(
135
- "Please install 'gradio' extra to use the GradioUI: `pip install 'smolagents[gradio]'`"
136
- )
137
- import gradio as gr
138
-
139
  total_input_tokens = 0
140
  total_output_tokens = 0
141
-
142
- for step_log in agent.run(task, stream=True, reset=reset_agent_memory, additional_args=additional_args):
143
- # Track tokens if model provides them
144
- if hasattr(agent.model, "last_input_token_count"):
145
- total_input_tokens += agent.model.last_input_token_count
146
- total_output_tokens += agent.model.last_output_token_count
147
- if isinstance(step_log, ActionStep):
148
- step_log.input_token_count = agent.model.last_input_token_count
149
- step_log.output_token_count = agent.model.last_output_token_count
150
-
151
- for message in pull_messages_from_step(
152
- step_log,
153
- ):
154
- yield message
155
-
156
- final_answer = step_log # Last log is the run's final_answer
157
- final_answer = handle_agent_output_types(final_answer)
158
-
159
- if isinstance(final_answer, AgentText):
160
- yield gr.ChatMessage(
161
- role="assistant",
162
- content=f"**Final answer:**\n{final_answer.to_string()}\n",
163
- )
164
- elif isinstance(final_answer, AgentImage):
165
- yield gr.ChatMessage(
166
- role="assistant",
167
- content={"path": final_answer.to_string(), "mime_type": "image/png"},
168
- )
169
- elif isinstance(final_answer, AgentAudio):
170
- yield gr.ChatMessage(
171
- role="assistant",
172
- content={"path": final_answer.to_string(), "mime_type": "audio/wav"},
173
- )
174
- else:
175
- yield gr.ChatMessage(role="assistant", content=f"**Final answer:** {str(final_answer)}")
176
 
177
 
178
  class GradioUI:
@@ -293,4 +265,4 @@ class GradioUI:
293
  demo.launch(debug=True, share=True, **kwargs)
294
 
295
 
296
- __all__ = ["stream_to_gradio", "GradioUI"]
 
123
  yield gr.ChatMessage(role="assistant", content="-----")
124
 
125
 
126
+ def stream_to_gradio(agent, task: str, reset_agent_memory: bool = True):
127
+ """Stream agent responses to Gradio interface with better error handling"""
 
 
 
 
 
 
 
 
 
 
 
128
  total_input_tokens = 0
129
  total_output_tokens = 0
130
+
131
+ try:
132
+ for msg in agent.chat(task, reset_memory=reset_agent_memory):
133
+ # Safely handle token counting
134
+ if hasattr(agent.model, 'last_input_token_count'):
135
+ input_tokens = agent.model.last_input_token_count or 0
136
+ total_input_tokens += input_tokens
137
+
138
+ if hasattr(agent.model, 'last_output_token_count'):
139
+ output_tokens = agent.model.last_output_token_count or 0
140
+ total_output_tokens += output_tokens
141
+
142
+ yield msg
143
+
144
+ except Exception as e:
145
+ error_msg = f"Error during chat: {str(e)}"
146
+ logger.error(error_msg)
147
+ yield f"⚠️ {error_msg}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
 
150
  class GradioUI:
 
265
  demo.launch(debug=True, share=True, **kwargs)
266
 
267
 
268
+ __all__ = ["stream_to_gradio", "GradioUI"]
app.py CHANGED
@@ -3,11 +3,16 @@ import datetime
3
  import requests
4
  import pytz
5
  import yaml
 
 
6
  from tools.final_answer import FinalAnswerTool
7
  from tools.text_analysis import analyze_text
8
 
9
  from Gradio_UI import GradioUI
10
 
 
 
 
11
 
12
  # Below is an example of a tool that does nothing. Amaze us with your creativity !
13
  @tool
@@ -40,14 +45,39 @@ def get_current_time_in_timezone(timezone: str) -> str:
40
 
41
 
42
  final_answer = FinalAnswerTool()
43
- model = HfApiModel(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  max_tokens=2096,
45
  temperature=0.5,
46
  model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
47
  custom_role_conversions=None,
 
 
48
  )
49
 
50
-
51
  # Import tool from Hub
52
  image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
53
 
 
3
  import requests
4
  import pytz
5
  import yaml
6
+ import time
7
+ import logging
8
  from tools.final_answer import FinalAnswerTool
9
  from tools.text_analysis import analyze_text
10
 
11
  from Gradio_UI import GradioUI
12
 
13
+ # Set up logging
14
+ logger = logging.getLogger(__name__)
15
+
16
 
17
  # Below is an example of a tool that does nothing. Amaze us with your creativity !
18
  @tool
 
45
 
46
 
47
  final_answer = FinalAnswerTool()
48
+
49
+
50
+ class RetryingHfApiModel(HfApiModel):
51
+ """HfApiModel with retry logic for rate limiting"""
52
+
53
+ def __init__(self, max_retries=3, retry_delay=2, **kwargs):
54
+ super().__init__(**kwargs)
55
+ self.max_retries = max_retries
56
+ self.retry_delay = retry_delay
57
+
58
+ async def async_chat(self, *args, **kwargs):
59
+ for attempt in range(self.max_retries):
60
+ try:
61
+ return await super().async_chat(*args, **kwargs)
62
+ except Exception as e:
63
+ if "429" in str(e) and attempt < self.max_retries - 1:
64
+ wait_time = self.retry_delay * (attempt + 1) # Exponential backoff
65
+ logger.warning(f"Rate limited. Retrying in {wait_time} seconds...")
66
+ time.sleep(wait_time)
67
+ continue
68
+ raise # Re-raise the exception if it's not a 429 or we're out of retries
69
+
70
+
71
+ # Replace the model initialization with our retrying version
72
+ model = RetryingHfApiModel(
73
  max_tokens=2096,
74
  temperature=0.5,
75
  model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
76
  custom_role_conversions=None,
77
+ max_retries=3, # Will try up to 3 times
78
+ retry_delay=2, # Start with 2 second delay, will increase with each retry
79
  )
80
 
 
81
  # Import tool from Hub
82
  image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
83