Spaces:
Sleeping
Sleeping
Enhance error handling and add retry mechanism for API calls
Browse files- Implemented RetryingHfApiModel with exponential backoff for rate-limited API requests
- Updated Gradio UI stream_to_gradio function with improved error handling and token tracking
- Added logging support for better error tracking and debugging
- Gradio_UI.py +21 -49
- app.py +32 -2
Gradio_UI.py
CHANGED
@@ -123,56 +123,28 @@ def pull_messages_from_step(
|
|
123 |
yield gr.ChatMessage(role="assistant", content="-----")
|
124 |
|
125 |
|
126 |
-
def stream_to_gradio(
|
127 |
-
agent
|
128 |
-
task: str,
|
129 |
-
reset_agent_memory: bool = False,
|
130 |
-
additional_args: Optional[dict] = None,
|
131 |
-
):
|
132 |
-
"""Runs an agent with the given task and streams the messages from the agent as gradio ChatMessages."""
|
133 |
-
if not _is_package_available("gradio"):
|
134 |
-
raise ModuleNotFoundError(
|
135 |
-
"Please install 'gradio' extra to use the GradioUI: `pip install 'smolagents[gradio]'`"
|
136 |
-
)
|
137 |
-
import gradio as gr
|
138 |
-
|
139 |
total_input_tokens = 0
|
140 |
total_output_tokens = 0
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
if isinstance(final_answer, AgentText):
|
160 |
-
yield gr.ChatMessage(
|
161 |
-
role="assistant",
|
162 |
-
content=f"**Final answer:**\n{final_answer.to_string()}\n",
|
163 |
-
)
|
164 |
-
elif isinstance(final_answer, AgentImage):
|
165 |
-
yield gr.ChatMessage(
|
166 |
-
role="assistant",
|
167 |
-
content={"path": final_answer.to_string(), "mime_type": "image/png"},
|
168 |
-
)
|
169 |
-
elif isinstance(final_answer, AgentAudio):
|
170 |
-
yield gr.ChatMessage(
|
171 |
-
role="assistant",
|
172 |
-
content={"path": final_answer.to_string(), "mime_type": "audio/wav"},
|
173 |
-
)
|
174 |
-
else:
|
175 |
-
yield gr.ChatMessage(role="assistant", content=f"**Final answer:** {str(final_answer)}")
|
176 |
|
177 |
|
178 |
class GradioUI:
|
@@ -293,4 +265,4 @@ class GradioUI:
|
|
293 |
demo.launch(debug=True, share=True, **kwargs)
|
294 |
|
295 |
|
296 |
-
__all__ = ["stream_to_gradio", "GradioUI"]
|
|
|
123 |
yield gr.ChatMessage(role="assistant", content="-----")
|
124 |
|
125 |
|
126 |
+
def stream_to_gradio(agent, task: str, reset_agent_memory: bool = True):
|
127 |
+
"""Stream agent responses to Gradio interface with better error handling"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
total_input_tokens = 0
|
129 |
total_output_tokens = 0
|
130 |
+
|
131 |
+
try:
|
132 |
+
for msg in agent.chat(task, reset_memory=reset_agent_memory):
|
133 |
+
# Safely handle token counting
|
134 |
+
if hasattr(agent.model, 'last_input_token_count'):
|
135 |
+
input_tokens = agent.model.last_input_token_count or 0
|
136 |
+
total_input_tokens += input_tokens
|
137 |
+
|
138 |
+
if hasattr(agent.model, 'last_output_token_count'):
|
139 |
+
output_tokens = agent.model.last_output_token_count or 0
|
140 |
+
total_output_tokens += output_tokens
|
141 |
+
|
142 |
+
yield msg
|
143 |
+
|
144 |
+
except Exception as e:
|
145 |
+
error_msg = f"Error during chat: {str(e)}"
|
146 |
+
logger.error(error_msg)
|
147 |
+
yield f"⚠️ {error_msg}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
|
149 |
|
150 |
class GradioUI:
|
|
|
265 |
demo.launch(debug=True, share=True, **kwargs)
|
266 |
|
267 |
|
268 |
+
__all__ = ["stream_to_gradio", "GradioUI"]
|
app.py
CHANGED
@@ -3,11 +3,16 @@ import datetime
|
|
3 |
import requests
|
4 |
import pytz
|
5 |
import yaml
|
|
|
|
|
6 |
from tools.final_answer import FinalAnswerTool
|
7 |
from tools.text_analysis import analyze_text
|
8 |
|
9 |
from Gradio_UI import GradioUI
|
10 |
|
|
|
|
|
|
|
11 |
|
12 |
# Below is an example of a tool that does nothing. Amaze us with your creativity !
|
13 |
@tool
|
@@ -40,14 +45,39 @@ def get_current_time_in_timezone(timezone: str) -> str:
|
|
40 |
|
41 |
|
42 |
final_answer = FinalAnswerTool()
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
max_tokens=2096,
|
45 |
temperature=0.5,
|
46 |
model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
|
47 |
custom_role_conversions=None,
|
|
|
|
|
48 |
)
|
49 |
|
50 |
-
|
51 |
# Import tool from Hub
|
52 |
image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
|
53 |
|
|
|
3 |
import requests
|
4 |
import pytz
|
5 |
import yaml
|
6 |
+
import time
|
7 |
+
import logging
|
8 |
from tools.final_answer import FinalAnswerTool
|
9 |
from tools.text_analysis import analyze_text
|
10 |
|
11 |
from Gradio_UI import GradioUI
|
12 |
|
13 |
+
# Set up logging
|
14 |
+
logger = logging.getLogger(__name__)
|
15 |
+
|
16 |
|
17 |
# Below is an example of a tool that does nothing. Amaze us with your creativity !
|
18 |
@tool
|
|
|
45 |
|
46 |
|
47 |
final_answer = FinalAnswerTool()
|
48 |
+
|
49 |
+
|
50 |
+
class RetryingHfApiModel(HfApiModel):
|
51 |
+
"""HfApiModel with retry logic for rate limiting"""
|
52 |
+
|
53 |
+
def __init__(self, max_retries=3, retry_delay=2, **kwargs):
|
54 |
+
super().__init__(**kwargs)
|
55 |
+
self.max_retries = max_retries
|
56 |
+
self.retry_delay = retry_delay
|
57 |
+
|
58 |
+
async def async_chat(self, *args, **kwargs):
|
59 |
+
for attempt in range(self.max_retries):
|
60 |
+
try:
|
61 |
+
return await super().async_chat(*args, **kwargs)
|
62 |
+
except Exception as e:
|
63 |
+
if "429" in str(e) and attempt < self.max_retries - 1:
|
64 |
+
wait_time = self.retry_delay * (attempt + 1) # Exponential backoff
|
65 |
+
logger.warning(f"Rate limited. Retrying in {wait_time} seconds...")
|
66 |
+
time.sleep(wait_time)
|
67 |
+
continue
|
68 |
+
raise # Re-raise the exception if it's not a 429 or we're out of retries
|
69 |
+
|
70 |
+
|
71 |
+
# Replace the model initialization with our retrying version
|
72 |
+
model = RetryingHfApiModel(
|
73 |
max_tokens=2096,
|
74 |
temperature=0.5,
|
75 |
model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
|
76 |
custom_role_conversions=None,
|
77 |
+
max_retries=3, # Will try up to 3 times
|
78 |
+
retry_delay=2, # Start with 2 second delay, will increase with each retry
|
79 |
)
|
80 |
|
|
|
81 |
# Import tool from Hub
|
82 |
image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
|
83 |
|