First_agent_template

Sleeping

App Files Files Community

strickvl commited on Feb 10

Commit

bc7ee1f

unverified ·

1 Parent(s): 4e257c7

Enhance error handling and add retry mechanism for API calls

Browse files

- Implemented RetryingHfApiModel with exponential backoff for rate-limited API requests
- Updated Gradio UI stream_to_gradio function with improved error handling and token tracking
- Added logging support for better error tracking and debugging

Files changed (2) hide show

Gradio_UI.py +21 -49
app.py +32 -2

Gradio_UI.py CHANGED Viewed

@@ -123,56 +123,28 @@ def pull_messages_from_step(
         yield gr.ChatMessage(role="assistant", content="-----")
-def stream_to_gradio(
-    agent,
-    task: str,
-    reset_agent_memory: bool = False,
-    additional_args: Optional[dict] = None,
-):
-    """Runs an agent with the given task and streams the messages from the agent as gradio ChatMessages."""
-    if not _is_package_available("gradio"):
-        raise ModuleNotFoundError(
-            "Please install 'gradio' extra to use the GradioUI: `pip install 'smolagents[gradio]'`"
-        )
-    import gradio as gr
     total_input_tokens = 0
     total_output_tokens = 0
-    for step_log in agent.run(task, stream=True, reset=reset_agent_memory, additional_args=additional_args):
-        # Track tokens if model provides them
-        if hasattr(agent.model, "last_input_token_count"):
-            total_input_tokens += agent.model.last_input_token_count
-            total_output_tokens += agent.model.last_output_token_count
-            if isinstance(step_log, ActionStep):
-                step_log.input_token_count = agent.model.last_input_token_count
-                step_log.output_token_count = agent.model.last_output_token_count
-        for message in pull_messages_from_step(
-            step_log,
-        ):
-            yield message
-    final_answer = step_log  # Last log is the run's final_answer
-    final_answer = handle_agent_output_types(final_answer)
-    if isinstance(final_answer, AgentText):
-        yield gr.ChatMessage(
-            role="assistant",
-            content=f"**Final answer:**\n{final_answer.to_string()}\n",
-        )
-    elif isinstance(final_answer, AgentImage):
-        yield gr.ChatMessage(
-            role="assistant",
-            content={"path": final_answer.to_string(), "mime_type": "image/png"},
-        )
-    elif isinstance(final_answer, AgentAudio):
-        yield gr.ChatMessage(
-            role="assistant",
-            content={"path": final_answer.to_string(), "mime_type": "audio/wav"},
-        )
-    else:
-        yield gr.ChatMessage(role="assistant", content=f"**Final answer:** {str(final_answer)}")
 class GradioUI:
@@ -293,4 +265,4 @@ class GradioUI:
         demo.launch(debug=True, share=True, **kwargs)
-__all__ = ["stream_to_gradio", "GradioUI"]

         yield gr.ChatMessage(role="assistant", content="-----")
+def stream_to_gradio(agent, task: str, reset_agent_memory: bool = True):
+    """Stream agent responses to Gradio interface with better error handling"""
     total_input_tokens = 0
     total_output_tokens = 0
+    try:
+        for msg in agent.chat(task, reset_memory=reset_agent_memory):
+            # Safely handle token counting
+            if hasattr(agent.model, 'last_input_token_count'):
+                input_tokens = agent.model.last_input_token_count or 0
+                total_input_tokens += input_tokens
+            if hasattr(agent.model, 'last_output_token_count'):
+                output_tokens = agent.model.last_output_token_count or 0
+                total_output_tokens += output_tokens
+            yield msg
+    except Exception as e:
+        error_msg = f"Error during chat: {str(e)}"
+        logger.error(error_msg)
+        yield f"⚠️ {error_msg}"
 class GradioUI:
         demo.launch(debug=True, share=True, **kwargs)
+__all__ = ["stream_to_gradio", "GradioUI"]

app.py CHANGED Viewed

@@ -3,11 +3,16 @@ import datetime
 import requests
 import pytz
 import yaml
 from tools.final_answer import FinalAnswerTool
 from tools.text_analysis import analyze_text
 from Gradio_UI import GradioUI
 # Below is an example of a tool that does nothing. Amaze us with your creativity !
 @tool
@@ -40,14 +45,39 @@ def get_current_time_in_timezone(timezone: str) -> str:
 final_answer = FinalAnswerTool()
-model = HfApiModel(
     max_tokens=2096,
     temperature=0.5,
     model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
     custom_role_conversions=None,
 )
 # Import tool from Hub
 image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)

 import requests
 import pytz
 import yaml
+import time
+import logging
 from tools.final_answer import FinalAnswerTool
 from tools.text_analysis import analyze_text
 from Gradio_UI import GradioUI
+# Set up logging
+logger = logging.getLogger(__name__)
 # Below is an example of a tool that does nothing. Amaze us with your creativity !
 @tool
 final_answer = FinalAnswerTool()
+class RetryingHfApiModel(HfApiModel):
+    """HfApiModel with retry logic for rate limiting"""
+    def __init__(self, max_retries=3, retry_delay=2, **kwargs):
+        super().__init__(**kwargs)
+        self.max_retries = max_retries
+        self.retry_delay = retry_delay
+    async def async_chat(self, *args, **kwargs):
+        for attempt in range(self.max_retries):
+            try:
+                return await super().async_chat(*args, **kwargs)
+            except Exception as e:
+                if "429" in str(e) and attempt < self.max_retries - 1:
+                    wait_time = self.retry_delay * (attempt + 1)  # Exponential backoff
+                    logger.warning(f"Rate limited. Retrying in {wait_time} seconds...")
+                    time.sleep(wait_time)
+                    continue
+                raise  # Re-raise the exception if it's not a 429 or we're out of retries
+# Replace the model initialization with our retrying version
+model = RetryingHfApiModel(
     max_tokens=2096,
     temperature=0.5,
     model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
     custom_role_conversions=None,
+    max_retries=3,  # Will try up to 3 times
+    retry_delay=2,  # Start with 2 second delay, will increase with each retry
 )
 # Import tool from Hub
 image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)