from typing import Any from smolagents.tools import Tool from smolagents.agent_types import AgentImage class FinalAnswerTool(Tool): name = "final_answer" description = "Provides the final answer to the user." inputs = {'answer': {'type': 'any', 'description': 'The final answer to return to the user.'}} output_type = "any" def __init__(self): super().__init__() def forward(self, answer: Any) -> Any: # If it's already an AgentImage, return it directly if isinstance(answer, AgentImage): return answer # If it's a string path to an image, convert it to AgentImage if isinstance(answer, str): # Check for image paths if '/tmp/gradio/' in answer or answer.endswith(('.png', '.jpg', '.jpeg', '.webp')): return AgentImage(answer) # Check for image paths embedded in text import re image_path_match = re.search(r'(/tmp/gradio/[^\s\n]+\.(?:png|jpg|jpeg|webp))', answer) if image_path_match: image_path = image_path_match.group(1) # Return both the image and the text return f"{AgentImage(image_path).to_string()}\n\n{answer.replace(image_path, '')}" # Return as is for all other cases return answer