from typing import Any
from smolagents.tools import Tool
from smolagents.agent_types import AgentImage

class FinalAnswerTool(Tool):
    name = "final_answer"
    description = "Provides the final answer to the user."
    inputs = {'answer': {'type': 'any', 'description': 'The final answer to return to the user.'}}
    output_type = "any"

    def __init__(self):
        super().__init__()

    def forward(self, answer: Any) -> Any:
        # If it's already an AgentImage, return it directly
        if isinstance(answer, AgentImage):
            return answer
            
        # If it's a string path to an image, convert it to AgentImage
        if isinstance(answer, str):
            # Check for image paths
            if '/tmp/gradio/' in answer or answer.endswith(('.png', '.jpg', '.jpeg', '.webp')):
                return AgentImage(answer)
                
            # Check for image paths embedded in text
            import re
            image_path_match = re.search(r'(/tmp/gradio/[^\s\n]+\.(?:png|jpg|jpeg|webp))', answer)
            if image_path_match:
                image_path = image_path_match.group(1)
                # Return both the image and the text
                return f"{AgentImage(image_path).to_string()}\n\n{answer.replace(image_path, '')}"
        
        # Return as is for all other cases
        return answer