Spaces:
Running
Running
Update agent.py
Browse files
agent.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
from smolagents import CodeAgent, LiteLLMModel, tool, load_tool, DuckDuckGoSearchTool, WikipediaSearchTool #, HfApiModel, OpenAIServerModel
|
2 |
import asyncio
|
3 |
import os
|
|
|
4 |
import yaml
|
5 |
from PIL import Image
|
6 |
import requests
|
@@ -35,23 +36,33 @@ import whisper
|
|
35 |
# return f"Error performing Google search: {str(e)}"
|
36 |
|
37 |
@tool
|
38 |
-
def ImageAnalysisTool(
|
39 |
-
"""Tool for analyzing images
|
40 |
Args:
|
41 |
-
|
42 |
Returns:
|
43 |
-
str: Image description
|
44 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
headers = {
|
47 |
-
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
|
48 |
}
|
49 |
-
|
50 |
-
|
|
|
|
|
|
|
|
|
51 |
|
52 |
model = LiteLLMModel(
|
53 |
model_id="gemini/gemini-2.0-flash",
|
54 |
-
api_key=
|
55 |
max_tokens=8192
|
56 |
)
|
57 |
|
@@ -63,10 +74,8 @@ def ImageAnalysisTool(image_path: str) -> str:
|
|
63 |
)
|
64 |
|
65 |
response = agent.run(
|
66 |
-
""
|
67 |
-
|
68 |
-
""",
|
69 |
-
images=image
|
70 |
)
|
71 |
|
72 |
return f"The image description: '{response}'"
|
|
|
1 |
from smolagents import CodeAgent, LiteLLMModel, tool, load_tool, DuckDuckGoSearchTool, WikipediaSearchTool #, HfApiModel, OpenAIServerModel
|
2 |
import asyncio
|
3 |
import os
|
4 |
+
import re
|
5 |
import yaml
|
6 |
from PIL import Image
|
7 |
import requests
|
|
|
36 |
# return f"Error performing Google search: {str(e)}"
|
37 |
|
38 |
@tool
|
39 |
+
def ImageAnalysisTool(question: str) -> str:
|
40 |
+
"""Tool for analyzing images mentioned in the question.
|
41 |
Args:
|
42 |
+
question (str): The question text which may contain an image URL.
|
43 |
Returns:
|
44 |
+
str: Image description or error message.
|
45 |
"""
|
46 |
+
# Extract URL from question using regex
|
47 |
+
url_pattern = r'https?://\S+'
|
48 |
+
match = re.search(url_pattern, question)
|
49 |
+
if not match:
|
50 |
+
return "No image URL found in the question."
|
51 |
+
image_url = match.group(0)
|
52 |
|
53 |
headers = {
|
54 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
|
55 |
}
|
56 |
+
try:
|
57 |
+
response = requests.get(image_url, headers=headers)
|
58 |
+
response.raise_for_status()
|
59 |
+
image = Image.open(BytesIO(response.content)).convert("RGB")
|
60 |
+
except Exception as e:
|
61 |
+
return f"Error fetching image: {e}"
|
62 |
|
63 |
model = LiteLLMModel(
|
64 |
model_id="gemini/gemini-2.0-flash",
|
65 |
+
api_key=os.environ.get("GEMINI_KEY"),
|
66 |
max_tokens=8192
|
67 |
)
|
68 |
|
|
|
74 |
)
|
75 |
|
76 |
response = agent.run(
|
77 |
+
"Describe in details the chess position you see in the image.",
|
78 |
+
images=[image]
|
|
|
|
|
79 |
)
|
80 |
|
81 |
return f"The image description: '{response}'"
|