SergeyO7 commited on
Commit
5b72b9c
·
verified ·
1 Parent(s): ec01a22

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +21 -12
agent.py CHANGED
@@ -1,6 +1,7 @@
1
  from smolagents import CodeAgent, LiteLLMModel, tool, load_tool, DuckDuckGoSearchTool, WikipediaSearchTool #, HfApiModel, OpenAIServerModel
2
  import asyncio
3
  import os
 
4
  import yaml
5
  from PIL import Image
6
  import requests
@@ -35,23 +36,33 @@ import whisper
35
  # return f"Error performing Google search: {str(e)}"
36
 
37
  @tool
38
- def ImageAnalysisTool(image_path: str) -> str:
39
- """Tool for analyzing images using computer vision
40
  Args:
41
- image_path (str): Path to image file
42
  Returns:
43
- str: Image description
44
  """
 
 
 
 
 
 
45
 
46
  headers = {
47
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
48
  }
49
- response = requests.get(image_path,headers=headers)
50
- image = Image.open(BytesIO(response.content)).convert("RGB")
 
 
 
 
51
 
52
  model = LiteLLMModel(
53
  model_id="gemini/gemini-2.0-flash",
54
- api_key= os.environ.get("GEMINI_KEY"),
55
  max_tokens=8192
56
  )
57
 
@@ -63,10 +74,8 @@ def ImageAnalysisTool(image_path: str) -> str:
63
  )
64
 
65
  response = agent.run(
66
- """
67
- Describe in details the chess position you see in the image.
68
- """,
69
- images=image
70
  )
71
 
72
  return f"The image description: '{response}'"
 
1
  from smolagents import CodeAgent, LiteLLMModel, tool, load_tool, DuckDuckGoSearchTool, WikipediaSearchTool #, HfApiModel, OpenAIServerModel
2
  import asyncio
3
  import os
4
+ import re
5
  import yaml
6
  from PIL import Image
7
  import requests
 
36
  # return f"Error performing Google search: {str(e)}"
37
 
38
  @tool
39
+ def ImageAnalysisTool(question: str) -> str:
40
+ """Tool for analyzing images mentioned in the question.
41
  Args:
42
+ question (str): The question text which may contain an image URL.
43
  Returns:
44
+ str: Image description or error message.
45
  """
46
+ # Extract URL from question using regex
47
+ url_pattern = r'https?://\S+'
48
+ match = re.search(url_pattern, question)
49
+ if not match:
50
+ return "No image URL found in the question."
51
+ image_url = match.group(0)
52
 
53
  headers = {
54
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
55
  }
56
+ try:
57
+ response = requests.get(image_url, headers=headers)
58
+ response.raise_for_status()
59
+ image = Image.open(BytesIO(response.content)).convert("RGB")
60
+ except Exception as e:
61
+ return f"Error fetching image: {e}"
62
 
63
  model = LiteLLMModel(
64
  model_id="gemini/gemini-2.0-flash",
65
+ api_key=os.environ.get("GEMINI_KEY"),
66
  max_tokens=8192
67
  )
68
 
 
74
  )
75
 
76
  response = agent.run(
77
+ "Describe in details the chess position you see in the image.",
78
+ images=[image]
 
 
79
  )
80
 
81
  return f"The image description: '{response}'"