Spaces:
Running
Running
Update agent.py
Browse files
agent.py
CHANGED
@@ -35,44 +35,44 @@ import whisper
|
|
35 |
# except Exception as e:
|
36 |
# return f"Error performing Google search: {str(e)}"
|
37 |
|
38 |
-
|
39 |
-
def ImageAnalysisTool(question: str, model: LiteLLMModel) -> str:
|
40 |
-
"""Tool for analyzing images mentioned in the question.
|
41 |
-
Args:
|
42 |
-
question (str): The question text which may contain an image URL.
|
43 |
-
Returns:
|
44 |
-
str: Image description or error message.
|
45 |
-
"""
|
46 |
-
# Extract URL from question using regex
|
47 |
-
url_pattern = r'https?://\S+'
|
48 |
-
match = re.search(url_pattern, question)
|
49 |
-
if not match:
|
50 |
-
return "No image URL found in the question."
|
51 |
-
image_url = match.group(0)
|
52 |
-
|
53 |
-
headers = {
|
54 |
-
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
|
55 |
-
}
|
56 |
-
try:
|
57 |
-
response = requests.get(image_url, headers=headers)
|
58 |
-
response.raise_for_status()
|
59 |
-
image = Image.open(BytesIO(response.content)).convert("RGB")
|
60 |
-
except Exception as e:
|
61 |
-
return f"Error fetching image: {e}"
|
62 |
-
|
63 |
-
agent = CodeAgent(
|
64 |
-
tools=[],
|
65 |
-
model=model,
|
66 |
-
max_steps=10,
|
67 |
-
verbosity_level=2
|
68 |
-
)
|
69 |
-
|
70 |
-
response = agent.run(
|
71 |
-
"Describe in details the chess position you see in the image.",
|
72 |
-
images=[image]
|
73 |
-
)
|
74 |
-
|
75 |
-
return f"The image description: '{response}'"
|
76 |
|
77 |
@tool
|
78 |
def SpeechToTextTool(audio_path: str) -> str:
|
@@ -98,6 +98,7 @@ def youtube_transcript(url: str) -> str:
|
|
98 |
url: YouTube video url in ""
|
99 |
"""
|
100 |
video_id = url.partition("https://www.youtube.com/watch?v=")[2]
|
|
|
101 |
transcript = YouTubeTranscriptApi.get_transcript(video_id)
|
102 |
transcript_text = " ".join([item["text"] for item in transcript])
|
103 |
return {"youtube_transcript": transcript_text}
|
@@ -138,7 +139,7 @@ class MagAgent:
|
|
138 |
# GoogleSearchTool,
|
139 |
DuckDuckGoSearchTool(),
|
140 |
WikipediaSearchTool(),
|
141 |
-
ImageAnalysisTool,
|
142 |
SpeechToTextTool
|
143 |
# LocalFileAudioTool()
|
144 |
]
|
|
|
35 |
# except Exception as e:
|
36 |
# return f"Error performing Google search: {str(e)}"
|
37 |
|
38 |
+
#@tool
|
39 |
+
#def ImageAnalysisTool(question: str, model: LiteLLMModel) -> str:
|
40 |
+
# """Tool for analyzing images mentioned in the question.
|
41 |
+
# Args:
|
42 |
+
# question (str): The question text which may contain an image URL.
|
43 |
+
# Returns:
|
44 |
+
# str: Image description or error message.
|
45 |
+
# """
|
46 |
+
# # Extract URL from question using regex
|
47 |
+
# url_pattern = r'https?://\S+'
|
48 |
+
# match = re.search(url_pattern, question)
|
49 |
+
# if not match:
|
50 |
+
# return "No image URL found in the question."
|
51 |
+
# image_url = match.group(0)
|
52 |
+
#
|
53 |
+
# headers = {
|
54 |
+
# "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
|
55 |
+
# }
|
56 |
+
# try:
|
57 |
+
# response = requests.get(image_url, headers=headers)
|
58 |
+
# response.raise_for_status()
|
59 |
+
# image = Image.open(BytesIO(response.content)).convert("RGB")
|
60 |
+
# except Exception as e:
|
61 |
+
# return f"Error fetching image: {e}"
|
62 |
+
#
|
63 |
+
# agent = CodeAgent(
|
64 |
+
# tools=[],
|
65 |
+
# model=model,
|
66 |
+
# max_steps=10,
|
67 |
+
# verbosity_level=2
|
68 |
+
# )
|
69 |
+
#
|
70 |
+
# response = agent.run(
|
71 |
+
# "Describe in details the chess position you see in the image.",
|
72 |
+
# images=[image]
|
73 |
+
# )
|
74 |
+
#
|
75 |
+
# return f"The image description: '{response}'"
|
76 |
|
77 |
@tool
|
78 |
def SpeechToTextTool(audio_path: str) -> str:
|
|
|
98 |
url: YouTube video url in ""
|
99 |
"""
|
100 |
video_id = url.partition("https://www.youtube.com/watch?v=")[2]
|
101 |
+
|
102 |
transcript = YouTubeTranscriptApi.get_transcript(video_id)
|
103 |
transcript_text = " ".join([item["text"] for item in transcript])
|
104 |
return {"youtube_transcript": transcript_text}
|
|
|
139 |
# GoogleSearchTool,
|
140 |
DuckDuckGoSearchTool(),
|
141 |
WikipediaSearchTool(),
|
142 |
+
# ImageAnalysisTool,
|
143 |
SpeechToTextTool
|
144 |
# LocalFileAudioTool()
|
145 |
]
|