Spaces:
Sleeping
Sleeping
Louis Delmas
commited on
Commit
·
d206ca1
1
Parent(s):
ca393b2
feat: 🎸 generate random sound agent tool
Browse files- .gitignore +3 -1
- Gradio_UI.py +3 -1
- app.py +44 -34
- requirements.txt +3 -0
.gitignore
CHANGED
@@ -1 +1,3 @@
|
|
1 |
-
venv
|
|
|
|
|
|
1 |
+
venv
|
2 |
+
__pycache__
|
3 |
+
.gradio
|
Gradio_UI.py
CHANGED
@@ -154,8 +154,10 @@ def stream_to_gradio(
|
|
154 |
yield message
|
155 |
|
156 |
final_answer = step_log # Last log is the run's final_answer
|
157 |
-
final_answer = handle_agent_output_types(final_answer)
|
158 |
|
|
|
|
|
|
|
159 |
if isinstance(final_answer, AgentText):
|
160 |
yield gr.ChatMessage(
|
161 |
role="assistant",
|
|
|
154 |
yield message
|
155 |
|
156 |
final_answer = step_log # Last log is the run's final_answer
|
|
|
157 |
|
158 |
+
if not isinstance(final_answer, (AgentText, AgentImage, AgentAudio)):
|
159 |
+
final_answer = handle_agent_output_types(final_answer)
|
160 |
+
|
161 |
if isinstance(final_answer, AgentText):
|
162 |
yield gr.ChatMessage(
|
163 |
role="assistant",
|
app.py
CHANGED
@@ -1,62 +1,72 @@
|
|
1 |
-
from smolagents import CodeAgent,
|
2 |
-
import
|
3 |
-
import requests
|
4 |
-
import pytz
|
5 |
import yaml
|
6 |
from tools.final_answer import FinalAnswerTool
|
|
|
|
|
|
|
7 |
|
8 |
from Gradio_UI import GradioUI
|
9 |
|
10 |
-
# Below is an example of a tool that does nothing. Amaze us with your creativity !
|
11 |
@tool
|
12 |
-
def
|
13 |
-
|
14 |
-
"""A tool that does nothing yet
|
15 |
-
Args:
|
16 |
-
arg1: the first argument
|
17 |
-
arg2: the second argument
|
18 |
-
"""
|
19 |
-
return "What magic will you build ?"
|
20 |
|
21 |
-
@tool
|
22 |
-
def get_current_time_in_timezone(timezone: str) -> str:
|
23 |
-
"""A tool that fetches the current local time in a specified timezone.
|
24 |
Args:
|
25 |
-
|
26 |
"""
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
final_answer = FinalAnswerTool()
|
38 |
-
web_search = DuckDuckGoSearchTool()
|
39 |
|
40 |
-
|
41 |
-
# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'
|
42 |
|
43 |
model = HfApiModel(
|
44 |
max_tokens=2096,
|
45 |
temperature=0.5,
|
46 |
-
model_id=
|
47 |
custom_role_conversions=None,
|
48 |
)
|
49 |
|
50 |
-
|
51 |
-
# Import tool from Hub
|
52 |
-
image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
|
53 |
-
|
54 |
with open("prompts.yaml", 'r') as stream:
|
55 |
prompt_templates = yaml.safe_load(stream)
|
56 |
|
57 |
agent = CodeAgent(
|
58 |
model=model,
|
59 |
-
tools=[final_answer,
|
60 |
max_steps=6,
|
61 |
verbosity_level=1,
|
62 |
grammar=None,
|
|
|
1 |
+
from smolagents import CodeAgent,HfApiModel,tool
|
2 |
+
import torch
|
|
|
|
|
3 |
import yaml
|
4 |
from tools.final_answer import FinalAnswerTool
|
5 |
+
import numpy as np
|
6 |
+
import random
|
7 |
+
from scipy.signal import square, sawtooth
|
8 |
|
9 |
from Gradio_UI import GradioUI
|
10 |
|
|
|
11 |
@tool
|
12 |
+
def generate_random_sound(duration: float = 1.0) -> torch.Tensor:
|
13 |
+
"""Generates a random sound with varying frequency and waveform and returns a torch tensor.
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
|
|
|
|
|
|
15 |
Args:
|
16 |
+
duration: Length of the sound in seconds (default: 1.0)
|
17 |
"""
|
18 |
+
# It seems the playback is about 3x slower, so we'll adjust the duration
|
19 |
+
adjusted_duration = duration / 3
|
20 |
+
|
21 |
+
# Sample rate
|
22 |
+
sample_rate = 44100
|
23 |
+
num_samples = int(sample_rate * adjusted_duration)
|
24 |
+
t = np.linspace(0, adjusted_duration, num_samples, endpoint=False)
|
25 |
+
|
26 |
+
# Random frequency between 100 and 10000 Hz
|
27 |
+
frequency = random.uniform(100, 10000)
|
28 |
+
|
29 |
+
# Random waveform selection
|
30 |
+
waveform = random.choice(['sine', 'square', 'sawtooth', 'triangle'])
|
31 |
|
32 |
+
if waveform == 'sine':
|
33 |
+
signal = np.sin(2 * np.pi * frequency * t)
|
34 |
+
elif waveform == 'square':
|
35 |
+
signal = square(2 * np.pi * frequency * t)
|
36 |
+
elif waveform == 'sawtooth':
|
37 |
+
signal = sawtooth(2 * np.pi * frequency * t)
|
38 |
+
else: # triangle
|
39 |
+
signal = sawtooth(2 * np.pi * frequency * t, width=0.5) # Triangle is a symmetric sawtooth
|
40 |
+
|
41 |
+
# Apply fade out
|
42 |
+
fade = np.exp(-3 * t / adjusted_duration) # Normalize fade over duration
|
43 |
+
signal = signal * fade
|
44 |
+
|
45 |
+
# Normalize to prevent clipping
|
46 |
+
signal = signal / np.max(np.abs(signal))
|
47 |
+
|
48 |
+
# Convert to torch tensor
|
49 |
+
tensor_signal = torch.from_numpy(signal.astype(np.float32))
|
50 |
+
|
51 |
+
return tensor_signal
|
52 |
|
53 |
final_answer = FinalAnswerTool()
|
|
|
54 |
|
55 |
+
model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'
|
|
|
56 |
|
57 |
model = HfApiModel(
|
58 |
max_tokens=2096,
|
59 |
temperature=0.5,
|
60 |
+
model_id=model_id,
|
61 |
custom_role_conversions=None,
|
62 |
)
|
63 |
|
|
|
|
|
|
|
|
|
64 |
with open("prompts.yaml", 'r') as stream:
|
65 |
prompt_templates = yaml.safe_load(stream)
|
66 |
|
67 |
agent = CodeAgent(
|
68 |
model=model,
|
69 |
+
tools=[final_answer, generate_random_sound],
|
70 |
max_steps=6,
|
71 |
verbosity_level=1,
|
72 |
grammar=None,
|
requirements.txt
CHANGED
@@ -3,3 +3,6 @@ smolagents
|
|
3 |
requests
|
4 |
duckduckgo_search
|
5 |
pandas
|
|
|
|
|
|
|
|
3 |
requests
|
4 |
duckduckgo_search
|
5 |
pandas
|
6 |
+
torch
|
7 |
+
scipy
|
8 |
+
smolagents[audio]
|