Louis Delmas commited on
Commit
d206ca1
·
1 Parent(s): ca393b2

feat: 🎸 generate random sound agent tool

Browse files
Files changed (4) hide show
  1. .gitignore +3 -1
  2. Gradio_UI.py +3 -1
  3. app.py +44 -34
  4. requirements.txt +3 -0
.gitignore CHANGED
@@ -1 +1,3 @@
1
- venv
 
 
 
1
+ venv
2
+ __pycache__
3
+ .gradio
Gradio_UI.py CHANGED
@@ -154,8 +154,10 @@ def stream_to_gradio(
154
  yield message
155
 
156
  final_answer = step_log # Last log is the run's final_answer
157
- final_answer = handle_agent_output_types(final_answer)
158
 
 
 
 
159
  if isinstance(final_answer, AgentText):
160
  yield gr.ChatMessage(
161
  role="assistant",
 
154
  yield message
155
 
156
  final_answer = step_log # Last log is the run's final_answer
 
157
 
158
+ if not isinstance(final_answer, (AgentText, AgentImage, AgentAudio)):
159
+ final_answer = handle_agent_output_types(final_answer)
160
+
161
  if isinstance(final_answer, AgentText):
162
  yield gr.ChatMessage(
163
  role="assistant",
app.py CHANGED
@@ -1,62 +1,72 @@
1
- from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool
2
- import datetime
3
- import requests
4
- import pytz
5
  import yaml
6
  from tools.final_answer import FinalAnswerTool
 
 
 
7
 
8
  from Gradio_UI import GradioUI
9
 
10
- # Below is an example of a tool that does nothing. Amaze us with your creativity !
11
  @tool
12
- def my_custom_tool(arg1:str, arg2:int)-> str: #it's import to specify the return type
13
- #Keep this format for the description / args / args description but feel free to modify the tool
14
- """A tool that does nothing yet
15
- Args:
16
- arg1: the first argument
17
- arg2: the second argument
18
- """
19
- return "What magic will you build ?"
20
 
21
- @tool
22
- def get_current_time_in_timezone(timezone: str) -> str:
23
- """A tool that fetches the current local time in a specified timezone.
24
  Args:
25
- timezone: A string representing a valid timezone (e.g., 'America/New_York').
26
  """
27
- try:
28
- # Create timezone object
29
- tz = pytz.timezone(timezone)
30
- # Get current time in that timezone
31
- local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
32
- return f"The current local time in {timezone} is: {local_time}"
33
- except Exception as e:
34
- return f"Error fetching time for timezone '{timezone}': {str(e)}"
 
 
 
 
 
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  final_answer = FinalAnswerTool()
38
- web_search = DuckDuckGoSearchTool()
39
 
40
- # If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
41
- # model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'
42
 
43
  model = HfApiModel(
44
  max_tokens=2096,
45
  temperature=0.5,
46
- model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded
47
  custom_role_conversions=None,
48
  )
49
 
50
-
51
- # Import tool from Hub
52
- image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
53
-
54
  with open("prompts.yaml", 'r') as stream:
55
  prompt_templates = yaml.safe_load(stream)
56
 
57
  agent = CodeAgent(
58
  model=model,
59
- tools=[final_answer, web_search], ## add your tools here (don't remove final answer)
60
  max_steps=6,
61
  verbosity_level=1,
62
  grammar=None,
 
1
+ from smolagents import CodeAgent,HfApiModel,tool
2
+ import torch
 
 
3
  import yaml
4
  from tools.final_answer import FinalAnswerTool
5
+ import numpy as np
6
+ import random
7
+ from scipy.signal import square, sawtooth
8
 
9
  from Gradio_UI import GradioUI
10
 
 
11
  @tool
12
+ def generate_random_sound(duration: float = 1.0) -> torch.Tensor:
13
+ """Generates a random sound with varying frequency and waveform and returns a torch tensor.
 
 
 
 
 
 
14
 
 
 
 
15
  Args:
16
+ duration: Length of the sound in seconds (default: 1.0)
17
  """
18
+ # It seems the playback is about 3x slower, so we'll adjust the duration
19
+ adjusted_duration = duration / 3
20
+
21
+ # Sample rate
22
+ sample_rate = 44100
23
+ num_samples = int(sample_rate * adjusted_duration)
24
+ t = np.linspace(0, adjusted_duration, num_samples, endpoint=False)
25
+
26
+ # Random frequency between 100 and 10000 Hz
27
+ frequency = random.uniform(100, 10000)
28
+
29
+ # Random waveform selection
30
+ waveform = random.choice(['sine', 'square', 'sawtooth', 'triangle'])
31
 
32
+ if waveform == 'sine':
33
+ signal = np.sin(2 * np.pi * frequency * t)
34
+ elif waveform == 'square':
35
+ signal = square(2 * np.pi * frequency * t)
36
+ elif waveform == 'sawtooth':
37
+ signal = sawtooth(2 * np.pi * frequency * t)
38
+ else: # triangle
39
+ signal = sawtooth(2 * np.pi * frequency * t, width=0.5) # Triangle is a symmetric sawtooth
40
+
41
+ # Apply fade out
42
+ fade = np.exp(-3 * t / adjusted_duration) # Normalize fade over duration
43
+ signal = signal * fade
44
+
45
+ # Normalize to prevent clipping
46
+ signal = signal / np.max(np.abs(signal))
47
+
48
+ # Convert to torch tensor
49
+ tensor_signal = torch.from_numpy(signal.astype(np.float32))
50
+
51
+ return tensor_signal
52
 
53
  final_answer = FinalAnswerTool()
 
54
 
55
+ model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'
 
56
 
57
  model = HfApiModel(
58
  max_tokens=2096,
59
  temperature=0.5,
60
+ model_id=model_id,
61
  custom_role_conversions=None,
62
  )
63
 
 
 
 
 
64
  with open("prompts.yaml", 'r') as stream:
65
  prompt_templates = yaml.safe_load(stream)
66
 
67
  agent = CodeAgent(
68
  model=model,
69
+ tools=[final_answer, generate_random_sound],
70
  max_steps=6,
71
  verbosity_level=1,
72
  grammar=None,
requirements.txt CHANGED
@@ -3,3 +3,6 @@ smolagents
3
  requests
4
  duckduckgo_search
5
  pandas
 
 
 
 
3
  requests
4
  duckduckgo_search
5
  pandas
6
+ torch
7
+ scipy
8
+ smolagents[audio]