File size: 1,333 Bytes
a54a14b
a471a3b
a54a14b
 
a471a3b
a54a14b
 
a471a3b
 
c996bea
a471a3b
a54a14b
 
 
 
 
 
 
 
a471a3b
a54a14b
 
 
 
 
 
 
a471a3b
a54a14b
 
 
 
a471a3b
a54a14b
a471a3b
 
 
 
 
a54a14b
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import io
import os

import soundfile as sf
from dotenv import load_dotenv
from huggingface_hub import InferenceClient
from smolagents import CodeAgent, GradioUI, HfApiModel

load_dotenv()


def convert_data_to_audio_filelike(your_input_tuple):
    """Convert (sample_rate, np.ndarray) to a BytesIO WAV file"""
    sample_rate, audio_data = your_input_tuple
    buffer = io.BytesIO()
    sf.write(buffer, audio_data, sample_rate, format="WAV")
    buffer.seek(0)
    return buffer


def speech2text_func(data, model: str = "openai/whisper-small.en") -> str:
    if isinstance(data, tuple):
        buffer = convert_data_to_audio_filelike(data)
        data = buffer.read()
    client = InferenceClient(
        provider="hf-inference",
        api_key=os.getenv("HF_TOKEN"),
    )
    return client.automatic_speech_recognition(data, model=model).text


def get_tools():
    add_base_tools = True
    tools_list = []
    return tools_list, add_base_tools


if __name__ == "__main__":
    tools_list, add_base_tools = get_tools()
    model = HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct", provider=None)
    agent = CodeAgent(
        tools=tools_list,
        model=model,
        add_base_tools=add_base_tools,
        additional_authorized_imports=["web_search"],
    )
    GradioUI(agent).launch(speech2text_func=speech2text_func)