File size: 1,333 Bytes
a54a14b a471a3b a54a14b a471a3b a54a14b a471a3b c996bea a471a3b a54a14b a471a3b a54a14b a471a3b a54a14b a471a3b a54a14b a471a3b a54a14b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
import io
import os
import soundfile as sf
from dotenv import load_dotenv
from huggingface_hub import InferenceClient
from smolagents import CodeAgent, GradioUI, HfApiModel
load_dotenv()
def convert_data_to_audio_filelike(your_input_tuple):
"""Convert (sample_rate, np.ndarray) to a BytesIO WAV file"""
sample_rate, audio_data = your_input_tuple
buffer = io.BytesIO()
sf.write(buffer, audio_data, sample_rate, format="WAV")
buffer.seek(0)
return buffer
def speech2text_func(data, model: str = "openai/whisper-small.en") -> str:
if isinstance(data, tuple):
buffer = convert_data_to_audio_filelike(data)
data = buffer.read()
client = InferenceClient(
provider="hf-inference",
api_key=os.getenv("HF_TOKEN"),
)
return client.automatic_speech_recognition(data, model=model).text
def get_tools():
add_base_tools = True
tools_list = []
return tools_list, add_base_tools
if __name__ == "__main__":
tools_list, add_base_tools = get_tools()
model = HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct", provider=None)
agent = CodeAgent(
tools=tools_list,
model=model,
add_base_tools=add_base_tools,
additional_authorized_imports=["web_search"],
)
GradioUI(agent).launch(speech2text_func=speech2text_func)
|