File size: 1,325 Bytes
2be798f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
import gradio as gr
import torch
import numpy as np
import soundfile as sf
import librosa
from transformers import pipeline
pipe = pipeline(
"automatic-speech-recognition",
model="antony66/whisper-large-v3-russian",
torch_dtype=torch.float16,
device=0 if torch.cuda.is_available() else -1
)
def transcribe(audio_data):
print(f"Received audio data: {audio_data}")
if audio_data is None:
return "Ошибка: не получены аудиоданные"
wav_file = "temp_audio.wav"
if isinstance(audio_data, tuple):
audio_array, sample_rate = audio_data
sf.write(wav_file, audio_array, sample_rate)
elif isinstance(audio_data, str):
audio_array, sample_rate = librosa.load(audio_data, sr=16000)
sf.write(wav_file, audio_array, sample_rate)
else:
return "Ошибка: неизвестный формат аудиоданных"
result = pipe(wav_file)
return result["text"]
with gr.Blocks() as app:
gr.Markdown("## Распознавание речи с Whisper")
audio_data = gr.Audio(type="filepath")
text_output = gr.Textbox(label="Распознанный текст")
btn = gr.Button("Распознать")
btn.click(transcribe, inputs=audio_data, outputs=text_output)
app.launch(debug=True)
|