test1 / app.py
masimishi's picture
Create app.py
2be798f verified
raw
history blame
1.33 kB
import gradio as gr
import torch
import numpy as np
import soundfile as sf
import librosa
from transformers import pipeline
pipe = pipeline(
"automatic-speech-recognition",
model="antony66/whisper-large-v3-russian",
torch_dtype=torch.float16,
device=0 if torch.cuda.is_available() else -1
)
def transcribe(audio_data):
print(f"Received audio data: {audio_data}")
if audio_data is None:
return "Ошибка: не получены аудиоданные"
wav_file = "temp_audio.wav"
if isinstance(audio_data, tuple):
audio_array, sample_rate = audio_data
sf.write(wav_file, audio_array, sample_rate)
elif isinstance(audio_data, str):
audio_array, sample_rate = librosa.load(audio_data, sr=16000)
sf.write(wav_file, audio_array, sample_rate)
else:
return "Ошибка: неизвестный формат аудиоданных"
result = pipe(wav_file)
return result["text"]
with gr.Blocks() as app:
gr.Markdown("## Распознавание речи с Whisper")
audio_data = gr.Audio(type="filepath")
text_output = gr.Textbox(label="Распознанный текст")
btn = gr.Button("Распознать")
btn.click(transcribe, inputs=audio_data, outputs=text_output)
app.launch(debug=True)