Spaces:
Running
Running
File size: 1,238 Bytes
ab2ffb7 f971019 ab2ffb7 1abf72c ab2ffb7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
import gradio as gr
import torch
from transformers import HubertForCTC, Wav2Vec2Processor
import librosa
# Load the model and processor from Hugging Face Hub
model_name = "Ansu/mHubert-basque-ASR" # Change this to your model
processor = Wav2Vec2Processor.from_pretrained(model_name)
model = HubertForCTC.from_pretrained(model_name)
# Function to transcribe audio
def transcribe(audio):
# Load audio file
audio, _ = librosa.load(audio, sr=16000)
# Process input
inputs = processor(audio, sampling_rate=16000, return_tensors="pt", padding=True)
# Get model predictions
with torch.no_grad():
logits = model(inputs.input_values).logits
predicted_ids = torch.argmax(logits, dim=-1)
# Decode predictions
transcription = processor.batch_decode(predicted_ids)[0]
return transcription
# Create Gradio interface
iface = gr.Interface(
fn=transcribe,
inputs=gr.Audio(sources=["upload", "microphone"], type="filepath", label="🎤 Upload or Record Audio"),
outputs="text",
title="HuBERT ASR Demo",
description="🎙️ Speak into the microphone or upload an audio file to get a transcription.",
live=True, # Enables real-time recording
)
iface.launch()
|