Spaces:

hriteshMaikap
/

marathi-asr-wav2vec2bert

Sleeping

File size: 1,910 Bytes

670f5ce
 
 
 
 
 
 
 
 
 
 
 
ee75be0
670f5ce
ee75be0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
670f5ce
ee75be0
 
670f5ce
ee75be0
670f5ce
 
ee75be0
670f5ce
 
e424f01

import gradio as gr
import torch
import torchaudio
import numpy as np
from transformers import Wav2Vec2BertProcessor, Wav2Vec2BertForCTC

# Load model and processor
repo_id = "hriteshMaikap/marathi-asr-model"
processor = Wav2Vec2BertProcessor.from_pretrained(repo_id)
model = Wav2Vec2BertForCTC.from_pretrained(repo_id)
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)
model.eval()  # Set to evaluation mode

def transcribe(audio_file):
    try:
        # Process audio
        waveform, sample_rate = torchaudio.load(audio_file)
        
        # Resample if needed
        if sample_rate != 16000:
            resampler = torchaudio.transforms.Resample(sample_rate, 16000)
            waveform = resampler(waveform)
            sample_rate = 16000
        
        # Convert to mono if needed
        if waveform.shape[0] > 1:
            waveform = torch.mean(waveform, dim=0, keepdim=True)
        
        # Convert to numpy
        speech_array = waveform.squeeze().numpy()
        
        # Process and run inference
        with torch.no_grad():
            inputs = processor(speech_array, sampling_rate=16000, return_tensors="pt").to(device)
            logits = model(inputs.input_features).logits
            predicted_ids = torch.argmax(logits, dim=-1)
        
        # Decode the predicted IDs
        transcription = processor.decode(predicted_ids[0])
        
        return transcription
    except Exception as e:
        return f"Error processing audio: {str(e)}"

# Create Gradio interface with updated syntax
demo = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(type="filepath"),  # Removed 'source' parameter
    outputs="text",
    title="Marathi Speech Recognition",
    description="Record your voice in Marathi and get a transcription. Click the microphone icon to start recording, then submit to transcribe."
)

demo.launch(show_error=True)