import gradio as gr import torch import torchaudio import numpy as np from transformers import Wav2Vec2BertProcessor, Wav2Vec2BertForCTC # Load model and processor repo_id = "hriteshMaikap/marathi-asr-model" processor = Wav2Vec2BertProcessor.from_pretrained(repo_id) model = Wav2Vec2BertForCTC.from_pretrained(repo_id) device = "cuda" if torch.cuda.is_available() else "cpu" model = model.to(device) model.eval() # Set to evaluation mode def transcribe(audio_file): try: # Process audio waveform, sample_rate = torchaudio.load(audio_file) # Resample if needed if sample_rate != 16000: resampler = torchaudio.transforms.Resample(sample_rate, 16000) waveform = resampler(waveform) sample_rate = 16000 # Convert to mono if needed if waveform.shape[0] > 1: waveform = torch.mean(waveform, dim=0, keepdim=True) # Convert to numpy speech_array = waveform.squeeze().numpy() # Process and run inference with torch.no_grad(): inputs = processor(speech_array, sampling_rate=16000, return_tensors="pt").to(device) logits = model(inputs.input_features).logits predicted_ids = torch.argmax(logits, dim=-1) # Decode the predicted IDs transcription = processor.decode(predicted_ids[0]) return transcription except Exception as e: return f"Error processing audio: {str(e)}" # Create Gradio interface with updated syntax demo = gr.Interface( fn=transcribe, inputs=gr.Audio(type="filepath"), # Removed 'source' parameter outputs="text", title="Marathi Speech Recognition", description="Record your voice in Marathi and get a transcription. Click the microphone icon to start recording, then submit to transcribe." ) demo.launch(show_error=True)