hriteshMaikap commited on
Commit
ee75be0
·
verified ·
1 Parent(s): a36ff2a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -31
app.py CHANGED
@@ -10,41 +10,46 @@ processor = Wav2Vec2BertProcessor.from_pretrained(repo_id)
10
  model = Wav2Vec2BertForCTC.from_pretrained(repo_id)
11
  device = "cuda" if torch.cuda.is_available() else "cpu"
12
  model = model.to(device)
 
13
 
14
- def transcribe(audio):
15
- # Process audio
16
- waveform, sample_rate = torchaudio.load(audio)
17
-
18
- # Resample if needed
19
- if sample_rate != 16000:
20
- resampler = torchaudio.transforms.Resample(sample_rate, 16000)
21
- waveform = resampler(waveform)
22
-
23
- # Convert to mono if needed
24
- if waveform.shape[0] > 1:
25
- waveform = torch.mean(waveform, dim=0, keepdim=True)
26
-
27
- # Convert to numpy
28
- speech_array = waveform.squeeze().numpy()
29
-
30
- # Process and run inference
31
- with torch.no_grad():
32
- inputs = processor(speech_array, sampling_rate=16000, return_tensors="pt").to(device)
33
- logits = model(inputs.input_features).logits
34
- predicted_ids = torch.argmax(logits, dim=-1)
35
-
36
- # Decode the predicted IDs
37
- transcription = processor.decode(predicted_ids[0])
38
-
39
- return transcription
 
 
 
 
40
 
41
- # Create Gradio interface
42
- iface = gr.Interface(
43
  fn=transcribe,
44
- inputs=gr.Audio(source="microphone", type="filepath"),
45
  outputs="text",
46
  title="Marathi Speech Recognition",
47
- description="Record your voice in Marathi and get a transcription."
48
  )
49
 
50
- iface.launch()
 
10
  model = Wav2Vec2BertForCTC.from_pretrained(repo_id)
11
  device = "cuda" if torch.cuda.is_available() else "cpu"
12
  model = model.to(device)
13
+ model.eval() # Set to evaluation mode
14
 
15
+ def transcribe(audio_file):
16
+ try:
17
+ # Process audio
18
+ waveform, sample_rate = torchaudio.load(audio_file)
19
+
20
+ # Resample if needed
21
+ if sample_rate != 16000:
22
+ resampler = torchaudio.transforms.Resample(sample_rate, 16000)
23
+ waveform = resampler(waveform)
24
+ sample_rate = 16000
25
+
26
+ # Convert to mono if needed
27
+ if waveform.shape[0] > 1:
28
+ waveform = torch.mean(waveform, dim=0, keepdim=True)
29
+
30
+ # Convert to numpy
31
+ speech_array = waveform.squeeze().numpy()
32
+
33
+ # Process and run inference
34
+ with torch.no_grad():
35
+ inputs = processor(speech_array, sampling_rate=16000, return_tensors="pt").to(device)
36
+ logits = model(inputs.input_features).logits
37
+ predicted_ids = torch.argmax(logits, dim=-1)
38
+
39
+ # Decode the predicted IDs
40
+ transcription = processor.decode(predicted_ids[0])
41
+
42
+ return transcription
43
+ except Exception as e:
44
+ return f"Error processing audio: {str(e)}"
45
 
46
+ # Create Gradio interface with updated syntax
47
+ demo = gr.Interface(
48
  fn=transcribe,
49
+ inputs=gr.Audio(type="filepath"), # Removed 'source' parameter
50
  outputs="text",
51
  title="Marathi Speech Recognition",
52
+ description="Record your voice in Marathi and get a transcription. Click the microphone icon to start recording, then submit to transcribe."
53
  )
54
 
55
+ demo.launch()