camanalo1 commited on
Commit
3fdc3cc
·
verified ·
1 Parent(s): 5d7b200

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -5
app.py CHANGED
@@ -19,12 +19,9 @@ model_tts = VitsModel.from_pretrained("facebook/mms-tts-eng")
19
  print("TTS Tokenizer:", tokenizer_tts) # Print the tokenizer for the TTS model
20
 
21
  def transcribe_and_generate_audio(audio):
22
- sr, y = audio
23
- y = y.astype(np.float32)
24
- y /= np.max(np.abs(y))
25
 
26
  # Transcribe audio
27
- asr_output = transcriber({"sampling_rate": sr, "raw": y})["text"]
28
 
29
  # Generate text based on ASR output
30
  generated_text = generator(prompt, max_length=100, num_return_sequences=1)[0]['generated_text']
@@ -44,7 +41,7 @@ def transcribe_and_generate_audio(audio):
44
  audio_input = gr.Interface(
45
  transcribe_and_generate_audio,
46
  gr.Audio(sources=["microphone"], label="Speak Here"),
47
- ["audio", "text", "text"],
48
  title="ASR -> LLM -> TTS",
49
  description="Speak into the microphone and hear the generated audio."
50
  )
 
19
  print("TTS Tokenizer:", tokenizer_tts) # Print the tokenizer for the TTS model
20
 
21
  def transcribe_and_generate_audio(audio):
 
 
 
22
 
23
  # Transcribe audio
24
+ asr_output = transcriber(audio)["text"]
25
 
26
  # Generate text based on ASR output
27
  generated_text = generator(prompt, max_length=100, num_return_sequences=1)[0]['generated_text']
 
41
  audio_input = gr.Interface(
42
  transcribe_and_generate_audio,
43
  gr.Audio(sources=["microphone"], label="Speak Here"),
44
+ "audio",
45
  title="ASR -> LLM -> TTS",
46
  description="Speak into the microphone and hear the generated audio."
47
  )