Spaces:

emirhanbilgic
/

Text-to-speech-Turkish

Running

emirhanbilgic commited on Aug 31, 2024

Commit

5f01cca

verified ·

1 Parent(s): 7ca15a8

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -116,22 +116,21 @@ def text_to_speech(text, audio_file=None):
     # Normalize the input text
     normalized_text = normalize_text(text)
     inputs = processor(text=normalized_text, return_tensors="pt").to(device)
     speaker_embeddings = default_embedding
     # Generate speech
-    speech = model.generate_speech(inputs["input_ids"], speaker_embeddings.unsqueeze(0), vocoder=vocoder)
     # Convert the generated speech to numpy array format
     speech_np = speech.cpu().numpy()
-    # Write the output to a temporary file
-    output_file = "output.wav"
-    sf.write(output_file, speech_np, samplerate=16000)
     # Return the numpy array and the sample rate
-    return speech_np, 16000
 iface = gr.Interface(
     fn=text_to_speech,
@@ -145,4 +144,4 @@ iface = gr.Interface(
     description="Enter Turkish text, optionally upload a short audio sample of the target speaker, and listen to the generated speech using the fine-tuned SpeechT5 model."
 )
-iface.launch(share=True)

     # Normalize the input text
     normalized_text = normalize_text(text)
+    # Prepare the input for the model
     inputs = processor(text=normalized_text, return_tensors="pt").to(device)
+    # Use the default speaker embedding
     speaker_embeddings = default_embedding
     # Generate speech
+    with torch.no_grad():
+        speech = model.generate_speech(inputs["input_ids"], speaker_embeddings.unsqueeze(0), vocoder=vocoder)
     # Convert the generated speech to numpy array format
     speech_np = speech.cpu().numpy()
     # Return the numpy array and the sample rate
+    return (speech_np, 16000)
 iface = gr.Interface(
     fn=text_to_speech,
     description="Enter Turkish text, optionally upload a short audio sample of the target speaker, and listen to the generated speech using the fine-tuned SpeechT5 model."
 )
+iface.launch(share=True)