Spaces:

Bils
/

AIPromoStudio

Running on Zero

App Files Files Community

Bils commited on Feb 3

Commit

a765116

verified ·

1 Parent(s): d3df06a

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -4

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 import torch
 import tempfile
 from scipy.io.wavfile import write
@@ -31,6 +32,16 @@ LLAMA_PIPELINES = {}
 MUSICGEN_MODELS = {}
 TTS_MODELS = {}
 # ---------------------------------------------------------------------
 # Helper Functions
 # ---------------------------------------------------------------------
@@ -100,7 +111,7 @@ def generate_script(user_prompt: str, model_id: str, token: str, duration: int):
             f"Based on the user's concept and the selected duration of {duration} seconds, produce the following: "
             "1. A concise voice-over script. Prefix this section with 'Voice-Over Script:'.\n"
             "2. Suggestions for sound design. Prefix this section with 'Sound Design Suggestions:'.\n"
-            "3. Music styles or track recommendations. Prefix this section with 'Music Suggestions:'."
         )
         combined_prompt = f"{system_prompt}\nUser concept: {user_prompt}\nOutput:"
@@ -163,11 +174,14 @@ def generate_voice(script: str, tts_model_name: str = "tts_models/en/ljspeech/ta
         if not script.strip():
             return "Error: No script provided."
         tts_model = get_tts_model(tts_model_name)
         # Generate and save voice
         output_path = os.path.join(tempfile.gettempdir(), "voice_over.wav")
-        tts_model.tts_to_file(text=script, file_path=output_path)
         return output_path
     except Exception as e:
@@ -230,14 +244,14 @@ def blend_audio(voice_path: str, music_path: str, ducking: bool, duck_level: int
         voice_len = len(voice)  # in milliseconds
         music_len = len(music)  # in milliseconds
-        # Loop music if it's shorter than voice
         if music_len < voice_len:
             looped_music = AudioSegment.empty()
             while len(looped_music) < voice_len:
                 looped_music += music
             music = looped_music
-        # Trim music if it's longer than voice
         if len(music) > voice_len:
             music = music[:voice_len]

 import os
+import re
 import torch
 import tempfile
 from scipy.io.wavfile import write
 MUSICGEN_MODELS = {}
 TTS_MODELS = {}
+# ---------------------------------------------------------------------
+# Utility Function: Clean Text
+# ---------------------------------------------------------------------
+def clean_text(text: str) -> str:
+    """
+    Removes undesired characters (e.g., asterisks) that might not be recognized by the model's vocabulary.
+    """
+    # Remove all asterisks. You can add more cleaning steps here as needed.
+    return re.sub(r'\*', '', text)
 # ---------------------------------------------------------------------
 # Helper Functions
 # ---------------------------------------------------------------------
             f"Based on the user's concept and the selected duration of {duration} seconds, produce the following: "
             "1. A concise voice-over script. Prefix this section with 'Voice-Over Script:'.\n"
             "2. Suggestions for sound design. Prefix this section with 'Sound Design Suggestions:'.\n"
+            "3. Music styles or track recommendations. Prefix this section with 'Music Suggestions:'."
         )
         combined_prompt = f"{system_prompt}\nUser concept: {user_prompt}\nOutput:"
         if not script.strip():
             return "Error: No script provided."
+        # Clean the script to remove special characters (e.g., asterisks) that may produce warnings
+        cleaned_script = clean_text(script)
         tts_model = get_tts_model(tts_model_name)
         # Generate and save voice
         output_path = os.path.join(tempfile.gettempdir(), "voice_over.wav")
+        tts_model.tts_to_file(text=cleaned_script, file_path=output_path)
         return output_path
     except Exception as e:
         voice_len = len(voice)  # in milliseconds
         music_len = len(music)  # in milliseconds
+        # Loop music if it's shorter than the voice
         if music_len < voice_len:
             looped_music = AudioSegment.empty()
             while len(looped_music) < voice_len:
                 looped_music += music
             music = looped_music
+        # Trim music if it's longer than the voice
         if len(music) > voice_len:
             music = music[:voice_len]