Bils commited on
Commit
e564c8e
Β·
verified Β·
1 Parent(s): 9fc7b58

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -74
app.py CHANGED
@@ -35,25 +35,19 @@ def generate_script(user_prompt: str, model_id: str, token: str, duration: int):
35
  llama_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
36
 
37
  system_prompt = (
38
- "You are an expert radio imaging producer specializing in sound design and music.\n"
39
- "---\n"
40
- f"Based on the user's concept and the selected duration of {duration} seconds, craft a concise, engaging promo script.\n"
41
- "---\n"
42
- "Ensure the script fits within the time limit and suggest a matching music style that complements the theme."
43
  )
44
 
45
  combined_prompt = f"{system_prompt}\nUser concept: {user_prompt}\nRefined script and music suggestion:"
46
- result = llama_pipeline(combined_prompt, max_new_tokens=500, do_sample=True, temperature=0.9)
47
-
48
- generated_text = result[0]["generated_text"]
49
- if "Refined script and music suggestion:" in generated_text:
50
- parts = generated_text.split("Refined script and music suggestion:", 1)[-1].strip()
51
- if "Music Style:" in parts:
52
- script, music_suggestion = parts.split("Music Style:", 1)
53
- return script.strip(), music_suggestion.strip()
54
- else:
55
- return parts.strip(), "No specific music suggestion found."
56
- return "Error: Could not parse the script.", None
57
  except Exception as e:
58
  return f"Error generating script: {e}", None
59
 
@@ -61,9 +55,10 @@ def generate_script(user_prompt: str, model_id: str, token: str, duration: int):
61
  # Voice-Over Generation Function
62
  # ---------------------------------------------------------------------
63
  @spaces.GPU(duration=300)
64
- def generate_voice(script: str):
65
  try:
66
- tts_model = "coqui/xtts-en-ljspeech-v2"
 
67
  processor = AutoProcessor.from_pretrained(tts_model)
68
  model = AutoModelForCausalLM.from_pretrained(tts_model)
69
 
@@ -125,64 +120,66 @@ def blend_audio(voice_path: str, music_path: str, ducking: bool):
125
  # ---------------------------------------------------------------------
126
  with gr.Blocks() as demo:
127
  gr.Markdown("""
128
- # 🎧 AI Promo Studio with Pages πŸš€
129
- Follow a step-by-step process to create amazing promos with AI.
130
  """)
131
 
132
- with gr.Tabs():
133
- # Step 1: Script Generation
134
- with gr.Tab("Step 1: Generate Script"):
135
- user_prompt = gr.Textbox(label="Promo Idea", placeholder="E.g., A 30-second promo for a morning show.")
136
- llama_model_id = gr.Textbox(label="Llama Model ID", value="meta-llama/Meta-Llama-3-8B-Instruct")
137
- duration = gr.Slider(label="Duration (seconds)", minimum=15, maximum=60, step=15, value=30)
138
- generate_script_button = gr.Button("Generate Script")
139
- script_output = gr.Textbox(label="Generated Script")
140
- music_suggestion_output = gr.Textbox(label="Music Suggestion")
141
-
142
- generate_script_button.click(
143
- fn=generate_script,
144
- inputs=[user_prompt, llama_model_id, hf_token, duration],
145
- outputs=[script_output, music_suggestion_output],
146
- )
147
-
148
- # Step 2: Voice Generation
149
- with gr.Tab("Step 2: Generate Voice"):
150
- script_input = gr.Textbox(label="Script for Voice", interactive=False)
151
- generate_voice_button = gr.Button("Generate Voice")
152
- voice_output = gr.Audio(label="Generated Voice", type="filepath")
153
-
154
- generate_voice_button.click(
155
- fn=generate_voice,
156
- inputs=[script_input],
157
- outputs=[voice_output],
158
- )
159
-
160
- # Step 3: Music Generation
161
- with gr.Tab("Step 3: Generate Music"):
162
- music_prompt_input = gr.Textbox(label="Music Suggestion Prompt", interactive=False)
163
- audio_length = gr.Slider(label="Music Length (tokens)", minimum=128, maximum=1024, step=64, value=512)
164
- generate_music_button = gr.Button("Generate Music")
165
- music_output = gr.Audio(label="Generated Music", type="filepath")
166
-
167
- generate_music_button.click(
168
- fn=generate_music,
169
- inputs=[music_prompt_input, audio_length],
170
- outputs=[music_output],
171
- )
172
-
173
- # Step 4: Blend Audio
174
- with gr.Tab("Step 4: Blend Audio"):
175
- voice_path = gr.Audio(label="Voice File", type="filepath")
176
- music_path = gr.Audio(label="Music File", type="filepath")
177
- ducking = gr.Checkbox(label="Enable Ducking", value=True)
178
- blend_button = gr.Button("Blend Audio")
179
- final_output = gr.Audio(label="Final Promo Audio", type="filepath")
180
-
181
- blend_button.click(
182
- fn=blend_audio,
183
- inputs=[voice_path, music_path, ducking],
184
- outputs=[final_output],
185
- )
 
 
186
 
187
  gr.Markdown("""
188
  <hr>
 
35
  llama_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
36
 
37
  system_prompt = (
38
+ f"You are an expert radio imaging producer specializing in sound design and music. "
39
+ f"Based on the user's concept and the selected duration of {duration} seconds, craft a concise, engaging promo script. "
40
+ f"Ensure the script fits within the time limit and suggest a matching music style that complements the theme."
 
 
41
  )
42
 
43
  combined_prompt = f"{system_prompt}\nUser concept: {user_prompt}\nRefined script and music suggestion:"
44
+ result = llama_pipeline(combined_prompt, max_new_tokens=200, do_sample=True, temperature=0.9)
45
+
46
+ generated_text = result[0]["generated_text"].split("Refined script and music suggestion:")[-1].strip()
47
+ if "Music Suggestion:" in generated_text:
48
+ script, music_suggestion = generated_text.split("Music Suggestion:")
49
+ return script.strip(), music_suggestion.strip()
50
+ return generated_text, "No specific music suggestion found."
 
 
 
 
51
  except Exception as e:
52
  return f"Error generating script: {e}", None
53
 
 
55
  # Voice-Over Generation Function
56
  # ---------------------------------------------------------------------
57
  @spaces.GPU(duration=300)
58
+ def generate_voice(script: str, speaker: str):
59
  try:
60
+ # Replace with your chosen TTS model
61
+ tts_model = "coqui/XTTS-v2"
62
  processor = AutoProcessor.from_pretrained(tts_model)
63
  model = AutoModelForCausalLM.from_pretrained(tts_model)
64
 
 
120
  # ---------------------------------------------------------------------
121
  with gr.Blocks() as demo:
122
  gr.Markdown("""
123
+ # 🎧 AI Promo Studio with Step-by-Step Script, Voice, Music, and Mixing πŸš€
124
+ Generate and mix radio promos effortlessly with AI tools!
125
  """)
126
 
127
+ with gr.Row():
128
+ user_prompt = gr.Textbox(label="Promo Idea", placeholder="E.g., A 30-second promo for a morning show.")
129
+ llama_model_id = gr.Textbox(label="Llama Model ID", value="meta-llama/Meta-Llama-3-8B-Instruct")
130
+ duration = gr.Slider(label="Duration (seconds)", minimum=15, maximum=60, step=15, value=30)
131
+ audio_length = gr.Slider(label="Music Length (tokens)", minimum=128, maximum=1024, step=64, value=512)
132
+ speaker = gr.Textbox(label="Voice Style (optional)", placeholder="E.g., male, female, or neutral.")
133
+ ducking = gr.Checkbox(label="Enable Ducking", value=True)
134
+
135
+ generate_script_button = gr.Button("Generate Script")
136
+ script_output = gr.Textbox(label="Generated Script")
137
+ music_suggestion_output = gr.Textbox(label="Music Suggestion")
138
+
139
+ generate_voice_button = gr.Button("Generate Voice")
140
+ voice_output = gr.Audio(label="Generated Voice", type="filepath")
141
+
142
+ generate_music_button = gr.Button("Generate Music")
143
+ music_output = gr.Audio(label="Generated Music", type="filepath")
144
+
145
+ blend_button = gr.Button("Blend Audio")
146
+ final_output = gr.Audio(label="Final Promo Audio", type="filepath")
147
+
148
+ def step_generate_script(user_prompt, model_id, duration):
149
+ return generate_script(user_prompt, model_id, hf_token, duration)
150
+
151
+ def step_generate_voice(script, speaker):
152
+ return generate_voice(script, speaker)
153
+
154
+ def step_generate_music(music_suggestion, audio_length):
155
+ return generate_music(music_suggestion, audio_length)
156
+
157
+ def step_blend_audio(voice_path, music_path, ducking):
158
+ return blend_audio(voice_path, music_path, ducking)
159
+
160
+ generate_script_button.click(
161
+ fn=lambda user_prompt, model_id, duration: generate_script(user_prompt, model_id, hf_token, duration),
162
+ inputs=[user_prompt, llama_model_id, duration],
163
+ outputs=[script_output, music_suggestion_output],
164
+ )
165
+
166
+ generate_voice_button.click(
167
+ fn=step_generate_voice,
168
+ inputs=[script_output, speaker],
169
+ outputs=[voice_output],
170
+ )
171
+
172
+ generate_music_button.click(
173
+ fn=step_generate_music,
174
+ inputs=[music_suggestion_output, audio_length],
175
+ outputs=[music_output],
176
+ )
177
+
178
+ blend_button.click(
179
+ fn=step_blend_audio,
180
+ inputs=[voice_output, music_output, ducking],
181
+ outputs=[final_output],
182
+ )
183
 
184
  gr.Markdown("""
185
  <hr>