Bils commited on
Commit
75b7da7
·
verified ·
1 Parent(s): 6aba99a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -27
app.py CHANGED
@@ -43,9 +43,14 @@ def generate_script(user_prompt: str, model_id: str, token: str, duration: int):
43
  combined_prompt = f"{system_prompt}\nUser concept: {user_prompt}\nRefined script and music suggestion:"
44
  result = llama_pipeline(combined_prompt, max_new_tokens=200, do_sample=True, temperature=0.9)
45
 
46
- generated_text = result[0]["generated_text"].split("Refined script and music suggestion:")[-1].strip()
47
- script, music_suggestion = generated_text.split("Music Suggestion:")
48
- return script.strip(), music_suggestion.strip()
 
 
 
 
 
49
  except Exception as e:
50
  return f"Error generating script: {e}", None
51
 
@@ -55,16 +60,12 @@ def generate_script(user_prompt: str, model_id: str, token: str, duration: int):
55
  @spaces.GPU(duration=300)
56
  def generate_voice(script: str, speaker: str):
57
  try:
58
- # Replace with your chosen TTS model
59
- tts_model = "coqui/XTTS-v2"
60
- processor = AutoProcessor.from_pretrained(tts_model)
61
- model = AutoModelForCausalLM.from_pretrained(tts_model)
62
 
63
- inputs = processor(script, return_tensors="pt")
64
- speech = model.generate(**inputs)
65
 
66
  output_path = f"{tempfile.gettempdir()}/generated_voice.wav"
67
- write(output_path, 22050, speech.cpu().numpy())
68
  return output_path
69
  except Exception as e:
70
  return f"Error generating voice-over: {e}"
@@ -139,38 +140,26 @@ with gr.Blocks() as demo:
139
  blend_button = gr.Button("Blend Audio")
140
  final_output = gr.Audio(label="Final Promo Audio", type="filepath")
141
 
142
- def step_generate_script(user_prompt, llama_model_id, duration):
143
- return generate_script(user_prompt, llama_model_id, hf_token, duration)
144
-
145
- def step_generate_voice(script, speaker):
146
- return generate_voice(script, speaker)
147
-
148
- def step_generate_music(music_suggestion, audio_length):
149
- return generate_music(music_suggestion, audio_length)
150
-
151
- def step_blend_audio(voice_path, music_path, ducking):
152
- return blend_audio(voice_path, music_path, ducking)
153
-
154
  generate_script_button.click(
155
- fn=step_generate_script,
156
- inputs=[user_prompt, llama_model_id, duration],
157
  outputs=[script_output],
158
  )
159
 
160
  generate_voice_button.click(
161
- fn=step_generate_voice,
162
  inputs=[script_output, speaker],
163
  outputs=[voice_output],
164
  )
165
 
166
  generate_music_button.click(
167
- fn=step_generate_music,
168
  inputs=[script_output, audio_length],
169
  outputs=[music_output],
170
  )
171
 
172
  blend_button.click(
173
- fn=step_blend_audio,
174
  inputs=[voice_output, music_output, ducking],
175
  outputs=[final_output],
176
  )
 
43
  combined_prompt = f"{system_prompt}\nUser concept: {user_prompt}\nRefined script and music suggestion:"
44
  result = llama_pipeline(combined_prompt, max_new_tokens=200, do_sample=True, temperature=0.9)
45
 
46
+ generated_text = result[0]["generated_text"]
47
+
48
+ # Check if the delimiter exists
49
+ if "Music Suggestion:" in generated_text:
50
+ script, music_suggestion = generated_text.split("Music Suggestion:", 1)
51
+ return script.strip(), music_suggestion.strip()
52
+ else:
53
+ return "Error: 'Music Suggestion:' delimiter not found in the generated text.", None
54
  except Exception as e:
55
  return f"Error generating script: {e}", None
56
 
 
60
  @spaces.GPU(duration=300)
61
  def generate_voice(script: str, speaker: str):
62
  try:
63
+ from TTS.api import TTS
 
 
 
64
 
65
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
 
66
 
67
  output_path = f"{tempfile.gettempdir()}/generated_voice.wav"
68
+ tts.tts_to_file(text=script, file_path=output_path)
69
  return output_path
70
  except Exception as e:
71
  return f"Error generating voice-over: {e}"
 
140
  blend_button = gr.Button("Blend Audio")
141
  final_output = gr.Audio(label="Final Promo Audio", type="filepath")
142
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  generate_script_button.click(
144
+ fn=generate_script,
145
+ inputs=[user_prompt, llama_model_id, hf_token, duration],
146
  outputs=[script_output],
147
  )
148
 
149
  generate_voice_button.click(
150
+ fn=generate_voice,
151
  inputs=[script_output, speaker],
152
  outputs=[voice_output],
153
  )
154
 
155
  generate_music_button.click(
156
+ fn=generate_music,
157
  inputs=[script_output, audio_length],
158
  outputs=[music_output],
159
  )
160
 
161
  blend_button.click(
162
+ fn=blend_audio,
163
  inputs=[voice_output, music_output, ducking],
164
  outputs=[final_output],
165
  )