Spaces:
Sleeping
Sleeping
File size: 9,376 Bytes
17d10a7 a15d204 d448add db46bfb 1c1b50f db46bfb 1c1b50f db8ba25 db46bfb cf3593c d9bf0f0 b950350 6aba99a 3168a3e 019c404 8e5f278 3168a3e ecc69bf cf3593c 1c1b50f b950350 1c1b50f ecc69bf b950350 dfa5d3e db8ba25 fd8d42a dfa5d3e e564c8e fd8d42a 3168a3e 60b6e41 74b6128 e564c8e fd8d42a 74b6128 fd8d42a b950350 74b6128 b950350 66b1260 b950350 019c404 b950350 66b1260 dfa5d3e 66b1260 dfa5d3e 66b1260 dfa5d3e 3b58485 217c4b5 17d10a7 16184b2 a3b5047 16184b2 217c4b5 cf3593c 16184b2 217c4b5 16184b2 217c4b5 b950350 16184b2 217c4b5 d9bf0f0 16184b2 217c4b5 1808e7a 217c4b5 16184b2 cf3593c b950350 d448add 16184b2 dfa5d3e 66b1260 dfa5d3e b950350 ecc69bf 66b1260 d9bf0f0 66b1260 d9bf0f0 b950350 ecc69bf 3172dc7 ede9fc5 ecc69bf 35e8eba a07ea84 35e8eba a07ea84 35e8eba 8c25665 b950350 d9bf0f0 b950350 1d543ba 3fe530b 35e8eba ede9fc5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 |
import gradio as gr
import os
import torch
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
pipeline,
AutoProcessor,
MusicgenForConditionalGeneration,
)
from scipy.io.wavfile import write
from pydub import AudioSegment
from dotenv import load_dotenv
import tempfile
import spaces
from TTS.api import TTS
from TTS.utils.synthesizer import Synthesizer
# Load environment variables
load_dotenv()
hf_token = os.getenv("HF_TOKEN")
# ---------------------------------------------------------------------
# Script Generation Function
# ---------------------------------------------------------------------
@spaces.GPU(duration=300)
def generate_script(user_prompt: str, model_id: str, token: str, duration: int):
try:
tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=token)
model = AutoModelForCausalLM.from_pretrained(
model_id,
use_auth_token=token,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True,
)
llama_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
# System prompt with clear structure instructions
system_prompt = (
f"You are an expert radio imaging producer specializing in sound design and music. "
f"Based on the user's concept and the selected duration of {duration} seconds, produce the following: "
f"1. A concise voice-over script. Prefix this section with 'Voice-Over Script:'.\n"
f"2. Suggestions for sound design. Prefix this section with 'Sound Design Suggestions:'.\n"
f"3. Music styles or track recommendations. Prefix this section with 'Music Suggestions:'."
)
combined_prompt = f"{system_prompt}\nUser concept: {user_prompt}\nOutput:"
result = llama_pipeline(combined_prompt, max_new_tokens=300, do_sample=True, temperature=0.8)
# Parsing output
generated_text = result[0]["generated_text"].split("Output:")[-1].strip()
# Extract sections based on prefixes
voice_script = generated_text.split("Voice-Over Script:")[1].split("Sound Design Suggestions:")[0].strip() if "Voice-Over Script:" in generated_text else "No voice-over script found."
sound_design = generated_text.split("Sound Design Suggestions:")[1].split("Music Suggestions:")[0].strip() if "Sound Design Suggestions:" in generated_text else "No sound design suggestions found."
music_suggestions = generated_text.split("Music Suggestions:")[1].strip() if "Music Suggestions:" in generated_text else "No music suggestions found."
return voice_script, sound_design, music_suggestions
except Exception as e:
return f"Error generating script: {e}", "", ""
# ---------------------------------------------------------------------
# Voice-Over Generation Function (Inactive)
# ---------------------------------------------------------------------
@spaces.GPU(duration=300)
def generate_voice(script: str, speaker: str = "default"):
try:
# Placeholder for inactive state
return "Voice-over generation is currently inactive."
except Exception as e:
return f"Error: {e}"
# ---------------------------------------------------------------------
# Music Generation Function (facebook/musicgen-medium)
# ---------------------------------------------------------------------
@spaces.GPU(duration=300)
def generate_music(prompt: str, audio_length: int):
try:
# Load facebook/musicgen-medium model
musicgen_model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-medium")
musicgen_processor = AutoProcessor.from_pretrained("facebook/musicgen-medium")
# Move the model to the appropriate device (CUDA or CPU)
device = "cuda" if torch.cuda.is_available() else "cpu"
musicgen_model.to(device)
# Prepare inputs
inputs = musicgen_processor(text=[prompt], padding=True, return_tensors="pt").to(device)
# Generate music
outputs = musicgen_model.generate(**inputs, max_new_tokens=audio_length)
# Process audio data
audio_data = outputs[0, 0].cpu().numpy()
normalized_audio = (audio_data / max(abs(audio_data)) * 32767).astype("int16")
# Save generated music to a file
output_path = f"{tempfile.gettempdir()}/musicgen_medium_generated_music.wav"
write(output_path, 44100, normalized_audio)
return output_path
except Exception as e:
return f"Error generating music: {e}"
# ---------------------------------------------------------------------
# Audio Blending Function with Ducking (Inactive)
# ---------------------------------------------------------------------
def blend_audio(voice_path: str, music_path: str, ducking: bool):
try:
# Placeholder for inactive state
return "Audio blending functionality is currently inactive."
except Exception as e:
return f"Error: {e}"
# ---------------------------------------------------------------------
# Gradio Interface
# ---------------------------------------------------------------------
with gr.Blocks() as demo:
gr.Markdown("""
# 🎧 AI Promo Studio 🚀
Welcome to **AI Promo Studio**, your one-stop solution for creating stunning and professional radio promos with ease!
Whether you're a sound designer, radio producer, or content creator, our AI-driven tools, powered by advanced LLM Llama models, empower you to bring your vision to life in just a few steps.
""")
with gr.Tabs():
# Step 1: Generate Script
with gr.Tab("Step 1: Generate Script"):
with gr.Row():
user_prompt = gr.Textbox(label="Promo Idea", placeholder="E.g., A 30-second promo for a morning show.")
llama_model_id = gr.Textbox(label="Llama Model ID", value="meta-llama/Meta-Llama-3-8B-Instruct")
duration = gr.Slider(label="Duration (seconds)", minimum=15, maximum=60, step=15, value=30)
generate_script_button = gr.Button("Generate Script")
script_output = gr.Textbox(label="Generated Voice-Over Script", lines=5)
sound_design_output = gr.Textbox(label="Sound Design Suggestions", lines=3)
music_suggestion_output = gr.Textbox(label="Music Suggestions", lines=3)
generate_script_button.click(
fn=lambda user_prompt, model_id, duration: generate_script(user_prompt, model_id, hf_token, duration),
inputs=[user_prompt, llama_model_id, duration],
outputs=[script_output, sound_design_output, music_suggestion_output],
)
# Step 2: Generate Voice
with gr.Tab("Step 2: Generate Voice"):
gr.Markdown("""
**Note:** Voice-over generation is currently inactive.
This feature will be available in future updates!
""")
with gr.Row():
speaker = gr.Textbox(label="Voice Style (optional)", placeholder="E.g., male, female, or neutral.")
generate_voice_button = gr.Button("Generate Voice")
voice_output = gr.Audio(label="Generated Voice", type="filepath")
generate_voice_button.click(
fn=generate_voice,
inputs=[script_output, speaker],
outputs=[voice_output],
)
# Step 3: Generate Music
with gr.Tab("Step 3: Generate Music"):
with gr.Row():
audio_length = gr.Slider(label="Music Length (tokens)", minimum=128, maximum=1024, step=64, value=512)
generate_music_button = gr.Button("Generate Music")
music_output = gr.Audio(label="Generated Music", type="filepath")
generate_music_button.click(
fn=lambda music_suggestion, audio_length: generate_music(music_suggestion, audio_length),
inputs=[music_suggestion_output, audio_length],
outputs=[music_output],
)
# Step 4: Blend Audio
with gr.Tab("Step 4: Blend Audio"):
gr.Markdown("""
**Note:** Audio blending functionality is currently inactive.
This feature will be available in future updates!
""")
with gr.Row():
ducking = gr.Checkbox(label="Enable Ducking", value=True)
blend_button = gr.Button("Blend Audio")
final_output = gr.Audio(label="Final Promo Audio", type="filepath")
blend_button.click(
fn=blend_audio,
inputs=[voice_output, music_output, ducking],
outputs=[final_output],
)
gr.Markdown("""
<hr>
<p style="text-align: center; font-size: 0.9em;">
Created with ❤️ by <a href="https://bilsimaging.com" target="_blank">bilsimaging.com</a>
</p>
""")
# Add visitor badge HTML
gr.HTML("""
<a href="https://visitorbadge.io/status?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2FBils%2Fradiogold">
<img src="https://api.visitorbadge.io/api/visitors?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2FBils%2Fradiogold&countColor=%23263759" />
</a>
""")
demo.launch(debug=True)
|