Spaces:
Running
Running
File size: 4,303 Bytes
1c3d65b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import json
import os
import asyncio
from moviepy.editor import AudioFileClip, concatenate_audioclips
from huggingface_hub import InferenceClient
import torch
import edge_tts
import tempfile
import gradio as gr
# Initialize Hugging Face Inference Client
Client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
generator = torch.Generator().manual_seed(42)
async def text_to_speech(text, voice, filename):
communicate = edge_tts.Communicate(text, voice)
await communicate.save(filename)
async def generate_conversation(script):
title = script['title']
content = script['content']
temp_files = []
tasks = []
for key, text in content.items():
speaker = key.split('_')[0] # Extract the speaker name
index = key.split('_')[1] # Extract the dialogue index
voice = "en-US-JennyNeural" if speaker == "Alice" else "en-US-GuyNeural"
# Create temporary file for each speaker's dialogue
temp_file = tempfile.NamedTemporaryFile(suffix='.mp3', delete=False)
temp_files.append(temp_file.name)
filename = temp_file.name
tasks.append(text_to_speech(text, voice, filename))
print(f"Generated audio for {speaker}_{index}: {filename}")
await asyncio.gather(*tasks)
# Combine the audio files using moviepy
audio_clips = [AudioFileClip(temp_file) for temp_file in temp_files]
combined = concatenate_audioclips(audio_clips)
# Create temporary file for the combined output
temp_output_file = tempfile.NamedTemporaryFile(suffix='.mp3', delete=False)
output_filename = temp_output_file.name
# Save the combined file
combined.write_audiofile(output_filename)
print(f"Combined audio saved as: {output_filename}")
# Clean up temporary files
for temp_file in temp_files:
os.remove(temp_file)
print(f"Deleted temporary file: {temp_file}")
return output_filename
# Function to generate podcast based on user input
def generate_podcast(topic, seed):
system_instructions = '''[SYSTEM] You are an educational podcast generator. You have to create a podcast between Alice and Bob that gives an overview of the topic given by the user.
Please provide the script in the following JSON format:
{
"title": "[string]",
"content": {
"Alice_0": "[string]",
"BOB_0": "[string]",
...
}
}
Be concise.
'''
text = f" Topic: {topic}"
formatted_prompt = system_instructions + text
stream = Client.text_generation(formatted_prompt, max_new_tokens=1024, seed=seed, stream=True, details=True, return_full_text=False)
generated_script = ""
for response in stream:
if not response.token.text == "</s>":
generated_script += response.token.text
# Generate the podcast
script_json = json.loads(generated_script) # Use the generated script as input
output_filename = asyncio.run(generate_conversation(script_json))
print("Output File:"+output_filename)
# Read the generated audio file
with open(output_filename, "rb") as f:
audio_bytes = f.read()
# Clean up the final output temporary file
os.remove(output_filename)
print(f"Deleted temporary file: {output_filename}")
return audio_bytes
DESCRIPTION = """ # <center><b>PODGEN 📻</b></center>
### <center>Generate a podcast on any topic</center>
### <center>Use the Power of llms to understand any topic better</center>
"""
with gr.Blocks(css="style.css") as demo:
gr.Markdown(DESCRIPTION)
with gr.Row():
seed = gr.Slider(
label="Seed",
minimum=0,
maximum=999999,
step=1,
value=0,
visible=False
)
input = gr.Textbox(label="Topic", placeholder="Enter a topic")
output = gr.Audio(label="Podgen", type="filepath",
interactive=False,
autoplay=True,
elem_classes="audio")
gr.Interface(
batch=True,
max_batch_size=10,
fn=generate_podcast,
inputs=[input, seed],
outputs=[output], live=True)
if __name__ == "__main__":
demo.queue(max_size=200).launch()
|