Spaces:

killerz3
/

PodGen

Running

File size: 4,303 Bytes

1c3d65b

import json
import os
import asyncio
from moviepy.editor import AudioFileClip, concatenate_audioclips
from huggingface_hub import InferenceClient
import torch
import edge_tts
import tempfile
import gradio as gr

# Initialize Hugging Face Inference Client
Client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
generator = torch.Generator().manual_seed(42)

async def text_to_speech(text, voice, filename):
    communicate = edge_tts.Communicate(text, voice)
    await communicate.save(filename)

async def generate_conversation(script):
    title = script['title']
    content = script['content']
    
    temp_files = []

    tasks = []
    for key, text in content.items():
        speaker = key.split('_')[0]  # Extract the speaker name
        index = key.split('_')[1]    # Extract the dialogue index
        voice = "en-US-JennyNeural" if speaker == "Alice" else "en-US-GuyNeural"
        
        # Create temporary file for each speaker's dialogue
        temp_file = tempfile.NamedTemporaryFile(suffix='.mp3', delete=False)
        temp_files.append(temp_file.name)
        
        filename = temp_file.name
        tasks.append(text_to_speech(text, voice, filename))
        print(f"Generated audio for {speaker}_{index}: {filename}")

    await asyncio.gather(*tasks)

    # Combine the audio files using moviepy
    audio_clips = [AudioFileClip(temp_file) for temp_file in temp_files]
    combined = concatenate_audioclips(audio_clips)

    # Create temporary file for the combined output
    temp_output_file = tempfile.NamedTemporaryFile(suffix='.mp3', delete=False)
    output_filename = temp_output_file.name

    # Save the combined file
    combined.write_audiofile(output_filename)
    print(f"Combined audio saved as: {output_filename}")

    # Clean up temporary files
    for temp_file in temp_files:
        os.remove(temp_file)
        print(f"Deleted temporary file: {temp_file}")

    return output_filename

# Function to generate podcast based on user input
def generate_podcast(topic, seed):
    system_instructions = '''[SYSTEM] You are an educational podcast generator. You have to create a podcast between Alice and Bob that gives an overview of the topic given by the user.
  Please provide the script in the following JSON format:
    {
      "title": "[string]",
      "content": {
        "Alice_0": "[string]",
        "BOB_0": "[string]",
        ...
      }
    }
    Be concise.
    '''

    text = f" Topic: {topic}"
    formatted_prompt = system_instructions + text
    stream = Client.text_generation(formatted_prompt, max_new_tokens=1024, seed=seed, stream=True, details=True, return_full_text=False)
    
    generated_script = ""
    for response in stream:
        if not response.token.text == "</s>":
            generated_script += response.token.text

    # Generate the podcast
    script_json = json.loads(generated_script)  # Use the generated script as input
    output_filename = asyncio.run(generate_conversation(script_json))
    print("Output File:"+output_filename)

    # Read the generated audio file
    with open(output_filename, "rb") as f:
        audio_bytes = f.read()

    # Clean up the final output temporary file
    os.remove(output_filename)
    print(f"Deleted temporary file: {output_filename}")

    return audio_bytes

DESCRIPTION = """ # <center><b>PODGEN 📻</b></center>
        ### <center>Generate a podcast on any topic</center>
        ### <center>Use the Power of llms to understand any topic better</center>
        """

with gr.Blocks(css="style.css") as demo:    
    gr.Markdown(DESCRIPTION)
    with gr.Row():
        
        seed = gr.Slider(
        label="Seed",
        minimum=0,
        maximum=999999,
        step=1,
        value=0,
        visible=False
        )
        input = gr.Textbox(label="Topic", placeholder="Enter a topic")
        output = gr.Audio(label="Podgen", type="filepath",
                        interactive=False,
                        autoplay=True,
                        elem_classes="audio")
        gr.Interface(
            batch=True,
            max_batch_size=10, 
            fn=generate_podcast, 
            inputs=[input, seed],
            outputs=[output], live=True)  
        
    

if __name__ == "__main__":
    demo.queue(max_size=200).launch()