File size: 4,303 Bytes
1c3d65b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import json
import os
import asyncio
from moviepy.editor import AudioFileClip, concatenate_audioclips
from huggingface_hub import InferenceClient
import torch
import edge_tts
import tempfile
import gradio as gr

# Initialize Hugging Face Inference Client
Client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
generator = torch.Generator().manual_seed(42)

async def text_to_speech(text, voice, filename):
    communicate = edge_tts.Communicate(text, voice)
    await communicate.save(filename)

async def generate_conversation(script):
    title = script['title']
    content = script['content']
    
    temp_files = []

    tasks = []
    for key, text in content.items():
        speaker = key.split('_')[0]  # Extract the speaker name
        index = key.split('_')[1]    # Extract the dialogue index
        voice = "en-US-JennyNeural" if speaker == "Alice" else "en-US-GuyNeural"
        
        # Create temporary file for each speaker's dialogue
        temp_file = tempfile.NamedTemporaryFile(suffix='.mp3', delete=False)
        temp_files.append(temp_file.name)
        
        filename = temp_file.name
        tasks.append(text_to_speech(text, voice, filename))
        print(f"Generated audio for {speaker}_{index}: {filename}")

    await asyncio.gather(*tasks)

    # Combine the audio files using moviepy
    audio_clips = [AudioFileClip(temp_file) for temp_file in temp_files]
    combined = concatenate_audioclips(audio_clips)

    # Create temporary file for the combined output
    temp_output_file = tempfile.NamedTemporaryFile(suffix='.mp3', delete=False)
    output_filename = temp_output_file.name

    # Save the combined file
    combined.write_audiofile(output_filename)
    print(f"Combined audio saved as: {output_filename}")

    # Clean up temporary files
    for temp_file in temp_files:
        os.remove(temp_file)
        print(f"Deleted temporary file: {temp_file}")

    return output_filename

# Function to generate podcast based on user input
def generate_podcast(topic, seed):
    system_instructions = '''[SYSTEM] You are an educational podcast generator. You have to create a podcast between Alice and Bob that gives an overview of the topic given by the user.
  Please provide the script in the following JSON format:
    {
      "title": "[string]",
      "content": {
        "Alice_0": "[string]",
        "BOB_0": "[string]",
        ...
      }
    }
    Be concise.
    '''

    text = f" Topic: {topic}"
    formatted_prompt = system_instructions + text
    stream = Client.text_generation(formatted_prompt, max_new_tokens=1024, seed=seed, stream=True, details=True, return_full_text=False)
    
    generated_script = ""
    for response in stream:
        if not response.token.text == "</s>":
            generated_script += response.token.text

    # Generate the podcast
    script_json = json.loads(generated_script)  # Use the generated script as input
    output_filename = asyncio.run(generate_conversation(script_json))
    print("Output File:"+output_filename)

    # Read the generated audio file
    with open(output_filename, "rb") as f:
        audio_bytes = f.read()

    # Clean up the final output temporary file
    os.remove(output_filename)
    print(f"Deleted temporary file: {output_filename}")

    return audio_bytes

DESCRIPTION = """ # <center><b>PODGEN 📻</b></center>
        ### <center>Generate a podcast on any topic</center>
        ### <center>Use the Power of llms to understand any topic better</center>
        """

with gr.Blocks(css="style.css") as demo:    
    gr.Markdown(DESCRIPTION)
    with gr.Row():
        
        seed = gr.Slider(
        label="Seed",
        minimum=0,
        maximum=999999,
        step=1,
        value=0,
        visible=False
        )
        input = gr.Textbox(label="Topic", placeholder="Enter a topic")
        output = gr.Audio(label="Podgen", type="filepath",
                        interactive=False,
                        autoplay=True,
                        elem_classes="audio")
        gr.Interface(
            batch=True,
            max_batch_size=10, 
            fn=generate_podcast, 
            inputs=[input, seed],
            outputs=[output], live=True)  
        
    

if __name__ == "__main__":
    demo.queue(max_size=200).launch()