File size: 1,132 Bytes
384a9fc
d54eecc
fb9850e
 
 
0b36a0d
fb9850e
 
0b36a0d
384a9fc
fb9850e
 
 
 
 
44f1dfa
 
fb9850e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44f1dfa
d54eecc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import spaces
import gradio as gr
import soundfile as sf
import numpy as np
from dia.model import Dia

# Load the model once
model = Dia.from_pretrained("nari-labs/Dia-1.6B")

@spaces.GPU
def generate_dialogue(script):
    output = model.generate(script)
    filename = "generated_dialogue.wav"
    sf.write(filename, output, 44100)
    return filename

with gr.Blocks() as demo:
    gr.Markdown("## ๐ŸŽ™๏ธ Dia - Text to Dialogue Demo")
    gr.Markdown("Enter a multi-speaker script below using `[S1]`, `[S2]`, etc.")

    with gr.Row():
        script_input = gr.Textbox(
            label="Script", 
            lines=6, 
            value="[S1] Dia is an open weights text to dialogue model. [S2] You get full control over scripts and voices. [S1] Wow. Amazing. (laughs) [S2] Try it now on Git hub or Hugging Face."
        )

    with gr.Row():
        generate_btn = gr.Button("Generate Audio")

    with gr.Row():
        audio_output = gr.Audio(label="Generated Dialogue", type="filepath")

    generate_btn.click(
        fn=generate_dialogue,
        inputs=script_input,
        outputs=audio_output
    )

demo.launch()