Dia-1.6B

Running on T4

App Files Files Community

NariLabs commited on 4 days ago

Commit

37f726c

verified ·

1 Parent(s): f18af23

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -55

app.py CHANGED Viewed

@@ -5,7 +5,6 @@ from typing import Optional, Tuple
 import spaces
 import gradio as gr
-from gradio_dialogue import Dialogue
 import numpy as np
 import soundfile as sf
 import torch
@@ -219,11 +218,7 @@ css = """
 #col-container {max-width: 90%; margin-left: auto; margin-right: auto;}
 """
 # Attempt to load default text from example.txt
-default_text = [{"speaker": "Speaker 1", "text": "Dia is an open weights text to dialogue model."},
-                {"speaker": "Speaker 2", "text": "You get full control over scripts and voices."},
-                {"speaker": "Speaker 1", "text": "Wow. Amazing. (laughs)"},
-                {"speaker": "Speaker 2", "text": "Try it now on Git hub or Hugging Face."},
-                ]
 example_txt_path = Path("./example.txt")
 if example_txt_path.exists():
     try:
@@ -234,47 +229,18 @@ if example_txt_path.exists():
         print(f"Warning: Could not read example.txt: {e}")
-def formatter(speaker, text):
-    speaker = speaker.split(" ")[1]
-    return f"[S{speaker}] {text}"
-emotions = [
-    "(laughs)",
-    "(clears throat)",
-    "(sighs)",
-    "(gasps)",
-    "(coughs)",
-    "(singing)",
-    "(sings)",
-    "(mumbles)",
-    "(beep)",
-    "(groans)",
-    "(sniffs)",
-    "(claps)",
-    "(screams)",
-    "(inhales)",
-    "(exhales)",
-    "(applause)",
-    "(burps)",
-    "(humming)",
-    "(sneezes)",
-    "(chuckle)",
-    "(whistles)",
-]
 # Build Gradio UI
 with gr.Blocks(css=css) as demo:
     gr.Markdown("# Nari Text-to-Speech Synthesis")
     with gr.Row(equal_height=False):
         with gr.Column(scale=1):
-            text_input = Dialogue(
-                speakers=["Speaker 1", "Speaker 2"],
-                emotions=emotions,
-                formatter=formatter,
                 value=default_text,
             )
             audio_prompt_input = gr.Audio(
                 label="Audio Prompt (Optional)",
                 show_label=True,
@@ -339,7 +305,6 @@ with gr.Blocks(css=css) as demo:
                 type="numpy",
                 autoplay=False,
             )
-            gr.Deeplink()
     # Link button click to function
     run_button.click(
@@ -362,11 +327,7 @@ with gr.Blocks(css=css) as demo:
     example_prompt_path = "./example_prompt.mp3"  # Adjust if needed
     examples_list = [
         [
-            [{"speaker": "Speaker 1", "text": "Oh fire! Oh my goodness! What's the procedure? What to we do people? The smoke could be coming through an air duct!"},
-            {"speaker": "Speaker 2", "text": "Oh my god! Okay.. it's happening. Everybody stay calm!"},
-            {"speaker": "Speaker 1", "text": "What's the procedure..."},
-            {"speaker": "Speaker 2", "text": "Everybody stay fucking calm!!!... Everybody fucking calm down!!!!! \n[S1] No! No! If you touch the handle, if its hot there might be a fire down the hallway!"},
-            ],
             None,
             3072,
             3.0,
@@ -376,15 +337,7 @@ with gr.Blocks(css=css) as demo:
             0.94,
         ],
         [
-            [{"speaker": "Speaker 1", "text": "Open weights text to dialogue model."},
-            {"speaker": "Speaker 2", "text": "You get full control over scripts and voices."},
-            {"speaker": "Speaker 1", "text": "I'm biased, but I think we clearly won."},
-            {"speaker": "Speaker 2", "text": "Hard to disagree. (laughs)"},
-            {"speaker": "Speaker 1", "text": "Thanks for listening to this demo."},
-            {"speaker": "Speaker 2", "text": "Try it now on Git hub and Hugging Face."},
-            {"speaker": "Speaker 1", "text": "If you liked our model, please give us a star and share to your friends."},
-            {"speaker": "Speaker 2", "text": "This was Nari Labs."},
-            ],
             example_prompt_path if Path(example_prompt_path).exists() else None,
             3072,
             3.0,
@@ -422,4 +375,4 @@ if __name__ == "__main__":
     # set `GRADIO_SERVER_NAME`, `GRADIO_SERVER_PORT` env vars to override default values
     # use `GRADIO_SERVER_NAME=0.0.0.0` for Docker
-    demo.launch(ssr_mode=False)

 import spaces
 import gradio as gr
 import numpy as np
 import soundfile as sf
 import torch
 #col-container {max-width: 90%; margin-left: auto; margin-right: auto;}
 """
 # Attempt to load default text from example.txt
+default_text = "[S1] Dia is an open weights text to dialogue model. \n[S2] You get full control over scripts and voices. \n[S1] Wow. Amazing. (laughs) \n[S2] Try it now on Git hub or Hugging Face."
 example_txt_path = Path("./example.txt")
 if example_txt_path.exists():
     try:
         print(f"Warning: Could not read example.txt: {e}")
 # Build Gradio UI
 with gr.Blocks(css=css) as demo:
     gr.Markdown("# Nari Text-to-Speech Synthesis")
     with gr.Row(equal_height=False):
         with gr.Column(scale=1):
+            text_input = gr.Textbox(
+                label="Input Text",
+                placeholder="Enter text here...",
                 value=default_text,
+                lines=5,  # Increased lines
             )
             audio_prompt_input = gr.Audio(
                 label="Audio Prompt (Optional)",
                 show_label=True,
                 type="numpy",
                 autoplay=False,
             )
     # Link button click to function
     run_button.click(
     example_prompt_path = "./example_prompt.mp3"  # Adjust if needed
     examples_list = [
         [
+            "[S1] Oh fire! Oh my goodness! What's the procedure? What to we do people? The smoke could be coming through an air duct! \n[S2] Oh my god! Okay.. it's happening. Everybody stay calm! \n[S1] What's the procedure... \n[S2] Everybody stay fucking calm!!!... Everybody fucking calm down!!!!! \n[S1] No! No! If you touch the handle, if its hot there might be a fire down the hallway! ",
             None,
             3072,
             3.0,
             0.94,
         ],
         [
+            "[S1] Open weights text to dialogue model. \n[S2] You get full control over scripts and voices. \n[S1] I'm biased, but I think we clearly won. \n[S2] Hard to disagree. (laughs) \n[S1] Thanks for listening to this demo. \n[S2] Try it now on Git hub and Hugging Face. \n[S1] If you liked our model, please give us a star and share to your friends. \n[S2] This was Nari Labs.",
             example_prompt_path if Path(example_prompt_path).exists() else None,
             3072,
             3.0,
     # set `GRADIO_SERVER_NAME`, `GRADIO_SERVER_PORT` env vars to override default values
     # use `GRADIO_SERVER_NAME=0.0.0.0` for Docker
+    demo.launch()