NariLabs commited on
Commit
f18af23
·
verified ·
1 Parent(s): 596ef1a
Files changed (2) hide show
  1. app.py +55 -8
  2. requirements.txt +1 -0
app.py CHANGED
@@ -5,6 +5,7 @@ from typing import Optional, Tuple
5
  import spaces
6
 
7
  import gradio as gr
 
8
  import numpy as np
9
  import soundfile as sf
10
  import torch
@@ -218,7 +219,11 @@ css = """
218
  #col-container {max-width: 90%; margin-left: auto; margin-right: auto;}
219
  """
220
  # Attempt to load default text from example.txt
221
- default_text = "[S1] Dia is an open weights text to dialogue model. \n[S2] You get full control over scripts and voices. \n[S1] Wow. Amazing. (laughs) \n[S2] Try it now on Git hub or Hugging Face."
 
 
 
 
222
  example_txt_path = Path("./example.txt")
223
  if example_txt_path.exists():
224
  try:
@@ -229,18 +234,47 @@ if example_txt_path.exists():
229
  print(f"Warning: Could not read example.txt: {e}")
230
 
231
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
  # Build Gradio UI
233
  with gr.Blocks(css=css) as demo:
234
  gr.Markdown("# Nari Text-to-Speech Synthesis")
235
 
236
  with gr.Row(equal_height=False):
237
  with gr.Column(scale=1):
238
- text_input = gr.Textbox(
239
- label="Input Text",
240
- placeholder="Enter text here...",
 
241
  value=default_text,
242
- lines=5, # Increased lines
243
  )
 
244
  audio_prompt_input = gr.Audio(
245
  label="Audio Prompt (Optional)",
246
  show_label=True,
@@ -305,6 +339,7 @@ with gr.Blocks(css=css) as demo:
305
  type="numpy",
306
  autoplay=False,
307
  )
 
308
 
309
  # Link button click to function
310
  run_button.click(
@@ -327,7 +362,11 @@ with gr.Blocks(css=css) as demo:
327
  example_prompt_path = "./example_prompt.mp3" # Adjust if needed
328
  examples_list = [
329
  [
330
- "[S1] Oh fire! Oh my goodness! What's the procedure? What to we do people? The smoke could be coming through an air duct! \n[S2] Oh my god! Okay.. it's happening. Everybody stay calm! \n[S1] What's the procedure... \n[S2] Everybody stay fucking calm!!!... Everybody fucking calm down!!!!! \n[S1] No! No! If you touch the handle, if its hot there might be a fire down the hallway! ",
 
 
 
 
331
  None,
332
  3072,
333
  3.0,
@@ -337,7 +376,15 @@ with gr.Blocks(css=css) as demo:
337
  0.94,
338
  ],
339
  [
340
- "[S1] Open weights text to dialogue model. \n[S2] You get full control over scripts and voices. \n[S1] I'm biased, but I think we clearly won. \n[S2] Hard to disagree. (laughs) \n[S1] Thanks for listening to this demo. \n[S2] Try it now on Git hub and Hugging Face. \n[S1] If you liked our model, please give us a star and share to your friends. \n[S2] This was Nari Labs.",
 
 
 
 
 
 
 
 
341
  example_prompt_path if Path(example_prompt_path).exists() else None,
342
  3072,
343
  3.0,
@@ -375,4 +422,4 @@ if __name__ == "__main__":
375
 
376
  # set `GRADIO_SERVER_NAME`, `GRADIO_SERVER_PORT` env vars to override default values
377
  # use `GRADIO_SERVER_NAME=0.0.0.0` for Docker
378
- demo.launch()
 
5
  import spaces
6
 
7
  import gradio as gr
8
+ from gradio_dialogue import Dialogue
9
  import numpy as np
10
  import soundfile as sf
11
  import torch
 
219
  #col-container {max-width: 90%; margin-left: auto; margin-right: auto;}
220
  """
221
  # Attempt to load default text from example.txt
222
+ default_text = [{"speaker": "Speaker 1", "text": "Dia is an open weights text to dialogue model."},
223
+ {"speaker": "Speaker 2", "text": "You get full control over scripts and voices."},
224
+ {"speaker": "Speaker 1", "text": "Wow. Amazing. (laughs)"},
225
+ {"speaker": "Speaker 2", "text": "Try it now on Git hub or Hugging Face."},
226
+ ]
227
  example_txt_path = Path("./example.txt")
228
  if example_txt_path.exists():
229
  try:
 
234
  print(f"Warning: Could not read example.txt: {e}")
235
 
236
 
237
+ def formatter(speaker, text):
238
+ speaker = speaker.split(" ")[1]
239
+ return f"[S{speaker}] {text}"
240
+
241
+ emotions = [
242
+ "(laughs)",
243
+ "(clears throat)",
244
+ "(sighs)",
245
+ "(gasps)",
246
+ "(coughs)",
247
+ "(singing)",
248
+ "(sings)",
249
+ "(mumbles)",
250
+ "(beep)",
251
+ "(groans)",
252
+ "(sniffs)",
253
+ "(claps)",
254
+ "(screams)",
255
+ "(inhales)",
256
+ "(exhales)",
257
+ "(applause)",
258
+ "(burps)",
259
+ "(humming)",
260
+ "(sneezes)",
261
+ "(chuckle)",
262
+ "(whistles)",
263
+ ]
264
+
265
  # Build Gradio UI
266
  with gr.Blocks(css=css) as demo:
267
  gr.Markdown("# Nari Text-to-Speech Synthesis")
268
 
269
  with gr.Row(equal_height=False):
270
  with gr.Column(scale=1):
271
+ text_input = Dialogue(
272
+ speakers=["Speaker 1", "Speaker 2"],
273
+ emotions=emotions,
274
+ formatter=formatter,
275
  value=default_text,
 
276
  )
277
+
278
  audio_prompt_input = gr.Audio(
279
  label="Audio Prompt (Optional)",
280
  show_label=True,
 
339
  type="numpy",
340
  autoplay=False,
341
  )
342
+ gr.Deeplink()
343
 
344
  # Link button click to function
345
  run_button.click(
 
362
  example_prompt_path = "./example_prompt.mp3" # Adjust if needed
363
  examples_list = [
364
  [
365
+ [{"speaker": "Speaker 1", "text": "Oh fire! Oh my goodness! What's the procedure? What to we do people? The smoke could be coming through an air duct!"},
366
+ {"speaker": "Speaker 2", "text": "Oh my god! Okay.. it's happening. Everybody stay calm!"},
367
+ {"speaker": "Speaker 1", "text": "What's the procedure..."},
368
+ {"speaker": "Speaker 2", "text": "Everybody stay fucking calm!!!... Everybody fucking calm down!!!!! \n[S1] No! No! If you touch the handle, if its hot there might be a fire down the hallway!"},
369
+ ],
370
  None,
371
  3072,
372
  3.0,
 
376
  0.94,
377
  ],
378
  [
379
+ [{"speaker": "Speaker 1", "text": "Open weights text to dialogue model."},
380
+ {"speaker": "Speaker 2", "text": "You get full control over scripts and voices."},
381
+ {"speaker": "Speaker 1", "text": "I'm biased, but I think we clearly won."},
382
+ {"speaker": "Speaker 2", "text": "Hard to disagree. (laughs)"},
383
+ {"speaker": "Speaker 1", "text": "Thanks for listening to this demo."},
384
+ {"speaker": "Speaker 2", "text": "Try it now on Git hub and Hugging Face."},
385
+ {"speaker": "Speaker 1", "text": "If you liked our model, please give us a star and share to your friends."},
386
+ {"speaker": "Speaker 2", "text": "This was Nari Labs."},
387
+ ],
388
  example_prompt_path if Path(example_prompt_path).exists() else None,
389
  3072,
390
  3.0,
 
422
 
423
  # set `GRADIO_SERVER_NAME`, `GRADIO_SERVER_PORT` env vars to override default values
424
  # use `GRADIO_SERVER_NAME=0.0.0.0` for Docker
425
+ demo.launch(ssr_mode=False)
requirements.txt CHANGED
@@ -6,3 +6,4 @@ pydantic>=2.11.3
6
  soundfile>=0.13.1
7
  torchaudio>=2.0.0
8
  torch>=2.0.0
 
 
6
  soundfile>=0.13.1
7
  torchaudio>=2.0.0
8
  torch>=2.0.0
9
+ gradio-dialogue>=0.0.4