dangtr0408 commited on
Commit
5b935e1
·
verified ·
1 Parent(s): 978babb

init space

Browse files
Files changed (5) hide show
  1. .gitattributes +35 -35
  2. README.md +13 -13
  3. app.py +96 -0
  4. packages.txt +1 -0
  5. requirements.txt +10 -0
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,13 +1,13 @@
1
- ---
2
- title: StyleTTS2 Lite Vi Space
3
- emoji: 🚀
4
- colorFrom: yellow
5
- colorTo: red
6
- sdk: gradio
7
- sdk_version: 5.25.0
8
- app_file: app.py
9
- pinned: false
10
- license: cc-by-nc-sa-4.0
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: StyleTTS2 Lite Vi
3
+ emoji: 🦀
4
+ colorFrom: gray
5
+ colorTo: pink
6
+ sdk: gradio
7
+ sdk_version: 5.24.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: cc-by-nc-sa-4.0
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import subprocess
3
+ import os
4
+ import sys
5
+ import soundfile as sf
6
+ import numpy as np
7
+ import torch.cuda
8
+
9
+ repo_url = "https://huggingface.co/dangtr0408/StyleTTS2-lite-vi"
10
+ repo_dir = "StyleTTS2-lite-vi"
11
+
12
+ if not os.path.exists(repo_dir):
13
+ subprocess.run(["git", "clone", repo_url, repo_dir])
14
+
15
+ # Clone repo and load model
16
+ sys.path.append(os.path.abspath(repo_dir))
17
+ from inference import StyleTTS2
18
+
19
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
20
+
21
+ config_path = os.path.join(repo_dir, "Models", "config.yml")
22
+ models_path = os.path.join(repo_dir, "Models", "model_vi_en.pth")
23
+ model = StyleTTS2(config_path, models_path).to(device)
24
+
25
+ # Core inference function
26
+ def process_inputs(text_prompt, reference_audio_paths,
27
+ n_merge, randomness, smooth_dur,
28
+ denoise, t_denoise, split_dur):
29
+
30
+ speakers = {}
31
+ for i, path in enumerate(reference_audio_paths, 1):
32
+ speaker_id = f"id_{i}"
33
+ speakers[speaker_id] = {
34
+ "path": path,
35
+ "lang": "vi",
36
+ "speed": 1.1
37
+ }
38
+
39
+ # Synthesize audio
40
+ r = model.generate(
41
+ text_prompt, speakers, denoise, t_denoise,
42
+ split_dur, "[id_1]", n_merge, randomness, smooth_dur
43
+ )
44
+
45
+ r = r / np.abs(r).max()
46
+ sf.write("output.wav", r, samplerate=24000)
47
+ return "output.wav"
48
+
49
+ custom_css = """
50
+ #custom-box {
51
+ min-height: 300px !important;
52
+ display: flex;
53
+ align-items: center;
54
+ }
55
+ #custom-box textarea {
56
+ min-height: 250px !important;
57
+ height: 100% !important;
58
+ }
59
+ """
60
+
61
+ # Gradio UI
62
+ with gr.Blocks(css=custom_css) as demo:
63
+ gr.Markdown("## StyleTTS2-lite-vi Demo")
64
+ gr.Markdown("Upload a reference audio and input your text to synthesize speech with style control.")
65
+
66
+ with gr.Row():
67
+ text_prompt = gr.Textbox(label="Text Prompt", placeholder="Enter your text here...", elem_id="custom-box")
68
+ reference_audios = gr.File(label="Reference Audios", file_types=[".wav", ".mp3", ".flac"], file_count="multiple", elem_id="custom-box")
69
+ # Parameters
70
+ with gr.Accordion("Advanced Settings", open=False):
71
+ denoise = gr.Checkbox(label="Apply Denoising", value=True)
72
+ t_denoise = gr.Slider(0.0, 1.0, value=0.3, label="Denoise Strength")
73
+ n_merge = gr.Slider(1, 30, value=16, label="Min Words to Merge")
74
+ randomness = gr.Slider(0.0, 1.0, value=0.2, label="Randomness")
75
+ smooth_dur = gr.Slider(0.0, 1.0, value=0.15, label="Smooth Duration")
76
+ split_dur = gr.Slider(0, 10, step=1, value=3, label="Split Ref Audio Duration")
77
+
78
+ submit_button = gr.Button("Synthesize")
79
+ synthesized_audio = gr.Audio(label="Synthesized Audio", type="filepath")
80
+
81
+ submit_button.click(
82
+ fn=process_inputs,
83
+ inputs=[
84
+ text_prompt,
85
+ reference_audios,
86
+ n_merge,
87
+ randomness,
88
+ smooth_dur,
89
+ denoise,
90
+ t_denoise,
91
+ split_dur
92
+ ],
93
+ outputs=synthesized_audio
94
+ )
95
+
96
+ demo.launch()
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ espeak-ng
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ torchaudio
3
+ numpy
4
+ PyYAML
5
+ munch
6
+ nltk
7
+ librosa
8
+ noisereduce
9
+ phonemizer
10
+ espeakng-loader