lalalic commited on
Commit
18ccbe4
·
verified ·
1 Parent(s): d8d2581

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +153 -9
app.py CHANGED
@@ -1,14 +1,158 @@
1
- import gradio as gr
 
 
 
 
 
 
 
 
 
 
2
  from TTS.api import TTS
3
 
4
- #print(TTS().list_models())
5
- tts=TTS("tts_models/zh-CN/baker/tacotron2-DDC-GST")
6
 
7
- def predict(text, sample):
8
- if sample is not None:
9
- return tts.tts_to_file(text=text, speaker_wav=sample)
10
- else:
11
- return tts.tts_to_file(text=text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- gr.Interface(predict, ["text", gr.Audio(label="Sample Audio",type="filepath")], gr.Audio(label="Synthesised Audio", autoplay=True)).queue().launch()
 
14
 
 
1
+ import re, io, os, stat
2
+ import tempfile, subprocess
3
+ import requests
4
+ import torch
5
+ import traceback
6
+ import numpy as np
7
+ import scipy
8
+ from flask import Flask, Blueprint, request, jsonify, send_file
9
+
10
+ import torch
11
+ import torchaudio
12
  from TTS.api import TTS
13
 
 
 
14
 
15
+ app = Flask(__name__)
16
+ def upload_bytes(bytes, ext=".wav"):
17
+ return bytes
18
+
19
+ # from qili import upload_bytes
20
+ # if __name__ == "__main__":
21
+ # app = Flask(__name__)
22
+ # else:
23
+ # app = Blueprint("xtts", __name__)
24
+
25
+
26
+ sample_root= os.environ.get('XTTS_SAMPLE_DIR')
27
+ if(sample_root==None):
28
+ sample_root=f'{os.getcwd()}/samples'
29
+ if not os.path.exists(sample_root):
30
+ os.makedirs(sample_root)
31
+
32
+ default_sample=f'{os.path.dirname(os.path.abspath(__file__))}/sample.wav', f'{sample_root}/sample.pt'
33
+
34
+ ffmpeg=f'{os.path.dirname(os.path.abspath(__file__))}/ffmpeg'
35
+ try:
36
+ st = os.stat(ffmpeg)
37
+ os.chmod(ffmpeg, st.st_mode | stat.S_IEXEC)
38
+ except:
39
+ traceback.print_exc()
40
+
41
+ tts=None
42
+ model=None
43
+ @app.route("/convert")
44
+ def predict():
45
+ global tts
46
+ global model
47
+ text = request.args.get('text')
48
+ sample = request.args.get('sample')
49
+ language = request.args.get('language')
50
+
51
+ if text is None:
52
+ return jsonify({'error': 'text is missing'}), 400
53
+
54
+ text= re.sub("([^\x00-\x7F]|\w)(\.|\。|\?)",r"\1 \2\2",text)
55
+
56
+ try:
57
+ if tts is None:
58
+ model_name="tts_models/multilingual/multi-dataset/xtts_v2"
59
+ tts = TTS(model_name=model_name)
60
+ model=tts.synthesizer.tts_model
61
+ #hack to use cache
62
+ model.__get_conditioning_latents=model.get_conditioning_latents
63
+ model.get_conditioning_latents=get_conditioning_latents
64
+
65
+ wav = tts.tts(
66
+ text,
67
+ language=language if language is not None else "zh",
68
+ speaker_wav=sample if sample is not None else default_sample[0],
69
+ )
70
+
71
+ with io.BytesIO() as wav_buffer:
72
+ if torch.is_tensor(wav):
73
+ wav = wav.cpu().numpy()
74
+ if isinstance(wav, list):
75
+ wav = np.array(wav)
76
+ wav_norm = wav * (32767 / max(0.01, np.max(np.abs(wav))))
77
+ wav_norm = wav_norm.astype(np.int16)
78
+ scipy.io.wavfile.write(wav_buffer, tts.synthesizer.output_sample_rate, wav_norm)
79
+ wav_bytes = wav_buffer.getvalue()
80
+ url= upload_bytes(wav_bytes, ext=".wav")
81
+ print(f'wav is at {url}')
82
+ return url
83
+ except Exception as e:
84
+ traceback.print_exc()
85
+ return str(e)
86
+
87
+ @app.route("/play")
88
+ def play():
89
+ url=predict()
90
+ return f'''
91
+ <html>
92
+ <body>
93
+ <audio controls autoplay>
94
+ <source src="{url}" type="audio/wav">
95
+ Your browser does not support the audio element.
96
+ </audio>
97
+ </body>
98
+ </html>
99
+ '''
100
+
101
+ def get_conditioning_latents(audio_path, **others):
102
+ global model
103
+ speaker_wav, pt_file=download(audio_path)
104
+ try:
105
+ if pt_file != None:
106
+ (
107
+ gpt_cond_latent,
108
+ speaker_embedding,
109
+ ) = torch.load(pt_file)
110
+ print(f'sample wav info loaded from {pt_file}')
111
+ except:
112
+ (
113
+ gpt_cond_latent,
114
+ speaker_embedding,
115
+ ) = model.__get_conditioning_latents(audio_path=speaker_wav, **others)
116
+ torch.save((gpt_cond_latent,speaker_embedding), pt_file)
117
+ print(f'sample wav info saved to {pt_file}')
118
+ return gpt_cond_latent,speaker_embedding
119
+
120
+ def download(url):
121
+ try:
122
+ response = requests.get(url)
123
+ if response.status_code == 200:
124
+ id=f'{sample_root}/{response.headers["etag"]}.pt'.replace('"','')
125
+ if(os.path.exists(id)):
126
+ return "", id
127
+ with tempfile.NamedTemporaryFile(mode="wb", delete=True) as temp_file:
128
+ temp_file.write(response.content)
129
+ return trim_sample_audio(os.path.abspath(temp_file.name)), id
130
+ except:
131
+ return default_sample
132
+
133
+ def trim_sample_audio(speaker_wav):
134
+ global ffmpeg
135
+ try:
136
+ lowpass_highpass = "lowpass=8000,highpass=75,"
137
+ trim_silence = "areverse,silenceremove=start_periods=1:start_silence=0:start_threshold=0.02,areverse,silenceremove=start_periods=1:start_silence=0:start_threshold=0.02,"
138
+ out_filename=speaker_wav.replace(".wav","_trimed.wav")
139
+ shell_command = f"{ffmpeg} -y -i {speaker_wav} -af {lowpass_highpass}{trim_silence} {out_filename}".split(" ")
140
+ subprocess.run(
141
+ [item for item in shell_command],
142
+ capture_output=False,
143
+ text=True,
144
+ check=True,
145
+ )
146
+ return out_filename
147
+ except:
148
+ traceback.print_exc()
149
+ return speaker_wav
150
+
151
+
152
+ @app.route("/")
153
+ def hello():
154
+ return "hello xtts"
155
 
156
+ if __name__ == '__main__':
157
+ app.run(debug=True)
158