Update app.py
Browse files
app.py
CHANGED
@@ -1,13 +1,43 @@
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
from DPTNet_eval.DPTNet_quant_sep import load_dpt_model, dpt_sep_process
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
# 加載模型
|
6 |
model = load_dpt_model()
|
7 |
|
8 |
def separate_audio(input_wav):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
outfilename = "output.wav"
|
10 |
-
dpt_sep_process(
|
|
|
11 |
return (
|
12 |
outfilename.replace('.wav', '_sep1.wav'),
|
13 |
outfilename.replace('.wav', '_sep2.wav')
|
@@ -25,14 +55,14 @@ description_html = """
|
|
25 |
<a href='http://deeplearning101.twman.org' target='_blank'>Deep Learning 101</a>
|
26 |
</div>
|
27 |
|
28 |
-
<br
|
29 |
|
30 |
### 🔍 使用方式:
|
31 |
- 上傳一段包含兩人對話的混音音檔(支援 `.mp3`, `.wav`)
|
32 |
- 點擊「Separate」按鈕
|
33 |
- 分離出兩個說話人的音軌
|
34 |
|
35 |
-
<br
|
36 |
|
37 |
### 📘 相關技術文章:
|
38 |
<ul>
|
@@ -69,4 +99,4 @@ if __name__ == "__main__":
|
|
69 |
allow_flagging="never"
|
70 |
)
|
71 |
|
72 |
-
interface.launch()
|
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
from DPTNet_eval.DPTNet_quant_sep import load_dpt_model, dpt_sep_process
|
4 |
+
import os
|
5 |
+
import soundfile as sf
|
6 |
+
import numpy as np
|
7 |
+
import librosa
|
8 |
+
import warnings
|
9 |
|
10 |
# 加載模型
|
11 |
model = load_dpt_model()
|
12 |
|
13 |
def separate_audio(input_wav):
|
14 |
+
"""
|
15 |
+
Gradio Audio(filepath) → 處理 → 回傳兩個分離後的音檔路徑
|
16 |
+
"""
|
17 |
+
file_extension = os.path.splitext(input_wav)[1].lower()
|
18 |
+
|
19 |
+
# 如果是 MP3 或其他格式,先轉成 WAV
|
20 |
+
if file_extension != ".wav":
|
21 |
+
data, sr = sf.read(input_wav)
|
22 |
+
|
23 |
+
# 轉單聲道
|
24 |
+
if len(data.shape) > 1:
|
25 |
+
data = data.mean(axis=1)
|
26 |
+
|
27 |
+
# 重採樣到 16kHz
|
28 |
+
if sr != 16000:
|
29 |
+
data = librosa.resample(data, orig_sr=sr, target_sr=16000)
|
30 |
+
|
31 |
+
# 存成 WAV
|
32 |
+
sf.write("input.wav", data, 16000)
|
33 |
+
wav_path = "input.wav"
|
34 |
+
else:
|
35 |
+
wav_path = input_wav
|
36 |
+
|
37 |
+
# 分離語音
|
38 |
outfilename = "output.wav"
|
39 |
+
dpt_sep_process(wav_path, model=model, outfilename=outfilename)
|
40 |
+
|
41 |
return (
|
42 |
outfilename.replace('.wav', '_sep1.wav'),
|
43 |
outfilename.replace('.wav', '_sep2.wav')
|
|
|
55 |
<a href='http://deeplearning101.twman.org' target='_blank'>Deep Learning 101</a>
|
56 |
</div>
|
57 |
|
58 |
+
<br>
|
59 |
|
60 |
### 🔍 使用方式:
|
61 |
- 上傳一段包含兩人對話的混音音檔(支援 `.mp3`, `.wav`)
|
62 |
- 點擊「Separate」按鈕
|
63 |
- 分離出兩個說話人的音軌
|
64 |
|
65 |
+
<br>
|
66 |
|
67 |
### 📘 相關技術文章:
|
68 |
<ul>
|
|
|
99 |
allow_flagging="never"
|
100 |
)
|
101 |
|
102 |
+
interface.launch(debug=True)
|