Spaces:

DeepLearning101
/

Speech-Separation

Running

App Files Files Community

DeepLearning101 commited on about 23 hours ago

Commit

e56b358

verified ·

1 Parent(s): 9c54cd9

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -36

app.py CHANGED Viewed

@@ -1,47 +1,57 @@
 import gradio as gr
 import torch
-from DPTNet_eval.DPTNet_quant_sep import load_dpt_model, dpt_sep_process
 import os
 import soundfile as sf
 import numpy as np
 import librosa
 import warnings
-# 加載模型
 model = load_dpt_model()
 def separate_audio(input_wav):
-    """
-    Gradio Audio(filepath) → 處理 → 回傳兩個分離後的音檔路徑
-    """
-    file_extension = os.path.splitext(input_wav)[1].lower()
-    # 如果是 MP3 或其他格式，先轉成 WAV
-    if file_extension != ".wav":
-        data, sr = sf.read(input_wav)
-        # 轉單聲道
-        if len(data.shape) > 1:
-            data = data.mean(axis=1)
-        # 重採樣到 16kHz
         if sr != 16000:
             data = librosa.resample(data, orig_sr=sr, target_sr=16000)
-        # 存成 WAV
-        sf.write("input.wav", data, 16000)
-        wav_path = "input.wav"
-    else:
-        wav_path = input_wav
-    # 分離語音
-    outfilename = "output.wav"
-    dpt_sep_process(wav_path, model=model, outfilename=outfilename)
-    return (
-        outfilename.replace('.wav', '_sep1.wav'),
-        outfilename.replace('.wav', '_sep2.wav')
-    )
 # 🎯 你提供的 description 內容（已轉為 HTML）
 description_html = """
@@ -87,16 +97,31 @@ description_html = """
 """
 if __name__ == "__main__":
     interface = gr.Interface(
         fn=separate_audio,
-        inputs=gr.Audio(type="filepath", label="請上傳混音音檔 (.mp3/.wav)"),
         outputs=[
-            gr.Audio(label="語音 1"),
-            gr.Audio(label="語音 2")
         ],
         title="🎙️ 語音分離 Demo - Deep Learning 101",
         description=description_html,
-        allow_flagging="never"
     )
-    interface.launch(debug=True)

 import gradio as gr
 import torch
 import os
 import soundfile as sf
 import numpy as np
 import librosa
 import warnings
+import tempfile
+from DPTNet_eval.DPTNet_quant_sep import load_dpt_model, dpt_sep_process
+# 過濾警告訊息
+warnings.filterwarnings("ignore", category=UserWarning)
+warnings.filterwarnings("ignore", category=FutureWarning)
+# 加載模型（全局變量）
 model = load_dpt_model()
 def separate_audio(input_wav):
+    """處理音訊分離的主要函數"""
+    try:
+        # 步驟 1：讀取音訊並標準化格式
+        data, sr = librosa.load(input_wav, sr=None, mono=True)
+        # 步驟 2：強制重採樣到 16kHz
         if sr != 16000:
             data = librosa.resample(data, orig_sr=sr, target_sr=16000)
+            sr = 16000
+        # 步驟 3：生成唯一臨時檔案
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
+            temp_wav = tmp_file.name
+            sf.write(temp_wav, data, sr, subtype='PCM_16')
+        # 步驟 4：執行語音分離
+        outfilename = "output.wav"
+        dpt_sep_process(temp_wav, model=model, outfilename=outfilename)
+        # 步驟 5：清理臨時檔案
+        os.remove(temp_wav)
+        # 步驟 6：驗證輸出檔案存在
+        output_files = [
+            outfilename.replace('.wav', '_sep1.wav'),
+            outfilename.replace('.wav', '_sep2.wav')
+        ]
+        if not all(os.path.exists(f) for f in output_files):
+            raise gr.Error("分離過程中發生錯誤，請檢查輸入檔案格式！")
+        return output_files
+    except Exception as e:
+        # 錯誤處理
+        error_msg = f"處理失敗：{str(e)}"
+        raise gr.Error(error_msg) from e
 # 🎯 你提供的 description 內容（已轉為 HTML）
 description_html = """
 """
 if __name__ == "__main__":
+    # 配置 Gradio 介面
     interface = gr.Interface(
         fn=separate_audio,
+        inputs=gr.Audio(
+            type="filepath",
+            label="請上傳混音音檔 (支援格式：mp3/wav/ogg)",
+            max_length=300  # 限制 5 分鐘長度
+        ),
         outputs=[
+            gr.Audio(label="語音軌道 1"),
+            gr.Audio(label="語音軌道 2")
         ],
         title="🎙️ 語音分離 Demo - Deep Learning 101",
         description=description_html,
+        allow_flagging="never",
+        examples=[
+            [os.path.join("examples", "sample1.wav")],
+            [os.path.join("examples", "sample2.mp3")]
+        ]
     )
+    # 啟動服務
+    interface.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        debug=False
+    )