import gradio as gr import torch import os import soundfile as sf import librosa import logging import tempfile import traceback from datetime import datetime from DPTNet_eval.DPTNet_quant_sep import load_dpt_model, dpt_sep_process # 配置日志系统 logging.basicConfig( filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) # 全局模型加载(避免重复加载) try: logger.info("開始加載語音分離模型...") model = load_dpt_model() logger.info("模型加載成功") except Exception as e: logger.error(f"模型加載失敗: {str(e)}") raise RuntimeError("模型初始化失敗") from e def separate_audio(input_wav): """處理音訊分離的主函數""" process_id = datetime.now().strftime("%Y%m%d%H%M%S%f") temp_wav = None try: logger.info(f"[{process_id}] 開始處理檔案: {input_wav}") # 1. 驗證輸入檔案 if not os.path.exists(input_wav): raise gr.Error("檔案不存在,請重新上傳") if os.path.getsize(input_wav) > 50 * 1024 * 1024: # 50MB限制 raise gr.Error("檔案大小超過50MB限制") # 2. 讀取並標準化音訊 logger.info(f"[{process_id}] 讀取音訊檔案...") data, sr = librosa.load(input_wav, sr=None, mono=True) # 3. 重採樣處理 if sr != 16000: logger.info(f"[{process_id}] 重採樣從 {sr}Hz 到 16000Hz...") data = librosa.resample(data, orig_sr=sr, target_sr=16000) sr = 16000 # 4. 創建臨時檔案 with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file: temp_wav = tmp_file.name logger.info(f"[{process_id}] 寫入臨時檔案: {temp_wav}") sf.write(temp_wav, data, sr, subtype='PCM_16') # 5. 執行語音分離 logger.info(f"[{process_id}] 開始語音分離...") out_dir = tempfile.mkdtemp() # 使用臨時目錄存放輸出 outfilename = os.path.join(out_dir, "output.wav") dpt_sep_process(temp_wav, model=model, outfilename=outfilename) # 6. 獲取輸出檔案 output_files = [ outfilename.replace('.wav', '_sep1.wav'), outfilename.replace('.wav', '_sep2.wav') ] logger.info(f"[{process_id}] 預期輸出檔案: {output_files}") # 7. 驗證輸出 if not all(os.path.exists(f) for f in output_files): missing = [f for f in output_files if not os.path.exists(f)] raise gr.Error(f"分離失敗,缺失檔案: {missing}") logger.info(f"[{process_id}] 處理完成") return output_files except Exception as e: error_msg = f"[{process_id}] 處理錯誤: {str(e)}\n{traceback.format_exc()}" logger.error(error_msg) raise gr.Error(f"處理失敗: {str(e)}") from e finally: # 清理臨時檔案 if temp_wav and os.path.exists(temp_wav): try: os.remove(temp_wav) logger.info(f"[{process_id}] 已清理臨時檔案") except Exception as clean_err: logger.warning(f"[{process_id}] 清理失敗: {str(clean_err)}") # 🎯 你提供的 description 內容(已轉為 HTML) description_html = """

中文語者分離(分割)

上傳一段混音音檔 (支援 `.mp3`, `.wav`),自動分離出兩個人的聲音

TonTon Huang Ph.D. | AI | 手把手帶你一起踩AI坑 | GitHub | Deep Learning 101 | YouTube

### 📘 相關技術文章:
""" if __name__ == "__main__": # 完整配置 Gradio 接口 interface = gr.Interface( fn=separate_audio, inputs=gr.Audio( type="filepath", label="請上傳混音音檔 (支援格式: mp3/wav/ogg)", sources=["upload", "microphone"], max_length=180 ), outputs=[ gr.Audio(label="語音軌道 1", format="wav"), gr.Audio(label="語音軌道 2", format="wav") ], title="🎙️ 語音分離 Demo - Deep Learning 101", description=description_html, # 直接使用HTML描述 flagging_mode="never", allow_flagging="never", allow_screenshot=False, live=True, examples=[ ["examples/sample1.wav"], ["examples/sample2.mp3"] ], theme="default" ) interface.queue(concurrency_count=2) launch_kwargs = { "server_name": "0.0.0.0", "server_port": 7860, "share": False, "debug": False, "auth": None, "inbrowser": True, "quiet": False, "prevent_thread_lock": True } interface.launch(**launch_kwargs)