Update app.py
Browse files
app.py
CHANGED
@@ -1,29 +1,3 @@
|
|
1 |
-
# import os
|
2 |
-
# import time
|
3 |
-
# import json
|
4 |
-
# import gradio as gr
|
5 |
-
# import torch
|
6 |
-
# import torchaudio
|
7 |
-
# import numpy as np
|
8 |
-
# from denoiser.demucs import Demucs
|
9 |
-
# from pydub import AudioSegment
|
10 |
-
|
11 |
-
# modelpath = './denoiser/master64.th'
|
12 |
-
|
13 |
-
# def transcribe(file_upload, microphone):
|
14 |
-
# file = microphone if microphone is not None else file_upload
|
15 |
-
# model = Demucs(hidden=64)
|
16 |
-
# state_dict = torch.load(modelpath, map_location='cpu')
|
17 |
-
# model.load_state_dict(state_dict)
|
18 |
-
# demucs = model
|
19 |
-
# x, sr = torchaudio.load(file)
|
20 |
-
# out = demucs(x[None])[0]
|
21 |
-
# out = out / max(out.abs().max().item(), 1)
|
22 |
-
# torchaudio.save('enhanced.wav', out, sr)
|
23 |
-
# enhanced = AudioSegment.from_wav('enhanced.wav') # 只有去完噪的需要降 bitrate 再做語音識別
|
24 |
-
# enhanced.export('enhanced.wav', format="wav", bitrate="256k")
|
25 |
-
# return "enhanced.wav"
|
26 |
-
|
27 |
import os
|
28 |
import time
|
29 |
import json
|
@@ -33,67 +7,93 @@ import torchaudio
|
|
33 |
import numpy as np
|
34 |
from denoiser.demucs import Demucs
|
35 |
from pydub import AudioSegment
|
36 |
-
import soundfile as sf
|
37 |
-
import librosa
|
38 |
|
39 |
modelpath = './denoiser/master64.th'
|
40 |
|
41 |
def transcribe(file_upload, microphone):
|
42 |
file = microphone if microphone is not None else file_upload
|
43 |
-
|
44 |
-
# 新增音訊預處理 → 統一格式
|
45 |
-
def preprocess_audio(path):
|
46 |
-
data, sr = sf.read(path)
|
47 |
-
|
48 |
-
# 如果是雙聲道 → 轉單聲道
|
49 |
-
if len(data.shape) > 1:
|
50 |
-
data = data.mean(axis=1)
|
51 |
-
|
52 |
-
# 如果不是 16kHz → 重採樣
|
53 |
-
if sr != 16000:
|
54 |
-
data = librosa.resample(data, orig_sr=sr, target_sr=16000)
|
55 |
-
sr = 16000
|
56 |
-
|
57 |
-
# 儲存為 WAV 供模型使用
|
58 |
-
sf.write("enhanced.wav", data, sr)
|
59 |
-
return "enhanced.wav"
|
60 |
-
|
61 |
-
# 如果是 MP3,先轉成 WAV 再處理
|
62 |
-
if file.lower().endswith(".mp3"):
|
63 |
-
audio = AudioSegment.from_file(file)
|
64 |
-
audio = audio.set_frame_rate(16000).set_channels(1) # 轉單聲道 + 16kHz
|
65 |
-
audio.export("enhanced.wav", format="wav")
|
66 |
-
file = "enhanced.wav"
|
67 |
-
else:
|
68 |
-
file = preprocess_audio(file)
|
69 |
-
|
70 |
model = Demucs(hidden=64)
|
71 |
state_dict = torch.load(modelpath, map_location='cpu')
|
72 |
model.load_state_dict(state_dict)
|
73 |
-
demucs = model
|
74 |
-
|
75 |
x, sr = torchaudio.load(file)
|
76 |
-
|
|
|
|
|
|
|
|
|
|
|
77 |
|
78 |
-
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
-
|
82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
|
88 |
-
|
89 |
|
90 |
|
91 |
-
# 👇 加上這一行,解決 Gradio schema 推導錯誤
|
92 |
-
transcribe.__annotations__ = {
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
}
|
97 |
|
98 |
demo = gr.Interface(
|
99 |
fn=transcribe,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
import time
|
3 |
import json
|
|
|
7 |
import numpy as np
|
8 |
from denoiser.demucs import Demucs
|
9 |
from pydub import AudioSegment
|
|
|
|
|
10 |
|
11 |
modelpath = './denoiser/master64.th'
|
12 |
|
13 |
def transcribe(file_upload, microphone):
|
14 |
file = microphone if microphone is not None else file_upload
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
model = Demucs(hidden=64)
|
16 |
state_dict = torch.load(modelpath, map_location='cpu')
|
17 |
model.load_state_dict(state_dict)
|
18 |
+
demucs = model
|
|
|
19 |
x, sr = torchaudio.load(file)
|
20 |
+
out = demucs(x[None])[0]
|
21 |
+
out = out / max(out.abs().max().item(), 1)
|
22 |
+
torchaudio.save('enhanced.wav', out, sr)
|
23 |
+
enhanced = AudioSegment.from_wav('enhanced.wav') # 只有去完噪的需要降 bitrate 再做語音識別
|
24 |
+
enhanced.export('enhanced.wav', format="wav", bitrate="256k")
|
25 |
+
return "enhanced.wav"
|
26 |
|
27 |
+
# import os
|
28 |
+
# import time
|
29 |
+
# import json
|
30 |
+
# import gradio as gr
|
31 |
+
# import torch
|
32 |
+
# import torchaudio
|
33 |
+
# import numpy as np
|
34 |
+
# from denoiser.demucs import Demucs
|
35 |
+
# from pydub import AudioSegment
|
36 |
+
# import soundfile as sf
|
37 |
+
# import librosa
|
38 |
|
39 |
+
# modelpath = './denoiser/master64.th'
|
40 |
+
|
41 |
+
# def transcribe(file_upload, microphone):
|
42 |
+
# file = microphone if microphone is not None else file_upload
|
43 |
+
|
44 |
+
# # 新增音訊預處理 → 統一格式
|
45 |
+
# def preprocess_audio(path):
|
46 |
+
# data, sr = sf.read(path)
|
47 |
+
|
48 |
+
# # 如果是雙聲道 → 轉單聲道
|
49 |
+
# if len(data.shape) > 1:
|
50 |
+
# data = data.mean(axis=1)
|
51 |
+
|
52 |
+
# # 如果不是 16kHz → 重採樣
|
53 |
+
# if sr != 16000:
|
54 |
+
# data = librosa.resample(data, orig_sr=sr, target_sr=16000)
|
55 |
+
# sr = 16000
|
56 |
+
|
57 |
+
# # 儲存為 WAV 供模型使用
|
58 |
+
# sf.write("enhanced.wav", data, sr)
|
59 |
+
# return "enhanced.wav"
|
60 |
+
|
61 |
+
# # 如果是 MP3,先轉成 WAV 再處理
|
62 |
+
# if file.lower().endswith(".mp3"):
|
63 |
+
# audio = AudioSegment.from_file(file)
|
64 |
+
# audio = audio.set_frame_rate(16000).set_channels(1) # 轉單聲道 + 16kHz
|
65 |
+
# audio.export("enhanced.wav", format="wav")
|
66 |
+
# file = "enhanced.wav"
|
67 |
+
# else:
|
68 |
+
# file = preprocess_audio(file)
|
69 |
+
|
70 |
+
# model = Demucs(hidden=64)
|
71 |
+
# state_dict = torch.load(modelpath, map_location='cpu')
|
72 |
+
# model.load_state_dict(state_dict)
|
73 |
+
# demucs = model.eval()
|
74 |
+
|
75 |
+
# x, sr = torchaudio.load(file)
|
76 |
+
# x = x[0:1] # 強制取第一個聲道(確保是單聲道)
|
77 |
+
|
78 |
+
# with torch.no_grad():
|
79 |
+
# out = demucs(x[None])[0]
|
80 |
+
|
81 |
+
# out = out / max(out.abs().max().item(), 1)
|
82 |
+
# torchaudio.save('enhanced_final.wav', out, sr)
|
83 |
|
84 |
+
# # 輸出 WAV 格式給前端播放
|
85 |
+
# enhanced = AudioSegment.from_wav('enhanced_final.wav')
|
86 |
+
# enhanced.export('enhanced_final.mp3', format="mp3", bitrate="256k")
|
87 |
|
88 |
+
# return "enhanced_final.mp3" # 回傳 MP3 更省空間
|
89 |
|
90 |
|
91 |
+
# # 👇 加上這一行,解決 Gradio schema 推導錯誤
|
92 |
+
# transcribe.__annotations__ = {
|
93 |
+
# "file_upload": str,
|
94 |
+
# "microphone": str,
|
95 |
+
# "return": str
|
96 |
+
# }
|
97 |
|
98 |
demo = gr.Interface(
|
99 |
fn=transcribe,
|