Spaces:

vankienemk
/

Voice-regconizer

Running

App Files Files Community

vankienemk commited on 18 days ago

Commit

34f8403

verified ·

1 Parent(s): 228880a

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -29

app.py CHANGED Viewed

@@ -4,15 +4,16 @@ import torchaudio
 import numpy as np
 from ichigo_asr.demo.utils import load_model
-# Hàm tải mô hình Ichigo Whisper
 def init_model():
-    # Tải Ichigo Whisper
     try:
         ichigo_model = load_model(
             ref="homebrewltd/ichigo-whisper:merge-medium-vi-2d-2560c-dim64.pth",
             size="merge-medium-vi-2d-2560c-dim64",
         )
-        device = "cpu" if torch.cuda.is_available() else "cuda"
         ichigo_model.ensure_whisper(device)
         ichigo_model.to(device)
         return ichigo_model, device
@@ -20,7 +21,7 @@ def init_model():
         print(f"Lỗi khi tải mô hình: {e}")
         return None, "cpu"
-# Khởi tạo mô hình khi ứng dụng bắt đầu
 ichigo_model, device = init_model()
 def transcribe(audio_path):
@@ -38,8 +39,11 @@ def transcribe(audio_path):
         if wav.shape[0] > 1:
             wav = wav.mean(dim=0, keepdim=True)
         # Thực hiện dự đoán
-        transcribe_result = ichigo_model.inference(wav.to(device))
         # Trả về kết quả
         return transcribe_result[0].text
@@ -52,38 +56,16 @@ description = """
 # 🍓 Ichigo Whisper Speech Recognition
 Sử dụng mô hình Ichigo-whisper để nhận dạng giọng nói.
 Mô hình này có hiệu suất tốt cho cả tiếng Anh và tiếng Việt!
-## Cách sử dụng:
-1. Nhấn vào nút microphone và nói
-2. Hoặc tải lên file audio
-3. Mô hình sẽ chuyển đổi giọng nói thành văn bản
-Chi tiết về mô hình: [Menlo/Ichigo-whisper-v0.1](https://huggingface.co/Menlo/Ichigo-whisper-v0.1)
 """
-# Tạo giao diện với hai tab: Microphone và Upload
-mic_transcribe = gr.Interface(
     fn=transcribe,
-    inputs=gr.Audio(sources="microphone", type="filepath"),
     outputs=gr.Textbox(label="Phiên âm"),
     title=title,
     description=description
 )
-file_transcribe = gr.Interface(
-    fn=transcribe,
-    inputs=gr.Audio(sources="upload", type="filepath"),
-    outputs=gr.Textbox(label="Phiên âm"),
-    title=title,
-    description=description
-)
-# Kết hợp các tab
-demo = gr.TabbedInterface(
-    [mic_transcribe, file_transcribe],
-    ["Microphone", "Upload Audio"]
-)
 # Khởi chạy ứng dụng
 if __name__ == "__main__":
     demo.launch()

 import numpy as np
 from ichigo_asr.demo.utils import load_model
+# Hàm tải mô hình Ichigo Whisper với map_location=cpu
 def init_model():
     try:
+        # Chỉ định rõ ràng map_location='cpu' để tải mô hình trên CPU
         ichigo_model = load_model(
             ref="homebrewltd/ichigo-whisper:merge-medium-vi-2d-2560c-dim64.pth",
             size="merge-medium-vi-2d-2560c-dim64",
+            map_location=torch.device('cpu')  # Thêm tham số này
         )
+        device = "cpu"  # Chỉ sử dụng CPU
         ichigo_model.ensure_whisper(device)
         ichigo_model.to(device)
         return ichigo_model, device
         print(f"Lỗi khi tải mô hình: {e}")
         return None, "cpu"
+# Khởi tạo mô hình
 ichigo_model, device = init_model()
 def transcribe(audio_path):
         if wav.shape[0] > 1:
             wav = wav.mean(dim=0, keepdim=True)
+        # Đảm bảo dữ liệu nằm trên CPU
+        wav = wav.to(device)
         # Thực hiện dự đoán
+        transcribe_result = ichigo_model.inference(wav)
         # Trả về kết quả
         return transcribe_result[0].text
 # 🍓 Ichigo Whisper Speech Recognition
 Sử dụng mô hình Ichigo-whisper để nhận dạng giọng nói.
 Mô hình này có hiệu suất tốt cho cả tiếng Anh và tiếng Việt!
 """
+demo = gr.Interface(
     fn=transcribe,
+    inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
     outputs=gr.Textbox(label="Phiên âm"),
     title=title,
     description=description
 )
 # Khởi chạy ứng dụng
 if __name__ == "__main__":
     demo.launch()