mobina1380
/

speechbrain-persian-ser

@@ -1,103 +1,110 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Minimal inference for SpeechBrain ECAPA-TDNN (ShEMO fine-tuned).
-"""
-import os
-import torch
-import speechbrain as sb
-from hyperpyyaml import load_hyperpyyaml
-from speechbrain.dataio.dataio import read_audio
-# ------------------------------------------------------------------
-# 1) paths
-# ------------------------------------------------------------------
-EXP_DIR = (
-    "/mnt/c/Users/NoteBook/Documents/fineTuningSpeechbrain/recipes/ShEMO/"
-    "emotion_recognition/results(2)/content/results/ECAPA-TDNN/1968"
-)
-HP_FILE  = os.path.join(EXP_DIR, "hyperparams.yaml")
-CKPT_DIR = os.path.join(EXP_DIR, "save")
-# ------------------------------------------------------------------
-# 2) hparams & modules
-# ------------------------------------------------------------------
-with open(HP_FILE) as f:
-    hparams = load_hyperpyyaml(f)
-modules = {
-    "compute_features": hparams["compute_features"],
-    "mean_var_norm"   : hparams["mean_var_norm"],
-    "embedding_model" : hparams["embedding_model"],
-    "classifier"      : hparams["classifier"],
-}
-checkpointer = sb.utils.checkpoints.Checkpointer(
-    checkpoints_dir=CKPT_DIR,
-    recoverables=modules,
-    allow_partial_load=True,
-)
-checkpointer.recover_if_possible()
-# ------------------------------------------------------------------
-# 3) Simple batch container (بدون PaddedBatch)
-# ------------------------------------------------------------------
-class SimpleBatch:
-    def __init__(self, wav, lens):
-        self.sig = (wav, lens)
-    def to(self, device):
-        wav, lens = self.sig
-        self.sig = (wav.to(device), lens.to(device))
-        return self
-# ------------------------------------------------------------------
-# 4) Brain for inference
-# ------------------------------------------------------------------
-class EmoIdBrain(sb.Brain):
-    def compute_forward(self, batch, stage):
-        wavs, lens = batch.sig
-        feats = self.modules.compute_features(wavs)
-        feats = self.modules.mean_var_norm(feats, lens)
-        emb   = self.modules.embedding_model(feats, lens)
-        out   = self.modules.classifier(emb)
-        return out
-device = 'cpu'
-brain  = EmoIdBrain(modules, hparams, run_opts={"device": device},
-                    checkpointer=checkpointer)
-print('dddddddddddddddd')
-# ------------------------------------------------------------------
-# 5) emotion labels (hard-coded)
-# ------------------------------------------------------------------
-IDX2LAB = [
-    "anger",      # 0
-    "sadness",    # 1
-    "neutral",    # 2
-    "surprise",   # 3
-    "happiness",  # 4
-    "fear",       # 5
-]
-# # ------------------------------------------------------------------
-# # 6) predict function
-# # ------------------------------------------------------------------
-def predict(wav_path: str) -> str:
-    wav = torch.tensor(read_audio(wav_path)).float().unsqueeze(0)  # [1,T]
-    lens = torch.tensor([1.0])                                     # full length
-    batch = SimpleBatch(wav, lens).to(device)
-    brain.modules.eval()
-                         # disable dropout if any
-    with torch.no_grad():
-        logits = brain.compute_forward(batch, stage=sb.Stage.TEST)
-    idx = int(logits.argmax(dim=-1))
-    return IDX2LAB[idx]
-# # ------------------------------------------------------------------
-# # 7) run
-# # ------------------------------------------------------------------
-if __name__ == "__main__":
-    WAV_FILE = "shortvoice.wav"   # change to your wav
-    print("Predicted emotion:", predict(WAV_FILE))

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Minimal inference for SpeechBrain ECAPA-TDNN (ShEMO fine-tuned).
+"""
+import os
+import torch
+import speechbrain as sb
+from hyperpyyaml import load_hyperpyyaml
+from speechbrain.dataio.dataio import read_audio
+# ------------------------------------------------------------------
+# 1) Paths
+# ------------------------------------------------------------------
+EXP_DIR = (
+    "/mnt/c/Users/NoteBook/Documents/fineTuningSpeechbrain/recipes/ShEMO/"
+    "emotion_recognition/results(2)/content/results/ECAPA-TDNN/1968"
+)
+HP_FILE  = os.path.join(EXP_DIR, "hyperparams.yaml")
+CKPT_DIR = os.path.join(EXP_DIR, "save")
+# ------------------------------------------------------------------
+# 2) Load hyperparams and modules
+# ------------------------------------------------------------------
+with open(HP_FILE) as f:
+    hparams = load_hyperpyyaml(f)
+modules = {
+    "compute_features": hparams["compute_features"],
+    "mean_var_norm"   : hparams["mean_var_norm"],
+    "embedding_model" : hparams["embedding_model"],
+    "classifier"      : hparams["classifier"],
+}
+# ------------------------------------------------------------------
+# 3) Device setup
+# ------------------------------------------------------------------
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Using device: {device}")
+checkpointer = sb.utils.checkpoints.Checkpointer(
+    checkpoints_dir=CKPT_DIR,
+    recoverables=modules,
+    allow_partial_load=True,
+)
+checkpointer.recover_if_possible()
+# ------------------------------------------------------------------
+# 4) Simple batch container
+# ------------------------------------------------------------------
+class SimpleBatch:
+    def __init__(self, wav, lens):
+        self.sig = (wav, lens)
+    def to(self, device):
+        wav, lens = self.sig
+        self.sig = (wav.to(device), lens.to(device))
+        return self
+# ------------------------------------------------------------------
+# 5) Brain class
+# ------------------------------------------------------------------
+class EmoIdBrain(sb.Brain):
+    def compute_forward(self, batch, stage):
+        wavs, lens = batch.sig
+        feats = self.modules.compute_features(wavs)
+        feats = self.modules.mean_var_norm(feats, lens)
+        emb   = self.modules.embedding_model(feats, lens)
+        out   = self.modules.classifier(emb)
+        return out
+brain = EmoIdBrain(
+    modules=modules,
+    hparams=hparams,
+    run_opts={"device": device},
+    checkpointer=checkpointer
+)
+# ------------------------------------------------------------------
+# 6) Emotion labels
+# ------------------------------------------------------------------
+IDX2LAB = [
+    "anger", "sadness", "neutral",
+    "surprise", "happiness", "fear"
+]
+# ------------------------------------------------------------------
+# 7) Prediction function
+# ------------------------------------------------------------------
+def predict(wav_path: str) -> str:
+    wav_raw = read_audio(wav_path)
+    wav = wav_raw.clone().detach().float().unsqueeze(0) if isinstance(wav_raw, torch.Tensor) else torch.tensor(wav_raw, dtype=torch.float32).unsqueeze(0)
+    lens = torch.tensor([1.0])
+    batch = SimpleBatch(wav, lens).to(device)
+    brain.modules.eval()
+    with torch.no_grad():
+        logits = brain.compute_forward(batch, stage=sb.Stage.TEST)
+    idx = int(logits.argmax(dim=-1))
+    return IDX2LAB[idx]
+# ------------------------------------------------------------------
+# 8) Run
+# ------------------------------------------------------------------
+if __name__ == "__main__":
+    WAV_FILE = "shortvoice.wav"
+    print("Predicted emotion:", predict(WAV_FILE))