dangtr0408
/

StyleTTS2-lite-vi

dangtr0408 commited on 18 days ago

Commit

6985472

1 Parent(s): 0d15013

minor changes

Files changed (2) hide show

Models/del_training.ipynb CHANGED Viewed

@@ -1,62 +1,62 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "2b6bb4be",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "import torch"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "dc802b47",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "models_path = \"./current_model_120k_vi.pth\"\n",
-    "name = \"./model.pth\"\n",
-    "params_whole = torch.load(models_path, map_location='cpu')\n",
-    "\n",
-    "for key in list(params_whole.keys()):\n",
-    "    if key != 'net':\n",
-    "        params_whole.pop(key)\n",
-    "\n",
-    "keep = ['decoder', 'predictor', 'text_encoder', 'style_encoder']\n",
-    "for module_name in list(params_whole['net'].keys()):\n",
-    "    if module_name not in keep:\n",
-    "        params_whole['net'].pop(module_name)\n",
-    "\n",
-    "torch.save(params_whole, name)\n",
-    "\n",
-    "\n",
-    "os.remove(models_path)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "base",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.7"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "2b6bb4be",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import torch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dc802b47",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "models_path = \"./current_model_120k_vi.pth\"\n",
+    "name = \"./model.pth\"\n",
+    "params_whole = torch.load(models_path, map_location='cpu')\n",
+    "\n",
+    "for key in list(params_whole.keys()):\n",
+    "    if key != 'net':\n",
+    "        params_whole.pop(key)\n",
+    "\n",
+    "keep = ['decoder', 'predictor', 'text_encoder', 'style_encoder']\n",
+    "for module_name in list(params_whole['net'].keys()):\n",
+    "    if module_name not in keep:\n",
+    "        params_whole['net'].pop(module_name)\n",
+    "\n",
+    "torch.save(params_whole, name)\n",
+    "\n",
+    "\n",
+    "#os.remove(models_path)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

inference.py CHANGED Viewed

@@ -64,7 +64,7 @@ class TextCleaner:
 class Preprocess:
     def __text_normalize(self, text):
-        punctuation = ["，", "、", "،", ";", "(", "．", "。", "…", "!", "–", ":"]
         map_to = "."
         punctuation_pattern = re.compile(f"[{''.join(re.escape(p) for p in punctuation)}]")
         #ensure consistency.
@@ -72,8 +72,8 @@ class Preprocess:
         #replace punctuation that acts like a comma or period
         #text = re.sub(r'\.{2,}', '.', text)
         text = punctuation_pattern.sub(map_to, text)
-        #remove or replace special chars except . , { } ? ' -  \ % $ & /
-        text = re.sub(r'[^\w\s.,{}?\'\-\[\]\%\$\&\/]', ' ', text)
         #replace consecutive whitespace chars with a single space and strip leading/trailing spaces
         text = re.sub(r'\s+', ' ', text).strip()
         return text
@@ -211,7 +211,7 @@ class StyleTTS2(torch.nn.Module):
             audio = audio*(1-denoise) + audio_denoise*denoise
         with torch.no_grad():
-            if split_dur>0 and len(audio)/sr>split_dur:
                 #This option will split the ref audio to multiple parts, calculate styles and average them
                 count = 0
                 ref_s = None

 class Preprocess:
     def __text_normalize(self, text):
+        punctuation = ["，", "、", "،", ";", "(", "．", "。", "…", "!", "–", ":", "?"]
         map_to = "."
         punctuation_pattern = re.compile(f"[{''.join(re.escape(p) for p in punctuation)}]")
         #ensure consistency.
         #replace punctuation that acts like a comma or period
         #text = re.sub(r'\.{2,}', '.', text)
         text = punctuation_pattern.sub(map_to, text)
+        #remove or replace special chars except . , { } % $ & ' -  \ /
+        text = re.sub(r'[^\w\s.,{}%$&\'\-\[\]\/]', ' ', text)
         #replace consecutive whitespace chars with a single space and strip leading/trailing spaces
         text = re.sub(r'\s+', ' ', text).strip()
         return text
             audio = audio*(1-denoise) + audio_denoise*denoise
         with torch.no_grad():
+            if split_dur>0 and len(audio)/sr>=4: #Only effective if audio length is >= 4s
                 #This option will split the ref audio to multiple parts, calculate styles and average them
                 count = 0
                 ref_s = None