feat(colab): support diffusion

2023-05-22 21:20:25 +08:00 · 2023-05-22 21:20:25 +08:00 · 8950172ff6
parent 746448033f
commit 8950172ff6
1 changed files with 50 additions and 6 deletions
--- a/sovits4_for_colab.ipynb
+++ b/sovits4_for_colab.ipynb
@ -80,8 +80,8 @@
    "!git clone https://github.com/svc-develop-team/so-vits-svc -b 4.0-Vec768-Layer12\n",
    "%pip uninstall -y torchdata torchtext\n",
    "%pip install --upgrade pip setuptools numpy numba\n",
-    "%pip install tensorrt pyworld praat-parselmouth fairseq tensorboardX torchcrepe librosa==0.9.1\n",
-    "%pip install torch==2.0.0+cu118 torchvision==0.15.1+cu118 torchaudio==2.0.1+cu118 --extra-index-url https://download.pytorch.org/whl/cu118\n",
+    "%pip install pyworld praat-parselmouth fairseq tensorboardX torchcrepe librosa==0.9.1 pyyaml pynvml pyloudnorm\n",
+    "%pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118\n",
    "exit()"
   ]
  },
@ -162,7 +162,21 @@
    "\n",
    "#@markdown Although the pretrained model generally does not cause any copyright problems, please pay attention to it. For example, ask the author in advance, or the author has indicated the feasible use in the description clearly.\n",
    "\n",
-    "!pwd"
+    "download_pretrained_model = True #@param {type:\"boolean\"}\n",
+    "D_0_URL = \"https://huggingface.co/datasets/ms903/sovits4.0-768vec-layer12/resolve/main/sovits_768l12_pre_large_320k/clean_D_320000.pth\" #@param {type:\"string\"}\n",
+    "G_0_URL = \"https://huggingface.co/datasets/ms903/sovits4.0-768vec-layer12/resolve/main/sovits_768l12_pre_large_320k/clean_G_320000.pth\" #@param {type:\"string\"}\n",
+    "\n",
+    "download_pretrained_diffusion_model = True #@param {type:\"boolean\"}\n",
+    "diff_model_URL = \"https://huggingface.co/datasets/ms903/Diff-SVC-refactor-pre-trained-model/resolve/main/fix_pitch_add_vctk_600k/model_0.pt\" #@param {type:\"string\"}\n",
+    "\n",
+    "%cd /content/so-vits-svc\n",
+    "\n",
+    "if download_pretrained_model:\n",
+    "    !curl -L {D_0_URL} -o logs/44k/D_0.pth\n",
+    "    !curl -L {G_0_URL} -o logs/44k/G_0.pth\n",
+    "\n",
+    "if download_pretrained_diffusion_model:\n",
+    "    !curl -L {diffusion_URL} -o logs/44k/diffusion/model_0.pt"
   ]
  },
  {
@ -294,11 +308,23 @@
    "#@markdown # Generate hubert and f0\n",
    "\n",
    "#@markdown\n",
+    "%cd /content/so-vits-svc\n",
    "\n",
    "f0_predictor = \"crepe\" #@param [\"crepe\", \"pm\", \"dio\", \"harvest\"]\n",
+    "use_diff = True #@param {type:\"boolean\"}\n",
    "\n",
-    "%cd /content/so-vits-svc\n",
-    "!python preprocess_hubert_f0.py --f0_predictor={f0_predictor}"
+    "diff_param = \"\"\n",
+    "if use_diff:\n",
+    "  diff_param = \"--use_diff\"\n",
+    "\n",
+    "  import os\n",
+    "  if not os.path.exists(\"./pretrain/nsf_hifigan/model\"):\n",
+    "    !curl -L https://github.com/openvpi/vocoders/releases/download/nsf-hifigan-v1/nsf_hifigan_20221211.zip -o nsf_hifigan_20221211.zip\n",
+    "    !unzip nsf_hifigan_20221211.zip\n",
+    "    !rm -rf pretrain/nsf_hifigan\n",
+    "    !mv -v nsf_hifigan pretrain\n",
+    "\n",
+    "!python preprocess_hubert_f0.py --f0_predictor={f0_predictor} {diff_param}"
   ]
  },
  {
@ -419,6 +445,24 @@
    "!python cluster/train_cluster.py"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#@title Train diffusion model (Optional)\n",
+    "\n",
+    "#@markdown # Train diffusion model (Optional)\n",
+    "\n",
+    "#@markdown #### Details see [README.md#-about-shallow-diffusion](https://github.com/svc-develop-team/so-vits-svc#-about-shallow-diffusion)\n",
+    "\n",
+    "#@markdown\n",
+    "\n",
+    "%cd /content/so-vits-svc\n",
+    "!python train_diff.py -c configs/diffusion.yaml"
+   ]
+  },
  {
   "attachments": {},
   "cell_type": "markdown",
@ -466,7 +510,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "#title Download nsf_hifigan\n",
+    "#title Download nsf_hifigan if you need it\n",
    "\n",
    "%cd /content/so-vits-svc\n",
    "!curl -L https://github.com/openvpi/vocoders/releases/download/nsf-hifigan-v1/nsf_hifigan_20221211.zip -o /content/so-vits-svc/nsf_hifigan_20221211.zip\n",