From f979532e818b61be8d055f0460135d5111f93ec7 Mon Sep 17 00:00:00 2001
From: Ftps <ftpsflandre@gmail.com>
Date: Sat, 24 Jun 2023 03:18:13 +0900
Subject: [PATCH 1/9] fix

---
 resample.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resample.py b/resample.py
index 954a676..275265e 100644
--- a/resample.py
+++ b/resample.py
@@ -37,7 +37,7 @@ def save_wav_to_path(wav, save_path, sr):
 
 
 def process(item):
-    spkdir, wav_name = item
+    spkdir, wav_name, args = item
     speaker = spkdir.replace("\\", "/").split("/")[-1]
 
     wav_path = os.path.join(args.in_dir, speaker, wav_name)
@@ -79,7 +79,7 @@ def process_all_speakers():
             spk_dir = os.path.join(args.in_dir, speaker)
             if os.path.isdir(spk_dir):
                 print(spk_dir)
-                futures = [executor.submit(process, (spk_dir, i)) for i in os.listdir(spk_dir) if i.endswith("wav")]
+                futures = [executor.submit(process, (spk_dir, i, args)) for i in os.listdir(spk_dir) if i.endswith("wav")]
                 for _ in tqdm(concurrent.futures.as_completed(futures), total=len(futures)):
                     pass
 

From 64be055fec53d5b281e58ed7f895be01dd00e5e1 Mon Sep 17 00:00:00 2001
From: asdfw13 <86564126+asdfw13@users.noreply.github.com>
Date: Fri, 23 Jun 2023 03:12:02 +0800
Subject: [PATCH 2/9] Update inference_main.py

---
 inference_main.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/inference_main.py b/inference_main.py
index 58aa176..ce87f37 100644
--- a/inference_main.py
+++ b/inference_main.py
@@ -141,7 +141,8 @@ def main():
             if only_diffusion : isdiffusion = "diff"
             if use_spk_mix:
                 spk = "spk_mix"
-            res_path = f'results/{clean_name}_{key}_{spk}{cluster_name}_{isdiffusion}.{wav_format}'
+            f0_predictor = f0p
+            res_path = f'results/{clean_name}_{key}_{spk}{cluster_name}_{isdiffusion}_{f0_predictor}.{wav_format}'
             soundfile.write(res_path, audio, svc_model.target_sample, format=wav_format)
             svc_model.clear_empty()
             

From 7c0d113eae379312727ef04a38a356c547c0fd59 Mon Sep 17 00:00:00 2001
From: asdfw13 <86564126+asdfw13@users.noreply.github.com>
Date: Fri, 23 Jun 2023 03:17:40 +0800
Subject: [PATCH 3/9] Update inference_main.py

---
 inference_main.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/inference_main.py b/inference_main.py
index ce87f37..1780342 100644
--- a/inference_main.py
+++ b/inference_main.py
@@ -141,8 +141,7 @@ def main():
             if only_diffusion : isdiffusion = "diff"
             if use_spk_mix:
                 spk = "spk_mix"
-            f0_predictor = f0p
-            res_path = f'results/{clean_name}_{key}_{spk}{cluster_name}_{isdiffusion}_{f0_predictor}.{wav_format}'
+            res_path = f'results/{clean_name}_{key}_{spk}{cluster_name}_{isdiffusion}_{f0p}.{wav_format}'
             soundfile.write(res_path, audio, svc_model.target_sample, format=wav_format)
             svc_model.clear_empty()
             

From 3691bbf5f38c6071855530f2efc84bcdb3b7c700 Mon Sep 17 00:00:00 2001
From: YuriHead <ylzz1997@outlook.com>
Date: Mon, 26 Jun 2023 00:58:52 +0800
Subject: [PATCH 4/9] Updata Depthwise Separable Conv1D to Infer Speed Up

---
 configs_template/config_template.json |  5 +-
 models.py                             | 52 +++++++++------
 modules/DSConv.py                     | 76 ++++++++++++++++++++++
 modules/commons.py                    |  6 +-
 modules/modules.py                    | 54 ++++++++--------
 train.py                              | 18 +++---
 vdecoder/hifigan/models.py            | 59 ++++++++++-------
 vdecoder/hifigan/utils.py             |  5 +-
 vdecoder/hifiganwithsnake/models.py   | 91 ++++++++++++++-------------
 vdecoder/hifiganwithsnake/utils.py    |  5 +-
 10 files changed, 249 insertions(+), 122 deletions(-)
 create mode 100644 modules/DSConv.py

diff --git a/configs_template/config_template.json b/configs_template/config_template.json
index 670329c..e70b5d8 100644
--- a/configs_template/config_template.json
+++ b/configs_template/config_template.json
@@ -60,7 +60,10 @@
     "vocoder_name":"nsf-hifigan",
     "speech_encoder":"vec768l12",
     "speaker_embedding":false,
-    "vol_embedding":false
+    "vol_embedding":false,
+    "use_depthwise_conv":false,
+    "use_depthwise_transposeconv":false,
+    "use_automatic_f0_prediction": true
   },
   "spk": {
     "nyaru": 0,
diff --git a/models.py b/models.py
index 1f67b29..2b2d5a1 100644
--- a/models.py
+++ b/models.py
@@ -321,6 +321,9 @@ class SynthesizerTrn(nn.Module):
                  sampling_rate=44100,
                  vol_embedding=False,
                  vocoder_name = "nsf-hifigan",
+                 use_depthwise_conv = False,
+                 use_depthwise_transposeconv = False,
+                 use_automatic_f0_prediction = True,
                  **kwargs):
 
         super().__init__()
@@ -343,6 +346,8 @@ class SynthesizerTrn(nn.Module):
         self.ssl_dim = ssl_dim
         self.vol_embedding = vol_embedding
         self.emb_g = nn.Embedding(n_speakers, gin_channels)
+        self.use_depthwise_conv = use_depthwise_conv
+        self.use_automatic_f0_prediction = use_automatic_f0_prediction
         if vol_embedding:
            self.emb_vol = nn.Linear(1, hidden_channels)
 
@@ -367,9 +372,12 @@ class SynthesizerTrn(nn.Module):
             "upsample_initial_channel": upsample_initial_channel,
             "upsample_kernel_sizes": upsample_kernel_sizes,
             "gin_channels": gin_channels,
+            "use_depthwise_conv":use_depthwise_conv,
+            "use_depthwise_transposeconv":use_depthwise_transposeconv
         }
         
-        
+        modules.set_Conv1dModel(self.use_depthwise_conv)
+
         if vocoder_name == "nsf-hifigan":
             from vdecoder.hifigan.models import Generator
             self.dec = Generator(h=hps)
@@ -383,16 +391,17 @@ class SynthesizerTrn(nn.Module):
 
         self.enc_q = Encoder(spec_channels, inter_channels, hidden_channels, 5, 1, 16, gin_channels=gin_channels)
         self.flow = ResidualCouplingBlock(inter_channels, hidden_channels, 5, 1, 4, gin_channels=gin_channels)
-        self.f0_decoder = F0Decoder(
-            1,
-            hidden_channels,
-            filter_channels,
-            n_heads,
-            n_layers,
-            kernel_size,
-            p_dropout,
-            spk_channels=gin_channels
-        )
+        if self.use_automatic_f0_prediction:
+            self.f0_decoder = F0Decoder(
+                1,
+                hidden_channels,
+                filter_channels,
+                n_heads,
+                n_layers,
+                kernel_size,
+                p_dropout,
+                spk_channels=gin_channels
+            )
         self.emb_uv = nn.Embedding(2, hidden_channels)
         self.character_mix = False
 
@@ -412,12 +421,16 @@ class SynthesizerTrn(nn.Module):
         # ssl prenet
         x_mask = torch.unsqueeze(commons.sequence_mask(c_lengths, c.size(2)), 1).to(c.dtype)
         x = self.pre(c) * x_mask + self.emb_uv(uv.long()).transpose(1,2) + vol
-
+        
         # f0 predict
-        lf0 = 2595. * torch.log10(1. + f0.unsqueeze(1) / 700.) / 500
-        norm_lf0 = utils.normalize_f0(lf0, x_mask, uv)
-        pred_lf0 = self.f0_decoder(x, norm_lf0, x_mask, spk_emb=g)
-
+        if self.use_automatic_f0_prediction:
+            lf0 = 2595. * torch.log10(1. + f0.unsqueeze(1) / 700.) / 500
+            norm_lf0 = utils.normalize_f0(lf0, x_mask, uv)
+            pred_lf0 = self.f0_decoder(x, norm_lf0, x_mask, spk_emb=g)
+        else:
+            lf0 = 0
+            norm_lf0 = 0
+            pred_lf0 = 0
         # encoder
         z_ptemp, m_p, logs_p, _ = self.enc_p(x, x_mask, f0=f0_to_coarse(f0))
         z, m_q, logs_q, spec_mask = self.enc_q(spec, spec_lengths, g=g)
@@ -431,6 +444,7 @@ class SynthesizerTrn(nn.Module):
 
         return o, ids_slice, spec_mask, (z, z_p, m_p, logs_p, m_q, logs_q), pred_lf0, norm_lf0, lf0
 
+    @torch.no_grad()
     def infer(self, c, f0, uv, g=None, noice_scale=0.35, seed=52468, predict_f0=False, vol = None):
 
         if c.device == torch.device("cuda"):
@@ -453,10 +467,10 @@ class SynthesizerTrn(nn.Module):
         x_mask = torch.unsqueeze(commons.sequence_mask(c_lengths, c.size(2)), 1).to(c.dtype)
         # vol proj
         vol = self.emb_vol(vol[:,:,None]).transpose(1,2) if vol!=None and self.vol_embedding else 0
-
-        x = self.pre(c) * x_mask + self.emb_uv(uv.long()).transpose(1, 2) + vol
+           
+        x = self.pre(c) * x_mask + self.emb_uv(uv.long()).transpose(1,2) + vol
         
-        if predict_f0:
+        if self.use_automatic_f0_prediction and predict_f0:
             lf0 = 2595. * torch.log10(1. + f0.unsqueeze(1) / 700.) / 500
             norm_lf0 = utils.normalize_f0(lf0, x_mask, uv, random_scale=False)
             pred_lf0 = self.f0_decoder(x, norm_lf0, x_mask, spk_emb=g)
diff --git a/modules/DSConv.py b/modules/DSConv.py
new file mode 100644
index 0000000..9909521
--- /dev/null
+++ b/modules/DSConv.py
@@ -0,0 +1,76 @@
+import torch
+import torch.nn as nn
+from torch.nn.utils import weight_norm, remove_weight_norm
+
+class Depthwise_Separable_Conv1D(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride = 1,
+        padding = 0,
+        dilation = 1,
+        bias = True,
+        padding_mode = 'zeros',  # TODO: refine this type
+        device=None,
+        dtype=None
+    ):
+      super().__init__()
+      self.depth_conv = nn.Conv1d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, groups=in_channels,stride = stride,padding=padding,dilation=dilation,bias=bias,padding_mode=padding_mode,device=device,dtype=dtype)
+      self.point_conv = nn.Conv1d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, bias=bias, device=device,dtype=dtype)
+    
+    def forward(self, input):
+      return self.point_conv(self.depth_conv(input))
+
+    def weight_norm(self):
+      self.depth_conv = weight_norm(self.depth_conv, name = 'weight')
+      self.point_conv = weight_norm(self.point_conv, name = 'weight')
+
+    def remove_weight_norm(self):
+      self.depth_conv = remove_weight_norm(self.depth_conv, name = 'weight')
+      self.point_conv = remove_weight_norm(self.point_conv, name = 'weight')
+
+class Depthwise_Separable_TransposeConv1D(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride = 1,
+        padding = 0, 
+        output_padding = 0,
+        bias = True,
+        dilation = 1,
+        padding_mode = 'zeros',  # TODO: refine this type
+        device=None,
+        dtype=None
+    ):
+      super().__init__()
+      self.depth_conv = nn.ConvTranspose1d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, groups=in_channels,stride = stride,output_padding=output_padding,padding=padding,dilation=dilation,bias=bias,padding_mode=padding_mode,device=device,dtype=dtype)
+      self.point_conv = nn.Conv1d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, bias=bias, device=device,dtype=dtype)
+    
+    def forward(self, input):
+      return self.point_conv(self.depth_conv(input))
+
+    def weight_norm(self):
+      self.depth_conv = weight_norm(self.depth_conv, name = 'weight')
+      self.point_conv = weight_norm(self.point_conv, name = 'weight')
+
+    def remove_weight_norm(self):
+      remove_weight_norm(self.depth_conv, name = 'weight')
+      remove_weight_norm(self.point_conv, name = 'weight')
+
+
+def weight_norm_modules(module, name = 'weight', dim = 0):
+    if isinstance(module,Depthwise_Separable_Conv1D) or isinstance(module,Depthwise_Separable_TransposeConv1D):
+      module.weight_norm()
+      return module
+    else:
+      return weight_norm(module,name,dim)
+
+def remove_weight_norm_modules(module, name = 'weight'):
+    if isinstance(module,Depthwise_Separable_Conv1D) or isinstance(module,Depthwise_Separable_TransposeConv1D):
+      module.remove_weight_norm()
+    else:
+      remove_weight_norm(module,name)
\ No newline at end of file
diff --git a/modules/commons.py b/modules/commons.py
index 0748880..c6e891d 100644
--- a/modules/commons.py
+++ b/modules/commons.py
@@ -24,10 +24,12 @@ def rand_slice_segments_with_pitch(x, pitch, x_lengths=None, segment_size=4):
 
 def init_weights(m, mean=0.0, std=0.01):
   classname = m.__class__.__name__
-  if classname.find("Conv") != -1:
+  if "Depthwise_Separable" in classname:
+    m.depth_conv.weight.data.normal_(mean, std)
+    m.point_conv.weight.data.normal_(mean, std) 
+  elif classname.find("Conv") != -1:
     m.weight.data.normal_(mean, std)
 
-
 def get_padding(kernel_size, dilation=1):
   return int((kernel_size*dilation - dilation)/2)
 
diff --git a/modules/modules.py b/modules/modules.py
index 54290fd..c1326a3 100644
--- a/modules/modules.py
+++ b/modules/modules.py
@@ -1,20 +1,20 @@
-import copy
-import math
-import numpy as np
-import scipy
 import torch
 from torch import nn
 from torch.nn import functional as F
 
-from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
-from torch.nn.utils import weight_norm, remove_weight_norm
+from modules.DSConv import weight_norm_modules, remove_weight_norm_modules, Depthwise_Separable_Conv1D
 
 import modules.commons as commons
 from modules.commons import init_weights, get_padding
 
-
 LRELU_SLOPE = 0.1
 
+Conv1dModel = nn.Conv1d
+
+def set_Conv1dModel(use_depthwise_conv):
+    global Conv1dModel
+    Conv1dModel = Depthwise_Separable_Conv1D if use_depthwise_conv else nn.Conv1d
+
 
 class LayerNorm(nn.Module):
   def __init__(self, channels, eps=1e-5):
@@ -44,13 +44,13 @@ class ConvReluNorm(nn.Module):
 
     self.conv_layers = nn.ModuleList()
     self.norm_layers = nn.ModuleList()
-    self.conv_layers.append(nn.Conv1d(in_channels, hidden_channels, kernel_size, padding=kernel_size//2))
+    self.conv_layers.append(Conv1dModel(in_channels, hidden_channels, kernel_size, padding=kernel_size//2))
     self.norm_layers.append(LayerNorm(hidden_channels))
     self.relu_drop = nn.Sequential(
         nn.ReLU(),
         nn.Dropout(p_dropout))
     for _ in range(n_layers-1):
-      self.conv_layers.append(nn.Conv1d(hidden_channels, hidden_channels, kernel_size, padding=kernel_size//2))
+      self.conv_layers.append(Conv1dModel(hidden_channels, hidden_channels, kernel_size, padding=kernel_size//2))
       self.norm_layers.append(LayerNorm(hidden_channels))
     self.proj = nn.Conv1d(hidden_channels, out_channels, 1)
     self.proj.weight.data.zero_()
@@ -124,14 +124,14 @@ class WN(torch.nn.Module):
 
     if gin_channels != 0:
       cond_layer = torch.nn.Conv1d(gin_channels, 2*hidden_channels*n_layers, 1)
-      self.cond_layer = torch.nn.utils.weight_norm(cond_layer, name='weight')
+      self.cond_layer = weight_norm_modules(cond_layer, name='weight')
 
     for i in range(n_layers):
       dilation = dilation_rate ** i
       padding = int((kernel_size * dilation - dilation) / 2)
-      in_layer = torch.nn.Conv1d(hidden_channels, 2*hidden_channels, kernel_size,
+      in_layer = Conv1dModel(hidden_channels, 2*hidden_channels, kernel_size,
                                  dilation=dilation, padding=padding)
-      in_layer = torch.nn.utils.weight_norm(in_layer, name='weight')
+      in_layer = weight_norm_modules(in_layer, name='weight')
       self.in_layers.append(in_layer)
 
       # last one is not necessary
@@ -141,7 +141,7 @@ class WN(torch.nn.Module):
         res_skip_channels = hidden_channels
 
       res_skip_layer = torch.nn.Conv1d(hidden_channels, res_skip_channels, 1)
-      res_skip_layer = torch.nn.utils.weight_norm(res_skip_layer, name='weight')
+      res_skip_layer = weight_norm_modules(res_skip_layer, name='weight')
       self.res_skip_layers.append(res_skip_layer)
 
   def forward(self, x, x_mask, g=None, **kwargs):
@@ -176,32 +176,32 @@ class WN(torch.nn.Module):
 
   def remove_weight_norm(self):
     if self.gin_channels != 0:
-      torch.nn.utils.remove_weight_norm(self.cond_layer)
+      remove_weight_norm_modules(self.cond_layer)
     for l in self.in_layers:
-      torch.nn.utils.remove_weight_norm(l)
+      remove_weight_norm_modules(l)
     for l in self.res_skip_layers:
-     torch.nn.utils.remove_weight_norm(l)
+      remove_weight_norm_modules(l)
 
 
 class ResBlock1(torch.nn.Module):
     def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5)):
         super(ResBlock1, self).__init__()
         self.convs1 = nn.ModuleList([
-            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0],
+            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=dilation[0],
                                padding=get_padding(kernel_size, dilation[0]))),
-            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1],
+            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=dilation[1],
                                padding=get_padding(kernel_size, dilation[1]))),
-            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[2],
+            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=dilation[2],
                                padding=get_padding(kernel_size, dilation[2])))
         ])
         self.convs1.apply(init_weights)
 
         self.convs2 = nn.ModuleList([
-            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
+            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=1,
                                padding=get_padding(kernel_size, 1))),
-            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
+            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=1,
                                padding=get_padding(kernel_size, 1))),
-            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
+            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=1,
                                padding=get_padding(kernel_size, 1)))
         ])
         self.convs2.apply(init_weights)
@@ -223,18 +223,18 @@ class ResBlock1(torch.nn.Module):
 
     def remove_weight_norm(self):
         for l in self.convs1:
-            remove_weight_norm(l)
+            remove_weight_norm_modules(l)
         for l in self.convs2:
-            remove_weight_norm(l)
+            remove_weight_norm_modules(l)
 
 
 class ResBlock2(torch.nn.Module):
     def __init__(self, channels, kernel_size=3, dilation=(1, 3)):
         super(ResBlock2, self).__init__()
         self.convs = nn.ModuleList([
-            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0],
+            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=dilation[0],
                                padding=get_padding(kernel_size, dilation[0]))),
-            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1],
+            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=dilation[1],
                                padding=get_padding(kernel_size, dilation[1])))
         ])
         self.convs.apply(init_weights)
@@ -252,7 +252,7 @@ class ResBlock2(torch.nn.Module):
 
     def remove_weight_norm(self):
         for l in self.convs:
-            remove_weight_norm(l)
+            remove_weight_norm_modules(l)
 
 
 class Log(nn.Module):
diff --git a/train.py b/train.py
index dba77bb..6d901d3 100644
--- a/train.py
+++ b/train.py
@@ -209,7 +209,7 @@ def train_and_evaluate(rank, epoch, hps, nets, optims, schedulers, scaler, loade
                 loss_kl = kl_loss(z_p, logs_q, m_p, logs_p, z_mask) * hps.train.c_kl
                 loss_fm = feature_loss(fmap_r, fmap_g)
                 loss_gen, losses_gen = generator_loss(y_d_hat_g)
-                loss_lf0 = F.mse_loss(pred_lf0, lf0)
+                loss_lf0 = F.mse_loss(pred_lf0, lf0) if net_g.module.use_automatic_f0_prediction else 0
                 loss_gen_all = loss_gen + loss_fm + loss_mel + loss_kl + loss_lf0
         optim_g.zero_grad()
         scaler.scale(loss_gen_all).backward()
@@ -241,13 +241,17 @@ def train_and_evaluate(rank, epoch, hps, nets, optims, schedulers, scaler, loade
                 image_dict = {
                     "slice/mel_org": utils.plot_spectrogram_to_numpy(y_mel[0].data.cpu().numpy()),
                     "slice/mel_gen": utils.plot_spectrogram_to_numpy(y_hat_mel[0].data.cpu().numpy()),
-                    "all/mel": utils.plot_spectrogram_to_numpy(mel[0].data.cpu().numpy()),
-                    "all/lf0": utils.plot_data_to_numpy(lf0[0, 0, :].cpu().numpy(),
-                                                          pred_lf0[0, 0, :].detach().cpu().numpy()),
-                    "all/norm_lf0": utils.plot_data_to_numpy(lf0[0, 0, :].cpu().numpy(),
-                                                               norm_lf0[0, 0, :].detach().cpu().numpy())
+                    "all/mel": utils.plot_spectrogram_to_numpy(mel[0].data.cpu().numpy())
                 }
 
+                if net_g.module.use_automatic_f0_prediction:
+                    image_dict.module.update({
+                        "all/lf0": utils.plot_data_to_numpy(lf0[0, 0, :].cpu().numpy(),
+                                                              pred_lf0[0, 0, :].detach().cpu().numpy()),
+                        "all/norm_lf0": utils.plot_data_to_numpy(lf0[0, 0, :].cpu().numpy(),
+                                                                   norm_lf0[0, 0, :].detach().cpu().numpy())
+                    })
+
                 utils.summarize(
                     writer=writer,
                     global_step=global_step,
@@ -328,4 +332,4 @@ def evaluate(hps, generator, eval_loader, writer_eval):
 
 
 if __name__ == "__main__":
-    main()
+    main()
\ No newline at end of file
diff --git a/vdecoder/hifigan/models.py b/vdecoder/hifigan/models.py
index 2c868f3..e727c70 100644
--- a/vdecoder/hifigan/models.py
+++ b/vdecoder/hifigan/models.py
@@ -6,11 +6,23 @@ import torch
 import torch.nn.functional as F
 import torch.nn as nn
 from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
-from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
+from torch.nn.utils import weight_norm,spectral_norm
 from .utils import init_weights, get_padding
 
+from modules.DSConv import weight_norm_modules, remove_weight_norm_modules, Depthwise_Separable_Conv1D, Depthwise_Separable_TransposeConv1D
+
 LRELU_SLOPE = 0.1
 
+Conv1dModel = nn.Conv1d
+ConvTranspose1dModel = nn.ConvTranspose1d
+
+def set_Conv1dModel(use_depthwise_conv):
+    global Conv1dModel
+    Conv1dModel = Depthwise_Separable_Conv1D if use_depthwise_conv else nn.Conv1d
+
+def set_ConvTranspose1dModel(use_depthwise_transposeconv):
+    global ConvTranspose1dModel
+    ConvTranspose1dModel = Depthwise_Separable_TransposeConv1D if use_depthwise_transposeconv else nn.ConvTranspose1d
 
 def load_model(model_path, device='cuda'):
     config_file = os.path.join(os.path.split(model_path)[0], 'config.json')
@@ -36,21 +48,21 @@ class ResBlock1(torch.nn.Module):
         super(ResBlock1, self).__init__()
         self.h = h
         self.convs1 = nn.ModuleList([
-            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0],
+            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=dilation[0],
                                padding=get_padding(kernel_size, dilation[0]))),
-            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1],
+            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=dilation[1],
                                padding=get_padding(kernel_size, dilation[1]))),
-            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[2],
+            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=dilation[2],
                                padding=get_padding(kernel_size, dilation[2])))
         ])
         self.convs1.apply(init_weights)
 
         self.convs2 = nn.ModuleList([
-            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
+            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=1,
                                padding=get_padding(kernel_size, 1))),
-            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
+            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=1,
                                padding=get_padding(kernel_size, 1))),
-            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
+            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=1,
                                padding=get_padding(kernel_size, 1)))
         ])
         self.convs2.apply(init_weights)
@@ -66,9 +78,9 @@ class ResBlock1(torch.nn.Module):
 
     def remove_weight_norm(self):
         for l in self.convs1:
-            remove_weight_norm(l)
+            remove_weight_norm_modules(l)
         for l in self.convs2:
-            remove_weight_norm(l)
+            remove_weight_norm_modules(l)
 
 
 class ResBlock2(torch.nn.Module):
@@ -76,9 +88,9 @@ class ResBlock2(torch.nn.Module):
         super(ResBlock2, self).__init__()
         self.h = h
         self.convs = nn.ModuleList([
-            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0],
+            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=dilation[0],
                                padding=get_padding(kernel_size, dilation[0]))),
-            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1],
+            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=dilation[1],
                                padding=get_padding(kernel_size, dilation[1])))
         ])
         self.convs.apply(init_weights)
@@ -92,7 +104,7 @@ class ResBlock2(torch.nn.Module):
 
     def remove_weight_norm(self):
         for l in self.convs:
-            remove_weight_norm(l)
+            remove_weight_norm_modules(l)
 
 
 def padDiff(x):
@@ -277,7 +289,10 @@ class Generator(torch.nn.Module):
     def __init__(self, h):
         super(Generator, self).__init__()
         self.h = h
-
+        
+        set_Conv1dModel(h["use_depthwise_conv"])
+        set_ConvTranspose1dModel(h["use_depthwise_transposeconv"])
+        
         self.num_kernels = len(h["resblock_kernel_sizes"])
         self.num_upsamples = len(h["upsample_rates"])
         self.f0_upsamp = torch.nn.Upsample(scale_factor=np.prod(h["upsample_rates"]))
@@ -285,17 +300,17 @@ class Generator(torch.nn.Module):
             sampling_rate=h["sampling_rate"],
             harmonic_num=8)
         self.noise_convs = nn.ModuleList()
-        self.conv_pre = weight_norm(Conv1d(h["inter_channels"], h["upsample_initial_channel"], 7, 1, padding=3))
+        self.conv_pre = weight_norm_modules(Conv1dModel(h["inter_channels"], h["upsample_initial_channel"], 7, 1, padding=3))
         resblock = ResBlock1 if h["resblock"] == '1' else ResBlock2
         self.ups = nn.ModuleList()
         for i, (u, k) in enumerate(zip(h["upsample_rates"], h["upsample_kernel_sizes"])):
             c_cur = h["upsample_initial_channel"] // (2 ** (i + 1))
-            self.ups.append(weight_norm(
-                ConvTranspose1d(h["upsample_initial_channel"] // (2 ** i), h["upsample_initial_channel"] // (2 ** (i + 1)),
-                                k, u, padding=(k - u +1 ) // 2)))
+            self.ups.append(weight_norm_modules(
+                ConvTranspose1dModel(h["upsample_initial_channel"] // (2 ** i), h["upsample_initial_channel"] // (2 ** (i + 1)),
+                                k, u, padding=(k - u + 1 ) // 2)))
             if i + 1 < len(h["upsample_rates"]):  #
                 stride_f0 = np.prod(h["upsample_rates"][i + 1:])
-                self.noise_convs.append(Conv1d(
+                self.noise_convs.append(Conv1dModel(
                     1, c_cur, kernel_size=stride_f0 * 2, stride=stride_f0, padding=(stride_f0+1) // 2))
             else:
                 self.noise_convs.append(Conv1d(1, c_cur, kernel_size=1))
@@ -305,7 +320,7 @@ class Generator(torch.nn.Module):
             for j, (k, d) in enumerate(zip(h["resblock_kernel_sizes"], h["resblock_dilation_sizes"])):
                 self.resblocks.append(resblock(h, ch, k, d))
 
-        self.conv_post = weight_norm(Conv1d(ch, 1, 7, 1, padding=3))
+        self.conv_post = weight_norm_modules(Conv1dModel(ch, 1, 7, 1, padding=3))
         self.ups.apply(init_weights)
         self.conv_post.apply(init_weights)
         self.cond = nn.Conv1d(h['gin_channels'], h['upsample_initial_channel'], 1)
@@ -342,11 +357,11 @@ class Generator(torch.nn.Module):
     def remove_weight_norm(self):
         print('Removing weight norm...')
         for l in self.ups:
-            remove_weight_norm(l)
+            remove_weight_norm_modules(l)
         for l in self.resblocks:
             l.remove_weight_norm()
-        remove_weight_norm(self.conv_pre)
-        remove_weight_norm(self.conv_post)
+        remove_weight_norm_modules(self.conv_pre)
+        remove_weight_norm_modules(self.conv_post)
 
 
 class DiscriminatorP(torch.nn.Module):
diff --git a/vdecoder/hifigan/utils.py b/vdecoder/hifigan/utils.py
index 9c93c99..4a4742f 100644
--- a/vdecoder/hifigan/utils.py
+++ b/vdecoder/hifigan/utils.py
@@ -21,7 +21,10 @@ def plot_spectrogram(spectrogram):
 
 def init_weights(m, mean=0.0, std=0.01):
     classname = m.__class__.__name__
-    if classname.find("Conv") != -1:
+    if "Depthwise_Separable" in classname:
+      m.depth_conv.weight.data.normal_(mean, std)
+      m.point_conv.weight.data.normal_(mean, std)
+    elif classname.find("Conv") != -1:
         m.weight.data.normal_(mean, std)
 
 
diff --git a/vdecoder/hifiganwithsnake/models.py b/vdecoder/hifiganwithsnake/models.py
index 4d9ae7a..709aead 100644
--- a/vdecoder/hifiganwithsnake/models.py
+++ b/vdecoder/hifiganwithsnake/models.py
@@ -6,12 +6,23 @@ import torch
 import torch.nn.functional as F
 import torch.nn as nn
 from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
-from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
+from torch.nn.utils import weight_norm, spectral_norm
 from .utils import init_weights, get_padding
 from vdecoder.hifiganwithsnake.alias.act import SnakeAlias
+from modules.DSConv import weight_norm_modules, remove_weight_norm_modules, Depthwise_Separable_Conv1D, Depthwise_Separable_TransposeConv1D
 
 LRELU_SLOPE = 0.1
 
+Conv1dModel = nn.Conv1d
+ConvTranspose1dModel = nn.ConvTranspose1d
+
+def set_Conv1dModel(use_depthwise_conv):
+    global Conv1dModel
+    Conv1dModel = Depthwise_Separable_Conv1D if use_depthwise_conv else nn.Conv1d
+
+def set_ConvTranspose1dModel(use_depthwise_transposeconv):
+    global ConvTranspose1dModel
+    ConvTranspose1dModel = Depthwise_Separable_TransposeConv1D if use_depthwise_transposeconv else nn.ConvTranspose1d
 
 def load_model(model_path, device='cuda'):
     config_file = os.path.join(os.path.split(model_path)[0], 'config.json')
@@ -33,79 +44,77 @@ def load_model(model_path, device='cuda'):
 
 
 class ResBlock1(torch.nn.Module):
-    def __init__(self, h, channels, kernel_size=3, dilation=(1, 3, 5), C=None):
+    def __init__(self, h, channels, kernel_size=3, dilation=(1, 3, 5)):
         super(ResBlock1, self).__init__()
         self.h = h
         self.convs1 = nn.ModuleList([
-            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0],
+            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=dilation[0],
                                padding=get_padding(kernel_size, dilation[0]))),
-            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1],
+            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=dilation[1],
                                padding=get_padding(kernel_size, dilation[1]))),
-            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[2],
+            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=dilation[2],
                                padding=get_padding(kernel_size, dilation[2])))
         ])
         self.convs1.apply(init_weights)
 
         self.convs2 = nn.ModuleList([
-            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
+            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=1,
                                padding=get_padding(kernel_size, 1))),
-            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
+            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=1,
                                padding=get_padding(kernel_size, 1))),
-            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
+            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=1,
                                padding=get_padding(kernel_size, 1)))
         ])
         self.convs2.apply(init_weights)
 
         self.num_layers = len(self.convs1) + len(self.convs2)
         self.activations = nn.ModuleList([
-            SnakeAlias(channels, C=C) for _ in range(self.num_layers)
+            SnakeAlias(channels) for _ in range(self.num_layers)
         ])
 
-    def forward(self, x, DIM=None):
+    def forward(self, x):
         acts1, acts2 = self.activations[::2], self.activations[1::2]
         for c1, c2, a1, a2 in zip(self.convs1, self.convs2, acts1, acts2):
-            xt = a1(x, DIM)
+            xt = a1(x)
             xt = c1(xt)
-            xt = a2(xt, DIM)
+            xt = a2(xt)
             xt = c2(xt)
             x = xt + x
         return x
 
     def remove_weight_norm(self):
         for l in self.convs1:
-            remove_weight_norm(l)
+            remove_weight_norm_modules(l)
         for l in self.convs2:
-            remove_weight_norm(l)
-
+            remove_weight_norm_modules(l)
 
 class ResBlock2(torch.nn.Module):
-    def __init__(self, h, channels, kernel_size=3, dilation=(1, 3), C=None):
+    def __init__(self, h, channels, kernel_size=3, dilation=(1, 3)):
         super(ResBlock2, self).__init__()
         self.h = h
         self.convs = nn.ModuleList([
-            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0],
+            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=dilation[0],
                                padding=get_padding(kernel_size, dilation[0]))),
-            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1],
+            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=dilation[1],
                                padding=get_padding(kernel_size, dilation[1])))
         ])
         self.convs.apply(init_weights)
         
         self.num_layers = len(self.convs)
         self.activations = nn.ModuleList([
-            SnakeAlias(channels, C=C) for _ in range(self.num_layers)
+            SnakeAlias(channels) for _ in range(self.num_layers)
         ])
 
-    def forward(self, x, DIM=None):
+    def forward(self, x):
         for c,a in zip(self.convs, self.activations):
-            xt = a(x, DIM)
+            xt = a(x)
             xt = c(xt)
             x = xt + x
         return x
 
     def remove_weight_norm(self):
         for l in self.convs:
-            remove_weight_norm(l)
-
+            remove_weight_norm_modules(l)
 
 def padDiff(x):
     return F.pad(F.pad(x, (0,0,-1,1), 'constant', 0) - x, (0,0,0,-1), 'constant', 0)
@@ -289,7 +298,10 @@ class Generator(torch.nn.Module):
     def __init__(self, h):
         super(Generator, self).__init__()
         self.h = h
-
+        
+        set_Conv1dModel(h["use_depthwise_conv"])
+        set_ConvTranspose1dModel(h["use_depthwise_transposeconv"])
+        
         self.num_kernels = len(h["resblock_kernel_sizes"])
         self.num_upsamples = len(h["upsample_rates"])
         self.f0_upsamp = torch.nn.Upsample(scale_factor=np.prod(h["upsample_rates"]))
@@ -297,32 +309,29 @@ class Generator(torch.nn.Module):
             sampling_rate=h["sampling_rate"],
             harmonic_num=8)
         self.noise_convs = nn.ModuleList()
-        self.conv_pre = weight_norm(Conv1d(h["inter_channels"], h["upsample_initial_channel"], 7, 1, padding=3))
+        self.conv_pre = weight_norm_modules(Conv1dModel(h["inter_channels"], h["upsample_initial_channel"], 7, 1, padding=3))
         resblock = ResBlock1 if h["resblock"] == '1' else ResBlock2
         self.ups = nn.ModuleList()
         for i, (u, k) in enumerate(zip(h["upsample_rates"], h["upsample_kernel_sizes"])):
             c_cur = h["upsample_initial_channel"] // (2 ** (i + 1))
-            self.ups.append(weight_norm(
-                ConvTranspose1d(h["upsample_initial_channel"] // (2 ** i), h["upsample_initial_channel"] // (2 ** (i + 1)),
-                                k, u, padding=(k - u + 1) // 2)))
+            self.ups.append(weight_norm_modules(
+                ConvTranspose1dModel(h["upsample_initial_channel"] // (2 ** i), h["upsample_initial_channel"] // (2 ** (i + 1)),
+                                k, u, padding=(k - u +1 ) // 2)))
             if i + 1 < len(h["upsample_rates"]):  #
                 stride_f0 = np.prod(h["upsample_rates"][i + 1:])
-                self.noise_convs.append(Conv1d(
-                    1, c_cur, kernel_size=stride_f0 * 2, stride=stride_f0, padding=(stride_f0+ 1) // 2))
+                self.noise_convs.append(Conv1dModel(
+                    1, c_cur, kernel_size=stride_f0 * 2, stride=stride_f0, padding=(stride_f0+1) // 2))
             else:
                 self.noise_convs.append(Conv1d(1, c_cur, kernel_size=1))
         self.resblocks = nn.ModuleList()
-        self.snakes = nn.ModuleList()
         for i in range(len(self.ups)):
             ch = h["upsample_initial_channel"] // (2 ** (i + 1))
-            self.snakes.append(SnakeAlias(h["upsample_initial_channel"] // (2 ** (i)), C = h["upsample_initial_channel"] >> i))
             for j, (k, d) in enumerate(zip(h["resblock_kernel_sizes"], h["resblock_dilation_sizes"])):
-                self.resblocks.append(resblock(h, ch, k, d, C = h["upsample_initial_channel"] >> (i + 1)))
+                self.resblocks.append(resblock(h, ch, k, d))
 
-        self.conv_post = weight_norm(Conv1d(ch, 1, 7, 1, padding=3))
+        self.conv_post = weight_norm_modules(Conv1dModel(ch, 1, 7, 1, padding=3))
         self.ups.apply(init_weights)
         self.conv_post.apply(init_weights)
-        self.snake_post = SnakeAlias(ch, C = h["upsample_initial_channel"] >> len(self.ups))
         self.cond = nn.Conv1d(h['gin_channels'], h['upsample_initial_channel'], 1)
         
     def forward(self, x, f0, g=None):
@@ -335,9 +344,8 @@ class Generator(torch.nn.Module):
         x = x + self.cond(g)
         # print(124,x.shape,har_source.shape)
         for i in range(self.num_upsamples):
-            # print(f"self.snakes.{i}.pre:", x.shape)
             x = self.snakes[i](x)
-            # print(f"self.snakes.{i}.after:", x.shape)
+            # print(3,x.shape)
             x = self.ups[i](x)
             x_source = self.noise_convs[i](har_source)
             # print(4,x_source.shape,har_source.shape,x.shape)
@@ -348,7 +356,6 @@ class Generator(torch.nn.Module):
                     xs = self.resblocks[i * self.num_kernels + j](x)
                 else:
                     xs += self.resblocks[i * self.num_kernels + j](x)
-            # print(f"self.resblocks.{i}.after:", xs.shape)
             x = xs / self.num_kernels
         x = self.snake_post(x)
         x = self.conv_post(x)
@@ -359,11 +366,11 @@ class Generator(torch.nn.Module):
     def remove_weight_norm(self):
         print('Removing weight norm...')
         for l in self.ups:
-            remove_weight_norm(l)
+            remove_weight_norm_modules(l)
         for l in self.resblocks:
-            l.remove_weight_norm()
-        remove_weight_norm(self.conv_pre)
-        remove_weight_norm(self.conv_post)
+            l.remove_weight_norm_modules()
+        remove_weight_norm_modules(self.conv_pre)
+        remove_weight_norm_modules(self.conv_post)
 
 
 class DiscriminatorP(torch.nn.Module):
diff --git a/vdecoder/hifiganwithsnake/utils.py b/vdecoder/hifiganwithsnake/utils.py
index 9c93c99..4a4742f 100644
--- a/vdecoder/hifiganwithsnake/utils.py
+++ b/vdecoder/hifiganwithsnake/utils.py
@@ -21,7 +21,10 @@ def plot_spectrogram(spectrogram):
 
 def init_weights(m, mean=0.0, std=0.01):
     classname = m.__class__.__name__
-    if classname.find("Conv") != -1:
+    if "Depthwise_Separable" in classname:
+      m.depth_conv.weight.data.normal_(mean, std)
+      m.point_conv.weight.data.normal_(mean, std)
+    elif classname.find("Conv") != -1:
         m.weight.data.normal_(mean, std)
 
 

From 89fb7159044b660292aae32979b059f321b7d487 Mon Sep 17 00:00:00 2001
From: YuriHead <ylzz1997@outlook.com>
Date: Mon, 26 Jun 2023 01:05:59 +0800
Subject: [PATCH 5/9] Debug

---
 train.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/train.py b/train.py
index 6d901d3..9ddd5c4 100644
--- a/train.py
+++ b/train.py
@@ -245,7 +245,7 @@ def train_and_evaluate(rank, epoch, hps, nets, optims, schedulers, scaler, loade
                 }
 
                 if net_g.module.use_automatic_f0_prediction:
-                    image_dict.module.update({
+                    image_dict.update({
                         "all/lf0": utils.plot_data_to_numpy(lf0[0, 0, :].cpu().numpy(),
                                                               pred_lf0[0, 0, :].detach().cpu().numpy()),
                         "all/norm_lf0": utils.plot_data_to_numpy(lf0[0, 0, :].cpu().numpy(),

From fd03762d522430192b09ff2da3419a5f92153248 Mon Sep 17 00:00:00 2001
From: YuriHead <ylzz1997@outlook.com>
Date: Mon, 26 Jun 2023 04:02:17 +0800
Subject: [PATCH 6/9] Updata BF16 AMP

---
 configs_template/config_template.json |  1 +
 modules/mel_processing.py             |  7 +++++--
 train.py                              | 17 ++++++++++-------
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/configs_template/config_template.json b/configs_template/config_template.json
index e70b5d8..c7ec01b 100644
--- a/configs_template/config_template.json
+++ b/configs_template/config_template.json
@@ -12,6 +12,7 @@
     "eps": 1e-09,
     "batch_size": 6,
     "fp16_run": false,
+    "half_type": "fp16",
     "lr_decay": 0.999875,
     "segment_size": 10240,
     "init_lr_ratio": 1,
diff --git a/modules/mel_processing.py b/modules/mel_processing.py
index a9936a2..a0ba17b 100644
--- a/modules/mel_processing.py
+++ b/modules/mel_processing.py
@@ -62,10 +62,13 @@ def spectrogram_torch(y, n_fft, sampling_rate, hop_size, win_size, center=False)
 
     y = torch.nn.functional.pad(y.unsqueeze(1), (int((n_fft-hop_size)/2), int((n_fft-hop_size)/2)), mode='reflect')
     y = y.squeeze(1)
-
+    
+    y_dtype = y.dtype
+    if y.dtype == torch.bfloat16: y = y.to(torch.float32)
     spec = torch.stft(y, n_fft, hop_length=hop_size, win_length=win_size, window=hann_window[wnsize_dtype_device],
                       center=center, pad_mode='reflect', normalized=False, onesided=True, return_complex=True)
-    spec = torch.view_as_real(spec)
+    spec = torch.view_as_real(spec).to(y_dtype)
+
     spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6)
     return spec
 
diff --git a/train.py b/train.py
index 9ddd5c4..cbc55d2 100644
--- a/train.py
+++ b/train.py
@@ -61,7 +61,7 @@ def run(rank, n_gpus, hps):
         utils.check_git_hash(hps.model_dir)
         writer = SummaryWriter(log_dir=hps.model_dir)
         writer_eval = SummaryWriter(log_dir=os.path.join(hps.model_dir, "eval"))
-
+    
     # for pytorch on win, backend use gloo    
     dist.init_process_group(backend=  'gloo' if os.name == 'nt' else 'nccl', init_method='env://', world_size=n_gpus, rank=rank)
     torch.manual_seed(hps.train.seed)
@@ -148,6 +148,8 @@ def train_and_evaluate(rank, epoch, hps, nets, optims, schedulers, scaler, loade
     train_loader, eval_loader = loaders
     if writers is not None:
         writer, writer_eval = writers
+    
+    half_type = torch.float16 if hps.train.half_type=="fp16" else torch.bfloat16
 
     # train_loader.batch_sampler.set_epoch(epoch)
     global global_step
@@ -169,8 +171,8 @@ def train_and_evaluate(rank, epoch, hps, nets, optims, schedulers, scaler, loade
             hps.data.sampling_rate,
             hps.data.mel_fmin,
             hps.data.mel_fmax)
-
-        with autocast(enabled=hps.train.fp16_run):
+        
+        with autocast(enabled=hps.train.fp16_run, dtype=half_type):
             y_hat, ids_slice, z_mask, \
             (z, z_p, m_p, logs_p, m_q, logs_q), pred_lf0, norm_lf0, lf0 = net_g(c, f0, uv, spec, g=g, c_lengths=lengths,
                                                                                 spec_lengths=lengths,vol = volume)
@@ -191,20 +193,21 @@ def train_and_evaluate(rank, epoch, hps, nets, optims, schedulers, scaler, loade
             # Discriminator
             y_d_hat_r, y_d_hat_g, _, _ = net_d(y, y_hat.detach())
 
-            with autocast(enabled=False):
+            with autocast(enabled=False, dtype=half_type):
                 loss_disc, losses_disc_r, losses_disc_g = discriminator_loss(y_d_hat_r, y_d_hat_g)
                 loss_disc_all = loss_disc
-
+        
         optim_d.zero_grad()
         scaler.scale(loss_disc_all).backward()
         scaler.unscale_(optim_d)
         grad_norm_d = commons.clip_grad_value_(net_d.parameters(), None)
         scaler.step(optim_d)
+        
 
-        with autocast(enabled=hps.train.fp16_run):
+        with autocast(enabled=hps.train.fp16_run, dtype=half_type):
             # Generator
             y_d_hat_r, y_d_hat_g, fmap_r, fmap_g = net_d(y, y_hat)
-            with autocast(enabled=False):
+            with autocast(enabled=False, dtype=half_type):
                 loss_mel = F.l1_loss(y_mel, y_hat_mel) * hps.train.c_mel
                 loss_kl = kl_loss(z_p, logs_q, m_p, logs_p, z_mask) * hps.train.c_kl
                 loss_fm = feature_loss(fmap_r, fmap_g)

From 531b765bfa0ffd56de453eee85d5f75089e07a1c Mon Sep 17 00:00:00 2001
From: YuriHead <ylzz1997@outlook.com>
Date: Mon, 26 Jun 2023 04:12:57 +0800
Subject: [PATCH 7/9] Debug Snake

---
 vdecoder/hifiganwithsnake/models.py | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/vdecoder/hifiganwithsnake/models.py b/vdecoder/hifiganwithsnake/models.py
index 709aead..ff4b32d 100644
--- a/vdecoder/hifiganwithsnake/models.py
+++ b/vdecoder/hifiganwithsnake/models.py
@@ -44,7 +44,7 @@ def load_model(model_path, device='cuda'):
 
 
 class ResBlock1(torch.nn.Module):
-    def __init__(self, h, channels, kernel_size=3, dilation=(1, 3, 5)):
+    def __init__(self, h, channels, kernel_size=3, dilation=(1, 3, 5), C=None):
         super(ResBlock1, self).__init__()
         self.h = h
         self.convs1 = nn.ModuleList([
@@ -69,15 +69,15 @@ class ResBlock1(torch.nn.Module):
 
         self.num_layers = len(self.convs1) + len(self.convs2)
         self.activations = nn.ModuleList([
-            SnakeAlias(channels) for _ in range(self.num_layers)
+            SnakeAlias(channels, C=C) for _ in range(self.num_layers)
         ])
 
-    def forward(self, x):
+    def forward(self, x, DIM=None):
         acts1, acts2 = self.activations[::2], self.activations[1::2]
         for c1, c2, a1, a2 in zip(self.convs1, self.convs2, acts1, acts2):
-            xt = a1(x)
+            xt = a1(x, DIM)
             xt = c1(xt)
-            xt = a2(xt)
+            xt = a2(xt, DIM)
             xt = c2(xt)
             x = xt + x
         return x
@@ -89,7 +89,7 @@ class ResBlock1(torch.nn.Module):
             remove_weight_norm_modules(l)
 
 class ResBlock2(torch.nn.Module):
-    def __init__(self, h, channels, kernel_size=3, dilation=(1, 3)):
+    def __init__(self, h, channels, kernel_size=3, dilation=(1, 3), C=None):
         super(ResBlock2, self).__init__()
         self.h = h
         self.convs = nn.ModuleList([
@@ -102,12 +102,12 @@ class ResBlock2(torch.nn.Module):
         
         self.num_layers = len(self.convs)
         self.activations = nn.ModuleList([
-            SnakeAlias(channels) for _ in range(self.num_layers)
+            SnakeAlias(channels, C=C) for _ in range(self.num_layers)
         ])
 
-    def forward(self, x):
+    def forward(self, x, DIM=None):
         for c,a in zip(self.convs, self.activations):
-            xt = a(x)
+            xt = a(x, DIM)
             xt = c(xt)
             x = xt + x
         return x
@@ -324,14 +324,17 @@ class Generator(torch.nn.Module):
             else:
                 self.noise_convs.append(Conv1d(1, c_cur, kernel_size=1))
         self.resblocks = nn.ModuleList()
+        self.snakes = nn.ModuleList()
         for i in range(len(self.ups)):
             ch = h["upsample_initial_channel"] // (2 ** (i + 1))
+            self.snakes.append(SnakeAlias(h["upsample_initial_channel"] // (2 ** (i)), C = h["upsample_initial_channel"] >> i))
             for j, (k, d) in enumerate(zip(h["resblock_kernel_sizes"], h["resblock_dilation_sizes"])):
                 self.resblocks.append(resblock(h, ch, k, d))
 
         self.conv_post = weight_norm_modules(Conv1dModel(ch, 1, 7, 1, padding=3))
         self.ups.apply(init_weights)
         self.conv_post.apply(init_weights)
+        self.snake_post = SnakeAlias(ch, C = h["upsample_initial_channel"] >> len(self.ups))
         self.cond = nn.Conv1d(h['gin_channels'], h['upsample_initial_channel'], 1)
         
     def forward(self, x, f0, g=None):
@@ -344,8 +347,9 @@ class Generator(torch.nn.Module):
         x = x + self.cond(g)
         # print(124,x.shape,har_source.shape)
         for i in range(self.num_upsamples):
+            # print(f"self.snakes.{i}.pre:", x.shape)
             x = self.snakes[i](x)
-            # print(3,x.shape)
+            # print(f"self.snakes.{i}.after:", x.shape)
             x = self.ups[i](x)
             x_source = self.noise_convs[i](har_source)
             # print(4,x_source.shape,har_source.shape,x.shape)
@@ -356,6 +360,7 @@ class Generator(torch.nn.Module):
                     xs = self.resblocks[i * self.num_kernels + j](x)
                 else:
                     xs += self.resblocks[i * self.num_kernels + j](x)
+            # print(f"self.resblocks.{i}.after:", xs.shape)
             x = xs / self.num_kernels
         x = self.snake_post(x)
         x = self.conv_post(x)

From 98ce91c395787408bbfb5aa4db97d09e915edd35 Mon Sep 17 00:00:00 2001
From: YuriHead <ylzz1997@outlook.com>
Date: Mon, 26 Jun 2023 04:16:55 +0800
Subject: [PATCH 8/9] Debug

---
 vdecoder/hifiganwithsnake/models.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vdecoder/hifiganwithsnake/models.py b/vdecoder/hifiganwithsnake/models.py
index ff4b32d..ba8d8f1 100644
--- a/vdecoder/hifiganwithsnake/models.py
+++ b/vdecoder/hifiganwithsnake/models.py
@@ -316,7 +316,7 @@ class Generator(torch.nn.Module):
             c_cur = h["upsample_initial_channel"] // (2 ** (i + 1))
             self.ups.append(weight_norm_modules(
                 ConvTranspose1dModel(h["upsample_initial_channel"] // (2 ** i), h["upsample_initial_channel"] // (2 ** (i + 1)),
-                                k, u, padding=(k - u +1 ) // 2)))
+                                k, u, padding=(k - u + 1 ) // 2)))
             if i + 1 < len(h["upsample_rates"]):  #
                 stride_f0 = np.prod(h["upsample_rates"][i + 1:])
                 self.noise_convs.append(Conv1dModel(
@@ -329,7 +329,7 @@ class Generator(torch.nn.Module):
             ch = h["upsample_initial_channel"] // (2 ** (i + 1))
             self.snakes.append(SnakeAlias(h["upsample_initial_channel"] // (2 ** (i)), C = h["upsample_initial_channel"] >> i))
             for j, (k, d) in enumerate(zip(h["resblock_kernel_sizes"], h["resblock_dilation_sizes"])):
-                self.resblocks.append(resblock(h, ch, k, d))
+                self.resblocks.append(resblock(h, ch, k, d, C = h["upsample_initial_channel"] >> (i + 1)))
 
         self.conv_post = weight_norm_modules(Conv1dModel(ch, 1, 7, 1, padding=3))
         self.ups.apply(init_weights)

From 57c079fbfab28e8922fe8db2b7d15edfd787e01c Mon Sep 17 00:00:00 2001
From: YuriHead <ylzz1997@outlook.com>
Date: Fri, 30 Jun 2023 00:38:09 +0800
Subject: [PATCH 9/9] New Tiny

---
 configs_template/config_template.json |  2 +-
 models.py                             |  7 +--
 modules/modules.py                    | 41 -------------
 vdecoder/hifigan/models.py            | 75 ++++++++++--------------
 vdecoder/hifigan/utils.py             | 11 ++--
 vdecoder/hifiganwithsnake/models.py   | 83 ++++++++++++---------------
 vdecoder/hifiganwithsnake/utils.py    | 11 ++--
 7 files changed, 78 insertions(+), 152 deletions(-)

diff --git a/configs_template/config_template.json b/configs_template/config_template.json
index c7ec01b..377a5ec 100644
--- a/configs_template/config_template.json
+++ b/configs_template/config_template.json
@@ -54,6 +54,7 @@
     "upsample_initial_channel": 512,
     "upsample_kernel_sizes": [16,16, 4, 4, 4],
     "n_layers_q": 3,
+    "n_flow_layer": 4,
     "use_spectral_norm": false,
     "gin_channels": 768,
     "ssl_dim": 768,
@@ -63,7 +64,6 @@
     "speaker_embedding":false,
     "vol_embedding":false,
     "use_depthwise_conv":false,
-    "use_depthwise_transposeconv":false,
     "use_automatic_f0_prediction": true
   },
   "spk": {
diff --git a/models.py b/models.py
index 2b2d5a1..a529206 100644
--- a/models.py
+++ b/models.py
@@ -322,8 +322,8 @@ class SynthesizerTrn(nn.Module):
                  vol_embedding=False,
                  vocoder_name = "nsf-hifigan",
                  use_depthwise_conv = False,
-                 use_depthwise_transposeconv = False,
                  use_automatic_f0_prediction = True,
+                 n_flow_layer = 4,
                  **kwargs):
 
         super().__init__()
@@ -372,8 +372,7 @@ class SynthesizerTrn(nn.Module):
             "upsample_initial_channel": upsample_initial_channel,
             "upsample_kernel_sizes": upsample_kernel_sizes,
             "gin_channels": gin_channels,
-            "use_depthwise_conv":use_depthwise_conv,
-            "use_depthwise_transposeconv":use_depthwise_transposeconv
+            "use_depthwise_conv":use_depthwise_conv
         }
         
         modules.set_Conv1dModel(self.use_depthwise_conv)
@@ -390,7 +389,7 @@ class SynthesizerTrn(nn.Module):
             self.dec = Generator(h=hps)
 
         self.enc_q = Encoder(spec_channels, inter_channels, hidden_channels, 5, 1, 16, gin_channels=gin_channels)
-        self.flow = ResidualCouplingBlock(inter_channels, hidden_channels, 5, 1, 4, gin_channels=gin_channels)
+        self.flow = ResidualCouplingBlock(inter_channels, hidden_channels, 5, 1, n_flow_layer, gin_channels=gin_channels)
         if self.use_automatic_f0_prediction:
             self.f0_decoder = F0Decoder(
                 1,
diff --git a/modules/modules.py b/modules/modules.py
index c1326a3..df63e29 100644
--- a/modules/modules.py
+++ b/modules/modules.py
@@ -66,47 +66,6 @@ class ConvReluNorm(nn.Module):
     return x * x_mask
 
 
-class DDSConv(nn.Module):
-  """
-  Dialted and Depth-Separable Convolution
-  """
-  def __init__(self, channels, kernel_size, n_layers, p_dropout=0.):
-    super().__init__()
-    self.channels = channels
-    self.kernel_size = kernel_size
-    self.n_layers = n_layers
-    self.p_dropout = p_dropout
-
-    self.drop = nn.Dropout(p_dropout)
-    self.convs_sep = nn.ModuleList()
-    self.convs_1x1 = nn.ModuleList()
-    self.norms_1 = nn.ModuleList()
-    self.norms_2 = nn.ModuleList()
-    for i in range(n_layers):
-      dilation = kernel_size ** i
-      padding = (kernel_size * dilation - dilation) // 2
-      self.convs_sep.append(nn.Conv1d(channels, channels, kernel_size, 
-          groups=channels, dilation=dilation, padding=padding
-      ))
-      self.convs_1x1.append(nn.Conv1d(channels, channels, 1))
-      self.norms_1.append(LayerNorm(channels))
-      self.norms_2.append(LayerNorm(channels))
-
-  def forward(self, x, x_mask, g=None):
-    if g is not None:
-      x = x + g
-    for i in range(self.n_layers):
-      y = self.convs_sep[i](x * x_mask)
-      y = self.norms_1[i](y)
-      y = F.gelu(y)
-      y = self.convs_1x1[i](y)
-      y = self.norms_2[i](y)
-      y = F.gelu(y)
-      y = self.drop(y)
-      x = x + y
-    return x * x_mask
-
-
 class WN(torch.nn.Module):
   def __init__(self, hidden_channels, kernel_size, dilation_rate, n_layers, gin_channels=0, p_dropout=0):
     super(WN, self).__init__()
diff --git a/vdecoder/hifigan/models.py b/vdecoder/hifigan/models.py
index e727c70..10eca45 100644
--- a/vdecoder/hifigan/models.py
+++ b/vdecoder/hifigan/models.py
@@ -1,28 +1,18 @@
-import os
 import json
-from .env import AttrDict
+import os
+
 import numpy as np
 import torch
-import torch.nn.functional as F
 import torch.nn as nn
-from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
-from torch.nn.utils import weight_norm,spectral_norm
-from .utils import init_weights, get_padding
+import torch.nn.functional as F
+from torch.nn import AvgPool1d, Conv1d, Conv2d, ConvTranspose1d
+from torch.nn.utils import remove_weight_norm, spectral_norm, weight_norm
 
-from modules.DSConv import weight_norm_modules, remove_weight_norm_modules, Depthwise_Separable_Conv1D, Depthwise_Separable_TransposeConv1D
+from .env import AttrDict
+from .utils import get_padding, init_weights
 
 LRELU_SLOPE = 0.1
 
-Conv1dModel = nn.Conv1d
-ConvTranspose1dModel = nn.ConvTranspose1d
-
-def set_Conv1dModel(use_depthwise_conv):
-    global Conv1dModel
-    Conv1dModel = Depthwise_Separable_Conv1D if use_depthwise_conv else nn.Conv1d
-
-def set_ConvTranspose1dModel(use_depthwise_transposeconv):
-    global ConvTranspose1dModel
-    ConvTranspose1dModel = Depthwise_Separable_TransposeConv1D if use_depthwise_transposeconv else nn.ConvTranspose1d
 
 def load_model(model_path, device='cuda'):
     config_file = os.path.join(os.path.split(model_path)[0], 'config.json')
@@ -48,21 +38,21 @@ class ResBlock1(torch.nn.Module):
         super(ResBlock1, self).__init__()
         self.h = h
         self.convs1 = nn.ModuleList([
-            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=dilation[0],
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0],
                                padding=get_padding(kernel_size, dilation[0]))),
-            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=dilation[1],
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1],
                                padding=get_padding(kernel_size, dilation[1]))),
-            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=dilation[2],
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[2],
                                padding=get_padding(kernel_size, dilation[2])))
         ])
         self.convs1.apply(init_weights)
 
         self.convs2 = nn.ModuleList([
-            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=1,
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
                                padding=get_padding(kernel_size, 1))),
-            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=1,
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
                                padding=get_padding(kernel_size, 1))),
-            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=1,
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
                                padding=get_padding(kernel_size, 1)))
         ])
         self.convs2.apply(init_weights)
@@ -78,9 +68,9 @@ class ResBlock1(torch.nn.Module):
 
     def remove_weight_norm(self):
         for l in self.convs1:
-            remove_weight_norm_modules(l)
+            remove_weight_norm(l)
         for l in self.convs2:
-            remove_weight_norm_modules(l)
+            remove_weight_norm(l)
 
 
 class ResBlock2(torch.nn.Module):
@@ -88,9 +78,9 @@ class ResBlock2(torch.nn.Module):
         super(ResBlock2, self).__init__()
         self.h = h
         self.convs = nn.ModuleList([
-            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=dilation[0],
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0],
                                padding=get_padding(kernel_size, dilation[0]))),
-            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=dilation[1],
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1],
                                padding=get_padding(kernel_size, dilation[1])))
         ])
         self.convs.apply(init_weights)
@@ -104,7 +94,7 @@ class ResBlock2(torch.nn.Module):
 
     def remove_weight_norm(self):
         for l in self.convs:
-            remove_weight_norm_modules(l)
+            remove_weight_norm(l)
 
 
 def padDiff(x):
@@ -211,8 +201,6 @@ class SineGen(torch.nn.Module):
         output uv: tensor(batchsize=1, length, 1)
         """
         with torch.no_grad():
-            f0_buf = torch.zeros(f0.shape[0], f0.shape[1], self.dim,
-                                 device=f0.device)
             # fundamental component
             fn = torch.multiply(f0, torch.FloatTensor([[range(1, self.harmonic_num + 2)]]).to(f0.device))
 
@@ -289,10 +277,7 @@ class Generator(torch.nn.Module):
     def __init__(self, h):
         super(Generator, self).__init__()
         self.h = h
-        
-        set_Conv1dModel(h["use_depthwise_conv"])
-        set_ConvTranspose1dModel(h["use_depthwise_transposeconv"])
-        
+
         self.num_kernels = len(h["resblock_kernel_sizes"])
         self.num_upsamples = len(h["upsample_rates"])
         self.f0_upsamp = torch.nn.Upsample(scale_factor=np.prod(h["upsample_rates"]))
@@ -300,17 +285,17 @@ class Generator(torch.nn.Module):
             sampling_rate=h["sampling_rate"],
             harmonic_num=8)
         self.noise_convs = nn.ModuleList()
-        self.conv_pre = weight_norm_modules(Conv1dModel(h["inter_channels"], h["upsample_initial_channel"], 7, 1, padding=3))
+        self.conv_pre = weight_norm(Conv1d(h["inter_channels"], h["upsample_initial_channel"], 7, 1, padding=3))
         resblock = ResBlock1 if h["resblock"] == '1' else ResBlock2
         self.ups = nn.ModuleList()
         for i, (u, k) in enumerate(zip(h["upsample_rates"], h["upsample_kernel_sizes"])):
             c_cur = h["upsample_initial_channel"] // (2 ** (i + 1))
-            self.ups.append(weight_norm_modules(
-                ConvTranspose1dModel(h["upsample_initial_channel"] // (2 ** i), h["upsample_initial_channel"] // (2 ** (i + 1)),
-                                k, u, padding=(k - u + 1 ) // 2)))
+            self.ups.append(weight_norm(
+                ConvTranspose1d(h["upsample_initial_channel"] // (2 ** i), h["upsample_initial_channel"] // (2 ** (i + 1)),
+                                k, u, padding=(k - u +1 ) // 2)))
             if i + 1 < len(h["upsample_rates"]):  #
                 stride_f0 = np.prod(h["upsample_rates"][i + 1:])
-                self.noise_convs.append(Conv1dModel(
+                self.noise_convs.append(Conv1d(
                     1, c_cur, kernel_size=stride_f0 * 2, stride=stride_f0, padding=(stride_f0+1) // 2))
             else:
                 self.noise_convs.append(Conv1d(1, c_cur, kernel_size=1))
@@ -320,7 +305,7 @@ class Generator(torch.nn.Module):
             for j, (k, d) in enumerate(zip(h["resblock_kernel_sizes"], h["resblock_dilation_sizes"])):
                 self.resblocks.append(resblock(h, ch, k, d))
 
-        self.conv_post = weight_norm_modules(Conv1dModel(ch, 1, 7, 1, padding=3))
+        self.conv_post = weight_norm(Conv1d(ch, 1, 7, 1, padding=3))
         self.ups.apply(init_weights)
         self.conv_post.apply(init_weights)
         self.cond = nn.Conv1d(h['gin_channels'], h['upsample_initial_channel'], 1)
@@ -357,18 +342,18 @@ class Generator(torch.nn.Module):
     def remove_weight_norm(self):
         print('Removing weight norm...')
         for l in self.ups:
-            remove_weight_norm_modules(l)
+            remove_weight_norm(l)
         for l in self.resblocks:
             l.remove_weight_norm()
-        remove_weight_norm_modules(self.conv_pre)
-        remove_weight_norm_modules(self.conv_post)
+        remove_weight_norm(self.conv_pre)
+        remove_weight_norm(self.conv_post)
 
 
 class DiscriminatorP(torch.nn.Module):
     def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=False):
         super(DiscriminatorP, self).__init__()
         self.period = period
-        norm_f = weight_norm if use_spectral_norm == False else spectral_norm
+        norm_f = weight_norm if use_spectral_norm is False else spectral_norm
         self.convs = nn.ModuleList([
             norm_f(Conv2d(1, 32, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))),
             norm_f(Conv2d(32, 128, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))),
@@ -427,7 +412,7 @@ class MultiPeriodDiscriminator(torch.nn.Module):
 class DiscriminatorS(torch.nn.Module):
     def __init__(self, use_spectral_norm=False):
         super(DiscriminatorS, self).__init__()
-        norm_f = weight_norm if use_spectral_norm == False else spectral_norm
+        norm_f = weight_norm if use_spectral_norm is False else spectral_norm
         self.convs = nn.ModuleList([
             norm_f(Conv1d(1, 128, 15, 1, padding=7)),
             norm_f(Conv1d(128, 128, 41, 2, groups=4, padding=20)),
diff --git a/vdecoder/hifigan/utils.py b/vdecoder/hifigan/utils.py
index 4a4742f..e519e2b 100644
--- a/vdecoder/hifigan/utils.py
+++ b/vdecoder/hifigan/utils.py
@@ -1,10 +1,10 @@
 import glob
 import os
-import matplotlib
-import torch
-from torch.nn.utils import weight_norm
+
 # matplotlib.use("Agg")
 import matplotlib.pylab as plt
+import torch
+from torch.nn.utils import weight_norm
 
 
 def plot_spectrogram(spectrogram):
@@ -21,10 +21,7 @@ def plot_spectrogram(spectrogram):
 
 def init_weights(m, mean=0.0, std=0.01):
     classname = m.__class__.__name__
-    if "Depthwise_Separable" in classname:
-      m.depth_conv.weight.data.normal_(mean, std)
-      m.point_conv.weight.data.normal_(mean, std)
-    elif classname.find("Conv") != -1:
+    if classname.find("Conv") != -1:
         m.weight.data.normal_(mean, std)
 
 
diff --git a/vdecoder/hifiganwithsnake/models.py b/vdecoder/hifiganwithsnake/models.py
index ba8d8f1..ab9bcd1 100644
--- a/vdecoder/hifiganwithsnake/models.py
+++ b/vdecoder/hifiganwithsnake/models.py
@@ -1,28 +1,20 @@
-import os
 import json
-from .env import AttrDict
+import os
+
 import numpy as np
 import torch
-import torch.nn.functional as F
 import torch.nn as nn
-from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
-from torch.nn.utils import weight_norm, spectral_norm
-from .utils import init_weights, get_padding
+import torch.nn.functional as F
+from torch.nn import AvgPool1d, Conv1d, Conv2d, ConvTranspose1d
+from torch.nn.utils import remove_weight_norm, spectral_norm, weight_norm
+
 from vdecoder.hifiganwithsnake.alias.act import SnakeAlias
-from modules.DSConv import weight_norm_modules, remove_weight_norm_modules, Depthwise_Separable_Conv1D, Depthwise_Separable_TransposeConv1D
+
+from .env import AttrDict
+from .utils import get_padding, init_weights
 
 LRELU_SLOPE = 0.1
 
-Conv1dModel = nn.Conv1d
-ConvTranspose1dModel = nn.ConvTranspose1d
-
-def set_Conv1dModel(use_depthwise_conv):
-    global Conv1dModel
-    Conv1dModel = Depthwise_Separable_Conv1D if use_depthwise_conv else nn.Conv1d
-
-def set_ConvTranspose1dModel(use_depthwise_transposeconv):
-    global ConvTranspose1dModel
-    ConvTranspose1dModel = Depthwise_Separable_TransposeConv1D if use_depthwise_transposeconv else nn.ConvTranspose1d
 
 def load_model(model_path, device='cuda'):
     config_file = os.path.join(os.path.split(model_path)[0], 'config.json')
@@ -48,21 +40,21 @@ class ResBlock1(torch.nn.Module):
         super(ResBlock1, self).__init__()
         self.h = h
         self.convs1 = nn.ModuleList([
-            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=dilation[0],
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0],
                                padding=get_padding(kernel_size, dilation[0]))),
-            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=dilation[1],
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1],
                                padding=get_padding(kernel_size, dilation[1]))),
-            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=dilation[2],
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[2],
                                padding=get_padding(kernel_size, dilation[2])))
         ])
         self.convs1.apply(init_weights)
 
         self.convs2 = nn.ModuleList([
-            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=1,
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
                                padding=get_padding(kernel_size, 1))),
-            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=1,
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
                                padding=get_padding(kernel_size, 1))),
-            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=1,
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
                                padding=get_padding(kernel_size, 1)))
         ])
         self.convs2.apply(init_weights)
@@ -84,18 +76,19 @@ class ResBlock1(torch.nn.Module):
 
     def remove_weight_norm(self):
         for l in self.convs1:
-            remove_weight_norm_modules(l)
+            remove_weight_norm(l)
         for l in self.convs2:
-            remove_weight_norm_modules(l)
+            remove_weight_norm(l)
+
 
 class ResBlock2(torch.nn.Module):
     def __init__(self, h, channels, kernel_size=3, dilation=(1, 3), C=None):
         super(ResBlock2, self).__init__()
         self.h = h
         self.convs = nn.ModuleList([
-            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=dilation[0],
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0],
                                padding=get_padding(kernel_size, dilation[0]))),
-            weight_norm_modules(Conv1dModel(channels, channels, kernel_size, 1, dilation=dilation[1],
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1],
                                padding=get_padding(kernel_size, dilation[1])))
         ])
         self.convs.apply(init_weights)
@@ -114,7 +107,8 @@ class ResBlock2(torch.nn.Module):
 
     def remove_weight_norm(self):
         for l in self.convs:
-            remove_weight_norm_modules(l)
+            remove_weight_norm(l)
+
 
 def padDiff(x):
     return F.pad(F.pad(x, (0,0,-1,1), 'constant', 0) - x, (0,0,0,-1), 'constant', 0)
@@ -220,8 +214,6 @@ class SineGen(torch.nn.Module):
         output uv: tensor(batchsize=1, length, 1)
         """
         with torch.no_grad():
-            f0_buf = torch.zeros(f0.shape[0], f0.shape[1], self.dim,
-                                 device=f0.device)
             # fundamental component
             fn = torch.multiply(f0, torch.FloatTensor([[range(1, self.harmonic_num + 2)]]).to(f0.device))
 
@@ -298,10 +290,7 @@ class Generator(torch.nn.Module):
     def __init__(self, h):
         super(Generator, self).__init__()
         self.h = h
-        
-        set_Conv1dModel(h["use_depthwise_conv"])
-        set_ConvTranspose1dModel(h["use_depthwise_transposeconv"])
-        
+
         self.num_kernels = len(h["resblock_kernel_sizes"])
         self.num_upsamples = len(h["upsample_rates"])
         self.f0_upsamp = torch.nn.Upsample(scale_factor=np.prod(h["upsample_rates"]))
@@ -309,18 +298,18 @@ class Generator(torch.nn.Module):
             sampling_rate=h["sampling_rate"],
             harmonic_num=8)
         self.noise_convs = nn.ModuleList()
-        self.conv_pre = weight_norm_modules(Conv1dModel(h["inter_channels"], h["upsample_initial_channel"], 7, 1, padding=3))
+        self.conv_pre = weight_norm(Conv1d(h["inter_channels"], h["upsample_initial_channel"], 7, 1, padding=3))
         resblock = ResBlock1 if h["resblock"] == '1' else ResBlock2
         self.ups = nn.ModuleList()
         for i, (u, k) in enumerate(zip(h["upsample_rates"], h["upsample_kernel_sizes"])):
             c_cur = h["upsample_initial_channel"] // (2 ** (i + 1))
-            self.ups.append(weight_norm_modules(
-                ConvTranspose1dModel(h["upsample_initial_channel"] // (2 ** i), h["upsample_initial_channel"] // (2 ** (i + 1)),
-                                k, u, padding=(k - u + 1 ) // 2)))
+            self.ups.append(weight_norm(
+                ConvTranspose1d(h["upsample_initial_channel"] // (2 ** i), h["upsample_initial_channel"] // (2 ** (i + 1)),
+                                k, u, padding=(k - u + 1) // 2)))
             if i + 1 < len(h["upsample_rates"]):  #
                 stride_f0 = np.prod(h["upsample_rates"][i + 1:])
-                self.noise_convs.append(Conv1dModel(
-                    1, c_cur, kernel_size=stride_f0 * 2, stride=stride_f0, padding=(stride_f0+1) // 2))
+                self.noise_convs.append(Conv1d(
+                    1, c_cur, kernel_size=stride_f0 * 2, stride=stride_f0, padding=(stride_f0+ 1) // 2))
             else:
                 self.noise_convs.append(Conv1d(1, c_cur, kernel_size=1))
         self.resblocks = nn.ModuleList()
@@ -331,7 +320,7 @@ class Generator(torch.nn.Module):
             for j, (k, d) in enumerate(zip(h["resblock_kernel_sizes"], h["resblock_dilation_sizes"])):
                 self.resblocks.append(resblock(h, ch, k, d, C = h["upsample_initial_channel"] >> (i + 1)))
 
-        self.conv_post = weight_norm_modules(Conv1dModel(ch, 1, 7, 1, padding=3))
+        self.conv_post = weight_norm(Conv1d(ch, 1, 7, 1, padding=3))
         self.ups.apply(init_weights)
         self.conv_post.apply(init_weights)
         self.snake_post = SnakeAlias(ch, C = h["upsample_initial_channel"] >> len(self.ups))
@@ -371,18 +360,18 @@ class Generator(torch.nn.Module):
     def remove_weight_norm(self):
         print('Removing weight norm...')
         for l in self.ups:
-            remove_weight_norm_modules(l)
+            remove_weight_norm(l)
         for l in self.resblocks:
-            l.remove_weight_norm_modules()
-        remove_weight_norm_modules(self.conv_pre)
-        remove_weight_norm_modules(self.conv_post)
+            l.remove_weight_norm()
+        remove_weight_norm(self.conv_pre)
+        remove_weight_norm(self.conv_post)
 
 
 class DiscriminatorP(torch.nn.Module):
     def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=False):
         super(DiscriminatorP, self).__init__()
         self.period = period
-        norm_f = weight_norm if use_spectral_norm == False else spectral_norm
+        norm_f = weight_norm if use_spectral_norm is False else spectral_norm
         self.convs = nn.ModuleList([
             norm_f(Conv2d(1, 32, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))),
             norm_f(Conv2d(32, 128, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))),
@@ -441,7 +430,7 @@ class MultiPeriodDiscriminator(torch.nn.Module):
 class DiscriminatorS(torch.nn.Module):
     def __init__(self, use_spectral_norm=False):
         super(DiscriminatorS, self).__init__()
-        norm_f = weight_norm if use_spectral_norm == False else spectral_norm
+        norm_f = weight_norm if use_spectral_norm is False else spectral_norm
         self.convs = nn.ModuleList([
             norm_f(Conv1d(1, 128, 15, 1, padding=7)),
             norm_f(Conv1d(128, 128, 41, 2, groups=4, padding=20)),
diff --git a/vdecoder/hifiganwithsnake/utils.py b/vdecoder/hifiganwithsnake/utils.py
index 4a4742f..e519e2b 100644
--- a/vdecoder/hifiganwithsnake/utils.py
+++ b/vdecoder/hifiganwithsnake/utils.py
@@ -1,10 +1,10 @@
 import glob
 import os
-import matplotlib
-import torch
-from torch.nn.utils import weight_norm
+
 # matplotlib.use("Agg")
 import matplotlib.pylab as plt
+import torch
+from torch.nn.utils import weight_norm
 
 
 def plot_spectrogram(spectrogram):
@@ -21,10 +21,7 @@ def plot_spectrogram(spectrogram):
 
 def init_weights(m, mean=0.0, std=0.01):
     classname = m.__class__.__name__
-    if "Depthwise_Separable" in classname:
-      m.depth_conv.weight.data.normal_(mean, std)
-      m.point_conv.weight.data.normal_(mean, std)
-    elif classname.find("Conv") != -1:
+    if classname.find("Conv") != -1:
         m.weight.data.normal_(mean, std)