Merge branch '4.1-Latest' into 4.1-Latest

2023-07-05 23:03:08 +08:00 · 2023-07-05 23:03:08 +08:00 · 6c18b0eb58
parent c887fd3cfb 1281d584fc
commit 6c18b0eb58
12 changed files with 35 additions and 24 deletions
--- a/.github/workflows/ruff.yml
+++ b/.github/workflows/ruff.yml
@ -0,0 +1,8 @@
+name: Ruff
+on: [push, pull_request]
+jobs:
+  ruff:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: chartboost/ruff-action@v1
--- a/.gitignore
+++ b/.gitignore
@ -150,7 +150,6 @@ results
 inference/chunks_temp.json
 logs
 hubert/checkpoint_best_legacy_500.pt
-pretrain/**/*.pt
 configs/config.json
 filelists/test.txt
 filelists/train.txt
@ -162,11 +161,5 @@ filelists/val.txt
 .idea/vcs.xml
 .idea/inspectionProfiles/profiles_settings.xml
 .idea/inspectionProfiles/Project_Default.xml
-pretrain/vec-768-layer-12.onnx
-pretrain/hubert-soft.onnx
-pretrain/hubert4.0.onnx
-pretrain/vec-256-layer-9.onnx
-pretrain/vec-256-layer-12.onnx
-pretrain/vec-768-layer-9.onnx
+pretrain/
 .vscode/launch.json
-.ruff.toml
--- a/.ruff.toml
+++ b/.ruff.toml
@ -0,0 +1,4 @@
+select = ["E", "F", "I"]
+
+# Never enforce `E501` (line length violations).
+ignore = ["E501", "E741"]
--- a/README.md
+++ b/README.md
@ -27,7 +27,7 @@ This project serves as a framework only and does not possess speech synthesis fu
 # Warning: Please ensure that you address any authorization issues related to the dataset on your own. You bear full responsibility for any problems arising from the usage of non-authorized datasets for training, as well as any resulting consequences. The repository and its maintainer, svc develop team, disclaim any association with or liability for the consequences. 

 1. This project is exclusively established for academic purposes, aiming to facilitate communication and learning. It is not intended for deployment in production environments.
-2. Any sovits-based video posted to a video platform must clearly specify in the introduction the input source vocals and audio used for the voice changer conversion, e.g., if you use someone else's video/audio and convert it by separating the vocals as the input source, you must give a clear link to the original video or music; if you use your own vocals or a voice synthesized by another voice synthesis engine as the input source, you must also specify this in the introduction.
+2. Any sovits-based video posted to a video platform must clearly specify in the introduction the input source vocals and audio used for the voice changer conversion, e.g., if you use someone else's video/audio and convert it by separating the vocals as the input source, you must give a clear link to the original video or music; if you use your own vocals or a voice synthesized by another voice synthesis engine as the input source, you must also state this in your introduction.
 3. You are solely responsible for any infringement issues caused by the input source and all consequences. When using other commercial vocal synthesis software as an input source, please ensure that you comply with the regulations of that software, noting that the regulations of many vocal synthesis engines explicitly state that they cannot be used to convert input sources!
 4. Engaging in illegal activities, as well as religious and political activities, is strictly prohibited when using this project. The project developers vehemently oppose the aforementioned activities. If you disagree with this provision, the usage of the project is prohibited.
 5. If you continue to use the program, you will be deemed to have agreed to the terms and conditions set forth in README and README has discouraged you and is not responsible for any subsequent problems.
@ -206,7 +206,7 @@ python resample.py

 #### Cautions

-Although this project has resample.py scripts for resampling, mono and loudness matching, the default loudness matching is to match to 0db. This can cause damage to the sound quality. While python's loudness matching package pyloudnorm does not limit the level, this can lead to popping. Therefore, it is recommended to consider using professional sound processing software, such as `adobe audition` for loudness matching. If you are already using other software for loudness matching, add the parameter `-skip_loudnorm` to the run command:
+Although this project has resample.py scripts for resampling, mono and loudness matching, the default loudness matching is to match to 0db. This can cause damage to the sound quality. While python's loudness matching package pyloudnorm does not limit the level, this can lead to sonic boom. Therefore, it is recommended to consider using professional sound processing software, such as `adobe audition` for loudness matching. If you are already using other software for loudness matching, add the parameter `-skip_loudnorm` to the run command:

 ```shell
 python resample.py --skip_loudnorm
@ -299,7 +299,7 @@ python preprocess_hubert_f0.py --f0_predictor dio --use_diff

 After completing the above steps, the dataset directory will contain the preprocessed data, and the dataset_raw folder can be deleted.

-## 🏋️‍♀️ Training
+## 🏋️‍ Training

 ### Sovits Model

--- a/README_zh_CN.md
+++ b/README_zh_CN.md
@ -301,7 +301,7 @@ python preprocess_hubert_f0.py --f0_predictor dio --use_diff

 执行完以上步骤后 dataset 目录便是预处理完成的数据，可以删除 dataset_raw 文件夹了

-## 🏋️‍♀️ 训练
+## 🏋️‍ 训练

 ### 主模型训练

--- a/modules/DSConv.py
+++ b/modules/DSConv.py
@ -1,6 +1,6 @@
-import torch
 import torch.nn as nn
-from torch.nn.utils import weight_norm, remove_weight_norm
+from torch.nn.utils import remove_weight_norm, weight_norm
+

 class Depthwise_Separable_Conv1D(nn.Module):
    def __init__(
--- a/modules/mel_processing.py
+++ b/modules/mel_processing.py
@ -53,7 +53,9 @@ def spectrogram_torch(y, n_fft, sampling_rate, hop_size, win_size, center=False)
    y = y.squeeze(1)
    
    y_dtype = y.dtype
-    if y.dtype == torch.bfloat16: y = y.to(torch.float32)
+    if y.dtype == torch.bfloat16:
+        y = y.to(torch.float32)
+
    spec = torch.stft(y, n_fft, hop_length=hop_size, win_length=win_size, window=hann_window[wnsize_dtype_device],
                      center=center, pad_mode='reflect', normalized=False, onesided=True, return_complex=True)
    spec = torch.view_as_real(spec).to(y_dtype)
--- a/modules/modules.py
+++ b/modules/modules.py
@ -1,12 +1,14 @@
 import torch
 from torch import nn
-from torch.nn import Conv1d
 from torch.nn import functional as F

-from modules.DSConv import weight_norm_modules, remove_weight_norm_modules, Depthwise_Separable_Conv1D
-
 import modules.commons as commons
-from modules.commons import init_weights, get_padding
+from modules.commons import get_padding, init_weights
+from modules.DSConv import (
+    Depthwise_Separable_Conv1D,
+    remove_weight_norm_modules,
+    weight_norm_modules,
+)

 LRELU_SLOPE = 0.1

--- a/train.py
+++ b/train.py
@ -99,7 +99,7 @@ def run(rank, n_gpus, hps):
        name=utils.latest_checkpoint_path(hps.model_dir, "D_*.pth")
        global_step=int(name[name.rfind("_")+1:name.rfind(".")])+1
        #global_step = (epoch_str - 1) * len(train_loader)
-    except:
+    except: # noqa: E722 I have no idea about this CC: @ylzz1997
        print("load old checkpoint failed...")
        epoch_str = 1
        global_step = 0
--- a/utils.py
+++ b/utils.py
@ -161,7 +161,7 @@ def load_checkpoint(checkpoint_path, model, optimizer=None, skip_optimizer=False
            # print("load", k)
            new_state_dict[k] = saved_state_dict[k]
            assert saved_state_dict[k].shape == v.shape, (saved_state_dict[k].shape, v.shape)
-        except:
+        except: # noqa: E722 I have no idea about this CC: @ylzz1997
            print("error, %s is not in the checkpoint" % k)
            logger.info("%s is not in the checkpoint" % k)
            new_state_dict[k] = v
--- a/vdecoder/hifiganwithsnake/alias/init.py
+++ b/vdecoder/hifiganwithsnake/alias/init.py
@ -1,6 +1,6 @@
 # Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0
 #   LICENSE is in incl_licenses directory.

-from .act import *
-from .filter import *
-from .resample import *
+from .act import *  # noqa: F403
+from .filter import *  # noqa: F403
+from .resample import *  # noqa: F403
--- a/webUI.py
+++ b/webUI.py
@ -377,7 +377,9 @@ with gr.Blocks(
                    """)
                debug_button = gr.Checkbox(label="Debug模式，如果向社区反馈BUG需要打开，打开后控制台可以显示具体错误提示", value=debug)
        vc_submit.click(vc_fn, [sid, vc_input3, output_format, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,f0_predictor,enhancer_adaptive_key,cr_threshold,k_step,use_spk_mix,second_encoding,loudness_envelope_adjustment], [vc_output1, vc_output2])
+
        vc_submit2.click(vc_fn2, [text2tts, tts_lang, tts_gender, tts_rate, tts_volume, sid, output_format, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,f0_predictor,enhancer_adaptive_key,cr_threshold,k_step,use_spk_mix,second_encoding,loudness_envelope_adjustment], [vc_output1, vc_output2])
+
        debug_button.change(debug_change,[],[])
        model_load_button.click(modelAnalysis,[model_path,config_path,cluster_model_path,device,enhance,diff_model_path,diff_config_path,only_diffusion,use_spk_mix],[sid,sid_output])
        model_unload_button.click(modelUnload,[],[sid,sid_output])