From 8bade86a3d9ea6fb97a7ce0bf1d20a4414084f87 Mon Sep 17 00:00:00 2001 From: Felix Yan Date: Fri, 17 Mar 2023 01:10:47 +0200 Subject: [PATCH 01/61] Correct typos in preprocess_hubert_f0.py --- preprocess_hubert_f0.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/preprocess_hubert_f0.py b/preprocess_hubert_f0.py index e815d82..66bac7e 100644 --- a/preprocess_hubert_f0.py +++ b/preprocess_hubert_f0.py @@ -24,9 +24,9 @@ def process_one(filename, hmodel): wav, sr = librosa.load(filename, sr=sampling_rate) soft_path = filename + ".soft.pt" if not os.path.exists(soft_path): - devive = torch.device("cuda" if torch.cuda.is_available() else "cpu") + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") wav16k = librosa.resample(wav, orig_sr=sampling_rate, target_sr=16000) - wav16k = torch.from_numpy(wav16k).to(devive) + wav16k = torch.from_numpy(wav16k).to(device) c = utils.get_hubert_content(hmodel, wav_16k_tensor=wav16k) torch.save(c.cpu(), soft_path) f0_path = filename + ".f0.npy" From 293762f8f9bdc1ab98137da0ca1055ca84c10157 Mon Sep 17 00:00:00 2001 From: Miuzarte <982809597@qq.com> Date: Sat, 18 Mar 2023 11:41:08 +0800 Subject: [PATCH 02/61] Added a good fork --- README.md | 2 ++ README_zh_CN.md | 2 ++ 2 files changed, 4 insertions(+) diff --git a/README.md b/README.md index 94063e2..cdb0d2e 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,8 @@ 5. If you distribute this repository's code or publish any results produced by this project publicly (including but not limited to video sharing platforms), please indicate the original author and code source (this repository). 6. If you use this project for any other plan, please contact and inform the author of this repository in advance. Thank you very much. +### A fork with a greatly improved interface:[34j/so-vits-svc-fork](https://github.com/34j/so-vits-svc-fork) + ## Update > Updated the 4.0-v2 model, the entire process is the same as 4.0. Compared to 4.0, there is some improvement in certain scenarios, but there are also some cases where it has regressed. Please refer to the [4.0-v2 branch](https://github.com/svc-develop-team/so-vits-svc/tree/4.0-v2) for more information. diff --git a/README_zh_CN.md b/README_zh_CN.md index 09f1f65..7d94ec6 100644 --- a/README_zh_CN.md +++ b/README_zh_CN.md @@ -11,6 +11,8 @@ 5. 如将本仓库代码二次分发,或将由此项目产出的任何结果公开发表 (包括但不限于视频网站投稿),请注明原作者及代码来源 (此仓库)。 6. 如果将此项目用于任何其他企划,请提前联系并告知本仓库作者,十分感谢。 +### 改善了交互的一个分支推荐:[34j/so-vits-svc-fork](https://github.com/34j/so-vits-svc-fork) + ## update > 更新了4.0-v2模型,全部流程同4.0,相比4.0在部分场景下有一定提升,但也有些情况有退步,具体可移步[4.0-v2分支](https://github.com/svc-develop-team/so-vits-svc/tree/4.0-v2) From 7f73a6e560c6d1ccfe940069ca9739286ace37ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A4=9A=E7=8E=A9=E5=B9=BB=E7=81=B5qwq?= <94176676+HuanLinOTO@users.noreply.github.com> Date: Sat, 18 Mar 2023 18:25:57 +0800 Subject: [PATCH 03/61] chore: better cn_readme --- README_zh_CN.md | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/README_zh_CN.md b/README_zh_CN.md index 7d94ec6..8e1af7c 100644 --- a/README_zh_CN.md +++ b/README_zh_CN.md @@ -2,7 +2,9 @@ [**English**](./README.md) | [**中文简体**](./README_zh_CN.md) -## 使用规约 +#### ✨ 改善了交互的一个分支推荐:[34j/so-vits-svc-fork](https://github.com/34j/so-vits-svc-fork) + +## 📏 使用规约 1. 本项目是基于学术交流目的建立,仅供交流与学习使用,并非为生产环境准备,请自行解决数据集的授权问题,任何由于使用非授权数据集进行训练造成的问题,需自行承担全部责任和一切后果! 2. 任何发布到视频平台的基于 sovits 制作的视频,都必须要在简介明确指明用于变声器转换的输入源歌声、音频,例如:使用他人发布的视频 / 音频,通过分离的人声作为输入源进行转换的,必须要给出明确的原视频、音乐链接;若使用是自己的人声,或是使用其他歌声合成引擎合成的声音作为输入源进行转换的,也必须在简介加以说明。 @@ -11,17 +13,16 @@ 5. 如将本仓库代码二次分发,或将由此项目产出的任何结果公开发表 (包括但不限于视频网站投稿),请注明原作者及代码来源 (此仓库)。 6. 如果将此项目用于任何其他企划,请提前联系并告知本仓库作者,十分感谢。 -### 改善了交互的一个分支推荐:[34j/so-vits-svc-fork](https://github.com/34j/so-vits-svc-fork) -## update +## 🆕 Update! > 更新了4.0-v2模型,全部流程同4.0,相比4.0在部分场景下有一定提升,但也有些情况有退步,具体可移步[4.0-v2分支](https://github.com/svc-develop-team/so-vits-svc/tree/4.0-v2) -## 模型简介 +## 📝 模型简介 歌声音色转换模型,通过SoftVC内容编码器提取源音频语音特征,与F0同时输入VITS替换原本的文本输入达到歌声转换的效果。同时,更换声码器为 [NSF HiFiGAN](https://github.com/openvpi/DiffSinger/tree/refactor/modules/nsf_hifigan) 解决断音问题 -### 4.0版本更新内容 +### 🆕 4.0 版本更新内容 + 特征输入更换为 [Content Vec](https://github.com/auspicious3000/contentvec) + 采样率统一使用44100hz @@ -31,7 +32,7 @@ + 增加了可选项 1:vc模式自动预测音高f0,即转换语音时不需要手动输入变调key,男女声的调能自动转换,但仅限语音转换,该模式转换歌声会跑调 + 增加了可选项 2:通过kmeans聚类方案减小音色泄漏,即使得音色更加像目标音色 -## 预先下载的模型文件 +## 📥 预先下载的模型文件 #### **必须项** @@ -53,7 +54,7 @@ http://obs.cstcloud.cn/share/obs/sankagenkeshi/checkpoint_best_legacy_500.pt 虽然底模一般不会引起什么版权问题,但还是请注意一下,比如事先询问作者,又或者作者在模型描述中明确写明了可行的用途 -## 数据集准备 +## 📊 数据集准备 仅需要以以下文件结构将数据集放入dataset_raw目录即可 @@ -69,7 +70,7 @@ dataset_raw └───xxx7-xxx007.wav ``` -## 数据预处理 +## 🛠️ 数据预处理 1. 重采样至 44100hz @@ -91,14 +92,14 @@ python preprocess_hubert_f0.py 执行完以上步骤后 dataset 目录便是预处理完成的数据,可以删除dataset_raw文件夹了 -## 训练 +## 🏋️‍♀️ 训练 ```shell python train.py -c configs/config.json -m 44k ``` 注:训练时会自动清除老的模型,只保留最新3个模型,如果想防止过拟合需要自己手动备份模型记录点,或修改配置文件keep_ckpts 0为永不清除 -## 推理 +## 🤖 推理 使用 [inference_main.py](inference_main.py) @@ -121,7 +122,7 @@ python inference_main.py -m "logs/44k/G_30400.pth" -c "configs/config.json" -n " + -cm, --cluster_model_path:聚类模型路径,如果没有训练聚类则随便填。 + -cr, --cluster_infer_ratio:聚类方案占比,范围 0-1,若没有训练聚类模型则填 0 即可。 -## 可选项 +## 🤔 可选项 如果前面的效果已经满意,或者没看明白下面在讲啥,那后面的内容都可以忽略,不影响模型使用(这些可选项影响比较小,可能在某些特定数据上有点效果,但大部分情况似乎都感知不太明显) @@ -148,7 +149,7 @@ python inference_main.py -m "logs/44k/G_30400.pth" -c "configs/config.json" -n " #### [23/03/16] 不再需要手动下载hubert -## Onnx导出 +## 📤 Onnx导出 使用 [onnx_export.py](onnx_export.py) + 新建文件夹:`checkpoints` 并打开 @@ -165,7 +166,7 @@ python inference_main.py -m "logs/44k/G_30400.pth" -c "configs/config.json" -n " + 注意:Hubert Onnx模型请使用MoeSS提供的模型,目前无法自行导出(fairseq中Hubert有不少onnx不支持的算子和涉及到常量的东西,在导出时会报错或者导出的模型输入输出shape和结果都有问题) [Hubert4.0](https://huggingface.co/NaruseMioShirakana/MoeSS-SUBModel) -## 一些法律条例参考 +## 📚 一些法律条例参考 #### 《民法典》 From 758109d47ea088a763bb36a0c826a681ceca2012 Mon Sep 17 00:00:00 2001 From: HenryMakerCH <89520883+HenryMakerCH@users.noreply.github.com> Date: Sun, 19 Mar 2023 00:55:20 +0800 Subject: [PATCH 04/61] Update train.py --- train.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/train.py b/train.py index e499528..c4ba1a1 100644 --- a/train.py +++ b/train.py @@ -3,6 +3,8 @@ import multiprocessing import time logging.getLogger('matplotlib').setLevel(logging.WARNING) +logging.getLogger('numba').setLevel(logging.WARNING) + import os import json import argparse From 4d5f717960be1fcf56904ce81e0d721f40be7d71 Mon Sep 17 00:00:00 2001 From: suanlixianren <101437864+suanlixianren@users.noreply.github.com> Date: Sun, 19 Mar 2023 17:28:09 +0800 Subject: [PATCH 05/61] Update README_zh_CN.md --- README_zh_CN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README_zh_CN.md b/README_zh_CN.md index 8e1af7c..eb83375 100644 --- a/README_zh_CN.md +++ b/README_zh_CN.md @@ -6,7 +6,7 @@ ## 📏 使用规约 -1. 本项目是基于学术交流目的建立,仅供交流与学习使用,并非为生产环境准备,请自行解决数据集的授权问题,任何由于使用非授权数据集进行训练造成的问题,需自行承担全部责任和一切后果! +1. 本项目是基于学术交流目的建立,仅供交流与学习使用,并非为生产环境准备,请自行解决数据集的授权问题,任何由于使用非授权数据集进行训练造成的问题,需自行承担全部责任和一切后果!与仓库、仓库维护者、svc develop team 无关! 2. 任何发布到视频平台的基于 sovits 制作的视频,都必须要在简介明确指明用于变声器转换的输入源歌声、音频,例如:使用他人发布的视频 / 音频,通过分离的人声作为输入源进行转换的,必须要给出明确的原视频、音乐链接;若使用是自己的人声,或是使用其他歌声合成引擎合成的声音作为输入源进行转换的,也必须在简介加以说明。 3. 由输入源造成的侵权问题需自行承担全部责任和一切后果。使用其他商用歌声合成软件作为输入源时,请确保遵守该软件的使用条例,注意,许多歌声合成引擎使用条例中明确指明不可用于输入源进行转换! 4. 继续使用视为已同意本仓库 README 所述相关条例,本仓库 README 已进行劝导义务,不对后续可能存在问题负责。 From fb91290fde80d48dee73b86c192f966169a5263c Mon Sep 17 00:00:00 2001 From: suanlixianren <101437864+suanlixianren@users.noreply.github.com> Date: Sun, 19 Mar 2023 17:31:33 +0800 Subject: [PATCH 06/61] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index cdb0d2e..b796e38 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ ## Terms of Use -1. This project is established for academic exchange purposes only and is intended for communication and learning purposes. It is not intended for production environments. Please solve the authorization problem of the dataset on your own. You shall be solely responsible for any problems caused by the use of non-authorized datasets for training and all consequences thereof. +1. This project is established for academic exchange purposes only and is intended for communication and learning purposes. It is not intended for production environments. Please solve the authorization problem of the dataset on your own. You shall be solely responsible for any problems caused by the use of non-authorized datasets for training and all consequences thereof.The repository and its maintainer, svc develop team, have nothing to do with the consequences! 2. Any videos based on sovits that are published on video platforms must clearly indicate in the description that they are used for voice changing and specify the input source of the voice or audio, for example, using videos or audios published by others and separating the vocals as input source for conversion, which must provide clear original video or music links. If your own voice or other synthesized voices from other commercial vocal synthesis software are used as the input source for conversion, you must also explain it in the description. 3. You shall be solely responsible for any infringement problems caused by the input source. When using other commercial vocal synthesis software as input source, please ensure that you comply with the terms of use of the software. Note that many vocal synthesis engines clearly state in their terms of use that they cannot be used for input source conversion. 4. Continuing to use this project is deemed as agreeing to the relevant provisions stated in this repository README. This repository README has the obligation to persuade, and is not responsible for any subsequent problems that may arise. From 80672699563bf5bb0f93c47e25c567e8bace85a6 Mon Sep 17 00:00:00 2001 From: suanlixianren <101437864+suanlixianren@users.noreply.github.com> Date: Sun, 19 Mar 2023 17:36:49 +0800 Subject: [PATCH 07/61] Update README_zh_CN.md --- README_zh_CN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README_zh_CN.md b/README_zh_CN.md index eb83375..dbeb6b0 100644 --- a/README_zh_CN.md +++ b/README_zh_CN.md @@ -6,7 +6,7 @@ ## 📏 使用规约 -1. 本项目是基于学术交流目的建立,仅供交流与学习使用,并非为生产环境准备,请自行解决数据集的授权问题,任何由于使用非授权数据集进行训练造成的问题,需自行承担全部责任和一切后果!与仓库、仓库维护者、svc develop team 无关! +1. 本项目是基于学术交流目的建立,仅供交流与学习使用,并非为生产环境准备,请自行解决数据集的授权问题,禁止使用非授权数据集进行训练!任何由于使用非授权数据集进行训练造成的问题,需自行承担全部责任和一切后果!与仓库、仓库维护者、svc develop team 无关! 2. 任何发布到视频平台的基于 sovits 制作的视频,都必须要在简介明确指明用于变声器转换的输入源歌声、音频,例如:使用他人发布的视频 / 音频,通过分离的人声作为输入源进行转换的,必须要给出明确的原视频、音乐链接;若使用是自己的人声,或是使用其他歌声合成引擎合成的声音作为输入源进行转换的,也必须在简介加以说明。 3. 由输入源造成的侵权问题需自行承担全部责任和一切后果。使用其他商用歌声合成软件作为输入源时,请确保遵守该软件的使用条例,注意,许多歌声合成引擎使用条例中明确指明不可用于输入源进行转换! 4. 继续使用视为已同意本仓库 README 所述相关条例,本仓库 README 已进行劝导义务,不对后续可能存在问题负责。 From 82139e5044e9b13d5d464d8d1262f01372c244eb Mon Sep 17 00:00:00 2001 From: suanlixianren <101437864+suanlixianren@users.noreply.github.com> Date: Sun, 19 Mar 2023 17:41:38 +0800 Subject: [PATCH 08/61] Update README_zh_CN.md --- README_zh_CN.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README_zh_CN.md b/README_zh_CN.md index dbeb6b0..9f9f4d5 100644 --- a/README_zh_CN.md +++ b/README_zh_CN.md @@ -5,8 +5,8 @@ #### ✨ 改善了交互的一个分支推荐:[34j/so-vits-svc-fork](https://github.com/34j/so-vits-svc-fork) ## 📏 使用规约 - -1. 本项目是基于学术交流目的建立,仅供交流与学习使用,并非为生产环境准备,请自行解决数据集的授权问题,禁止使用非授权数据集进行训练!任何由于使用非授权数据集进行训练造成的问题,需自行承担全部责任和一切后果!与仓库、仓库维护者、svc develop team 无关! +#Warning:请自行解决数据集授权问题,禁止使用非授权数据集进行训练!任何由于使用非授权数据集进行训练造成的问题,需自行承担全部责任和后果!与仓库、仓库维护者、svc develop team 无关! +1. 本项目是基于学术交流目的建立,仅供交流与学习使用,并非为生产环境准备。 2. 任何发布到视频平台的基于 sovits 制作的视频,都必须要在简介明确指明用于变声器转换的输入源歌声、音频,例如:使用他人发布的视频 / 音频,通过分离的人声作为输入源进行转换的,必须要给出明确的原视频、音乐链接;若使用是自己的人声,或是使用其他歌声合成引擎合成的声音作为输入源进行转换的,也必须在简介加以说明。 3. 由输入源造成的侵权问题需自行承担全部责任和一切后果。使用其他商用歌声合成软件作为输入源时,请确保遵守该软件的使用条例,注意,许多歌声合成引擎使用条例中明确指明不可用于输入源进行转换! 4. 继续使用视为已同意本仓库 README 所述相关条例,本仓库 README 已进行劝导义务,不对后续可能存在问题负责。 From 849c792bcfb2a5f3a122792a18bd57e41a5f1c25 Mon Sep 17 00:00:00 2001 From: suanlixianren <101437864+suanlixianren@users.noreply.github.com> Date: Sun, 19 Mar 2023 17:41:52 +0800 Subject: [PATCH 09/61] Update README_zh_CN.md --- README_zh_CN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README_zh_CN.md b/README_zh_CN.md index 9f9f4d5..bb353da 100644 --- a/README_zh_CN.md +++ b/README_zh_CN.md @@ -5,7 +5,7 @@ #### ✨ 改善了交互的一个分支推荐:[34j/so-vits-svc-fork](https://github.com/34j/so-vits-svc-fork) ## 📏 使用规约 -#Warning:请自行解决数据集授权问题,禁止使用非授权数据集进行训练!任何由于使用非授权数据集进行训练造成的问题,需自行承担全部责任和后果!与仓库、仓库维护者、svc develop team 无关! +# Warning:请自行解决数据集授权问题,禁止使用非授权数据集进行训练!任何由于使用非授权数据集进行训练造成的问题,需自行承担全部责任和后果!与仓库、仓库维护者、svc develop team 无关! 1. 本项目是基于学术交流目的建立,仅供交流与学习使用,并非为生产环境准备。 2. 任何发布到视频平台的基于 sovits 制作的视频,都必须要在简介明确指明用于变声器转换的输入源歌声、音频,例如:使用他人发布的视频 / 音频,通过分离的人声作为输入源进行转换的,必须要给出明确的原视频、音乐链接;若使用是自己的人声,或是使用其他歌声合成引擎合成的声音作为输入源进行转换的,也必须在简介加以说明。 3. 由输入源造成的侵权问题需自行承担全部责任和一切后果。使用其他商用歌声合成软件作为输入源时,请确保遵守该软件的使用条例,注意,许多歌声合成引擎使用条例中明确指明不可用于输入源进行转换! From 0f0739ef14cfbdad3416ed530e40303d1882ff59 Mon Sep 17 00:00:00 2001 From: ylzz1997 Date: Mon, 20 Mar 2023 14:08:55 +0800 Subject: [PATCH 10/61] Update voice auto-split --- inference/infer_tool.py | 4 ++++ inference_main.py | 31 ++++++++++++++++++++++++++----- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/inference/infer_tool.py b/inference/infer_tool.py index 3a2635b..6440fb0 100644 --- a/inference/infer_tool.py +++ b/inference/infer_tool.py @@ -102,6 +102,10 @@ def pad_array(arr, target_length): pad_right = pad_width - pad_left padded_arr = np.pad(arr, (pad_left, pad_right), 'constant', constant_values=(0, 0)) return padded_arr + +def split_list_by_n(list_collection, n, pre=0): + for i in range(0, len(list_collection), n): + yield list_collection[i-pre if i-pre>=0 else i: i + n] class Svc(object): diff --git a/inference_main.py b/inference_main.py index f869369..bde6a2c 100644 --- a/inference_main.py +++ b/inference_main.py @@ -25,6 +25,7 @@ def main(): # 一定要设置的部分 parser.add_argument('-m', '--model_path', type=str, default="logs/44k/G_0.pth", help='模型路径') parser.add_argument('-c', '--config_path', type=str, default="configs/config.json", help='配置文件路径') + parser.add_argument('-cl', '--clip', type=float, default=0, help='音频自动切片,0为不切片,单位为秒/s') parser.add_argument('-n', '--clean_names', type=str, nargs='+', default=["君の知らない物語-src.wav"], help='wav文件名列表,放在raw文件夹下') parser.add_argument('-t', '--trans', type=int, nargs='+', default=[0], help='音高调整,支持正负(半音)') parser.add_argument('-s', '--spk_list', type=str, nargs='+', default=['nen'], help='合成目标说话人名称') @@ -34,6 +35,7 @@ def main(): help='语音转换自动预测音高,转换歌声时不要打开这个会严重跑调') parser.add_argument('-cm', '--cluster_model_path', type=str, default="logs/44k/kmeans_10000.pt", help='聚类模型路径,如果没有训练聚类则随便填') parser.add_argument('-cr', '--cluster_infer_ratio', type=float, default=0, help='聚类方案占比,范围0-1,若没有训练聚类模型则填0即可') + parser.add_argument('-lg', '--linear_gradient', type=float, default=0, help='两端音频切片的交叉淡入长度,如果自动切片后出现人声不连贯可调整该数值,如果连贯建议采用默认值0,单位为秒/s') # 不用动的部分 parser.add_argument('-sd', '--slice_db', type=int, default=-40, help='默认-40,嘈杂的音频可以-30,干声保留呼吸可以-50') @@ -55,6 +57,8 @@ def main(): cluster_infer_ratio = args.cluster_infer_ratio noice_scale = args.noice_scale pad_seconds = args.pad_seconds + clip = args.clip + lg = args.linear_gradient infer_tool.fill_a_to_b(trans, clean_names) for clean_name, tran in zip(clean_names, trans): @@ -65,22 +69,32 @@ def main(): wav_path = Path(raw_audio_path).with_suffix('.wav') chunks = slicer.cut(wav_path, db_thresh=slice_db) audio_data, audio_sr = slicer.chunks2audio(wav_path, chunks) + per_size = int(clip*audio_sr) + lg_size = int(lg*audio_sr) + lg = np.linspace(0,1,lg_size) if lg_size!=0 else 0 for spk in spk_list: audio = [] for (slice_tag, data) in audio_data: print(f'#=====segment start, {round(len(data) / audio_sr, 3)}s======') - length = int(np.ceil(len(data) / audio_sr * svc_model.target_sample)) if slice_tag: print('jump empty segment') _audio = np.zeros(length) + audio.extend(list(infer_tool.pad_array(_audio, length))) + continue + if per_size != 0: + datas = infer_tool.split_list_by_n(data, per_size,lg_size) else: + datas = [data] + for k,dat in enumerate(datas): + per_length = int(np.ceil(len(dat) / audio_sr * svc_model.target_sample)) if clip!=0 else length + if clip!=0: print(f'###=====segment clip start, {round(len(dat) / audio_sr, 3)}s======') # padd pad_len = int(audio_sr * pad_seconds) - data = np.concatenate([np.zeros([pad_len]), data, np.zeros([pad_len])]) + dat = np.concatenate([np.zeros([pad_len]), dat, np.zeros([pad_len])]) raw_path = io.BytesIO() - soundfile.write(raw_path, data, audio_sr, format="wav") + soundfile.write(raw_path, dat, audio_sr, format="wav") raw_path.seek(0) out_audio, out_sr = svc_model.infer(spk, tran, raw_path, cluster_infer_ratio=cluster_infer_ratio, @@ -90,8 +104,15 @@ def main(): _audio = out_audio.cpu().numpy() pad_len = int(svc_model.target_sample * pad_seconds) _audio = _audio[pad_len:-pad_len] - - audio.extend(list(infer_tool.pad_array(_audio, length))) + _audio = infer_tool.pad_array(_audio, per_length) + if lg_size!=0 and k!=0: + lg1 = audio[-lg_size:] + lg2 = _audio[0:lg_size] + lg_pre = lg1*(1-lg)+lg2*lg + audio = audio[0:-lg_size] + audio.extend(lg_pre) + _audio = _audio[lg_size:] + audio.extend(list(_audio)) key = "auto" if auto_predict_f0 else f"{tran}key" cluster_name = "" if cluster_infer_ratio == 0 else f"_{cluster_infer_ratio}" res_path = f'./results/{clean_name}_{key}_{spk}{cluster_name}.{wav_format}' From 63ded35c2afe6f2ed7c6186512d73e881d089a93 Mon Sep 17 00:00:00 2001 From: ylzz1997 Date: Mon, 20 Mar 2023 14:09:41 +0800 Subject: [PATCH 11/61] Update voice auto-split --- inference_main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inference_main.py b/inference_main.py index bde6a2c..809ae31 100644 --- a/inference_main.py +++ b/inference_main.py @@ -35,7 +35,7 @@ def main(): help='语音转换自动预测音高,转换歌声时不要打开这个会严重跑调') parser.add_argument('-cm', '--cluster_model_path', type=str, default="logs/44k/kmeans_10000.pt", help='聚类模型路径,如果没有训练聚类则随便填') parser.add_argument('-cr', '--cluster_infer_ratio', type=float, default=0, help='聚类方案占比,范围0-1,若没有训练聚类模型则填0即可') - parser.add_argument('-lg', '--linear_gradient', type=float, default=0, help='两端音频切片的交叉淡入长度,如果自动切片后出现人声不连贯可调整该数值,如果连贯建议采用默认值0,单位为秒/s') + parser.add_argument('-lg', '--linear_gradient', type=float, default=0, help='两段音频切片的交叉淡入长度,如果自动切片后出现人声不连贯可调整该数值,如果连贯建议采用默认值0,单位为秒/s') # 不用动的部分 parser.add_argument('-sd', '--slice_db', type=int, default=-40, help='默认-40,嘈杂的音频可以-30,干声保留呼吸可以-50') From 80a9cbe7e299c68c5360add582c223b131ef824e Mon Sep 17 00:00:00 2001 From: ylzz1997 Date: Mon, 20 Mar 2023 16:26:40 +0800 Subject: [PATCH 12/61] Update voice auto-split --- inference_main.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/inference_main.py b/inference_main.py index 809ae31..21e2ef8 100644 --- a/inference_main.py +++ b/inference_main.py @@ -43,6 +43,7 @@ def main(): parser.add_argument('-ns', '--noice_scale', type=float, default=0.4, help='噪音级别,会影响咬字和音质,较为玄学') parser.add_argument('-p', '--pad_seconds', type=float, default=0.5, help='推理音频pad秒数,由于未知原因开头结尾会有异响,pad一小段静音段后就不会出现') parser.add_argument('-wf', '--wav_format', type=str, default='flac', help='音频输出格式') + parser.add_argument('-lgr', '--linear_gradient_retain', type=float, default=0.75, help='自动音频切片后,需要舍弃每段切片的头尾。该参数设置交叉长度保留的比例,范围0-1,左开右闭') args = parser.parse_args() @@ -59,6 +60,7 @@ def main(): pad_seconds = args.pad_seconds clip = args.clip lg = args.linear_gradient + lgr = args.linear_gradient_retain infer_tool.fill_a_to_b(trans, clean_names) for clean_name, tran in zip(clean_names, trans): @@ -71,12 +73,16 @@ def main(): audio_data, audio_sr = slicer.chunks2audio(wav_path, chunks) per_size = int(clip*audio_sr) lg_size = int(lg*audio_sr) - lg = np.linspace(0,1,lg_size) if lg_size!=0 else 0 + lg_size_r = int(lg_size*lgr) + lg_size_c_l = (lg_size-lg_size_r)//2 + lg_size_c_r = lg_size-lg_size_r-lg_size_c_l + lg = np.linspace(0,1,lg_size_r) if lg_size!=0 else 0 for spk in spk_list: audio = [] for (slice_tag, data) in audio_data: print(f'#=====segment start, {round(len(data) / audio_sr, 3)}s======') + length = int(np.ceil(len(data) / audio_sr * svc_model.target_sample)) if slice_tag: print('jump empty segment') @@ -106,12 +112,12 @@ def main(): _audio = _audio[pad_len:-pad_len] _audio = infer_tool.pad_array(_audio, per_length) if lg_size!=0 and k!=0: - lg1 = audio[-lg_size:] - lg2 = _audio[0:lg_size] + lg1 = audio[-(lg_size_r+lg_size_c_r):-lg_size_c_r] if lgr != 1 else audio[-lg_size:] + lg2 = _audio[lg_size_c_l:lg_size_c_l+lg_size_r] if lgr != 1 else _audio[0:lg_size] lg_pre = lg1*(1-lg)+lg2*lg - audio = audio[0:-lg_size] + audio = audio[0:-(lg_size_r+lg_size_c_r)] if lgr != 1 else audio[0:-lg_size] audio.extend(lg_pre) - _audio = _audio[lg_size:] + _audio = _audio[lg_size_c_l+lg_size_r:] if lgr != 1 else _audio[lg_size:] audio.extend(list(_audio)) key = "auto" if auto_predict_f0 else f"{tran}key" cluster_name = "" if cluster_infer_ratio == 0 else f"_{cluster_infer_ratio}" From 31d6fb780735dc36ed357890e321c20d3976371d Mon Sep 17 00:00:00 2001 From: Miuzarte <982809597@qq.com> Date: Mon, 20 Mar 2023 19:07:59 +0800 Subject: [PATCH 13/61] Update README.md. close #64 --- README.md | 37 +++++++++++++++++++------------------ README_zh_CN.md | 9 ++++++--- 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index b796e38..54b9eaf 100644 --- a/README.md +++ b/README.md @@ -2,26 +2,30 @@ [**English**](./README.md) | [**中文简体**](./README_zh_CN.md) -## Terms of Use +#### ✨ A fork with a greatly improved interface: [34j/so-vits-svc-fork](https://github.com/34j/so-vits-svc-fork) -1. This project is established for academic exchange purposes only and is intended for communication and learning purposes. It is not intended for production environments. Please solve the authorization problem of the dataset on your own. You shall be solely responsible for any problems caused by the use of non-authorized datasets for training and all consequences thereof.The repository and its maintainer, svc develop team, have nothing to do with the consequences! +#### ✨ A client supports real-time conversion: [w-okada/voice-changer](https://github.com/w-okada/voice-changer) + +## 📏 Terms of Use + +# Warning: Please solve the authorization problem of the dataset on your own. You shall be solely responsible for any problems caused by the use of non-authorized datasets for training and all consequences thereof.The repository and its maintainer, svc develop team, have nothing to do with the consequences! + +1. This project is established for academic exchange purposes only and is intended for communication and learning purposes. It is not intended for production environments. 2. Any videos based on sovits that are published on video platforms must clearly indicate in the description that they are used for voice changing and specify the input source of the voice or audio, for example, using videos or audios published by others and separating the vocals as input source for conversion, which must provide clear original video or music links. If your own voice or other synthesized voices from other commercial vocal synthesis software are used as the input source for conversion, you must also explain it in the description. 3. You shall be solely responsible for any infringement problems caused by the input source. When using other commercial vocal synthesis software as input source, please ensure that you comply with the terms of use of the software. Note that many vocal synthesis engines clearly state in their terms of use that they cannot be used for input source conversion. 4. Continuing to use this project is deemed as agreeing to the relevant provisions stated in this repository README. This repository README has the obligation to persuade, and is not responsible for any subsequent problems that may arise. 5. If you distribute this repository's code or publish any results produced by this project publicly (including but not limited to video sharing platforms), please indicate the original author and code source (this repository). 6. If you use this project for any other plan, please contact and inform the author of this repository in advance. Thank you very much. -### A fork with a greatly improved interface:[34j/so-vits-svc-fork](https://github.com/34j/so-vits-svc-fork) - -## Update +## 🆕 Update! > Updated the 4.0-v2 model, the entire process is the same as 4.0. Compared to 4.0, there is some improvement in certain scenarios, but there are also some cases where it has regressed. Please refer to the [4.0-v2 branch](https://github.com/svc-develop-team/so-vits-svc/tree/4.0-v2) for more information. -## Model Introduction +## 📝 Model Introduction The singing voice conversion model uses SoftVC content encoder to extract source audio speech features, then the vectors are directly fed into VITS instead of converting to a text based intermediate; thus the pitch and intonations are conserved. Additionally, the vocoder is changed to [NSF HiFiGAN](https://github.com/openvpi/DiffSinger/tree/refactor/modules/nsf_hifigan) to solve the problem of sound interruption. -### 4.0 Version Update Content +### 🆕 4.0 Version Update Content - Feature input is changed to [Content Vec](https://github.com/auspicious3000/contentvec) - The sampling rate is unified to use 44100Hz @@ -31,7 +35,7 @@ The singing voice conversion model uses SoftVC content encoder to extract source - Added an option 1: automatic pitch prediction for vc mode, which means that you don't need to manually enter the pitch key when converting speech, and the pitch of male and female voices can be automatically converted. However, this mode will cause pitch shift when converting songs. - Added option 2: reduce timbre leakage through k-means clustering scheme, making the timbre more similar to the target timbre. -## Pre-trained Model Files +## 📥 Pre-trained Model Files #### **Required** @@ -53,7 +57,7 @@ Get them from svc-develop-team(TBD) or anywhere else. Although the pretrained model generally does not cause any copyright problems, please pay attention to it. For example, ask the author in advance, or the author has indicated the feasible use in the description clearly. -## Dataset Preparation +## 📊 Dataset Preparation Simply place the dataset in the `dataset_raw` directory with the following file structure. @@ -69,7 +73,7 @@ dataset_raw └───xxx7-xxx007.wav ``` -## Preprocessing +## 🛠️ Preprocessing 1. Resample to 44100hz @@ -91,7 +95,7 @@ python preprocess_hubert_f0.py After completing the above steps, the dataset directory will contain the preprocessed data, and the dataset_raw folder can be deleted. -## Training +## 🏋️‍♀️ Training ```shell python train.py -c configs/config.json -m 44k @@ -99,7 +103,7 @@ python train.py -c configs/config.json -m 44k Note: During training, the old models will be automatically cleared and only the latest three models will be kept. If you want to prevent overfitting, you need to manually backup the model checkpoints, or modify the configuration file `keep_ckpts` to 0 to never clear them. -## Inference +## 🤖 Inference Use [inference_main.py](https://github.com/svc-develop-team/so-vits-svc/blob/4.0/inference_main.py) @@ -111,7 +115,6 @@ python inference_main.py -m "logs/44k/G_30400.pth" -c "configs/config.json" -n " ``` Required parameters: - - -m, --model_path: path to the model. - -c, --config_path: path to the configuration file. - -n, --clean_names: a list of wav file names located in the raw folder. @@ -119,19 +122,17 @@ Required parameters: - -s, --spk_list: target speaker name for synthesis. Optional parameters: see the next section - - -a, --auto_predict_f0: automatic pitch prediction for voice conversion, do not enable this when converting songs as it can cause serious pitch issues. - -cm, --cluster_model_path: path to the clustering model, fill in any value if clustering is not trained. - -cr, --cluster_infer_ratio: proportion of the clustering solution, range 0-1, fill in 0 if the clustering model is not trained. -## Optional Settings +## 🤔 Optional Settings If the results from the previous section are satisfactory, or if you didn't understand what is being discussed in the following section, you can skip it, and it won't affect the model usage. (These optional settings have a relatively small impact, and they may have some effect on certain specific data, but in most cases, the difference may not be noticeable.) ### Automatic f0 prediction During the 4.0 model training, an f0 predictor is also trained, which can be used for automatic pitch prediction during voice conversion. However, if the effect is not good, manual pitch prediction can be used instead. But please do not enable this feature when converting singing voice as it may cause serious pitch shifting! - - Set "auto_predict_f0" to true in inference_main. ### Cluster-based timbre leakage control @@ -151,7 +152,7 @@ The existing steps before clustering do not need to be changed. All you need to #### [23/03/16] No longer need to download hubert manually -## Exporting to Onnx +## 📤 Exporting to Onnx Use [onnx_export.py](https://github.com/svc-develop-team/so-vits-svc/blob/4.0/onnx_export.py) @@ -168,7 +169,7 @@ Use [onnx_export.py](https://github.com/svc-develop-team/so-vits-svc/blob/4.0/on Note: For Hubert Onnx models, please use the models provided by MoeSS. Currently, they cannot be exported on their own (Hubert in fairseq has many unsupported operators and things involving constants that can cause errors or result in problems with the input/output shape and results when exported.) [Hubert4.0](https://huggingface.co/NaruseMioShirakana/MoeSS-SUBModel) -## Some legal provisions for reference +## 📚 Some legal provisions for reference #### 《民法典》 diff --git a/README_zh_CN.md b/README_zh_CN.md index bb353da..5065635 100644 --- a/README_zh_CN.md +++ b/README_zh_CN.md @@ -4,8 +4,12 @@ #### ✨ 改善了交互的一个分支推荐:[34j/so-vits-svc-fork](https://github.com/34j/so-vits-svc-fork) +#### ✨ 支持实时转换的一个客户端:[w-okada/voice-changer](https://github.com/w-okada/voice-changer) + ## 📏 使用规约 + # Warning:请自行解决数据集授权问题,禁止使用非授权数据集进行训练!任何由于使用非授权数据集进行训练造成的问题,需自行承担全部责任和后果!与仓库、仓库维护者、svc develop team 无关! + 1. 本项目是基于学术交流目的建立,仅供交流与学习使用,并非为生产环境准备。 2. 任何发布到视频平台的基于 sovits 制作的视频,都必须要在简介明确指明用于变声器转换的输入源歌声、音频,例如:使用他人发布的视频 / 音频,通过分离的人声作为输入源进行转换的,必须要给出明确的原视频、音乐链接;若使用是自己的人声,或是使用其他歌声合成引擎合成的声音作为输入源进行转换的,也必须在简介加以说明。 3. 由输入源造成的侵权问题需自行承担全部责任和一切后果。使用其他商用歌声合成软件作为输入源时,请确保遵守该软件的使用条例,注意,许多歌声合成引擎使用条例中明确指明不可用于输入源进行转换! @@ -13,7 +17,6 @@ 5. 如将本仓库代码二次分发,或将由此项目产出的任何结果公开发表 (包括但不限于视频网站投稿),请注明原作者及代码来源 (此仓库)。 6. 如果将此项目用于任何其他企划,请提前联系并告知本仓库作者,十分感谢。 - ## 🆕 Update! > 更新了4.0-v2模型,全部流程同4.0,相比4.0在部分场景下有一定提升,但也有些情况有退步,具体可移步[4.0-v2分支](https://github.com/svc-develop-team/so-vits-svc/tree/4.0-v2) @@ -97,6 +100,7 @@ python preprocess_hubert_f0.py ```shell python train.py -c configs/config.json -m 44k ``` + 注:训练时会自动清除老的模型,只保留最新3个模型,如果想防止过拟合需要自己手动备份模型记录点,或修改配置文件keep_ckpts 0为永不清除 ## 🤖 推理 @@ -133,8 +137,7 @@ python inference_main.py -m "logs/44k/G_30400.pth" -c "configs/config.json" -n " ### 聚类音色泄漏控制 -介绍:聚类方案可以减小音色泄漏,使得模型训练出来更像目标的音色(但其实不是特别明显),但是单纯的聚类方案会降低模型的咬字(会口齿不清)(这个很明显),本模型采用了融合的方式, -可以线性控制聚类方案与非聚类方案的占比,也就是可以手动在"像目标音色" 和 "咬字清晰" 之间调整比例,找到合适的折中点。 +介绍:聚类方案可以减小音色泄漏,使得模型训练出来更像目标的音色(但其实不是特别明显),但是单纯的聚类方案会降低模型的咬字(会口齿不清)(这个很明显),本模型采用了融合的方式,可以线性控制聚类方案与非聚类方案的占比,也就是可以手动在"像目标音色" 和 "咬字清晰" 之间调整比例,找到合适的折中点。 使用聚类前面的已有步骤不用进行任何的变动,只需要额外训练一个聚类模型,虽然效果比较有限,但训练成本也比较低 From 306b70a749705e5f87e9d263ce6a424f4d6d4a10 Mon Sep 17 00:00:00 2001 From: RiceCake Date: Tue, 21 Mar 2023 19:31:54 +0800 Subject: [PATCH 14/61] Fix a bug that may cause size doesn't match error --- preprocess_flist_config.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/preprocess_flist_config.py b/preprocess_flist_config.py index 9a7da50..6a29726 100644 --- a/preprocess_flist_config.py +++ b/preprocess_flist_config.py @@ -78,6 +78,8 @@ if __name__ == "__main__": f.write(wavpath + "\n") config_template["spk"] = spk_dict + config_template["model"]["n_speakers"] = spk_id + print("Writing configs/config.json") with open("configs/config.json", "w") as f: json.dump(config_template, f, indent=2) From 5671384ff238fba033ae301416f010926125a0c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=AC=AC=E7=B4=97=E7=89=B9?= <66856838+Miuzarte@users.noreply.github.com> Date: Wed, 22 Mar 2023 00:02:58 +0800 Subject: [PATCH 15/61] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E6=97=A7=E8=B4=A1?= =?UTF-8?q?=E7=8C=AE=E8=80=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README_zh_CN.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/README_zh_CN.md b/README_zh_CN.md index 5065635..b23b749 100644 --- a/README_zh_CN.md +++ b/README_zh_CN.md @@ -169,6 +169,24 @@ python inference_main.py -m "logs/44k/G_30400.pth" -c "configs/config.json" -n " + 注意:Hubert Onnx模型请使用MoeSS提供的模型,目前无法自行导出(fairseq中Hubert有不少onnx不支持的算子和涉及到常量的东西,在导出时会报错或者导出的模型输入输出shape和结果都有问题) [Hubert4.0](https://huggingface.co/NaruseMioShirakana/MoeSS-SUBModel) +## 旧贡献者 + +因为某些原因原作者进行了删库处理,本仓库重建之初由于组织成员疏忽直接重新上传了所有文件导致以前的contributors全部木大,现在在README里重新添加一个旧贡献者列表 + +*某些成员已根据其个人意愿不将其列出* + + + + + + + + + + + +

MistEO


XiaoMiku01


しぐれ


TomoGaSukunai


Plachtaa


zd小达


凍聲響世

+ ## 📚 一些法律条例参考 #### 《民法典》 From 1c43260dd836cad41962dcc18cd8932a734a5617 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=AC=AC=E7=B4=97=E7=89=B9?= <66856838+Miuzarte@users.noreply.github.com> Date: Wed, 22 Mar 2023 00:11:50 +0800 Subject: [PATCH 16/61] Add previous contributors list --- README.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/README.md b/README.md index 54b9eaf..8f9e1af 100644 --- a/README.md +++ b/README.md @@ -169,6 +169,24 @@ Use [onnx_export.py](https://github.com/svc-develop-team/so-vits-svc/blob/4.0/on Note: For Hubert Onnx models, please use the models provided by MoeSS. Currently, they cannot be exported on their own (Hubert in fairseq has many unsupported operators and things involving constants that can cause errors or result in problems with the input/output shape and results when exported.) [Hubert4.0](https://huggingface.co/NaruseMioShirakana/MoeSS-SUBModel) +## Previous contributors + +For some reason the author deleted the original repository. Because of the negligence of the organization members, the contributor list was cleared because all files were directly reuploaded to this repository at the beginning of the reconstruction of this repository. Now add a previous contributor list to README.md. + +*Some members have not listed according to their personal wishes.* + + + + + + + + + + + +

MistEO


XiaoMiku01


しぐれ


TomoGaSukunai


Plachtaa


zd小达


凍聲響世

+ ## 📚 Some legal provisions for reference #### 《民法典》 From d0b3ae7991ca77d2baca052ab31de2a2d372ed54 Mon Sep 17 00:00:00 2001 From: RiceCake Date: Wed, 22 Mar 2023 19:01:05 +0800 Subject: [PATCH 17/61] =?UTF-8?q?=E6=B7=BB=E5=8A=A0-cl=E5=8F=82=E6=95=B0?= =?UTF-8?q?=E6=9C=89=E5=85=B3=E6=8F=8F=E8=BF=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README_zh_CN.md | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/README_zh_CN.md b/README_zh_CN.md index b23b749..163f276 100644 --- a/README_zh_CN.md +++ b/README_zh_CN.md @@ -120,6 +120,7 @@ python inference_main.py -m "logs/44k/G_30400.pth" -c "configs/config.json" -n " + -n, --clean_names:wav 文件名列表,放在 raw 文件夹下。 + -t, --trans:音高调整,支持正负(半音)。 + -s, --spk_list:合成目标说话人名称。 ++ -cl, --clip:音频自动切片,0为不切片,单位为秒/s。 可选项部分:见下一节 + -a, --auto_predict_f0:语音转换自动预测音高,转换歌声时不要打开这个会严重跑调。 @@ -169,24 +170,6 @@ python inference_main.py -m "logs/44k/G_30400.pth" -c "configs/config.json" -n " + 注意:Hubert Onnx模型请使用MoeSS提供的模型,目前无法自行导出(fairseq中Hubert有不少onnx不支持的算子和涉及到常量的东西,在导出时会报错或者导出的模型输入输出shape和结果都有问题) [Hubert4.0](https://huggingface.co/NaruseMioShirakana/MoeSS-SUBModel) -## 旧贡献者 - -因为某些原因原作者进行了删库处理,本仓库重建之初由于组织成员疏忽直接重新上传了所有文件导致以前的contributors全部木大,现在在README里重新添加一个旧贡献者列表 - -*某些成员已根据其个人意愿不将其列出* - - - - - - - - - - - -

MistEO


XiaoMiku01


しぐれ


TomoGaSukunai


Plachtaa


zd小达


凍聲響世

- ## 📚 一些法律条例参考 #### 《民法典》 From 80720fe6bcfd6ffab1548047ece828e98bc05cee Mon Sep 17 00:00:00 2001 From: RiceCake Date: Wed, 22 Mar 2023 19:02:12 +0800 Subject: [PATCH 18/61] adding the -cl parameter's description --- README.md | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/README.md b/README.md index 8f9e1af..a49a7d0 100644 --- a/README.md +++ b/README.md @@ -120,6 +120,7 @@ Required parameters: - -n, --clean_names: a list of wav file names located in the raw folder. - -t, --trans: pitch adjustment, supports positive and negative (semitone) values. - -s, --spk_list: target speaker name for synthesis. +- -cl, --clip:voice auto-split,set to 0 to turn off,duration in seconds. Optional parameters: see the next section - -a, --auto_predict_f0: automatic pitch prediction for voice conversion, do not enable this when converting songs as it can cause serious pitch issues. @@ -169,24 +170,6 @@ Use [onnx_export.py](https://github.com/svc-develop-team/so-vits-svc/blob/4.0/on Note: For Hubert Onnx models, please use the models provided by MoeSS. Currently, they cannot be exported on their own (Hubert in fairseq has many unsupported operators and things involving constants that can cause errors or result in problems with the input/output shape and results when exported.) [Hubert4.0](https://huggingface.co/NaruseMioShirakana/MoeSS-SUBModel) -## Previous contributors - -For some reason the author deleted the original repository. Because of the negligence of the organization members, the contributor list was cleared because all files were directly reuploaded to this repository at the beginning of the reconstruction of this repository. Now add a previous contributor list to README.md. - -*Some members have not listed according to their personal wishes.* - - - - - - - - - - - -

MistEO


XiaoMiku01


しぐれ


TomoGaSukunai


Plachtaa


zd小达


凍聲響世

- ## 📚 Some legal provisions for reference #### 《民法典》 From c354b52fcc909c3e1040d2611ed5a5e56d70a383 Mon Sep 17 00:00:00 2001 From: RiceCake Date: Wed, 22 Mar 2023 19:03:09 +0800 Subject: [PATCH 19/61] adding the -cl parameter's description --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a49a7d0..b88f7c0 100644 --- a/README.md +++ b/README.md @@ -120,7 +120,7 @@ Required parameters: - -n, --clean_names: a list of wav file names located in the raw folder. - -t, --trans: pitch adjustment, supports positive and negative (semitone) values. - -s, --spk_list: target speaker name for synthesis. -- -cl, --clip:voice auto-split,set to 0 to turn off,duration in seconds. +- -cl, --clip: voice auto-split,set to 0 to turn off,duration in seconds. Optional parameters: see the next section - -a, --auto_predict_f0: automatic pitch prediction for voice conversion, do not enable this when converting songs as it can cause serious pitch issues. From 029b29781a8b4e65fe7f2c870290e45a87aa05e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=BA=A2=E8=A1=80=E7=90=83AE3803?= <2544390577@qq.com> Date: Wed, 22 Mar 2023 21:47:48 +0900 Subject: [PATCH 20/61] Update README.md --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index b88f7c0..392e7fd 100644 --- a/README.md +++ b/README.md @@ -172,6 +172,8 @@ Note: For Hubert Onnx models, please use the models provided by MoeSS. Currently ## 📚 Some legal provisions for reference +#### Any country, region, organization, or individual using this project must comply with the following laws. + #### 《民法典》 ##### 第一千零一十九条 @@ -188,3 +190,9 @@ Note: For Hubert Onnx models, please use the models provided by MoeSS. Currently 【作品侵害名誉权】行为人发表的文学、艺术作品以真人真事或者特定人为描述对象,含有侮辱、诽谤内容,侵害他人名誉权的,受害人有权依法请求该行为人承担民事责任。 行为人发表的文学、艺术作品不以特定人为描述对象,仅其中的情节与该特定人的情况相似的,不承担民事责任。 + +#### 《[中华人民共和国宪法](http://www.gov.cn/guoqing/2018-03/22/content_5276318.htm)》 + +#### 《[中华人民共和国刑法](http://gongbao.court.gov.cn/Details/f8e30d0689b23f57bfc782d21035c3.html?sw=%E4%B8%AD%E5%8D%8E%E4%BA%BA%E6%B0%91%E5%85%B1%E5%92%8C%E5%9B%BD%E5%88%91%E6%B3%95)》 + +#### 《[中华人民共和国民法典](http://gongbao.court.gov.cn/Details/51eb6750b8361f79be8f90d09bc202.html)》 \ No newline at end of file From 44445ab8c3edcf12e39852cc7b728b29982dd125 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=BA=A2=E8=A1=80=E7=90=83AE3803?= <2544390577@qq.com> Date: Wed, 22 Mar 2023 21:50:33 +0900 Subject: [PATCH 21/61] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E4=B8=AD=E6=96=87READM?= =?UTF-8?q?E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README_zh_CN.md | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/README_zh_CN.md b/README_zh_CN.md index 163f276..f08c691 100644 --- a/README_zh_CN.md +++ b/README_zh_CN.md @@ -80,7 +80,7 @@ dataset_raw ```shell python resample.py ``` - + 2. 自动划分训练集 验证集 测试集 以及自动生成配置文件 ```shell @@ -172,19 +172,24 @@ python inference_main.py -m "logs/44k/G_30400.pth" -c "configs/config.json" -n " ## 📚 一些法律条例参考 +#### 任何国家,地区,组织和个人使用此项目必须遵守以下法律 + #### 《民法典》 -##### 第一千零一十九条 +##### 第一千零一十九条 -任何组织或者个人不得以丑化、污损,或者利用信息技术手段伪造等方式侵害他人的肖像权。未经肖像权人同意,不得制作、使用、公开肖像权人的肖像,但是法律另有规定的除外。 -未经肖像权人同意,肖像作品权利人不得以发表、复制、发行、出租、展览等方式使用或者公开肖像权人的肖像。 -对自然人声音的保护,参照适用肖像权保护的有关规定。 +任何组织或者个人不得以丑化、污损,或者利用信息技术手段伪造等方式侵害他人的肖像权。未经肖像权人同意,不得制作、使用、公开肖像权人的肖像,但是法律另有规定的除外。 未经肖像权人同意,肖像作品权利人不得以发表、复制、发行、出租、展览等方式使用或者公开肖像权人的肖像。 对自然人声音的保护,参照适用肖像权保护的有关规定。 -##### 第一千零二十四条 +##### 第一千零二十四条 -【名誉权】民事主体享有名誉权。任何组织或者个人不得以侮辱、诽谤等方式侵害他人的名誉权。 +【名誉权】民事主体享有名誉权。任何组织或者个人不得以侮辱、诽谤等方式侵害他人的名誉权。 -##### 第一千零二十七条 +##### 第一千零二十七条 -【作品侵害名誉权】行为人发表的文学、艺术作品以真人真事或者特定人为描述对象,含有侮辱、诽谤内容,侵害他人名誉权的,受害人有权依法请求该行为人承担民事责任。 -行为人发表的文学、艺术作品不以特定人为描述对象,仅其中的情节与该特定人的情况相似的,不承担民事责任。 +【作品侵害名誉权】行为人发表的文学、艺术作品以真人真事或者特定人为描述对象,含有侮辱、诽谤内容,侵害他人名誉权的,受害人有权依法请求该行为人承担民事责任。 行为人发表的文学、艺术作品不以特定人为描述对象,仅其中的情节与该特定人的情况相似的,不承担民事责任。 + +#### 《[中华人民共和国宪法](http://www.gov.cn/guoqing/2018-03/22/content_5276318.htm)》 + +#### 《[中华人民共和国刑法](http://gongbao.court.gov.cn/Details/f8e30d0689b23f57bfc782d21035c3.html?sw=中华人民共和国刑法)》 + +#### 《[中华人民共和国民法典](http://gongbao.court.gov.cn/Details/51eb6750b8361f79be8f90d09bc202.html)》 \ No newline at end of file From ee18ffc7cb32d2dca1f2ee6a092234bd99f51f5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=BA=A2=E8=A1=80=E7=90=83AE3803?= <2544390577@qq.com> Date: Wed, 22 Mar 2023 22:02:08 +0900 Subject: [PATCH 22/61] Update README_zh_CN.md --- README_zh_CN.md | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/README_zh_CN.md b/README_zh_CN.md index f08c691..ace8ef4 100644 --- a/README_zh_CN.md +++ b/README_zh_CN.md @@ -170,6 +170,24 @@ python inference_main.py -m "logs/44k/G_30400.pth" -c "configs/config.json" -n " + 注意:Hubert Onnx模型请使用MoeSS提供的模型,目前无法自行导出(fairseq中Hubert有不少onnx不支持的算子和涉及到常量的东西,在导出时会报错或者导出的模型输入输出shape和结果都有问题) [Hubert4.0](https://huggingface.co/NaruseMioShirakana/MoeSS-SUBModel) +## 旧贡献者 + +因为某些原因原作者进行了删库处理,本仓库重建之初由于组织成员疏忽直接重新上传了所有文件导致以前的contributors全部木大,现在在README里重新添加一个旧贡献者列表 + +*某些成员已根据其个人意愿不将其列出* + + + + + + + + + + + +

MistEO


XiaoMiku01


しぐれ


TomoGaSukunai


Plachtaa


zd小达


凍聲響世

+ ## 📚 一些法律条例参考 #### 任何国家,地区,组织和个人使用此项目必须遵守以下法律 @@ -192,4 +210,4 @@ python inference_main.py -m "logs/44k/G_30400.pth" -c "configs/config.json" -n " #### 《[中华人民共和国刑法](http://gongbao.court.gov.cn/Details/f8e30d0689b23f57bfc782d21035c3.html?sw=中华人民共和国刑法)》 -#### 《[中华人民共和国民法典](http://gongbao.court.gov.cn/Details/51eb6750b8361f79be8f90d09bc202.html)》 \ No newline at end of file +#### 《[中华人民共和国民法典](http://gongbao.court.gov.cn/Details/51eb6750b8361f79be8f90d09bc202.html)》 From 13ce3fb56ad5d6fe8b424912ec5c922ac9322f69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=BA=A2=E8=A1=80=E7=90=83AE3803?= <2544390577@qq.com> Date: Wed, 22 Mar 2023 22:04:52 +0900 Subject: [PATCH 23/61] Update README.md --- README.md | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 392e7fd..5bcb49c 100644 --- a/README.md +++ b/README.md @@ -170,6 +170,24 @@ Use [onnx_export.py](https://github.com/svc-develop-team/so-vits-svc/blob/4.0/on Note: For Hubert Onnx models, please use the models provided by MoeSS. Currently, they cannot be exported on their own (Hubert in fairseq has many unsupported operators and things involving constants that can cause errors or result in problems with the input/output shape and results when exported.) [Hubert4.0](https://huggingface.co/NaruseMioShirakana/MoeSS-SUBModel) +## Previous contributors + +For some reason the author deleted the original repository. Because of the negligence of the organization members, the contributor list was cleared because all files were directly reuploaded to this repository at the beginning of the reconstruction of this repository. Now add a previous contributor list to README.md. + +*Some members have not listed according to their personal wishes.* + + + + + + + + + + + +

MistEO


XiaoMiku01


しぐれ


TomoGaSukunai


Plachtaa


zd小达


凍聲響世

+ ## 📚 Some legal provisions for reference #### Any country, region, organization, or individual using this project must comply with the following laws. @@ -195,4 +213,4 @@ Note: For Hubert Onnx models, please use the models provided by MoeSS. Currently #### 《[中华人民共和国刑法](http://gongbao.court.gov.cn/Details/f8e30d0689b23f57bfc782d21035c3.html?sw=%E4%B8%AD%E5%8D%8E%E4%BA%BA%E6%B0%91%E5%85%B1%E5%92%8C%E5%9B%BD%E5%88%91%E6%B3%95)》 -#### 《[中华人民共和国民法典](http://gongbao.court.gov.cn/Details/51eb6750b8361f79be8f90d09bc202.html)》 \ No newline at end of file +#### 《[中华人民共和国民法典](http://gongbao.court.gov.cn/Details/51eb6750b8361f79be8f90d09bc202.html)》 From 0bd74c20288cb3c96bcb0ca522bff0c91fdf1d17 Mon Sep 17 00:00:00 2001 From: ylzz1997 Date: Thu, 23 Mar 2023 16:39:00 +0800 Subject: [PATCH 24/61] Update WebUI --- inference/infer_tool.py | 45 ++++++++++++++------ webUI.py | 94 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 127 insertions(+), 12 deletions(-) create mode 100644 webUI.py diff --git a/inference/infer_tool.py b/inference/infer_tool.py index 6440fb0..dd1799a 100644 --- a/inference/infer_tool.py +++ b/inference/infer_tool.py @@ -190,38 +190,59 @@ class Svc(object): # 清理显存 torch.cuda.empty_cache() - def slice_inference(self,raw_audio_path, spk, tran, slice_db,cluster_infer_ratio, auto_predict_f0,noice_scale, pad_seconds=0.5): + def slice_inference(self,raw_audio_path, spk, tran, slice_db,cluster_infer_ratio, auto_predict_f0,noice_scale, pad_seconds=0.5, clip_seconds=0,lg_num=0,lgr_num =0.75): wav_path = raw_audio_path chunks = slicer.cut(wav_path, db_thresh=slice_db) audio_data, audio_sr = slicer.chunks2audio(wav_path, chunks) - + per_size = int(clip_seconds*audio_sr) + lg_size = int(lg_num*audio_sr) + lg_size_r = int(lg_size*lgr_num) + lg_size_c_l = (lg_size-lg_size_r)//2 + lg_size_c_r = lg_size-lg_size_r-lg_size_c_l + lg = np.linspace(0,1,lg_size_r) if lg_size!=0 else 0 + audio = [] for (slice_tag, data) in audio_data: print(f'#=====segment start, {round(len(data) / audio_sr, 3)}s======') # padd - pad_len = int(audio_sr * pad_seconds) - data = np.concatenate([np.zeros([pad_len]), data, np.zeros([pad_len])]) length = int(np.ceil(len(data) / audio_sr * self.target_sample)) - raw_path = io.BytesIO() - soundfile.write(raw_path, data, audio_sr, format="wav") - raw_path.seek(0) if slice_tag: print('jump empty segment') _audio = np.zeros(length) + audio.extend(list(pad_array(_audio, length))) + continue + if per_size != 0: + datas = split_list_by_n(data, per_size,lg_size) else: + datas = [data] + for k,dat in enumerate(datas): + per_length = int(np.ceil(len(dat) / audio_sr * self.target_sample)) if clip_seconds!=0 else length + if clip_seconds!=0: print(f'###=====segment clip start, {round(len(dat) / audio_sr, 3)}s======') + # padd + pad_len = int(audio_sr * pad_seconds) + dat = np.concatenate([np.zeros([pad_len]), dat, np.zeros([pad_len])]) + raw_path = io.BytesIO() + soundfile.write(raw_path, dat, audio_sr, format="wav") + raw_path.seek(0) out_audio, out_sr = self.infer(spk, tran, raw_path, cluster_infer_ratio=cluster_infer_ratio, auto_predict_f0=auto_predict_f0, noice_scale=noice_scale ) _audio = out_audio.cpu().numpy() - - pad_len = int(self.target_sample * pad_seconds) - _audio = _audio[pad_len:-pad_len] - audio.extend(list(_audio)) + pad_len = int(self.target_sample * pad_seconds) + _audio = _audio[pad_len:-pad_len] + _audio = pad_array(_audio, per_length) + if lg_size!=0 and k!=0: + lg1 = audio[-(lg_size_r+lg_size_c_r):-lg_size_c_r] if lgr_num != 1 else audio[-lg_size:] + lg2 = _audio[lg_size_c_l:lg_size_c_l+lg_size_r] if lgr_num != 1 else _audio[0:lg_size] + lg_pre = lg1*(1-lg)+lg2*lg + audio = audio[0:-(lg_size_r+lg_size_c_r)] if lgr_num != 1 else audio[0:-lg_size] + audio.extend(lg_pre) + _audio = _audio[lg_size_c_l+lg_size_r:] if lgr_num != 1 else _audio[lg_size:] + audio.extend(list(_audio)) return np.array(audio) - class RealTimeVC: def __init__(self): self.last_chunk = None diff --git a/webUI.py b/webUI.py new file mode 100644 index 0000000..1795982 --- /dev/null +++ b/webUI.py @@ -0,0 +1,94 @@ +import io +import os + +# os.system("wget -P cvec/ https://huggingface.co/spaces/innnky/nanami/resolve/main/checkpoint_best_legacy_500.pt") +import gradio as gr +import librosa +import numpy as np +import soundfile +from inference.infer_tool import Svc +import logging + +logging.getLogger('numba').setLevel(logging.WARNING) +logging.getLogger('markdown_it').setLevel(logging.WARNING) +logging.getLogger('urllib3').setLevel(logging.WARNING) +logging.getLogger('matplotlib').setLevel(logging.WARNING) +logging.getLogger('multipart').setLevel(logging.WARNING) + +model = None +spk = None + +def vc_fn(sid, input_audio, vc_transform, auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num): + global model + try: + if input_audio is None: + return "You need to upload an audio", None + if model is None: + return "You need to upload an model", None + sampling_rate, audio = input_audio + # print(audio.shape,sampling_rate) + audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32) + if len(audio.shape) > 1: + audio = librosa.to_mono(audio.transpose(1, 0)) + temp_path = "temp.wav" + soundfile.write(temp_path, audio, model.target_sample, format="wav") + _audio = model.slice_inference(temp_path, sid, vc_transform, slice_db, cluster_ratio, auto_f0, noise_scale,pad_seconds,cl_num,lg_num,lgr_num) + os.remove(temp_path) + return "Success", (model.target_sample, _audio) + except Exception as e: + return "异常信息:"+str(e)+"\n请排障后重试",None + +app = gr.Blocks() +with app: + with gr.Tabs(): + with gr.TabItem("Sovits4.0"): + gr.Markdown(value=""" + Sovits4.0 WebUI + """) + + gr.Markdown(value=""" + 下面是模型文件选择: + """) + model_path = gr.File(label="模型文件") + gr.Markdown(value=""" + 下面是配置文件选择: + """) + config_path = gr.File(label="配置文件") + gr.Markdown(value=""" + 下面是聚类模型文件选择,没有可以不填: + """) + cluster_model_path = gr.File(label="聚类模型文件") + device = gr.Dropdown(label="推理设备,留白则为自动选择cpu和gpu",choices=[None,"gpu","cpu"],value=None) + gr.Markdown(value=""" + 全部上传完毕后(全部文件模块显示download),点击模型解析进行解析: + """) + model_analysis_button = gr.Button(value="模型解析") + sid = gr.Dropdown(label="音色(说话人)") + sid_output = gr.Textbox(label="Output Message") + vc_input3 = gr.Audio(label="上传音频") + vc_transform = gr.Number(label="变调(整数,可以正负,半音数量,升高八度就是12)", value=0) + cluster_ratio = gr.Number(label="聚类模型混合比例,0-1之间,默认为0不启用聚类,能提升音色相似度,但会导致咬字下降(如果使用建议0.5左右)", value=0) + auto_f0 = gr.Checkbox(label="自动f0预测,配合聚类模型f0预测效果更好,会导致变调功能失效(仅限转换语音,歌声不要勾选此项会究极跑调)", value=False) + slice_db = gr.Number(label="切片阈值", value=-40) + noise_scale = gr.Number(label="noise_scale 建议不要动,会影响音质,玄学参数", value=0.4) + cl_num = gr.Number(label="音频自动切片,0为不切片,单位为秒/s", value=0) + pad_seconds = gr.Number(label="推理音频pad秒数,由于未知原因开头结尾会有异响,pad一小段静音段后就不会出现", value=0.5) + lg_num = gr.Number(label="两端音频切片的交叉淡入长度,如果自动切片后出现人声不连贯可调整该数值,如果连贯建议采用默认值0,注意,该设置会影响推理速度,单位为秒/s", value=0) + lgr_num = gr.Number(label="自动音频切片后,需要舍弃每段切片的头尾。该参数设置交叉长度保留的比例,范围0-1,左开右闭", value=0.75,interactive=True) + vc_submit = gr.Button("转换", variant="primary") + vc_output1 = gr.Textbox(label="Output Message") + vc_output2 = gr.Audio(label="Output Audio") + def modelAnalysis(model_path,config_path,cluster_model_path,device): + try: + global model + model = Svc(model_path.name, config_path.name,device=device if device!="" else None,cluster_model_path= cluster_model_path.name if cluster_model_path!=None else "") + spks = list(model.spk2id.keys()) + return sid.update(choices = spks,value=spks[0]),"ok" + except Exception as e: + return "","异常信息:"+str(e)+"\n请排障后重试" + vc_submit.click(vc_fn, [sid, vc_input3, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num], [vc_output1, vc_output2]) + model_analysis_button.click(modelAnalysis,[model_path,config_path,cluster_model_path,device],[sid,sid_output]) + app.launch() + + + From 34d7c4a7a145beb6aa7beeaa6f03a231f2c552e6 Mon Sep 17 00:00:00 2001 From: ylzz1997 Date: Thu, 23 Mar 2023 20:10:33 +0800 Subject: [PATCH 25/61] Update clear cuda cache --- webUI.py | 1 + 1 file changed, 1 insertion(+) diff --git a/webUI.py b/webUI.py index 1795982..648e88c 100644 --- a/webUI.py +++ b/webUI.py @@ -33,6 +33,7 @@ def vc_fn(sid, input_audio, vc_transform, auto_f0,cluster_ratio, slice_db, noise temp_path = "temp.wav" soundfile.write(temp_path, audio, model.target_sample, format="wav") _audio = model.slice_inference(temp_path, sid, vc_transform, slice_db, cluster_ratio, auto_f0, noise_scale,pad_seconds,cl_num,lg_num,lgr_num) + model.clear_empty() os.remove(temp_path) return "Success", (model.target_sample, _audio) except Exception as e: From 7998c78a614f6b89e2205ae738008a5e7a669858 Mon Sep 17 00:00:00 2001 From: Miuzarte <982809597@qq.com> Date: Thu, 23 Mar 2023 20:11:27 +0800 Subject: [PATCH 26/61] Update requirement.txt --- requirements.txt | 1 + requirements_win.txt | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 6f2894f..cce7439 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,3 +16,4 @@ onnxoptimizer fairseq==0.12.2 librosa==0.8.1 tensorboard +tensorboardX diff --git a/requirements_win.txt b/requirements_win.txt index 26103d8..5a69429 100644 --- a/requirements_win.txt +++ b/requirements_win.txt @@ -2,8 +2,8 @@ librosa==0.9.2 fairseq==0.12.2 Flask==2.1.2 Flask_Cors==3.0.10 -gradio==3.4.1 -numpy==1.20.0 +gradio +numpy playsound==1.3.0 PyAudio==0.2.12 pydub==0.25.1 @@ -19,3 +19,4 @@ praat-parselmouth onnx onnxsim onnxoptimizer +tensorboardX From 53779711b5c1b552cc65f00ad6de632d002342ab Mon Sep 17 00:00:00 2001 From: Miuzarte <982809597@qq.com> Date: Thu, 23 Mar 2023 23:25:56 +0800 Subject: [PATCH 27/61] add template --- .github/ISSUE_TEMPLATE/help_wanted.yaml | 107 ++++++++++++++++++++++++ .github/ISSUE_TEMPLATE/none.md | 7 ++ .github/no-response.yml | 3 + 3 files changed, 117 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/help_wanted.yaml create mode 100644 .github/ISSUE_TEMPLATE/none.md create mode 100644 .github/no-response.yml diff --git a/.github/ISSUE_TEMPLATE/help_wanted.yaml b/.github/ISSUE_TEMPLATE/help_wanted.yaml new file mode 100644 index 0000000..c6ca1d3 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/help_wanted.yaml @@ -0,0 +1,107 @@ +name: 请求帮助 +description: 遇到了无法自行解决的错误 +title: '[Bug]: ' +labels: [ "bug?" ] + +body: + - type: markdown + attributes: + value: | + #### 提问前建议先自己去尝试解决,可以借助一些搜索引擎(谷歌/必应/New Bing/StackOverflow等等)。如果实在无法自己解决再发issues,在提issues之前,请先仔细阅读《[提问的智慧](https://github.com/ryanhanwu/How-To-Ask-Questions-The-Smart-Way/blob/main/README-zh_CN.md)》 + ### 什么样的issues会被close + 1. 伸手党 + 2. 一键包/环境包相关 + 3. 提供的信息不全 + 4. 所用的数据集是无授权数据集(游戏角色/二次元人物暂不归为此类,但是训练时候也要小心谨慎。如果能联系到官方,必须先和官方联系并核实清楚) + --- + - type: checkboxes + id: Clause + attributes: + label: 请勾选下方的确认框。 + options: + - label: "我已仔细阅读README.md" + required: true + - label: "我已通过各种搜索引擎排查问题,我要提出的问题并不常见" + required: true + - label: "我未在使用由第三方用户提供的一键包/环境包" + required: true + + - type: markdown + attributes: + value: | + # 请根据实际使用环境填写以下信息 + --- + + - type: input + id: System + attributes: + label: 系统平台版本号 + description: Windows请执行`winver`,Linux请执行`uname -a` + validations: + required: true + + - type: input + id: PythonVersion + attributes: + label: Python版本 + description: 执行`python -V`以查看 + validations: + required: true + + - type: input + id: PyTorchVersion + attributes: + label: PyTorch版本 + description: 执行`pip show torch`以查看 + validations: + required: true + + - type: dropdown + id: Branch + attributes: + label: sovits分支 + options: + - 4.0 + - 4.0-v2 + - 3.0-32k + - 3.0-48k + validations: + required: true + + - type: input + id: DatasetSource + attributes: + label: 数据集来源(用于判断数据集质量) + description: 如:UVR处理过的vtb直播音频、录音棚录制 + validations: + required: true + + - type: input + id: WhereOccurs + attributes: + label: 出现问题的环节或执行的命令 + description: 如:预处理、训练、`python preprocess_hubert_f0.py` + validations: + required: true + + - type: textarea + id: Description + attributes: + label: 问题描述 + description: 在这里描述自己的问题,越详细越好 + validations: + required: true + + - type: textarea + id: Log + attributes: + label: 日志 + description: 从执行命令到执行完毕输出的所有信息 + render: python + validations: + required: true + + - type: textarea + id: Supplementary + attributes: + label: 补充说明 diff --git a/.github/ISSUE_TEMPLATE/none.md b/.github/ISSUE_TEMPLATE/none.md new file mode 100644 index 0000000..8309857 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/none.md @@ -0,0 +1,7 @@ +--- +name: default issue +about: 如果模板中没有你想发起的issue类型,可以选择此项 / If there is no issue type you want to raise, you can start with this one. +title: '' +labels: '' +assignees: '' +--- diff --git a/.github/no-response.yml b/.github/no-response.yml new file mode 100644 index 0000000..0b79c87 --- /dev/null +++ b/.github/no-response.yml @@ -0,0 +1,3 @@ +daysUntilClose: 7 +responseRequiredLabel: waiting response +closeComment: 由于缺少必要信息且没有回应,该 issue 已被自动关闭,如有需要补充的内容请回复并自行重新打开该 issue \ No newline at end of file From 42718706baee8f9c7411cd37ec2ca59174ad52e9 Mon Sep 17 00:00:00 2001 From: Miuzarte <982809597@qq.com> Date: Thu, 23 Mar 2023 23:52:40 +0800 Subject: [PATCH 28/61] add template --- .github/ISSUE_TEMPLATE/help_wanted.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/help_wanted.yaml b/.github/ISSUE_TEMPLATE/help_wanted.yaml index c6ca1d3..1c1a5ab 100644 --- a/.github/ISSUE_TEMPLATE/help_wanted.yaml +++ b/.github/ISSUE_TEMPLATE/help_wanted.yaml @@ -30,7 +30,6 @@ body: attributes: value: | # 请根据实际使用环境填写以下信息 - --- - type: input id: System From 7a67d6724e4a778f72174458fbb5bbc9ec550ee6 Mon Sep 17 00:00:00 2001 From: Miuzarte <982809597@qq.com> Date: Fri, 24 Mar 2023 01:20:53 +0800 Subject: [PATCH 29/61] add template --- .github/ISSUE_TEMPLATE/help_wanted.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/ISSUE_TEMPLATE/help_wanted.yaml b/.github/ISSUE_TEMPLATE/help_wanted.yaml index 1c1a5ab..2b06e48 100644 --- a/.github/ISSUE_TEMPLATE/help_wanted.yaml +++ b/.github/ISSUE_TEMPLATE/help_wanted.yaml @@ -100,6 +100,13 @@ body: validations: required: true + - type: textarea + id: ValidOneClick + attributes: + label: 截图so-vits-svc/文件夹并粘贴到此处 + validations: + required: true + - type: textarea id: Supplementary attributes: From 1bcfbd516142ea2b76fc90861df586ef3dc54845 Mon Sep 17 00:00:00 2001 From: Miuzarte <982809597@qq.com> Date: Fri, 24 Mar 2023 01:24:12 +0800 Subject: [PATCH 30/61] update template --- .github/ISSUE_TEMPLATE/help_wanted.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/help_wanted.yaml b/.github/ISSUE_TEMPLATE/help_wanted.yaml index 2b06e48..5101122 100644 --- a/.github/ISSUE_TEMPLATE/help_wanted.yaml +++ b/.github/ISSUE_TEMPLATE/help_wanted.yaml @@ -1,7 +1,7 @@ name: 请求帮助 description: 遇到了无法自行解决的错误 -title: '[Bug]: ' -labels: [ "bug?" ] +title: '[Help]: ' +labels: [ "help wanted" ] body: - type: markdown From 6964709e5a7be22fbda206a2e6eef4b103b0100f Mon Sep 17 00:00:00 2001 From: Miuzarte <982809597@qq.com> Date: Fri, 24 Mar 2023 01:42:51 +0800 Subject: [PATCH 31/61] Update template --- .github/ISSUE_TEMPLATE/help_wanted.yaml | 2 ++ .github/ISSUE_TEMPLATE/none.md | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/help_wanted.yaml b/.github/ISSUE_TEMPLATE/help_wanted.yaml index 5101122..90340c4 100644 --- a/.github/ISSUE_TEMPLATE/help_wanted.yaml +++ b/.github/ISSUE_TEMPLATE/help_wanted.yaml @@ -8,10 +8,12 @@ body: attributes: value: | #### 提问前建议先自己去尝试解决,可以借助一些搜索引擎(谷歌/必应/New Bing/StackOverflow等等)。如果实在无法自己解决再发issues,在提issues之前,请先仔细阅读《[提问的智慧](https://github.com/ryanhanwu/How-To-Ask-Questions-The-Smart-Way/blob/main/README-zh_CN.md)》 + --- ### 什么样的issues会被close 1. 伸手党 2. 一键包/环境包相关 3. 提供的信息不全 + 4. 低级的如缺少依赖而导致无法运行的问题 4. 所用的数据集是无授权数据集(游戏角色/二次元人物暂不归为此类,但是训练时候也要小心谨慎。如果能联系到官方,必须先和官方联系并核实清楚) --- - type: checkboxes diff --git a/.github/ISSUE_TEMPLATE/none.md b/.github/ISSUE_TEMPLATE/none.md index 8309857..d79881a 100644 --- a/.github/ISSUE_TEMPLATE/none.md +++ b/.github/ISSUE_TEMPLATE/none.md @@ -1,7 +1,7 @@ --- -name: default issue -about: 如果模板中没有你想发起的issue类型,可以选择此项 / If there is no issue type you want to raise, you can start with this one. +name: Default issue +about: 如果模板中没有你想发起的issue类型,可以选择此项,但这个issue会获得一个较低的处理优先级 / If there is no issue type you want to raise, you can start with this one. But this issue will get a lower priority to deal with. title: '' -labels: '' +labels: 'lower priority' assignees: '' --- From 7ea3b4cf0f0394b8286b695e3301b4917b28f29b Mon Sep 17 00:00:00 2001 From: Miuzarte <982809597@qq.com> Date: Fri, 24 Mar 2023 01:44:34 +0800 Subject: [PATCH 32/61] Update template --- .github/ISSUE_TEMPLATE/config.yml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/config.yml diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..25ceee5 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,5 @@ +blank_issues_enabled: true +contact_links: + - name: 讨论区 + url: https://github.com/Mrs4s/go-cqhttp/discussions/ + about: 若是想询问项目技术/原理等相关,请转至讨论区或发起一个低优先级的issue From df25222784842c9b3a6a157fa8f71aff2233d393 Mon Sep 17 00:00:00 2001 From: Miuzarte <982809597@qq.com> Date: Fri, 24 Mar 2023 01:44:51 +0800 Subject: [PATCH 33/61] Update template --- .github/ISSUE_TEMPLATE/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 25ceee5..1f44c8b 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,4 +1,4 @@ -blank_issues_enabled: true +blank_issues_enabled: false contact_links: - name: 讨论区 url: https://github.com/Mrs4s/go-cqhttp/discussions/ From e9e0900810dba0226b288b7972c9cf10b23b2e3f Mon Sep 17 00:00:00 2001 From: Miuzarte <982809597@qq.com> Date: Fri, 24 Mar 2023 01:46:46 +0800 Subject: [PATCH 34/61] Update template --- .github/ISSUE_TEMPLATE/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 1f44c8b..e0ff070 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -2,4 +2,4 @@ blank_issues_enabled: false contact_links: - name: 讨论区 url: https://github.com/Mrs4s/go-cqhttp/discussions/ - about: 若是想询问项目技术/原理等相关,请转至讨论区或发起一个低优先级的issue + about: 简单的询问/讨论请转至讨论区或发起一个低优先级的Default issue From f86d3a275ffe3f980c766333704665a326aa3b9d Mon Sep 17 00:00:00 2001 From: Miuzarte <982809597@qq.com> Date: Fri, 24 Mar 2023 01:49:08 +0800 Subject: [PATCH 35/61] Update template --- .github/ISSUE_TEMPLATE/help_wanted.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/help_wanted.yaml b/.github/ISSUE_TEMPLATE/help_wanted.yaml index 90340c4..3fe093a 100644 --- a/.github/ISSUE_TEMPLATE/help_wanted.yaml +++ b/.github/ISSUE_TEMPLATE/help_wanted.yaml @@ -62,7 +62,7 @@ body: attributes: label: sovits分支 options: - - 4.0 + - 4.0(Default) - 4.0-v2 - 3.0-32k - 3.0-48k From 3c3b42a6c0f0f6edf303406cb1807bf0ff5a27d2 Mon Sep 17 00:00:00 2001 From: Miuzarte <982809597@qq.com> Date: Fri, 24 Mar 2023 02:20:30 +0800 Subject: [PATCH 36/61] Update template --- .github/ISSUE_TEMPLATE/help_wanted.yaml | 9 +- .github/ISSUE_TEMPLATE/help_wanted_en_US.yaml | 116 ++++++++++++++++++ 2 files changed, 121 insertions(+), 4 deletions(-) create mode 100644 .github/ISSUE_TEMPLATE/help_wanted_en_US.yaml diff --git a/.github/ISSUE_TEMPLATE/help_wanted.yaml b/.github/ISSUE_TEMPLATE/help_wanted.yaml index 3fe093a..260415e 100644 --- a/.github/ISSUE_TEMPLATE/help_wanted.yaml +++ b/.github/ISSUE_TEMPLATE/help_wanted.yaml @@ -7,7 +7,7 @@ body: - type: markdown attributes: value: | - #### 提问前建议先自己去尝试解决,可以借助一些搜索引擎(谷歌/必应/New Bing/StackOverflow等等)。如果实在无法自己解决再发issues,在提issues之前,请先仔细阅读《[提问的智慧](https://github.com/ryanhanwu/How-To-Ask-Questions-The-Smart-Way/blob/main/README-zh_CN.md)》 + #### 提问前请先自己去尝试解决,可以借助chatgpt或一些搜索引擎(谷歌/必应/New Bing/StackOverflow等等)。如果实在无法自己解决再发issue,在提issue之前,请先了解《[提问的智慧](https://github.com/ryanhanwu/How-To-Ask-Questions-The-Smart-Way/blob/main/README-zh_CN.md)》 --- ### 什么样的issues会被close 1. 伸手党 @@ -16,6 +16,7 @@ body: 4. 低级的如缺少依赖而导致无法运行的问题 4. 所用的数据集是无授权数据集(游戏角色/二次元人物暂不归为此类,但是训练时候也要小心谨慎。如果能联系到官方,必须先和官方联系并核实清楚) --- + - type: checkboxes id: Clause attributes: @@ -37,7 +38,7 @@ body: id: System attributes: label: 系统平台版本号 - description: Windows请执行`winver`,Linux请执行`uname -a` + description: Windows请执行`winver` | Linux请执行`uname -a` validations: required: true @@ -62,7 +63,7 @@ body: attributes: label: sovits分支 options: - - 4.0(Default) + - 4.0(默认) - 4.0-v2 - 3.0-32k - 3.0-48k @@ -105,7 +106,7 @@ body: - type: textarea id: ValidOneClick attributes: - label: 截图so-vits-svc/文件夹并粘贴到此处 + label: 截图`so-vits-svc/`文件夹并粘贴到此处 validations: required: true diff --git a/.github/ISSUE_TEMPLATE/help_wanted_en_US.yaml b/.github/ISSUE_TEMPLATE/help_wanted_en_US.yaml new file mode 100644 index 0000000..f9b6ae3 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/help_wanted_en_US.yaml @@ -0,0 +1,116 @@ +name: Ask for help +description: Encountered an error cannot be resolved by self +title: '[Help]: ' +labels: [ "help wanted" ] + +body: + - type: markdown + attributes: + value: | + #### Please try to solve the problem yourself before asking for help,You can use chatgpt or some search engines like google, bing, new bing and StackOverflow until you really find that you can't solve it by yourself. And before you raise an issue, please understand *[How To Ask Questions The Smart Way](http://www.catb.org/~esr/faqs/smart-questions.html)* in advance + --- + ### What kind of issue will be close immediately + 1. Beggars or Free Riders + 2. One click package / Environment package (Not using `pip install -r requirement.txt`) + 3. Incomplete information + 4. Stupid issues such as miss a dependency package + 4. Using unauthorized dataset (Game characters / anime characters are not included in this category temporarily but you still need to pay attention. If you can contact the official, you must contact the official and verify it at first.) + --- + + - type: checkboxes + id: Clause + attributes: + label: Please check the checkboxes below. + options: + - label: "I have read README.md carefully" + required: true + - label: "I have been troubleshooting issues through various search engines. The questions I want to ask are not common" + required: true + - label: "I am NOT using one click package / environment package" + required: true + + - type: markdown + attributes: + value: | + # Please fill in the following information according to your actual environment + + - type: input + id: System + attributes: + label: OS version + description: Windows run `winver` | Linux run `uname -a` + validations: + required: true + + - type: input + id: PythonVersion + attributes: + label: Python version + description: Run `python -V` to check + validations: + required: true + + - type: input + id: PyTorchVersion + attributes: + label: PyTorch version + description: Run `pip show torch` to check + validations: + required: true + + - type: dropdown + id: Branch + attributes: + label: Branch of sovits + options: + - 4.0(Default) + - 4.0-v2 + - 3.0-32k + - 3.0-48k + validations: + required: true + + - type: input + id: DatasetSource + attributes: + label: Dataset source (Used to judge the dataset quality) + description: Like: UVR-processed streaming audio / Recorded in recording studio + validations: + required: true + + - type: input + id: WhereOccurs + attributes: + label: Where thr problem occurs or what command you executed + description: Like: Preprocessing / Training / `python preprocess_hubert_f0.py` + validations: + required: true + + - type: textarea + id: Description + attributes: + label: Problem description + description: Describe your problem here, the more detailed the better. + validations: + required: true + + - type: textarea + id: Log + attributes: + label: Log + description: All information output from the command you executed to the end of execution + render: python + validations: + required: true + + - type: textarea + id: ValidOneClick + attributes: + label: Screenshot `so-vits-svc/` folder and paste here + validations: + required: true + + - type: textarea + id: Supplementary + attributes: + label: Supplementary description From 2513cda7ca845f6aef5050f213ffda842e90b787 Mon Sep 17 00:00:00 2001 From: Miuzarte <982809597@qq.com> Date: Fri, 24 Mar 2023 02:21:59 +0800 Subject: [PATCH 37/61] Update template --- .github/ISSUE_TEMPLATE/help_wanted_en_US.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/help_wanted_en_US.yaml b/.github/ISSUE_TEMPLATE/help_wanted_en_US.yaml index f9b6ae3..88b45f5 100644 --- a/.github/ISSUE_TEMPLATE/help_wanted_en_US.yaml +++ b/.github/ISSUE_TEMPLATE/help_wanted_en_US.yaml @@ -74,7 +74,7 @@ body: id: DatasetSource attributes: label: Dataset source (Used to judge the dataset quality) - description: Like: UVR-processed streaming audio / Recorded in recording studio + description: Like UVR-processed streaming audio / Recorded in recording studio validations: required: true @@ -82,7 +82,7 @@ body: id: WhereOccurs attributes: label: Where thr problem occurs or what command you executed - description: Like: Preprocessing / Training / `python preprocess_hubert_f0.py` + description: Like Preprocessing / Training / `python preprocess_hubert_f0.py` validations: required: true From f11a03e36bb0f8ab88efe4811c736ed59d4f4243 Mon Sep 17 00:00:00 2001 From: Miuzarte <982809597@qq.com> Date: Fri, 24 Mar 2023 02:29:26 +0800 Subject: [PATCH 38/61] Update template --- .github/ISSUE_TEMPLATE/config.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index e0ff070..6318e03 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,5 +1,8 @@ blank_issues_enabled: false contact_links: + - name: 不知道怎么做? + url: https://github.com/Mrs4s/go-cqhttp/issues/633 + about: 建议你先查看此教程 - name: 讨论区 url: https://github.com/Mrs4s/go-cqhttp/discussions/ - about: 简单的询问/讨论请转至讨论区或发起一个低优先级的Default issue + about: 使用中若遇到问题或有新点子新需求,请先在这里求助和征求意见。 From 6a318d731bd9dc2f738fcbf26204d786c5f40666 Mon Sep 17 00:00:00 2001 From: Miuzarte <982809597@qq.com> Date: Fri, 24 Mar 2023 02:30:46 +0800 Subject: [PATCH 39/61] Update template --- .github/ISSUE_TEMPLATE/config.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 6318e03..8a9afa2 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,8 +1,5 @@ blank_issues_enabled: false contact_links: - - name: 不知道怎么做? - url: https://github.com/Mrs4s/go-cqhttp/issues/633 - about: 建议你先查看此教程 - - name: 讨论区 + - name: 讨论区 / Discussions url: https://github.com/Mrs4s/go-cqhttp/discussions/ - about: 使用中若遇到问题或有新点子新需求,请先在这里求助和征求意见。 + about: 简单的询问/讨论请转至讨论区或发起一个低优先级的Default issue / For simple inquiries / discussions, please go to the discussions or raise a low priority Default issue From 097e4e1ca78f6bdaa38a55b364132b2e31007dfd Mon Sep 17 00:00:00 2001 From: Miuzarte <982809597@qq.com> Date: Fri, 24 Mar 2023 06:58:00 +0800 Subject: [PATCH 40/61] Update issues template --- .github/ISSUE_TEMPLATE/config.yml | 2 +- .github/ISSUE_TEMPLATE/help_wanted.yaml | 2 +- .github/ISSUE_TEMPLATE/help_wanted_en_US.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 8a9afa2..8770971 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,5 +1,5 @@ blank_issues_enabled: false contact_links: - name: 讨论区 / Discussions - url: https://github.com/Mrs4s/go-cqhttp/discussions/ + url: https://github.com/svc-develop-team/so-vits-svc/discussions about: 简单的询问/讨论请转至讨论区或发起一个低优先级的Default issue / For simple inquiries / discussions, please go to the discussions or raise a low priority Default issue diff --git a/.github/ISSUE_TEMPLATE/help_wanted.yaml b/.github/ISSUE_TEMPLATE/help_wanted.yaml index 260415e..2da4929 100644 --- a/.github/ISSUE_TEMPLATE/help_wanted.yaml +++ b/.github/ISSUE_TEMPLATE/help_wanted.yaml @@ -106,7 +106,7 @@ body: - type: textarea id: ValidOneClick attributes: - label: 截图`so-vits-svc/`文件夹并粘贴到此处 + label: 截图`so-vits-svc`文件夹并粘贴到此处 validations: required: true diff --git a/.github/ISSUE_TEMPLATE/help_wanted_en_US.yaml b/.github/ISSUE_TEMPLATE/help_wanted_en_US.yaml index 88b45f5..7a47f12 100644 --- a/.github/ISSUE_TEMPLATE/help_wanted_en_US.yaml +++ b/.github/ISSUE_TEMPLATE/help_wanted_en_US.yaml @@ -106,7 +106,7 @@ body: - type: textarea id: ValidOneClick attributes: - label: Screenshot `so-vits-svc/` folder and paste here + label: Screenshot `so-vits-svc` folder and paste here validations: required: true From 1c7b1532858fcd50212b166cf412c5167bdc95e6 Mon Sep 17 00:00:00 2001 From: Miuzarte <982809597@qq.com> Date: Fri, 24 Mar 2023 12:27:43 +0800 Subject: [PATCH 41/61] Update issues template --- .github/ISSUE_TEMPLATE/help_wanted.yaml | 10 +++++----- .github/ISSUE_TEMPLATE/help_wanted_en_US.yaml | 16 ++++++++-------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/help_wanted.yaml b/.github/ISSUE_TEMPLATE/help_wanted.yaml index 2da4929..070cd55 100644 --- a/.github/ISSUE_TEMPLATE/help_wanted.yaml +++ b/.github/ISSUE_TEMPLATE/help_wanted.yaml @@ -7,7 +7,7 @@ body: - type: markdown attributes: value: | - #### 提问前请先自己去尝试解决,可以借助chatgpt或一些搜索引擎(谷歌/必应/New Bing/StackOverflow等等)。如果实在无法自己解决再发issue,在提issue之前,请先了解《[提问的智慧](https://github.com/ryanhanwu/How-To-Ask-Questions-The-Smart-Way/blob/main/README-zh_CN.md)》 + #### 提问前请先自己去尝试解决,可以借助chatgpt或一些搜索引擎(谷歌/必应/New Bing/StackOverflow等等)。如果实在无法自己解决再发issue,在提issue之前,请先了解《[提问的智慧](https://github.com/ryanhanwu/How-To-Ask-Questions-The-Smart-Way/blob/main/README-zh_CN.md)》。 --- ### 什么样的issues会被close 1. 伸手党 @@ -22,11 +22,11 @@ body: attributes: label: 请勾选下方的确认框。 options: - - label: "我已仔细阅读README.md" + - label: "我已仔细阅读README.md。" required: true - - label: "我已通过各种搜索引擎排查问题,我要提出的问题并不常见" + - label: "我已通过各种搜索引擎排查问题,我要提出的问题并不常见。" required: true - - label: "我未在使用由第三方用户提供的一键包/环境包" + - label: "我未在使用由第三方用户提供的一键包/环境包。" required: true - type: markdown @@ -98,7 +98,7 @@ body: id: Log attributes: label: 日志 - description: 从执行命令到执行完毕输出的所有信息 + description: 从执行命令到执行完毕输出的所有信息(包括你所执行的命令) render: python validations: required: true diff --git a/.github/ISSUE_TEMPLATE/help_wanted_en_US.yaml b/.github/ISSUE_TEMPLATE/help_wanted_en_US.yaml index 7a47f12..2f12e2d 100644 --- a/.github/ISSUE_TEMPLATE/help_wanted_en_US.yaml +++ b/.github/ISSUE_TEMPLATE/help_wanted_en_US.yaml @@ -7,7 +7,7 @@ body: - type: markdown attributes: value: | - #### Please try to solve the problem yourself before asking for help,You can use chatgpt or some search engines like google, bing, new bing and StackOverflow until you really find that you can't solve it by yourself. And before you raise an issue, please understand *[How To Ask Questions The Smart Way](http://www.catb.org/~esr/faqs/smart-questions.html)* in advance + #### Please try to solve the problem yourself before asking for help. You can use chatgpt or some search engines like google, bing, new bing and StackOverflow until you really find that you can't solve it by yourself. And before you raise an issue, please understand *[How To Ask Questions The Smart Way](http://www.catb.org/~esr/faqs/smart-questions.html)* in advance. --- ### What kind of issue will be close immediately 1. Beggars or Free Riders @@ -22,17 +22,17 @@ body: attributes: label: Please check the checkboxes below. options: - - label: "I have read README.md carefully" + - label: "I have readed README.md carefully." required: true - - label: "I have been troubleshooting issues through various search engines. The questions I want to ask are not common" + - label: "I have been troubleshooting issues through various search engines. The questions I want to ask are not common." required: true - - label: "I am NOT using one click package / environment package" + - label: "I am NOT using one click package / environment package." required: true - type: markdown attributes: value: | - # Please fill in the following information according to your actual environment + # Please fill in the following information according to your actual environment. - type: input id: System @@ -74,7 +74,7 @@ body: id: DatasetSource attributes: label: Dataset source (Used to judge the dataset quality) - description: Like UVR-processed streaming audio / Recorded in recording studio + description: Such as UVR-processed streaming audio / Recorded in recording studio validations: required: true @@ -82,7 +82,7 @@ body: id: WhereOccurs attributes: label: Where thr problem occurs or what command you executed - description: Like Preprocessing / Training / `python preprocess_hubert_f0.py` + description: Such as Preprocessing / Training / `python preprocess_hubert_f0.py` validations: required: true @@ -98,7 +98,7 @@ body: id: Log attributes: label: Log - description: All information output from the command you executed to the end of execution + description: All information output from the command you executed to the end of execution (include the command) render: python validations: required: true From 4ce3a869f6eb46fcdcb4fac8a7de1bc05f7a6112 Mon Sep 17 00:00:00 2001 From: Miuzarte <982809597@qq.com> Date: Fri, 24 Mar 2023 12:33:38 +0800 Subject: [PATCH 42/61] Update issues template --- .github/ISSUE_TEMPLATE/help_wanted.yaml | 2 +- .github/ISSUE_TEMPLATE/help_wanted_en_US.yaml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/help_wanted.yaml b/.github/ISSUE_TEMPLATE/help_wanted.yaml index 070cd55..93db849 100644 --- a/.github/ISSUE_TEMPLATE/help_wanted.yaml +++ b/.github/ISSUE_TEMPLATE/help_wanted.yaml @@ -9,7 +9,7 @@ body: value: | #### 提问前请先自己去尝试解决,可以借助chatgpt或一些搜索引擎(谷歌/必应/New Bing/StackOverflow等等)。如果实在无法自己解决再发issue,在提issue之前,请先了解《[提问的智慧](https://github.com/ryanhanwu/How-To-Ask-Questions-The-Smart-Way/blob/main/README-zh_CN.md)》。 --- - ### 什么样的issues会被close + ### 什么样的issue会被直接close 1. 伸手党 2. 一键包/环境包相关 3. 提供的信息不全 diff --git a/.github/ISSUE_TEMPLATE/help_wanted_en_US.yaml b/.github/ISSUE_TEMPLATE/help_wanted_en_US.yaml index 2f12e2d..54b5a6a 100644 --- a/.github/ISSUE_TEMPLATE/help_wanted_en_US.yaml +++ b/.github/ISSUE_TEMPLATE/help_wanted_en_US.yaml @@ -9,7 +9,7 @@ body: value: | #### Please try to solve the problem yourself before asking for help. You can use chatgpt or some search engines like google, bing, new bing and StackOverflow until you really find that you can't solve it by yourself. And before you raise an issue, please understand *[How To Ask Questions The Smart Way](http://www.catb.org/~esr/faqs/smart-questions.html)* in advance. --- - ### What kind of issue will be close immediately + ### What kind of issue will be closed immediately 1. Beggars or Free Riders 2. One click package / Environment package (Not using `pip install -r requirement.txt`) 3. Incomplete information @@ -32,7 +32,7 @@ body: - type: markdown attributes: value: | - # Please fill in the following information according to your actual environment. + # Please fill in the following information according to your actual environment - type: input id: System From eb8ef9a3059be84db93f31a227ae30bbe5def606 Mon Sep 17 00:00:00 2001 From: Miuzarte <982809597@qq.com> Date: Fri, 24 Mar 2023 12:36:19 +0800 Subject: [PATCH 43/61] Update issues template --- .github/ISSUE_TEMPLATE/help_wanted_en_US.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/help_wanted_en_US.yaml b/.github/ISSUE_TEMPLATE/help_wanted_en_US.yaml index 54b5a6a..67022df 100644 --- a/.github/ISSUE_TEMPLATE/help_wanted_en_US.yaml +++ b/.github/ISSUE_TEMPLATE/help_wanted_en_US.yaml @@ -22,7 +22,7 @@ body: attributes: label: Please check the checkboxes below. options: - - label: "I have readed README.md carefully." + - label: "I have read README.md carefully." required: true - label: "I have been troubleshooting issues through various search engines. The questions I want to ask are not common." required: true From f0ada336873fe91ac3194afa05ca8c39e4b8f5e2 Mon Sep 17 00:00:00 2001 From: Miuzarte <982809597@qq.com> Date: Fri, 24 Mar 2023 12:41:56 +0800 Subject: [PATCH 44/61] Update issues template --- .github/ISSUE_TEMPLATE/none.md | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 .github/ISSUE_TEMPLATE/none.md diff --git a/.github/ISSUE_TEMPLATE/none.md b/.github/ISSUE_TEMPLATE/none.md deleted file mode 100644 index d79881a..0000000 --- a/.github/ISSUE_TEMPLATE/none.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -name: Default issue -about: 如果模板中没有你想发起的issue类型,可以选择此项,但这个issue会获得一个较低的处理优先级 / If there is no issue type you want to raise, you can start with this one. But this issue will get a lower priority to deal with. -title: '' -labels: 'lower priority' -assignees: '' ---- From 2854013a8a480e5437eeb63af65569b7567e2c36 Mon Sep 17 00:00:00 2001 From: Lengyue Date: Fri, 24 Mar 2023 00:43:29 -0400 Subject: [PATCH 45/61] rm test dataset that is never used --- preprocess_flist_config.py | 12 +----------- spec_gen.py | 3 --- 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/preprocess_flist_config.py b/preprocess_flist_config.py index 6a29726..2717e51 100644 --- a/preprocess_flist_config.py +++ b/preprocess_flist_config.py @@ -25,13 +25,11 @@ if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--train_list", type=str, default="./filelists/train.txt", help="path to train list") parser.add_argument("--val_list", type=str, default="./filelists/val.txt", help="path to val list") - parser.add_argument("--test_list", type=str, default="./filelists/test.txt", help="path to test list") parser.add_argument("--source_dir", type=str, default="./dataset/44k", help="path to source dir") args = parser.parse_args() train = [] val = [] - test = [] idx = 0 spk_dict = {} spk_id = 0 @@ -51,13 +49,11 @@ if __name__ == "__main__": new_wavs.append(file) wavs = new_wavs shuffle(wavs) - train += wavs[2:-2] + train += wavs[2:] val += wavs[:2] - test += wavs[-2:] shuffle(train) shuffle(val) - shuffle(test) print("Writing", args.train_list) with open(args.train_list, "w") as f: @@ -70,12 +66,6 @@ if __name__ == "__main__": for fname in tqdm(val): wavpath = fname f.write(wavpath + "\n") - - print("Writing", args.test_list) - with open(args.test_list, "w") as f: - for fname in tqdm(test): - wavpath = fname - f.write(wavpath + "\n") config_template["spk"] = spk_dict config_template["model"]["n_speakers"] = spk_id diff --git a/spec_gen.py b/spec_gen.py index 9476395..e7a5056 100644 --- a/spec_gen.py +++ b/spec_gen.py @@ -11,12 +11,9 @@ config = json.loads(data) hps = HParams(**config) train_dataset = TextAudioSpeakerLoader("filelists/train.txt", hps) -test_dataset = TextAudioSpeakerLoader("filelists/test.txt", hps) eval_dataset = TextAudioSpeakerLoader("filelists/val.txt", hps) for _ in tqdm(train_dataset): pass for _ in tqdm(eval_dataset): pass -for _ in tqdm(test_dataset): - pass \ No newline at end of file From 6a953317b9a699009fc256f591ef4e8ad98dec56 Mon Sep 17 00:00:00 2001 From: Miuzarte <982809597@qq.com> Date: Fri, 24 Mar 2023 12:47:31 +0800 Subject: [PATCH 46/61] Update issues template --- .github/ISSUE_TEMPLATE/default.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/default.md diff --git a/.github/ISSUE_TEMPLATE/default.md b/.github/ISSUE_TEMPLATE/default.md new file mode 100644 index 0000000..7ce057c --- /dev/null +++ b/.github/ISSUE_TEMPLATE/default.md @@ -0,0 +1,7 @@ +--- +name: Default issue +about: 如果模板中没有你想发起的issue类型,可以选择此项,但这个issue也许会获得一个较低的处理优先级 / If there is no issue type you want to raise, you can start with this one. But this issue maybe will get a lower priority to deal with. +title: '' +labels: 'not urgent' +assignees: '' +--- From 75522a6ede246c9f09c31d25db6decf5688c45dd Mon Sep 17 00:00:00 2001 From: Miuzarte <982809597@qq.com> Date: Fri, 24 Mar 2023 12:58:22 +0800 Subject: [PATCH 47/61] Update issues template --- .github/ISSUE_TEMPLATE/{help_wanted.yaml => ask_for_help.yaml} | 2 +- .../{help_wanted_en_US.yaml => ask_for_help_en_US.yaml} | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename .github/ISSUE_TEMPLATE/{help_wanted.yaml => ask_for_help.yaml} (97%) rename .github/ISSUE_TEMPLATE/{help_wanted_en_US.yaml => ask_for_help_en_US.yaml} (97%) diff --git a/.github/ISSUE_TEMPLATE/help_wanted.yaml b/.github/ISSUE_TEMPLATE/ask_for_help.yaml similarity index 97% rename from .github/ISSUE_TEMPLATE/help_wanted.yaml rename to .github/ISSUE_TEMPLATE/ask_for_help.yaml index 93db849..e6ae629 100644 --- a/.github/ISSUE_TEMPLATE/help_wanted.yaml +++ b/.github/ISSUE_TEMPLATE/ask_for_help.yaml @@ -106,7 +106,7 @@ body: - type: textarea id: ValidOneClick attributes: - label: 截图`so-vits-svc`文件夹并粘贴到此处 + label: 截图`so-vits-svc`、`logs/44k`文件夹并粘贴到此处 validations: required: true diff --git a/.github/ISSUE_TEMPLATE/help_wanted_en_US.yaml b/.github/ISSUE_TEMPLATE/ask_for_help_en_US.yaml similarity index 97% rename from .github/ISSUE_TEMPLATE/help_wanted_en_US.yaml rename to .github/ISSUE_TEMPLATE/ask_for_help_en_US.yaml index 67022df..405cb98 100644 --- a/.github/ISSUE_TEMPLATE/help_wanted_en_US.yaml +++ b/.github/ISSUE_TEMPLATE/ask_for_help_en_US.yaml @@ -106,7 +106,7 @@ body: - type: textarea id: ValidOneClick attributes: - label: Screenshot `so-vits-svc` folder and paste here + label: Screenshot `so-vits-svc` and `logs/44k` folders and paste here validations: required: true From 32cfec751e6e96cdc6a41eb99c7c8de324852a92 Mon Sep 17 00:00:00 2001 From: Lengyue Date: Fri, 24 Mar 2023 01:00:14 -0400 Subject: [PATCH 48/61] remove redundent spec_gen and fix related bug --- data_utils.py | 2 ++ preprocess_hubert_f0.py | 53 +++++++++++++++++++++++++++++++++++------ spec_gen.py | 19 --------------- 3 files changed, 48 insertions(+), 26 deletions(-) delete mode 100644 spec_gen.py diff --git a/data_utils.py b/data_utils.py index 5929dbc..93c3d5c 100644 --- a/data_utils.py +++ b/data_utils.py @@ -47,6 +47,8 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset): audio_norm = audio / self.max_wav_value audio_norm = audio_norm.unsqueeze(0) spec_filename = filename.replace(".wav", ".spec.pt") + + # Ideally, all data generated after Mar 25 should have .spec.pt if os.path.exists(spec_filename): spec = torch.load(spec_filename) else: diff --git a/preprocess_hubert_f0.py b/preprocess_hubert_f0.py index 66bac7e..763fb0d 100644 --- a/preprocess_hubert_f0.py +++ b/preprocess_hubert_f0.py @@ -7,10 +7,12 @@ from random import shuffle import torch from glob import glob from tqdm import tqdm +from modules.mel_processing import spectrogram_torch import utils import logging -logging.getLogger('numba').setLevel(logging.WARNING) + +logging.getLogger("numba").setLevel(logging.WARNING) import librosa import numpy as np @@ -29,11 +31,42 @@ def process_one(filename, hmodel): wav16k = torch.from_numpy(wav16k).to(device) c = utils.get_hubert_content(hmodel, wav_16k_tensor=wav16k) torch.save(c.cpu(), soft_path) + f0_path = filename + ".f0.npy" if not os.path.exists(f0_path): - f0 = utils.compute_f0_dio(wav, sampling_rate=sampling_rate, hop_length=hop_length) + f0 = utils.compute_f0_dio( + wav, sampling_rate=sampling_rate, hop_length=hop_length + ) np.save(f0_path, f0) + spec_path = filename.replace(".wav", ".spec.pt") + if not os.path.exists(spec_path): + # Process spectrogram + # The following code can't be replaced by torch.FloatTensor(wav) + # because load_wav_to_torch return a tensor that need to be normalized + + audio, sr = utils.load_wav_to_torch(filename) + if sr != hps.data.sampling_rate: + raise ValueError( + "{} SR doesn't match target {} SR".format( + sr, hps.data.sampling_rate + ) + ) + + audio_norm = audio / hps.data.max_wav_value + audio_norm = audio_norm.unsqueeze(0) + + spec = spectrogram_torch( + audio_norm, + hps.data.filter_length, + hps.data.sampling_rate, + hps.data.hop_length, + hps.data.win_length, + center=False, + ) + spec = torch.squeeze(spec, 0) + torch.save(spec, spec_path) + def process_batch(filenames): print("Loading hubert for content...") @@ -46,17 +79,23 @@ def process_batch(filenames): if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--in_dir", type=str, default="dataset/44k", help="path to input dir") + parser.add_argument( + "--in_dir", type=str, default="dataset/44k", help="path to input dir" + ) args = parser.parse_args() - filenames = glob(f'{args.in_dir}/*/*.wav', recursive=True) # [:10] + filenames = glob(f"{args.in_dir}/*/*.wav", recursive=True) # [:10] shuffle(filenames) - multiprocessing.set_start_method('spawn',force=True) + multiprocessing.set_start_method("spawn", force=True) num_processes = 1 chunk_size = int(math.ceil(len(filenames) / num_processes)) - chunks = [filenames[i:i + chunk_size] for i in range(0, len(filenames), chunk_size)] + chunks = [ + filenames[i : i + chunk_size] for i in range(0, len(filenames), chunk_size) + ] print([len(c) for c in chunks]) - processes = [multiprocessing.Process(target=process_batch, args=(chunk,)) for chunk in chunks] + processes = [ + multiprocessing.Process(target=process_batch, args=(chunk,)) for chunk in chunks + ] for p in processes: p.start() diff --git a/spec_gen.py b/spec_gen.py deleted file mode 100644 index e7a5056..0000000 --- a/spec_gen.py +++ /dev/null @@ -1,19 +0,0 @@ -from data_utils import TextAudioSpeakerLoader -import json -from tqdm import tqdm - -from utils import HParams - -config_path = 'configs/config.json' -with open(config_path, "r") as f: - data = f.read() -config = json.loads(data) -hps = HParams(**config) - -train_dataset = TextAudioSpeakerLoader("filelists/train.txt", hps) -eval_dataset = TextAudioSpeakerLoader("filelists/val.txt", hps) - -for _ in tqdm(train_dataset): - pass -for _ in tqdm(eval_dataset): - pass From a0f7a031cbafa953a27b04277d024a8ccf58f6cb Mon Sep 17 00:00:00 2001 From: Miuzarte <982809597@qq.com> Date: Fri, 24 Mar 2023 13:42:38 +0800 Subject: [PATCH 49/61] Update README.md --- README.md | 4 ++-- README_zh_CN.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 5bcb49c..f98fe0a 100644 --- a/README.md +++ b/README.md @@ -75,13 +75,13 @@ dataset_raw ## 🛠️ Preprocessing -1. Resample to 44100hz +1. Resample to 44100Hz and mono ```shell python resample.py ``` -2. Automatically split the dataset into training, validation, and test sets, and generate configuration files +2. Automatically split the dataset into training and validation sets, and generate configuration files ```shell python preprocess_flist_config.py diff --git a/README_zh_CN.md b/README_zh_CN.md index ace8ef4..01cf8f4 100644 --- a/README_zh_CN.md +++ b/README_zh_CN.md @@ -75,13 +75,13 @@ dataset_raw ## 🛠️ 数据预处理 -1. 重采样至 44100hz +1. 重采样至44100Hz单声道 ```shell python resample.py ``` -2. 自动划分训练集 验证集 测试集 以及自动生成配置文件 +2. 自动划分训练集、验证集,以及自动生成配置文件 ```shell python preprocess_flist_config.py From 58322242ac0e66f792f43016aa7a220b26dbfc0c Mon Sep 17 00:00:00 2001 From: Miuzarte <982809597@qq.com> Date: Fri, 24 Mar 2023 16:59:47 +0800 Subject: [PATCH 50/61] Update README.md --- README.md | 14 ++++++++++++-- README_zh_CN.md | 14 ++++++++++++-- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index f98fe0a..3da7179 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,7 @@ Although the pretrained model generally does not cause any copyright problems, p Simply place the dataset in the `dataset_raw` directory with the following file structure. -```shell +``` dataset_raw ├───speaker0 │ ├───xxx1-xxx1.wav @@ -73,6 +73,16 @@ dataset_raw └───xxx7-xxx007.wav ``` +You can customize the speaker name. + +``` +dataset_raw +└───suijiSUI + ├───1.wav + ├───... + └───25788785-20221210-200143-856_01_(Vocals)_0_0.wav +``` + ## 🛠️ Preprocessing 1. Resample to 44100Hz and mono @@ -170,7 +180,7 @@ Use [onnx_export.py](https://github.com/svc-develop-team/so-vits-svc/blob/4.0/on Note: For Hubert Onnx models, please use the models provided by MoeSS. Currently, they cannot be exported on their own (Hubert in fairseq has many unsupported operators and things involving constants that can cause errors or result in problems with the input/output shape and results when exported.) [Hubert4.0](https://huggingface.co/NaruseMioShirakana/MoeSS-SUBModel) -## Previous contributors +## ☀️ Previous contributors For some reason the author deleted the original repository. Because of the negligence of the organization members, the contributor list was cleared because all files were directly reuploaded to this repository at the beginning of the reconstruction of this repository. Now add a previous contributor list to README.md. diff --git a/README_zh_CN.md b/README_zh_CN.md index 01cf8f4..e3bdfb6 100644 --- a/README_zh_CN.md +++ b/README_zh_CN.md @@ -61,7 +61,7 @@ http://obs.cstcloud.cn/share/obs/sankagenkeshi/checkpoint_best_legacy_500.pt 仅需要以以下文件结构将数据集放入dataset_raw目录即可 -```shell +``` dataset_raw ├───speaker0 │ ├───xxx1-xxx1.wav @@ -73,6 +73,16 @@ dataset_raw └───xxx7-xxx007.wav ``` +可以自定义说话人名称 + +``` +dataset_raw +└───suijiSUI + ├───1.wav + ├───... + └───25788785-20221210-200143-856_01_(Vocals)_0_0.wav +``` + ## 🛠️ 数据预处理 1. 重采样至44100Hz单声道 @@ -170,7 +180,7 @@ python inference_main.py -m "logs/44k/G_30400.pth" -c "configs/config.json" -n " + 注意:Hubert Onnx模型请使用MoeSS提供的模型,目前无法自行导出(fairseq中Hubert有不少onnx不支持的算子和涉及到常量的东西,在导出时会报错或者导出的模型输入输出shape和结果都有问题) [Hubert4.0](https://huggingface.co/NaruseMioShirakana/MoeSS-SUBModel) -## 旧贡献者 +## ☀️ 旧贡献者 因为某些原因原作者进行了删库处理,本仓库重建之初由于组织成员疏忽直接重新上传了所有文件导致以前的contributors全部木大,现在在README里重新添加一个旧贡献者列表 From 5197749ce57cf7b03ab0ea3be367e1a95d63a9fd Mon Sep 17 00:00:00 2001 From: Miuzarte <982809597@qq.com> Date: Fri, 24 Mar 2023 18:03:39 +0800 Subject: [PATCH 51/61] Update issues template --- .github/ISSUE_TEMPLATE/ask_for_help.yaml | 4 ++-- .github/ISSUE_TEMPLATE/ask_for_help_en_US.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/ask_for_help.yaml b/.github/ISSUE_TEMPLATE/ask_for_help.yaml index e6ae629..d224ca8 100644 --- a/.github/ISSUE_TEMPLATE/ask_for_help.yaml +++ b/.github/ISSUE_TEMPLATE/ask_for_help.yaml @@ -7,7 +7,7 @@ body: - type: markdown attributes: value: | - #### 提问前请先自己去尝试解决,可以借助chatgpt或一些搜索引擎(谷歌/必应/New Bing/StackOverflow等等)。如果实在无法自己解决再发issue,在提issue之前,请先了解《[提问的智慧](https://github.com/ryanhanwu/How-To-Ask-Questions-The-Smart-Way/blob/main/README-zh_CN.md)》。 + #### 提问前请先自己去尝试解决,比如查看[本仓库wiki中的Quick solution](https://github.com/svc-develop-team/so-vits-svc/wiki/Quick-solution),也可以借助chatgpt或一些搜索引擎(谷歌/必应/New Bing/StackOverflow等等)。如果实在无法自己解决再发issue,在提issue之前,请先了解《[提问的智慧](https://github.com/ryanhanwu/How-To-Ask-Questions-The-Smart-Way/blob/main/README-zh_CN.md)》。 --- ### 什么样的issue会被直接close 1. 伸手党 @@ -22,7 +22,7 @@ body: attributes: label: 请勾选下方的确认框。 options: - - label: "我已仔细阅读README.md。" + - label: "我已仔细阅读[README.md](https://github.com/svc-develop-team/so-vits-svc/blob/4.0/README_zh_CN.md)和[wiki中的Quick solution](https://github.com/svc-develop-team/so-vits-svc/wiki/Quick-solution)。" required: true - label: "我已通过各种搜索引擎排查问题,我要提出的问题并不常见。" required: true diff --git a/.github/ISSUE_TEMPLATE/ask_for_help_en_US.yaml b/.github/ISSUE_TEMPLATE/ask_for_help_en_US.yaml index 405cb98..6f8b99f 100644 --- a/.github/ISSUE_TEMPLATE/ask_for_help_en_US.yaml +++ b/.github/ISSUE_TEMPLATE/ask_for_help_en_US.yaml @@ -7,7 +7,7 @@ body: - type: markdown attributes: value: | - #### Please try to solve the problem yourself before asking for help. You can use chatgpt or some search engines like google, bing, new bing and StackOverflow until you really find that you can't solve it by yourself. And before you raise an issue, please understand *[How To Ask Questions The Smart Way](http://www.catb.org/~esr/faqs/smart-questions.html)* in advance. + #### Please try to solve the problem yourself before asking for help. At first you can read *[Quick solution in wiki](https://github.com/svc-develop-team/so-vits-svc/wiki/Quick-solution)*. Then you can use chatgpt or some search engines like google, bing, new bing and StackOverflow until you really find that you can't solve it by yourself. And before you raise an issue, please understand *[How To Ask Questions The Smart Way](http://www.catb.org/~esr/faqs/smart-questions.html)* in advance. --- ### What kind of issue will be closed immediately 1. Beggars or Free Riders @@ -22,7 +22,7 @@ body: attributes: label: Please check the checkboxes below. options: - - label: "I have read README.md carefully." + - label: "I have read *[README.md](https://github.com/svc-develop-team/so-vits-svc/blob/4.0/README.md)* and *[Quick solution in wiki](https://github.com/svc-develop-team/so-vits-svc/wiki/Quick-solution)* carefully." required: true - label: "I have been troubleshooting issues through various search engines. The questions I want to ask are not common." required: true From c6d95f9af97ba82f2ad6c71c7de2a6c3e3795451 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A4=9A=E7=8E=A9=E5=B9=BB=E7=81=B5qwq?= <94176676+HuanLinOTO@users.noreply.github.com> Date: Fri, 24 Mar 2023 20:22:52 +0800 Subject: [PATCH 52/61] Update README.md --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 3da7179..4e318ce 100644 --- a/README.md +++ b/README.md @@ -224,3 +224,6 @@ For some reason the author deleted the original repository. Because of the negli #### 《[中华人民共和国刑法](http://gongbao.court.gov.cn/Details/f8e30d0689b23f57bfc782d21035c3.html?sw=%E4%B8%AD%E5%8D%8E%E4%BA%BA%E6%B0%91%E5%85%B1%E5%92%8C%E5%9B%BD%E5%88%91%E6%B3%95)》 #### 《[中华人民共和国民法典](http://gongbao.court.gov.cn/Details/51eb6750b8361f79be8f90d09bc202.html)》 + +## 💪 Thanks to all contributors for their efforts +![contributors](https://github.com/svc-develop-team/so-vits-svc/graphs/contributors) From 345e2d35c8c037d49ef2ff4598d2cda53447b41f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A4=9A=E7=8E=A9=E5=B9=BB=E7=81=B5qwq?= <94176676+HuanLinOTO@users.noreply.github.com> Date: Fri, 24 Mar 2023 20:23:30 +0800 Subject: [PATCH 53/61] Update README_zh_CN.md --- README_zh_CN.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README_zh_CN.md b/README_zh_CN.md index e3bdfb6..602eba7 100644 --- a/README_zh_CN.md +++ b/README_zh_CN.md @@ -221,3 +221,6 @@ python inference_main.py -m "logs/44k/G_30400.pth" -c "configs/config.json" -n " #### 《[中华人民共和国刑法](http://gongbao.court.gov.cn/Details/f8e30d0689b23f57bfc782d21035c3.html?sw=中华人民共和国刑法)》 #### 《[中华人民共和国民法典](http://gongbao.court.gov.cn/Details/51eb6750b8361f79be8f90d09bc202.html)》 + +## 💪 感谢所有的贡献者 +![contributors](https://github.com/svc-develop-team/so-vits-svc/graphs/contributors) From 847ce184aa595e2d6bb02cada06e12a6d6ccd54c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A4=9A=E7=8E=A9=E5=B9=BB=E7=81=B5qwq?= <94176676+HuanLinOTO@users.noreply.github.com> Date: Fri, 24 Mar 2023 20:24:53 +0800 Subject: [PATCH 54/61] Update README_zh_CN.md --- README_zh_CN.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README_zh_CN.md b/README_zh_CN.md index 602eba7..f00141f 100644 --- a/README_zh_CN.md +++ b/README_zh_CN.md @@ -223,4 +223,6 @@ python inference_main.py -m "logs/44k/G_30400.pth" -c "configs/config.json" -n " #### 《[中华人民共和国民法典](http://gongbao.court.gov.cn/Details/51eb6750b8361f79be8f90d09bc202.html)》 ## 💪 感谢所有的贡献者 -![contributors](https://github.com/svc-develop-team/so-vits-svc/graphs/contributors) + + + From 6122b3ee153a5dbdba2f196e369d898bedc546ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A4=9A=E7=8E=A9=E5=B9=BB=E7=81=B5qwq?= <94176676+HuanLinOTO@users.noreply.github.com> Date: Fri, 24 Mar 2023 20:25:15 +0800 Subject: [PATCH 55/61] Update README.md --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4e318ce..7cc3567 100644 --- a/README.md +++ b/README.md @@ -226,4 +226,6 @@ For some reason the author deleted the original repository. Because of the negli #### 《[中华人民共和国民法典](http://gongbao.court.gov.cn/Details/51eb6750b8361f79be8f90d09bc202.html)》 ## 💪 Thanks to all contributors for their efforts -![contributors](https://github.com/svc-develop-team/so-vits-svc/graphs/contributors) + + + From 01a0ff9870aa46809925b1b75601c7b68a50d192 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A4=9A=E7=8E=A9=E5=B9=BB=E7=81=B5qwq?= <94176676+HuanLinOTO@users.noreply.github.com> Date: Fri, 24 Mar 2023 23:13:44 +0800 Subject: [PATCH 56/61] readme: Python Version --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 7cc3567..3863d90 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,10 @@ The singing voice conversion model uses SoftVC content encoder to extract source - Added an option 1: automatic pitch prediction for vc mode, which means that you don't need to manually enter the pitch key when converting speech, and the pitch of male and female voices can be automatically converted. However, this mode will cause pitch shift when converting songs. - Added option 2: reduce timbre leakage through k-means clustering scheme, making the timbre more similar to the target timbre. +## 💬 About Python Version + +After conducting tests, we believe that the project runs stably on Python version 3.8.9. + ## 📥 Pre-trained Model Files #### **Required** From c3fee8918ed3e8320237ec04cd16013a0142cefc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A4=9A=E7=8E=A9=E5=B9=BB=E7=81=B5qwq?= <94176676+HuanLinOTO@users.noreply.github.com> Date: Fri, 24 Mar 2023 23:15:47 +0800 Subject: [PATCH 57/61] Update README_zh_CN.md --- README_zh_CN.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README_zh_CN.md b/README_zh_CN.md index f00141f..a6b279b 100644 --- a/README_zh_CN.md +++ b/README_zh_CN.md @@ -35,6 +35,10 @@ + 增加了可选项 1:vc模式自动预测音高f0,即转换语音时不需要手动输入变调key,男女声的调能自动转换,但仅限语音转换,该模式转换歌声会跑调 + 增加了可选项 2:通过kmeans聚类方案减小音色泄漏,即使得音色更加像目标音色 +## 💬 关于 Python 版本问题 + +我们在进行测试后,认为 Python 3.8.9 版本能够稳定地运行该项目 + ## 📥 预先下载的模型文件 #### **必须项** From 3fb7fe31d314e7970b6ba545c3e9d67d3af33117 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A4=9A=E7=8E=A9=E5=B9=BB=E7=81=B5qwq?= <94176676+HuanLinOTO@users.noreply.github.com> Date: Fri, 24 Mar 2023 23:30:49 +0800 Subject: [PATCH 58/61] Update ask_for_help.yaml --- .github/ISSUE_TEMPLATE/ask_for_help.yaml | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/ask_for_help.yaml b/.github/ISSUE_TEMPLATE/ask_for_help.yaml index d224ca8..5418c69 100644 --- a/.github/ISSUE_TEMPLATE/ask_for_help.yaml +++ b/.github/ISSUE_TEMPLATE/ask_for_help.yaml @@ -41,6 +41,13 @@ body: description: Windows请执行`winver` | Linux请执行`uname -a` validations: required: true + - type: input + id: GPU + attributes: + label: GPU 型号 + description: 请执行`nvidia-smi` + validations: + required: true - type: input id: PythonVersion @@ -54,7 +61,7 @@ body: id: PyTorchVersion attributes: label: PyTorch版本 - description: 执行`pip show torch`以查看 + description: 执行`pip show torch`以查看 validations: required: true @@ -98,7 +105,7 @@ body: id: Log attributes: label: 日志 - description: 从执行命令到执行完毕输出的所有信息(包括你所执行的命令) + description: 将从执行命令到执行完毕输出的所有信息(包括你所执行的命令)粘贴到 https://pastebin.com/ 并把剪贴板链接贴到这里 render: python validations: required: true From b74337b7ec4f2769c9e09254127d4f1de0572c4f Mon Sep 17 00:00:00 2001 From: Miuzarte <982809597@qq.com> Date: Fri, 24 Mar 2023 23:39:21 +0800 Subject: [PATCH 59/61] Update issues template --- .github/ISSUE_TEMPLATE/ask_for_help.yaml | 11 ++++++----- .github/ISSUE_TEMPLATE/ask_for_help_en_US.yaml | 14 +++++++++++--- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/ask_for_help.yaml b/.github/ISSUE_TEMPLATE/ask_for_help.yaml index 5418c69..b20b072 100644 --- a/.github/ISSUE_TEMPLATE/ask_for_help.yaml +++ b/.github/ISSUE_TEMPLATE/ask_for_help.yaml @@ -38,14 +38,15 @@ body: id: System attributes: label: 系统平台版本号 - description: Windows请执行`winver` | Linux请执行`uname -a` + description: Windows执行`winver` | Linux执行`uname -a` validations: required: true + - type: input id: GPU attributes: label: GPU 型号 - description: 请执行`nvidia-smi` + description: 执行`nvidia-smi` validations: required: true @@ -53,7 +54,7 @@ body: id: PythonVersion attributes: label: Python版本 - description: 执行`python -V`以查看 + description: 执行`python -V` validations: required: true @@ -61,7 +62,7 @@ body: id: PyTorchVersion attributes: label: PyTorch版本 - description: 执行`pip show torch`以查看 + description: 执行`pip show torch` validations: required: true @@ -105,7 +106,7 @@ body: id: Log attributes: label: 日志 - description: 将从执行命令到执行完毕输出的所有信息(包括你所执行的命令)粘贴到 https://pastebin.com/ 并把剪贴板链接贴到这里 + description: 将从执行命令到执行完毕输出的所有信息(包括你所执行的命令)粘贴到[pastebin.com](https://pastebin.com/)并把剪贴板链接贴到这里 render: python validations: required: true diff --git a/.github/ISSUE_TEMPLATE/ask_for_help_en_US.yaml b/.github/ISSUE_TEMPLATE/ask_for_help_en_US.yaml index 6f8b99f..f838b5a 100644 --- a/.github/ISSUE_TEMPLATE/ask_for_help_en_US.yaml +++ b/.github/ISSUE_TEMPLATE/ask_for_help_en_US.yaml @@ -14,7 +14,7 @@ body: 2. One click package / Environment package (Not using `pip install -r requirement.txt`) 3. Incomplete information 4. Stupid issues such as miss a dependency package - 4. Using unauthorized dataset (Game characters / anime characters are not included in this category temporarily but you still need to pay attention. If you can contact the official, you must contact the official and verify it at first.) + 4. Using unlicenced dataset (Game characters / anime characters are not included in this category temporarily but you still need to pay attention. If you can contact the official, you must contact the official and verify it at first.) --- - type: checkboxes @@ -42,11 +42,19 @@ body: validations: required: true + - type: input + id: GPU + attributes: + label: GPU + description: Run `nvidia-smi` + validations: + required: true + - type: input id: PythonVersion attributes: label: Python version - description: Run `python -V` to check + description: Run `python -V` validations: required: true @@ -54,7 +62,7 @@ body: id: PyTorchVersion attributes: label: PyTorch version - description: Run `pip show torch` to check + description: Run `pip show torch` validations: required: true From a8579e1b0291921e182d3f8c6fcc82b66b76af01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=CE=9D=CE=B1=CF=81=CE=BF=CF=85=CF=83=CE=AD=C2=B7=CE=BC?= =?UTF-8?q?=C2=B7=CE=B3=CE=B9=CE=BF=CF=85=CE=BC=CE=B5=CE=BC=CE=AF=C2=B7?= =?UTF-8?q?=CE=A7=CE=B9=CE=BD=CE=B1=CE=BA=CE=AC=CE=BD=CE=BD=CE=B1?= <40709280+NaruseMioShirakana@users.noreply.github.com> Date: Sun, 26 Mar 2023 00:23:04 +0800 Subject: [PATCH 60/61] Update onnx_export.py --- onnx_export.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/onnx_export.py b/onnx_export.py index 7914d12..a70a912 100644 --- a/onnx_export.py +++ b/onnx_export.py @@ -16,11 +16,12 @@ def main(NetExport): for i in SVCVITS.parameters(): i.requires_grad = False - test_hidden_unit = torch.rand(1, 10, 256) - test_pitch = torch.rand(1, 10) - test_mel2ph = torch.LongTensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]).unsqueeze(0) - test_uv = torch.ones(1, 10, dtype=torch.float32) - test_noise = torch.randn(1, 192, 10) + n_frame = 10 + test_hidden_unit = torch.rand(1, n_frame, 256) + test_pitch = torch.rand(1, n_frame) + test_mel2ph = torch.arange(0, n_frame, dtype=torch.int64)[None] # torch.LongTensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]).unsqueeze(0) + test_uv = torch.ones(1, n_frame, dtype=torch.float32) + test_noise = torch.randn(1, 192, n_frame) test_sid = torch.LongTensor([0]) input_names = ["c", "f0", "mel2ph", "uv", "noise", "sid"] output_names = ["audio", ] From b7696fb9ef07af0706d0dcc3bb567db499a2da74 Mon Sep 17 00:00:00 2001 From: ylzz1997 Date: Mon, 27 Mar 2023 21:34:26 +0800 Subject: [PATCH 61/61] Debug audio samplerate --- webUI.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/webUI.py b/webUI.py index 648e88c..47e99fe 100644 --- a/webUI.py +++ b/webUI.py @@ -8,6 +8,7 @@ import numpy as np import soundfile from inference.infer_tool import Svc import logging +import torch logging.getLogger('numba').setLevel(logging.WARNING) logging.getLogger('markdown_it').setLevel(logging.WARNING) @@ -17,6 +18,10 @@ logging.getLogger('multipart').setLevel(logging.WARNING) model = None spk = None +cuda = [] +if torch.cuda.is_available(): + for i in range(torch.cuda.device_count()): + cuda.append("cuda:{}".format(i)) def vc_fn(sid, input_audio, vc_transform, auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num): global model @@ -31,7 +36,7 @@ def vc_fn(sid, input_audio, vc_transform, auto_f0,cluster_ratio, slice_db, noise if len(audio.shape) > 1: audio = librosa.to_mono(audio.transpose(1, 0)) temp_path = "temp.wav" - soundfile.write(temp_path, audio, model.target_sample, format="wav") + soundfile.write(temp_path, audio, sampling_rate, format="wav") _audio = model.slice_inference(temp_path, sid, vc_transform, slice_db, cluster_ratio, auto_f0, noise_scale,pad_seconds,cl_num,lg_num,lgr_num) model.clear_empty() os.remove(temp_path) @@ -59,7 +64,7 @@ with app: 下面是聚类模型文件选择,没有可以不填: """) cluster_model_path = gr.File(label="聚类模型文件") - device = gr.Dropdown(label="推理设备,留白则为自动选择cpu和gpu",choices=[None,"gpu","cpu"],value=None) + device = gr.Dropdown(label="推理设备,默认为自动选择cpu和gpu",choices=["Auto",*cuda,"cpu"],value="Auto") gr.Markdown(value=""" 全部上传完毕后(全部文件模块显示download),点击模型解析进行解析: """) @@ -82,9 +87,10 @@ with app: def modelAnalysis(model_path,config_path,cluster_model_path,device): try: global model - model = Svc(model_path.name, config_path.name,device=device if device!="" else None,cluster_model_path= cluster_model_path.name if cluster_model_path!=None else "") + model = Svc(model_path.name, config_path.name,device=device if device!="Auto" else None,cluster_model_path= cluster_model_path.name if cluster_model_path!=None else "") spks = list(model.spk2id.keys()) - return sid.update(choices = spks,value=spks[0]),"ok" + device_name = torch.cuda.get_device_properties(model.dev).name if "cuda" in str(model.dev) else str(model.dev) + return sid.update(choices = spks,value=spks[0]),"ok,模型被加载到了设备{}之上".format(device_name) except Exception as e: return "","异常信息:"+str(e)+"\n请排障后重试" vc_submit.click(vc_fn, [sid, vc_input3, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num], [vc_output1, vc_output2])