From 8dfc86742d0a898e7d384ddba721d7949fb9b13f Mon Sep 17 00:00:00 2001 From: ylzz1997 Date: Sat, 22 Jul 2023 20:38:13 +0800 Subject: [PATCH 01/23] Debug Rmvpe --- modules/F0Predictor/rmvpe/inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/F0Predictor/rmvpe/inference.py b/modules/F0Predictor/rmvpe/inference.py index 40b6e94..02d2188 100644 --- a/modules/F0Predictor/rmvpe/inference.py +++ b/modules/F0Predictor/rmvpe/inference.py @@ -28,7 +28,7 @@ class RMVPE: def mel2hidden(self, mel): with torch.no_grad(): n_frames = mel.shape[-1] - mel = F.pad(mel, (0, 32 * ((n_frames - 1) // 32 + 1) - n_frames), mode='reflect') + mel = F.pad(mel, (0, 32 * ((n_frames - 1) // 32 + 1) - n_frames), mode='constant') hidden = self.model(mel) return hidden[:, :n_frames] From ba5d2c80bac5c06a01752eed479f7bf27bdbb301 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stardust=C2=B7=E5=87=8F?= Date: Sat, 22 Jul 2023 21:22:04 +0800 Subject: [PATCH 02/23] Update preprocess_hubert_f0.py --- preprocess_hubert_f0.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/preprocess_hubert_f0.py b/preprocess_hubert_f0.py index 15556c6..20f33b9 100644 --- a/preprocess_hubert_f0.py +++ b/preprocess_hubert_f0.py @@ -111,7 +111,7 @@ def process_batch(file_chunk, f0p, diff=False, mel_extractor=None): if torch.cuda.is_available(): gpu_id = rank % torch.cuda.device_count() device = torch.device(f"cuda:{gpu_id}") - print("Rank {rank} uses device {device}") + print(f"Rank {rank} uses device {device}") hmodel = utils.get_speech_encoder(speech_encoder, device=device) print("Loaded speech encoder.") for filename in tqdm(file_chunk): From 75988d007cbecb2c4f1b350cf752f39e4df5cbf7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stardust=C2=B7=E5=87=8F?= Date: Sat, 22 Jul 2023 21:24:15 +0800 Subject: [PATCH 03/23] Update preprocess_hubert_f0.py --- preprocess_hubert_f0.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/preprocess_hubert_f0.py b/preprocess_hubert_f0.py index 20f33b9..9ab35ee 100644 --- a/preprocess_hubert_f0.py +++ b/preprocess_hubert_f0.py @@ -32,8 +32,12 @@ def process_one(filename, hmodel,f0p,diff=False,mel_extractor=None): wav, sr = librosa.load(filename, sr=sampling_rate) audio_norm = torch.FloatTensor(wav) audio_norm = audio_norm.unsqueeze(0) - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - + rank = mp.current_process()._identity + rank = rank[0] if len(rank) > 0 else 0 + if torch.cuda.is_available(): + gpu_id = rank % torch.cuda.device_count() + device = torch.device(f"cuda:{gpu_id}") + print(f"Rank {rank} uses device {device}") soft_path = filename + ".soft.pt" if not os.path.exists(soft_path): wav16k = librosa.resample(wav, orig_sr=sampling_rate, target_sr=16000) From 989b7194f6181c3f4fd09caf812b471696604c73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stardust=C2=B7=E5=87=8F?= Date: Sat, 22 Jul 2023 21:25:50 +0800 Subject: [PATCH 04/23] Update preprocess_hubert_f0.py --- preprocess_hubert_f0.py | 1 - 1 file changed, 1 deletion(-) diff --git a/preprocess_hubert_f0.py b/preprocess_hubert_f0.py index 9ab35ee..1f6cab4 100644 --- a/preprocess_hubert_f0.py +++ b/preprocess_hubert_f0.py @@ -37,7 +37,6 @@ def process_one(filename, hmodel,f0p,diff=False,mel_extractor=None): if torch.cuda.is_available(): gpu_id = rank % torch.cuda.device_count() device = torch.device(f"cuda:{gpu_id}") - print(f"Rank {rank} uses device {device}") soft_path = filename + ".soft.pt" if not os.path.exists(soft_path): wav16k = librosa.resample(wav, orig_sr=sampling_rate, target_sr=16000) From 61c78b45608e4845d5669d225eec23aa706b68a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stardust=C2=B7=E5=87=8F?= Date: Sat, 22 Jul 2023 21:34:11 +0800 Subject: [PATCH 05/23] Update preprocess_hubert_f0.py --- preprocess_hubert_f0.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/preprocess_hubert_f0.py b/preprocess_hubert_f0.py index 1f6cab4..72f8df4 100644 --- a/preprocess_hubert_f0.py +++ b/preprocess_hubert_f0.py @@ -27,16 +27,12 @@ hop_length = hps.data.hop_length speech_encoder = hps["model"]["speech_encoder"] -def process_one(filename, hmodel,f0p,diff=False,mel_extractor=None): +def process_one(filename, hmodel,f0p,diff=False,mel_extractor=None,rank): # print(filename) wav, sr = librosa.load(filename, sr=sampling_rate) audio_norm = torch.FloatTensor(wav) audio_norm = audio_norm.unsqueeze(0) - rank = mp.current_process()._identity - rank = rank[0] if len(rank) > 0 else 0 - if torch.cuda.is_available(): - gpu_id = rank % torch.cuda.device_count() - device = torch.device(f"cuda:{gpu_id}") + device = torch.device(f"cuda:{rank}") soft_path = filename + ".soft.pt" if not os.path.exists(soft_path): wav16k = librosa.resample(wav, orig_sr=sampling_rate, target_sr=16000) @@ -118,7 +114,7 @@ def process_batch(file_chunk, f0p, diff=False, mel_extractor=None): hmodel = utils.get_speech_encoder(speech_encoder, device=device) print("Loaded speech encoder.") for filename in tqdm(file_chunk): - process_one(filename, hmodel, f0p, diff, mel_extractor) + process_one(filename, hmodel, f0p, diff, mel_extractor, rank) def parallel_process(filenames, num_processes, f0p, diff, mel_extractor): with ProcessPoolExecutor(max_workers=num_processes) as executor: From 7423deb26b6390c152416f1a52d0227a3169d95e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stardust=C2=B7=E5=87=8F?= Date: Sat, 22 Jul 2023 21:35:52 +0800 Subject: [PATCH 06/23] Update preprocess_hubert_f0.py --- preprocess_hubert_f0.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/preprocess_hubert_f0.py b/preprocess_hubert_f0.py index 72f8df4..8909257 100644 --- a/preprocess_hubert_f0.py +++ b/preprocess_hubert_f0.py @@ -27,7 +27,7 @@ hop_length = hps.data.hop_length speech_encoder = hps["model"]["speech_encoder"] -def process_one(filename, hmodel,f0p,diff=False,mel_extractor=None,rank): +def process_one(filename, hmodel,f0p,diff=False,rank,mel_extractor=None): # print(filename) wav, sr = librosa.load(filename, sr=sampling_rate) audio_norm = torch.FloatTensor(wav) @@ -114,7 +114,7 @@ def process_batch(file_chunk, f0p, diff=False, mel_extractor=None): hmodel = utils.get_speech_encoder(speech_encoder, device=device) print("Loaded speech encoder.") for filename in tqdm(file_chunk): - process_one(filename, hmodel, f0p, diff, mel_extractor, rank) + process_one(filename, hmodel, f0p, diff, rank, mel_extractor) def parallel_process(filenames, num_processes, f0p, diff, mel_extractor): with ProcessPoolExecutor(max_workers=num_processes) as executor: From 0f5847a64c37ae2c3f28439f9f78222855ab5e1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stardust=C2=B7=E5=87=8F?= Date: Sat, 22 Jul 2023 21:36:27 +0800 Subject: [PATCH 07/23] Update preprocess_hubert_f0.py --- preprocess_hubert_f0.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/preprocess_hubert_f0.py b/preprocess_hubert_f0.py index 8909257..1e59d1e 100644 --- a/preprocess_hubert_f0.py +++ b/preprocess_hubert_f0.py @@ -27,7 +27,7 @@ hop_length = hps.data.hop_length speech_encoder = hps["model"]["speech_encoder"] -def process_one(filename, hmodel,f0p,diff=False,rank,mel_extractor=None): +def process_one(filename, hmodel,f0p,rank,diff=False,mel_extractor=None): # print(filename) wav, sr = librosa.load(filename, sr=sampling_rate) audio_norm = torch.FloatTensor(wav) @@ -114,7 +114,7 @@ def process_batch(file_chunk, f0p, diff=False, mel_extractor=None): hmodel = utils.get_speech_encoder(speech_encoder, device=device) print("Loaded speech encoder.") for filename in tqdm(file_chunk): - process_one(filename, hmodel, f0p, diff, rank, mel_extractor) + process_one(filename, hmodel, f0p, rank, diff, mel_extractor) def parallel_process(filenames, num_processes, f0p, diff, mel_extractor): with ProcessPoolExecutor(max_workers=num_processes) as executor: From d07d92b61adaa29869b3c645de44bde3058ed599 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stardust=C2=B7=E5=87=8F?= Date: Sat, 22 Jul 2023 21:56:02 +0800 Subject: [PATCH 08/23] Update preprocess_flist_config.py --- preprocess_flist_config.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/preprocess_flist_config.py b/preprocess_flist_config.py index f8ade33..de4df41 100644 --- a/preprocess_flist_config.py +++ b/preprocess_flist_config.py @@ -4,7 +4,7 @@ import os import re import wave from random import shuffle - +from loguru import logger from tqdm import tqdm import diffusion.logger.utils as du @@ -46,9 +46,9 @@ if __name__ == "__main__": if not file.endswith("wav"): continue if not pattern.match(file): - print(f"warning:文件名{file}中包含非字母数字下划线,可能会导致错误。(也可能不会)") + logger.warning(f"文件名{file}中包含非字母数字下划线,可能会导致错误。(也可能不会)") if get_wav_duration(file) < 0.3: - print("skip too short audio:", file) + logger.info("Skip too short audio:" + file) continue new_wavs.append(file) wavs = new_wavs @@ -59,13 +59,13 @@ if __name__ == "__main__": shuffle(train) shuffle(val) - print("Writing", args.train_list) + logger.info("Writing" + args.train_list) with open(args.train_list, "w") as f: for fname in tqdm(train): wavpath = fname f.write(wavpath + "\n") - print("Writing", args.val_list) + logger.info("Writing" + args.val_list) with open(args.val_list, "w") as f: for fname in tqdm(val): wavpath = fname @@ -97,8 +97,8 @@ if __name__ == "__main__": if args.vol_aug: config_template["train"]["vol_aug"] = config_template["model"]["vol_embedding"] = True - print("Writing configs/config.json") + logger.info("Writing to configs/config.json") with open("configs/config.json", "w") as f: json.dump(config_template, f, indent=2) - print("Writing configs/diffusion.yaml") + logger.info("Writing to configs/diffusion.yaml") du.save_config("configs/diffusion.yaml",d_config_template) From 1cdccce44ac5447cf14fe847a1dacc2ad83ad2c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stardust=C2=B7=E5=87=8F?= Date: Sat, 22 Jul 2023 22:01:44 +0800 Subject: [PATCH 09/23] Update preprocess_hubert_f0.py --- preprocess_hubert_f0.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/preprocess_hubert_f0.py b/preprocess_hubert_f0.py index 1e59d1e..ec6dcc3 100644 --- a/preprocess_hubert_f0.py +++ b/preprocess_hubert_f0.py @@ -5,6 +5,7 @@ import random from concurrent.futures import ProcessPoolExecutor from glob import glob from random import shuffle +from loguru import logger import librosa import numpy as np @@ -28,7 +29,6 @@ speech_encoder = hps["model"]["speech_encoder"] def process_one(filename, hmodel,f0p,rank,diff=False,mel_extractor=None): - # print(filename) wav, sr = librosa.load(filename, sr=sampling_rate) audio_norm = torch.FloatTensor(wav) audio_norm = audio_norm.unsqueeze(0) @@ -104,15 +104,15 @@ def process_one(filename, hmodel,f0p,rank,diff=False,mel_extractor=None): np.save(aug_vol_path,aug_vol.to('cpu').numpy()) def process_batch(file_chunk, f0p, diff=False, mel_extractor=None): - print("Loading speech encoder for content...") + logger.info("Loading speech encoder for content...") rank = mp.current_process()._identity rank = rank[0] if len(rank) > 0 else 0 if torch.cuda.is_available(): gpu_id = rank % torch.cuda.device_count() device = torch.device(f"cuda:{gpu_id}") - print(f"Rank {rank} uses device {device}") + logger.info(f"Rank {rank} uses device {device}") hmodel = utils.get_speech_encoder(speech_encoder, device=device) - print("Loaded speech encoder.") + logger.info(f"Loaded speech encoder for rank {rank}") for filename in tqdm(file_chunk): process_one(filename, hmodel, f0p, rank, diff, mel_extractor) @@ -144,7 +144,9 @@ if __name__ == "__main__": args = parser.parse_args() f0p = args.f0_predictor print(speech_encoder) - print(f0p) + logger.info("Using " + speech_encoder + " SpeechEncoder") + logger.info("Using " + f0p + "f0 extractor") + logger.info("Using diff Mode:") print(args.use_diff) if args.use_diff: print("use_diff") From 36c24022584bc9fcf9e66c455bbe4daa8af896c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stardust=C2=B7=E5=87=8F?= Date: Sat, 22 Jul 2023 22:02:02 +0800 Subject: [PATCH 10/23] Update requirements.txt --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 5b6b5f8..9cc8149 100644 --- a/requirements.txt +++ b/requirements.txt @@ -25,3 +25,4 @@ langdetect pyyaml pynvml faiss-cpu +loguru From ff07b3d9e6628ed6eac824c2a0fa80b65f4c70bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stardust=C2=B7=E5=87=8F?= Date: Sat, 22 Jul 2023 22:02:14 +0800 Subject: [PATCH 11/23] Update requirements_win.txt --- requirements_win.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements_win.txt b/requirements_win.txt index 924a641..06ac4ab 100644 --- a/requirements_win.txt +++ b/requirements_win.txt @@ -29,3 +29,4 @@ langdetect pyyaml pynvml faiss-cpu +loguru From 12a3ba587e34470a1e30294cf73c4186b65063be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stardust=C2=B7=E5=87=8F?= Date: Sat, 22 Jul 2023 22:04:44 +0800 Subject: [PATCH 12/23] Update train_diff.py --- train_diff.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/train_diff.py b/train_diff.py index adf5fb3..4cdc0eb 100644 --- a/train_diff.py +++ b/train_diff.py @@ -8,7 +8,7 @@ from diffusion.logger import utils from diffusion.solver import train from diffusion.unit2mel import Unit2Mel from diffusion.vocoder import Vocoder - +from loguru import logger def parse_args(args=None, namespace=None): """Parse command-line arguments.""" @@ -28,8 +28,8 @@ if __name__ == '__main__': # load config args = utils.load_config(cmd.config) - print(' > config:', cmd.config) - print(' > exp:', args.env.expdir) + logger.info(' > config:'+ cmd.config) + logger.info(' > exp:'+ args.env.expdir) # load vocoder vocoder = Vocoder(args.vocoder.type, args.vocoder.ckpt, device=args.device) @@ -47,7 +47,7 @@ if __name__ == '__main__': args.model.k_step_max ) - print(f' > INFO: now model timesteps is {model.timesteps}, and k_step_max is {model.k_step_max}') + logger.info(f' > Now model timesteps is {model.timesteps}, and k_step_max is {model.k_step_max}') # load parameters optimizer = torch.optim.AdamW(model.parameters()) From 76974269b39a2d2730d540bbd2ebed08c97348ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stardust=C2=B7=E5=87=8F?= Date: Sat, 22 Jul 2023 22:06:22 +0800 Subject: [PATCH 13/23] Update preprocess_flist_config.py Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- preprocess_flist_config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/preprocess_flist_config.py b/preprocess_flist_config.py index de4df41..30a59d6 100644 --- a/preprocess_flist_config.py +++ b/preprocess_flist_config.py @@ -4,6 +4,7 @@ import os import re import wave from random import shuffle + from loguru import logger from tqdm import tqdm From befc4593c3c9e082f1690c2f7509dc6a47ecf3aa Mon Sep 17 00:00:00 2001 From: ylzz1997 Date: Sat, 22 Jul 2023 23:02:52 +0800 Subject: [PATCH 14/23] ruff fix --- preprocess_hubert_f0.py | 2 +- train_diff.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/preprocess_hubert_f0.py b/preprocess_hubert_f0.py index ec6dcc3..b5d1f76 100644 --- a/preprocess_hubert_f0.py +++ b/preprocess_hubert_f0.py @@ -5,12 +5,12 @@ import random from concurrent.futures import ProcessPoolExecutor from glob import glob from random import shuffle -from loguru import logger import librosa import numpy as np import torch import torch.multiprocessing as mp +from loguru import logger from tqdm import tqdm import diffusion.logger.utils as du diff --git a/train_diff.py b/train_diff.py index 4cdc0eb..65ba382 100644 --- a/train_diff.py +++ b/train_diff.py @@ -1,6 +1,7 @@ import argparse import torch +from loguru import logger from torch.optim import lr_scheduler from diffusion.data_loaders import get_data_loaders @@ -8,7 +9,7 @@ from diffusion.logger import utils from diffusion.solver import train from diffusion.unit2mel import Unit2Mel from diffusion.vocoder import Vocoder -from loguru import logger + def parse_args(args=None, namespace=None): """Parse command-line arguments.""" From 40777bbeb45e28e9e6163f1999c92c74716244f5 Mon Sep 17 00:00:00 2001 From: YuChuXi <81864000+yuxibenxi@users.noreply.github.com> Date: Sat, 22 Jul 2023 23:04:49 +0800 Subject: [PATCH 15/23] Update README_zh_CN.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 修改了加速预处理的部分 --- README_zh_CN.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README_zh_CN.md b/README_zh_CN.md index 4adbab6..84252c8 100644 --- a/README_zh_CN.md +++ b/README_zh_CN.md @@ -259,13 +259,6 @@ wavlmbase+ ```shell python preprocess_flist_config.py --speech_encoder vec768l12 --vol_aug ``` - -**加速预处理** -如若您的数据集比较大,可以尝试添加`--num_processes`参数: -```shell -python preprocess_flist_config.py --speech_encoder vec768l12 --vol_aug --num_processes 8 -``` -所有的Workers会被自动分配到多个GPU上(如果您有多个GPU的话) 使用后训练出的模型将匹配到输入源响度,否则为训练集响度。 #### 此时可以在生成的 config.json 与 diffusion.yaml 修改部分参数 @@ -325,6 +318,13 @@ rmvpe python preprocess_hubert_f0.py --f0_predictor dio --use_diff ``` +**加速预处理** +如若您的数据集比较大,可以尝试添加`--num_processes`参数: +```shell +python preprocess_hubert_f0.py --f0_predictor dio --use_diff --num_processes 8 +``` +所有的Workers会被自动分配到多个线程上 + 执行完以上步骤后 dataset 目录便是预处理完成的数据,可以删除 dataset_raw 文件夹了 ## 🏋️‍ 训练 From 968e80b7b45725aa0339d0a5956b3674eab22c6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stardust=C2=B7=E5=87=8F?= Date: Sat, 22 Jul 2023 23:48:47 +0800 Subject: [PATCH 16/23] Update README.md --- README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 53f4944..fd36413 100644 --- a/README.md +++ b/README.md @@ -258,15 +258,6 @@ Add `--vol_aug` if you want to enable loudness embedding: python preprocess_flist_config.py --speech_encoder vec768l12 --vol_aug ``` -**Speed Up preprocess** - -If your dataset is pretty large,you can increase the param `--num_processes` like that: - -```shell -python preprocess_flist_config.py --speech_encoder vec768l12 --vol_aug --num_processes 8 -``` -All the worker will be assigned to different GPU if you have more than one GPUs. - After enabling loudness embedding, the trained model will match the loudness of the input source; otherwise, it will match the loudness of the training set. #### You can modify some parameters in the generated config.json and diffusion.yaml @@ -324,6 +315,15 @@ If you want shallow diffusion (optional), you need to add the `--use_diff` param python preprocess_hubert_f0.py --f0_predictor dio --use_diff ``` +**Speed Up preprocess** + +If your dataset is pretty large,you can increase the param `--num_processes` like that: + +```shell +python preprocess_hubert_f0.py --speech_encoder vec768l12 --vol_aug --num_processes 8 +``` +All the worker will be assigned to different GPU if you have more than one GPUs. + After completing the above steps, the dataset directory will contain the preprocessed data, and the dataset_raw folder can be deleted. ## 🏋️‍ Training From 85ef9ab7f9126c40215172ac907f6d82057e52bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stardust=C2=B7=E5=87=8F?= Date: Sat, 22 Jul 2023 23:50:52 +0800 Subject: [PATCH 17/23] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index fd36413..c45e554 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@
- +LOGO # SoftVC VITS Singing Voice Conversion [**English**](./README.md) | [**中文简体**](./README_zh_CN.md) From 54c9473abb17f2f27cee035bd368a4470616f016 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stardust=C2=B7=E5=87=8F?= Date: Sat, 22 Jul 2023 23:51:03 +0800 Subject: [PATCH 18/23] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index c45e554..9855f79 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@
LOGO + # SoftVC VITS Singing Voice Conversion [**English**](./README.md) | [**中文简体**](./README_zh_CN.md) From e50786e6270c383d2c57c0e9274cc1c3cc82a306 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stardust=C2=B7=E5=87=8F?= Date: Sat, 22 Jul 2023 23:51:58 +0800 Subject: [PATCH 19/23] Update README_zh_CN.md --- README_zh_CN.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README_zh_CN.md b/README_zh_CN.md index 84252c8..d153a88 100644 --- a/README_zh_CN.md +++ b/README_zh_CN.md @@ -1,5 +1,6 @@
- +LOGO + # SoftVC VITS Singing Voice Conversion [**English**](./README.md) | [**中文简体**](./README_zh_CN.md) From 4c4093eef37369cbb0687ff317d349efe40f816e Mon Sep 17 00:00:00 2001 From: Miuzarte <982809597@qq.com> Date: Sun, 23 Jul 2023 05:56:27 +0800 Subject: [PATCH 20/23] why not rich.progress --- resample.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/resample.py b/resample.py index 50eacc1..809cf49 100644 --- a/resample.py +++ b/resample.py @@ -7,7 +7,7 @@ from multiprocessing import cpu_count import librosa import numpy as np from scipy.io import wavfile -from tqdm import tqdm +from rich.progress import track def load_wav(wav_path): @@ -81,7 +81,7 @@ def process_all_speakers(): if os.path.isdir(spk_dir): print(spk_dir) futures = [executor.submit(process, (spk_dir, i, args)) for i in os.listdir(spk_dir) if i.endswith("wav")] - for _ in tqdm(concurrent.futures.as_completed(futures), total=len(futures)): + for _ in track(concurrent.futures.as_completed(futures), total=len(futures), description="resampling:"): pass From 3ab6f1f1749c8a9586d1636d1d6c6399fb5ae2ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=AC=AC=E7=B4=97=E7=89=B9?= <66856838+Miuzarte@users.noreply.github.com> Date: Sun, 23 Jul 2023 09:25:05 +0800 Subject: [PATCH 21/23] update requirements.txt --- requirements.txt | 2 ++ requirements_onnx_encoder.txt | 2 ++ requirements_win.txt | 2 ++ 3 files changed, 6 insertions(+) diff --git a/requirements.txt b/requirements.txt index 9cc8149..2670ea6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,6 +10,8 @@ torch torchaudio torchcrepe tqdm +rich.progress +loguru scikit-maad praat-parselmouth onnx diff --git a/requirements_onnx_encoder.txt b/requirements_onnx_encoder.txt index a624622..cfde17c 100644 --- a/requirements_onnx_encoder.txt +++ b/requirements_onnx_encoder.txt @@ -9,6 +9,8 @@ torch==1.13.1 torchaudio==0.13.1 torchcrepe tqdm +rich.progress +loguru scikit-maad praat-parselmouth onnx diff --git a/requirements_win.txt b/requirements_win.txt index 06ac4ab..7112b0b 100644 --- a/requirements_win.txt +++ b/requirements_win.txt @@ -15,6 +15,8 @@ sounddevice==0.4.5 SoundFile==0.10.3.post1 starlette==0.19.1 tqdm==4.63.0 +rich.progress +loguru torchcrepe scikit-maad praat-parselmouth From 174cb333f861533832503f8e7778a941bcd3a99b Mon Sep 17 00:00:00 2001 From: ylzz1997 Date: Sun, 23 Jul 2023 09:43:00 +0800 Subject: [PATCH 22/23] Debug rank GPU --- preprocess_hubert_f0.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/preprocess_hubert_f0.py b/preprocess_hubert_f0.py index b5d1f76..04397d7 100644 --- a/preprocess_hubert_f0.py +++ b/preprocess_hubert_f0.py @@ -114,7 +114,7 @@ def process_batch(file_chunk, f0p, diff=False, mel_extractor=None): hmodel = utils.get_speech_encoder(speech_encoder, device=device) logger.info(f"Loaded speech encoder for rank {rank}") for filename in tqdm(file_chunk): - process_one(filename, hmodel, f0p, rank, diff, mel_extractor) + process_one(filename, hmodel, f0p, gpu_id, diff, mel_extractor) def parallel_process(filenames, num_processes, f0p, diff, mel_extractor): with ProcessPoolExecutor(max_workers=num_processes) as executor: From 691486fd55ac9a91a3a8920f4e0ed82fff685b5e Mon Sep 17 00:00:00 2001 From: ylzz1997 Date: Sun, 23 Jul 2023 09:47:21 +0800 Subject: [PATCH 23/23] Debug requirements and ruff fix --- requirements.txt | 5 ++--- requirements_win.txt | 3 +-- resample.py | 2 +- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/requirements.txt b/requirements.txt index 2670ea6..f86b81e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,7 +10,7 @@ torch torchaudio torchcrepe tqdm -rich.progress +rich loguru scikit-maad praat-parselmouth @@ -26,5 +26,4 @@ edge_tts langdetect pyyaml pynvml -faiss-cpu -loguru +faiss-cpu \ No newline at end of file diff --git a/requirements_win.txt b/requirements_win.txt index 7112b0b..461a992 100644 --- a/requirements_win.txt +++ b/requirements_win.txt @@ -15,7 +15,7 @@ sounddevice==0.4.5 SoundFile==0.10.3.post1 starlette==0.19.1 tqdm==4.63.0 -rich.progress +rich loguru torchcrepe scikit-maad @@ -31,4 +31,3 @@ langdetect pyyaml pynvml faiss-cpu -loguru diff --git a/resample.py b/resample.py index 809cf49..af421fd 100644 --- a/resample.py +++ b/resample.py @@ -6,8 +6,8 @@ from multiprocessing import cpu_count import librosa import numpy as np -from scipy.io import wavfile from rich.progress import track +from scipy.io import wavfile def load_wav(wav_path):