From 8dfc86742d0a898e7d384ddba721d7949fb9b13f Mon Sep 17 00:00:00 2001
From: ylzz1997 <ylzz1997@outlook.com>
Date: Sat, 22 Jul 2023 20:38:13 +0800
Subject: [PATCH 01/23] Debug Rmvpe

---
 modules/F0Predictor/rmvpe/inference.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/F0Predictor/rmvpe/inference.py b/modules/F0Predictor/rmvpe/inference.py
index 40b6e94..02d2188 100644
--- a/modules/F0Predictor/rmvpe/inference.py
+++ b/modules/F0Predictor/rmvpe/inference.py
@@ -28,7 +28,7 @@ class RMVPE:
     def mel2hidden(self, mel):
         with torch.no_grad():
             n_frames = mel.shape[-1]
-            mel = F.pad(mel, (0, 32 * ((n_frames - 1) // 32 + 1) - n_frames), mode='reflect')
+            mel = F.pad(mel, (0, 32 * ((n_frames - 1) // 32 + 1) - n_frames), mode='constant')
             hidden = self.model(mel)
             return hidden[:, :n_frames]
 

From ba5d2c80bac5c06a01752eed479f7bf27bdbb301 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stardust=C2=B7=E5=87=8F?= <star_dust_chen@foxmail.com>
Date: Sat, 22 Jul 2023 21:22:04 +0800
Subject: [PATCH 02/23] Update preprocess_hubert_f0.py

---
 preprocess_hubert_f0.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/preprocess_hubert_f0.py b/preprocess_hubert_f0.py
index 15556c6..20f33b9 100644
--- a/preprocess_hubert_f0.py
+++ b/preprocess_hubert_f0.py
@@ -111,7 +111,7 @@ def process_batch(file_chunk, f0p, diff=False, mel_extractor=None):
     if torch.cuda.is_available():
         gpu_id = rank % torch.cuda.device_count()
         device = torch.device(f"cuda:{gpu_id}")
-    print("Rank {rank} uses device {device}")
+    print(f"Rank {rank} uses device {device}")
     hmodel = utils.get_speech_encoder(speech_encoder, device=device)
     print("Loaded speech encoder.")
     for filename in tqdm(file_chunk):

From 75988d007cbecb2c4f1b350cf752f39e4df5cbf7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stardust=C2=B7=E5=87=8F?= <star_dust_chen@foxmail.com>
Date: Sat, 22 Jul 2023 21:24:15 +0800
Subject: [PATCH 03/23] Update preprocess_hubert_f0.py

---
 preprocess_hubert_f0.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/preprocess_hubert_f0.py b/preprocess_hubert_f0.py
index 20f33b9..9ab35ee 100644
--- a/preprocess_hubert_f0.py
+++ b/preprocess_hubert_f0.py
@@ -32,8 +32,12 @@ def process_one(filename, hmodel,f0p,diff=False,mel_extractor=None):
     wav, sr = librosa.load(filename, sr=sampling_rate)
     audio_norm = torch.FloatTensor(wav)
     audio_norm = audio_norm.unsqueeze(0)
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-
+    rank = mp.current_process()._identity
+    rank = rank[0] if len(rank) > 0 else 0
+    if torch.cuda.is_available():
+        gpu_id = rank % torch.cuda.device_count()
+        device = torch.device(f"cuda:{gpu_id}")
+    print(f"Rank {rank} uses device {device}")
     soft_path = filename + ".soft.pt"
     if not os.path.exists(soft_path):
         wav16k = librosa.resample(wav, orig_sr=sampling_rate, target_sr=16000)

From 989b7194f6181c3f4fd09caf812b471696604c73 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stardust=C2=B7=E5=87=8F?= <star_dust_chen@foxmail.com>
Date: Sat, 22 Jul 2023 21:25:50 +0800
Subject: [PATCH 04/23] Update preprocess_hubert_f0.py

---
 preprocess_hubert_f0.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/preprocess_hubert_f0.py b/preprocess_hubert_f0.py
index 9ab35ee..1f6cab4 100644
--- a/preprocess_hubert_f0.py
+++ b/preprocess_hubert_f0.py
@@ -37,7 +37,6 @@ def process_one(filename, hmodel,f0p,diff=False,mel_extractor=None):
     if torch.cuda.is_available():
         gpu_id = rank % torch.cuda.device_count()
         device = torch.device(f"cuda:{gpu_id}")
-    print(f"Rank {rank} uses device {device}")
     soft_path = filename + ".soft.pt"
     if not os.path.exists(soft_path):
         wav16k = librosa.resample(wav, orig_sr=sampling_rate, target_sr=16000)

From 61c78b45608e4845d5669d225eec23aa706b68a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stardust=C2=B7=E5=87=8F?= <star_dust_chen@foxmail.com>
Date: Sat, 22 Jul 2023 21:34:11 +0800
Subject: [PATCH 05/23] Update preprocess_hubert_f0.py

---
 preprocess_hubert_f0.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/preprocess_hubert_f0.py b/preprocess_hubert_f0.py
index 1f6cab4..72f8df4 100644
--- a/preprocess_hubert_f0.py
+++ b/preprocess_hubert_f0.py
@@ -27,16 +27,12 @@ hop_length = hps.data.hop_length
 speech_encoder = hps["model"]["speech_encoder"]
 
 
-def process_one(filename, hmodel,f0p,diff=False,mel_extractor=None):
+def process_one(filename, hmodel,f0p,diff=False,mel_extractor=None,rank):
     # print(filename)
     wav, sr = librosa.load(filename, sr=sampling_rate)
     audio_norm = torch.FloatTensor(wav)
     audio_norm = audio_norm.unsqueeze(0)
-    rank = mp.current_process()._identity
-    rank = rank[0] if len(rank) > 0 else 0
-    if torch.cuda.is_available():
-        gpu_id = rank % torch.cuda.device_count()
-        device = torch.device(f"cuda:{gpu_id}")
+    device = torch.device(f"cuda:{rank}")
     soft_path = filename + ".soft.pt"
     if not os.path.exists(soft_path):
         wav16k = librosa.resample(wav, orig_sr=sampling_rate, target_sr=16000)
@@ -118,7 +114,7 @@ def process_batch(file_chunk, f0p, diff=False, mel_extractor=None):
     hmodel = utils.get_speech_encoder(speech_encoder, device=device)
     print("Loaded speech encoder.")
     for filename in tqdm(file_chunk):
-        process_one(filename, hmodel, f0p, diff, mel_extractor)
+        process_one(filename, hmodel, f0p, diff, mel_extractor, rank)
 
 def parallel_process(filenames, num_processes, f0p, diff, mel_extractor):
     with ProcessPoolExecutor(max_workers=num_processes) as executor:

From 7423deb26b6390c152416f1a52d0227a3169d95e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stardust=C2=B7=E5=87=8F?= <star_dust_chen@foxmail.com>
Date: Sat, 22 Jul 2023 21:35:52 +0800
Subject: [PATCH 06/23] Update preprocess_hubert_f0.py

---
 preprocess_hubert_f0.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/preprocess_hubert_f0.py b/preprocess_hubert_f0.py
index 72f8df4..8909257 100644
--- a/preprocess_hubert_f0.py
+++ b/preprocess_hubert_f0.py
@@ -27,7 +27,7 @@ hop_length = hps.data.hop_length
 speech_encoder = hps["model"]["speech_encoder"]
 
 
-def process_one(filename, hmodel,f0p,diff=False,mel_extractor=None,rank):
+def process_one(filename, hmodel,f0p,diff=False,rank,mel_extractor=None):
     # print(filename)
     wav, sr = librosa.load(filename, sr=sampling_rate)
     audio_norm = torch.FloatTensor(wav)
@@ -114,7 +114,7 @@ def process_batch(file_chunk, f0p, diff=False, mel_extractor=None):
     hmodel = utils.get_speech_encoder(speech_encoder, device=device)
     print("Loaded speech encoder.")
     for filename in tqdm(file_chunk):
-        process_one(filename, hmodel, f0p, diff, mel_extractor, rank)
+        process_one(filename, hmodel, f0p, diff, rank, mel_extractor)
 
 def parallel_process(filenames, num_processes, f0p, diff, mel_extractor):
     with ProcessPoolExecutor(max_workers=num_processes) as executor:

From 0f5847a64c37ae2c3f28439f9f78222855ab5e1c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stardust=C2=B7=E5=87=8F?= <star_dust_chen@foxmail.com>
Date: Sat, 22 Jul 2023 21:36:27 +0800
Subject: [PATCH 07/23] Update preprocess_hubert_f0.py

---
 preprocess_hubert_f0.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/preprocess_hubert_f0.py b/preprocess_hubert_f0.py
index 8909257..1e59d1e 100644
--- a/preprocess_hubert_f0.py
+++ b/preprocess_hubert_f0.py
@@ -27,7 +27,7 @@ hop_length = hps.data.hop_length
 speech_encoder = hps["model"]["speech_encoder"]
 
 
-def process_one(filename, hmodel,f0p,diff=False,rank,mel_extractor=None):
+def process_one(filename, hmodel,f0p,rank,diff=False,mel_extractor=None):
     # print(filename)
     wav, sr = librosa.load(filename, sr=sampling_rate)
     audio_norm = torch.FloatTensor(wav)
@@ -114,7 +114,7 @@ def process_batch(file_chunk, f0p, diff=False, mel_extractor=None):
     hmodel = utils.get_speech_encoder(speech_encoder, device=device)
     print("Loaded speech encoder.")
     for filename in tqdm(file_chunk):
-        process_one(filename, hmodel, f0p, diff, rank, mel_extractor)
+        process_one(filename, hmodel, f0p, rank, diff, mel_extractor)
 
 def parallel_process(filenames, num_processes, f0p, diff, mel_extractor):
     with ProcessPoolExecutor(max_workers=num_processes) as executor:

From d07d92b61adaa29869b3c645de44bde3058ed599 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stardust=C2=B7=E5=87=8F?= <star_dust_chen@foxmail.com>
Date: Sat, 22 Jul 2023 21:56:02 +0800
Subject: [PATCH 08/23] Update preprocess_flist_config.py

---
 preprocess_flist_config.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/preprocess_flist_config.py b/preprocess_flist_config.py
index f8ade33..de4df41 100644
--- a/preprocess_flist_config.py
+++ b/preprocess_flist_config.py
@@ -4,7 +4,7 @@ import os
 import re
 import wave
 from random import shuffle
-
+from loguru import logger
 from tqdm import tqdm
 
 import diffusion.logger.utils as du
@@ -46,9 +46,9 @@ if __name__ == "__main__":
             if not file.endswith("wav"):
                 continue
             if not pattern.match(file):
-                print(f"warning：文件名{file}中包含非字母数字下划线，可能会导致错误。（也可能不会）")
+                logger.warning(f"文件名{file}中包含非字母数字下划线，可能会导致错误。（也可能不会）")
             if get_wav_duration(file) < 0.3:
-                print("skip too short audio:", file)
+                logger.info("Skip too short audio:" + file)
                 continue
             new_wavs.append(file)
         wavs = new_wavs
@@ -59,13 +59,13 @@ if __name__ == "__main__":
     shuffle(train)
     shuffle(val)
             
-    print("Writing", args.train_list)
+    logger.info("Writing" + args.train_list)
     with open(args.train_list, "w") as f:
         for fname in tqdm(train):
             wavpath = fname
             f.write(wavpath + "\n")
         
-    print("Writing", args.val_list)
+    logger.info("Writing" + args.val_list)
     with open(args.val_list, "w") as f:
         for fname in tqdm(val):
             wavpath = fname
@@ -97,8 +97,8 @@ if __name__ == "__main__":
     if args.vol_aug:
         config_template["train"]["vol_aug"] = config_template["model"]["vol_embedding"] = True
 
-    print("Writing configs/config.json")
+    logger.info("Writing to configs/config.json")
     with open("configs/config.json", "w") as f:
         json.dump(config_template, f, indent=2)
-    print("Writing configs/diffusion.yaml")
+    logger.info("Writing to configs/diffusion.yaml")
     du.save_config("configs/diffusion.yaml",d_config_template)

From 1cdccce44ac5447cf14fe847a1dacc2ad83ad2c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stardust=C2=B7=E5=87=8F?= <star_dust_chen@foxmail.com>
Date: Sat, 22 Jul 2023 22:01:44 +0800
Subject: [PATCH 09/23] Update preprocess_hubert_f0.py

---
 preprocess_hubert_f0.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/preprocess_hubert_f0.py b/preprocess_hubert_f0.py
index 1e59d1e..ec6dcc3 100644
--- a/preprocess_hubert_f0.py
+++ b/preprocess_hubert_f0.py
@@ -5,6 +5,7 @@ import random
 from concurrent.futures import ProcessPoolExecutor
 from glob import glob
 from random import shuffle
+from loguru import logger
 
 import librosa
 import numpy as np
@@ -28,7 +29,6 @@ speech_encoder = hps["model"]["speech_encoder"]
 
 
 def process_one(filename, hmodel,f0p,rank,diff=False,mel_extractor=None):
-    # print(filename)
     wav, sr = librosa.load(filename, sr=sampling_rate)
     audio_norm = torch.FloatTensor(wav)
     audio_norm = audio_norm.unsqueeze(0)
@@ -104,15 +104,15 @@ def process_one(filename, hmodel,f0p,rank,diff=False,mel_extractor=None):
             np.save(aug_vol_path,aug_vol.to('cpu').numpy())
 
 def process_batch(file_chunk, f0p, diff=False, mel_extractor=None):
-    print("Loading speech encoder for content...")
+    logger.info("Loading speech encoder for content...")
     rank = mp.current_process()._identity
     rank = rank[0] if len(rank) > 0 else 0
     if torch.cuda.is_available():
         gpu_id = rank % torch.cuda.device_count()
         device = torch.device(f"cuda:{gpu_id}")
-    print(f"Rank {rank} uses device {device}")
+    logger.info(f"Rank {rank} uses device {device}")
     hmodel = utils.get_speech_encoder(speech_encoder, device=device)
-    print("Loaded speech encoder.")
+    logger.info(f"Loaded speech encoder for rank {rank}")
     for filename in tqdm(file_chunk):
         process_one(filename, hmodel, f0p, rank, diff, mel_extractor)
 
@@ -144,7 +144,9 @@ if __name__ == "__main__":
     args = parser.parse_args()
     f0p = args.f0_predictor
     print(speech_encoder)
-    print(f0p)
+    logger.info("Using " + speech_encoder + " SpeechEncoder")
+    logger.info("Using " + f0p + "f0 extractor")
+    logger.info("Using diff Mode:")
     print(args.use_diff)
     if args.use_diff:
         print("use_diff")

From 36c24022584bc9fcf9e66c455bbe4daa8af896c9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stardust=C2=B7=E5=87=8F?= <star_dust_chen@foxmail.com>
Date: Sat, 22 Jul 2023 22:02:02 +0800
Subject: [PATCH 10/23] Update requirements.txt

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 5b6b5f8..9cc8149 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -25,3 +25,4 @@ langdetect
 pyyaml
 pynvml
 faiss-cpu
+loguru

From ff07b3d9e6628ed6eac824c2a0fa80b65f4c70bd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stardust=C2=B7=E5=87=8F?= <star_dust_chen@foxmail.com>
Date: Sat, 22 Jul 2023 22:02:14 +0800
Subject: [PATCH 11/23] Update requirements_win.txt

---
 requirements_win.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements_win.txt b/requirements_win.txt
index 924a641..06ac4ab 100644
--- a/requirements_win.txt
+++ b/requirements_win.txt
@@ -29,3 +29,4 @@ langdetect
 pyyaml
 pynvml
 faiss-cpu
+loguru

From 12a3ba587e34470a1e30294cf73c4186b65063be Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stardust=C2=B7=E5=87=8F?= <star_dust_chen@foxmail.com>
Date: Sat, 22 Jul 2023 22:04:44 +0800
Subject: [PATCH 12/23] Update train_diff.py

---
 train_diff.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/train_diff.py b/train_diff.py
index adf5fb3..4cdc0eb 100644
--- a/train_diff.py
+++ b/train_diff.py
@@ -8,7 +8,7 @@ from diffusion.logger import utils
 from diffusion.solver import train
 from diffusion.unit2mel import Unit2Mel
 from diffusion.vocoder import Vocoder
-
+from loguru import logger
 
 def parse_args(args=None, namespace=None):
     """Parse command-line arguments."""
@@ -28,8 +28,8 @@ if __name__ == '__main__':
     
     # load config
     args = utils.load_config(cmd.config)
-    print(' > config:', cmd.config)
-    print(' >    exp:', args.env.expdir)
+    logger.info(' > config:'+ cmd.config)
+    logger.info(' > exp:'+ args.env.expdir)
     
     # load vocoder
     vocoder = Vocoder(args.vocoder.type, args.vocoder.ckpt, device=args.device)
@@ -47,7 +47,7 @@ if __name__ == '__main__':
                 args.model.k_step_max
                 )
     
-    print(f' > INFO: now model timesteps is {model.timesteps}, and k_step_max is {model.k_step_max}')
+    logger.info(f' > Now model timesteps is {model.timesteps}, and k_step_max is {model.k_step_max}')
     
     # load parameters
     optimizer = torch.optim.AdamW(model.parameters())

From 76974269b39a2d2730d540bbd2ebed08c97348ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stardust=C2=B7=E5=87=8F?= <star_dust_chen@foxmail.com>
Date: Sat, 22 Jul 2023 22:06:22 +0800
Subject: [PATCH 13/23] Update preprocess_flist_config.py

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 preprocess_flist_config.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/preprocess_flist_config.py b/preprocess_flist_config.py
index de4df41..30a59d6 100644
--- a/preprocess_flist_config.py
+++ b/preprocess_flist_config.py
@@ -4,6 +4,7 @@ import os
 import re
 import wave
 from random import shuffle
+
 from loguru import logger
 from tqdm import tqdm
 

From befc4593c3c9e082f1690c2f7509dc6a47ecf3aa Mon Sep 17 00:00:00 2001
From: ylzz1997 <ylzz1997@outlook.com>
Date: Sat, 22 Jul 2023 23:02:52 +0800
Subject: [PATCH 14/23] ruff fix

---
 preprocess_hubert_f0.py | 2 +-
 train_diff.py           | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/preprocess_hubert_f0.py b/preprocess_hubert_f0.py
index ec6dcc3..b5d1f76 100644
--- a/preprocess_hubert_f0.py
+++ b/preprocess_hubert_f0.py
@@ -5,12 +5,12 @@ import random
 from concurrent.futures import ProcessPoolExecutor
 from glob import glob
 from random import shuffle
-from loguru import logger
 
 import librosa
 import numpy as np
 import torch
 import torch.multiprocessing as mp
+from loguru import logger
 from tqdm import tqdm
 
 import diffusion.logger.utils as du
diff --git a/train_diff.py b/train_diff.py
index 4cdc0eb..65ba382 100644
--- a/train_diff.py
+++ b/train_diff.py
@@ -1,6 +1,7 @@
 import argparse
 
 import torch
+from loguru import logger
 from torch.optim import lr_scheduler
 
 from diffusion.data_loaders import get_data_loaders
@@ -8,7 +9,7 @@ from diffusion.logger import utils
 from diffusion.solver import train
 from diffusion.unit2mel import Unit2Mel
 from diffusion.vocoder import Vocoder
-from loguru import logger
+
 
 def parse_args(args=None, namespace=None):
     """Parse command-line arguments."""

From 40777bbeb45e28e9e6163f1999c92c74716244f5 Mon Sep 17 00:00:00 2001
From: YuChuXi <81864000+yuxibenxi@users.noreply.github.com>
Date: Sat, 22 Jul 2023 23:04:49 +0800
Subject: [PATCH 15/23] Update README_zh_CN.md
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

修改了加速预处理的部分
---
 README_zh_CN.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/README_zh_CN.md b/README_zh_CN.md
index 4adbab6..84252c8 100644
--- a/README_zh_CN.md
+++ b/README_zh_CN.md
@@ -259,13 +259,6 @@ wavlmbase+
 ```shell
 python preprocess_flist_config.py --speech_encoder vec768l12 --vol_aug
 ```
-
-**加速预处理**
-如若您的数据集比较大，可以尝试添加`--num_processes`参数：
-```shell
-python preprocess_flist_config.py --speech_encoder vec768l12 --vol_aug --num_processes 8
-```
-所有的Workers会被自动分配到多个GPU上（如果您有多个GPU的话）
 使用后训练出的模型将匹配到输入源响度，否则为训练集响度。
 
 #### 此时可以在生成的 config.json 与 diffusion.yaml 修改部分参数
@@ -325,6 +318,13 @@ rmvpe
 python preprocess_hubert_f0.py --f0_predictor dio --use_diff
 ```
 
+**加速预处理**
+如若您的数据集比较大，可以尝试添加`--num_processes`参数：
+```shell
+python preprocess_hubert_f0.py --f0_predictor dio --use_diff --num_processes 8
+```
+所有的Workers会被自动分配到多个线程上
+
 执行完以上步骤后 dataset 目录便是预处理完成的数据，可以删除 dataset_raw 文件夹了
 
 ## 🏋️‍ 训练

From 968e80b7b45725aa0339d0a5956b3674eab22c6a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stardust=C2=B7=E5=87=8F?= <star_dust_chen@foxmail.com>
Date: Sat, 22 Jul 2023 23:48:47 +0800
Subject: [PATCH 16/23] Update README.md

---
 README.md | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 53f4944..fd36413 100644
--- a/README.md
+++ b/README.md
@@ -258,15 +258,6 @@ Add `--vol_aug` if you want to enable loudness embedding:
 python preprocess_flist_config.py --speech_encoder vec768l12 --vol_aug
 ```
 
-**Speed Up preprocess**
-
-If your dataset is pretty large,you can increase the param `--num_processes` like that:
-
-```shell
-python preprocess_flist_config.py --speech_encoder vec768l12 --vol_aug --num_processes 8
-```
-All the worker will be assigned to different GPU if you have more than one GPUs.
-
 After enabling loudness embedding, the trained model will match the loudness of the input source; otherwise, it will match the loudness of the training set.
 
 #### You can modify some parameters in the generated config.json and diffusion.yaml
@@ -324,6 +315,15 @@ If you want shallow diffusion (optional), you need to add the `--use_diff` param
 python preprocess_hubert_f0.py --f0_predictor dio --use_diff
 ```
 
+**Speed Up preprocess**
+
+If your dataset is pretty large,you can increase the param `--num_processes` like that:
+
+```shell
+python preprocess_hubert_f0.py --speech_encoder vec768l12 --vol_aug --num_processes 8
+```
+All the worker will be assigned to different GPU if you have more than one GPUs.
+
 After completing the above steps, the dataset directory will contain the preprocessed data, and the dataset_raw folder can be deleted.
 
 ## 🏋️‍ Training

From 85ef9ab7f9126c40215172ac907f6d82057e52bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stardust=C2=B7=E5=87=8F?= <star_dust_chen@foxmail.com>
Date: Sat, 22 Jul 2023 23:50:52 +0800
Subject: [PATCH 17/23] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index fd36413..c45e554 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
 <div align="center">
-  
+<img alt="LOGO" src="https://avatars.githubusercontent.com/u/127122328?s=400&u=5395a98a4f945a3a50cb0cc96c2747505d190dbc&v=4" width="300" height="300" />
 # SoftVC VITS Singing Voice Conversion
 
 [**English**](./README.md) | [**中文简体**](./README_zh_CN.md)

From 54c9473abb17f2f27cee035bd368a4470616f016 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stardust=C2=B7=E5=87=8F?= <star_dust_chen@foxmail.com>
Date: Sat, 22 Jul 2023 23:51:03 +0800
Subject: [PATCH 18/23] Update README.md

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index c45e554..9855f79 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,6 @@
 <div align="center">
 <img alt="LOGO" src="https://avatars.githubusercontent.com/u/127122328?s=400&u=5395a98a4f945a3a50cb0cc96c2747505d190dbc&v=4" width="300" height="300" />
+  
 # SoftVC VITS Singing Voice Conversion
 
 [**English**](./README.md) | [**中文简体**](./README_zh_CN.md)

From e50786e6270c383d2c57c0e9274cc1c3cc82a306 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stardust=C2=B7=E5=87=8F?= <star_dust_chen@foxmail.com>
Date: Sat, 22 Jul 2023 23:51:58 +0800
Subject: [PATCH 19/23] Update README_zh_CN.md

---
 README_zh_CN.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README_zh_CN.md b/README_zh_CN.md
index 84252c8..d153a88 100644
--- a/README_zh_CN.md
+++ b/README_zh_CN.md
@@ -1,5 +1,6 @@
 <div align="center">
-  
+<img alt="LOGO" src="https://avatars.githubusercontent.com/u/127122328?s=400&u=5395a98a4f945a3a50cb0cc96c2747505d190dbc&v=4" width="300" height="300" />
+
 # SoftVC VITS Singing Voice Conversion
 
 [**English**](./README.md) | [**中文简体**](./README_zh_CN.md)

From 4c4093eef37369cbb0687ff317d349efe40f816e Mon Sep 17 00:00:00 2001
From: Miuzarte <982809597@qq.com>
Date: Sun, 23 Jul 2023 05:56:27 +0800
Subject: [PATCH 20/23] why not rich.progress

---
 resample.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resample.py b/resample.py
index 50eacc1..809cf49 100644
--- a/resample.py
+++ b/resample.py
@@ -7,7 +7,7 @@ from multiprocessing import cpu_count
 import librosa
 import numpy as np
 from scipy.io import wavfile
-from tqdm import tqdm
+from rich.progress import track
 
 
 def load_wav(wav_path):
@@ -81,7 +81,7 @@ def process_all_speakers():
             if os.path.isdir(spk_dir):
                 print(spk_dir)
                 futures = [executor.submit(process, (spk_dir, i, args)) for i in os.listdir(spk_dir) if i.endswith("wav")]
-                for _ in tqdm(concurrent.futures.as_completed(futures), total=len(futures)):
+                for _ in track(concurrent.futures.as_completed(futures), total=len(futures), description="resampling:"):
                     pass
 
 

From 3ab6f1f1749c8a9586d1636d1d6c6399fb5ae2ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=AC=AC=E7=B4=97=E7=89=B9?=
 <66856838+Miuzarte@users.noreply.github.com>
Date: Sun, 23 Jul 2023 09:25:05 +0800
Subject: [PATCH 21/23] update requirements.txt

---
 requirements.txt              | 2 ++
 requirements_onnx_encoder.txt | 2 ++
 requirements_win.txt          | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/requirements.txt b/requirements.txt
index 9cc8149..2670ea6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,6 +10,8 @@ torch
 torchaudio
 torchcrepe
 tqdm
+rich.progress
+loguru
 scikit-maad
 praat-parselmouth
 onnx
diff --git a/requirements_onnx_encoder.txt b/requirements_onnx_encoder.txt
index a624622..cfde17c 100644
--- a/requirements_onnx_encoder.txt
+++ b/requirements_onnx_encoder.txt
@@ -9,6 +9,8 @@ torch==1.13.1
 torchaudio==0.13.1
 torchcrepe
 tqdm
+rich.progress
+loguru
 scikit-maad
 praat-parselmouth
 onnx
diff --git a/requirements_win.txt b/requirements_win.txt
index 06ac4ab..7112b0b 100644
--- a/requirements_win.txt
+++ b/requirements_win.txt
@@ -15,6 +15,8 @@ sounddevice==0.4.5
 SoundFile==0.10.3.post1
 starlette==0.19.1
 tqdm==4.63.0
+rich.progress
+loguru
 torchcrepe
 scikit-maad
 praat-parselmouth

From 174cb333f861533832503f8e7778a941bcd3a99b Mon Sep 17 00:00:00 2001
From: ylzz1997 <ylzz1997@outlook.com>
Date: Sun, 23 Jul 2023 09:43:00 +0800
Subject: [PATCH 22/23] Debug rank GPU

---
 preprocess_hubert_f0.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/preprocess_hubert_f0.py b/preprocess_hubert_f0.py
index b5d1f76..04397d7 100644
--- a/preprocess_hubert_f0.py
+++ b/preprocess_hubert_f0.py
@@ -114,7 +114,7 @@ def process_batch(file_chunk, f0p, diff=False, mel_extractor=None):
     hmodel = utils.get_speech_encoder(speech_encoder, device=device)
     logger.info(f"Loaded speech encoder for rank {rank}")
     for filename in tqdm(file_chunk):
-        process_one(filename, hmodel, f0p, rank, diff, mel_extractor)
+        process_one(filename, hmodel, f0p, gpu_id, diff, mel_extractor)
 
 def parallel_process(filenames, num_processes, f0p, diff, mel_extractor):
     with ProcessPoolExecutor(max_workers=num_processes) as executor:

From 691486fd55ac9a91a3a8920f4e0ed82fff685b5e Mon Sep 17 00:00:00 2001
From: ylzz1997 <ylzz1997@outlook.com>
Date: Sun, 23 Jul 2023 09:47:21 +0800
Subject: [PATCH 23/23] Debug requirements and ruff fix

---
 requirements.txt     | 5 ++---
 requirements_win.txt | 3 +--
 resample.py          | 2 +-
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 2670ea6..f86b81e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,7 +10,7 @@ torch
 torchaudio
 torchcrepe
 tqdm
-rich.progress
+rich
 loguru
 scikit-maad
 praat-parselmouth
@@ -26,5 +26,4 @@ edge_tts
 langdetect
 pyyaml
 pynvml
-faiss-cpu
-loguru
+faiss-cpu
\ No newline at end of file
diff --git a/requirements_win.txt b/requirements_win.txt
index 7112b0b..461a992 100644
--- a/requirements_win.txt
+++ b/requirements_win.txt
@@ -15,7 +15,7 @@ sounddevice==0.4.5
 SoundFile==0.10.3.post1
 starlette==0.19.1
 tqdm==4.63.0
-rich.progress
+rich
 loguru
 torchcrepe
 scikit-maad
@@ -31,4 +31,3 @@ langdetect
 pyyaml
 pynvml
 faiss-cpu
-loguru
diff --git a/resample.py b/resample.py
index 809cf49..af421fd 100644
--- a/resample.py
+++ b/resample.py
@@ -6,8 +6,8 @@ from multiprocessing import cpu_count
 
 import librosa
 import numpy as np
-from scipy.io import wavfile
 from rich.progress import track
+from scipy.io import wavfile
 
 
 def load_wav(wav_path):