From 46e0fa48ddf00e4873d61963296900ec72d16f3e Mon Sep 17 00:00:00 2001 From: umoubuton <127150330+umoufuton@users.noreply.github.com> Date: Fri, 2 Jun 2023 12:41:25 +0800 Subject: [PATCH] Update resample.py --- README.md | 28 +++------------------------- README_zh_CN.md | 28 +++------------------------- resample.py | 6 ++++-- 3 files changed, 10 insertions(+), 52 deletions(-) diff --git a/README.md b/README.md index 538ed74..853bcda 100644 --- a/README.md +++ b/README.md @@ -182,32 +182,10 @@ python resample.py #### Attention -Although this project has the script resample.py for resampling, to mono and loudness matching, the default loudness matching is to match to 0db. This may cause damage to the sound quality. While python's loudness matching package pyloudnorm is unable to limit the level, this results in a burst. Therefore, it is suggested to consider using professional sound processing software such as `adobe audition` for resampling, to mono and loudness matching processing. If you use other software for resampling, to mono and loudness matching, do not run the preceding command. +Although this project has the script resample.py for resampling, to mono and loudness matching, the default loudness matching is to match to 0db. This may cause damage to the sound quality. While python's loudness matching package pyloudnorm is unable to limit the level, this results in a burst. Therefore, it is suggested to consider using professional sound processing software such as `adobe audition` for loudness matching processing. If you have already used other software for loudness matching, run the command with the argument `--skip_loudnorm`: -To manually process the audio, you need to put the dataset into the Dataset directory with the following file structure. If the directory does not exist, you can create it yourself. - -``` -dataset -└───44k - ├───speaker0 - │ ├───xxx1-xxx1.wav - │ ├───... - │ └───Lxx-0xx8.wav - └───speaker1 - ├───xx2-0xxx2.wav - ├───... - └───xxx7-xxx007.wav -``` - -You can customize the speaker name. - -``` -dataset -└───44k - └───suijiSUI - ├───1.wav - ├───... - └───25788785-20221210-200143-856_01_(Vocals)_0_0.wav +```shell +python resample.py --skip_loudnorm ``` ### 2. Automatically split the dataset into training and validation sets, and generate configuration files. diff --git a/README_zh_CN.md b/README_zh_CN.md index 456a616..7fb2410 100644 --- a/README_zh_CN.md +++ b/README_zh_CN.md @@ -184,32 +184,10 @@ python resample.py #### 注意 -虽然本项目拥有重采样、转换单声道与响度匹配的脚本resample.py,但是默认的响度匹配是匹配到0db。这可能会造成音质的受损。而python的响度匹配包pyloudnorm无法对电平进行压限,这会导致爆音。所以建议可以考虑使用专业声音处理软件如`adobe audition`等软件做重采样、转换单声道与响度匹配处理。若使用其他软件做重采样、转换单声道与响度匹配,则可以不运行上述命令。 +虽然本项目拥有重采样、转换单声道与响度匹配的脚本resample.py,但是默认的响度匹配是匹配到0db。这可能会造成音质的受损。而python的响度匹配包pyloudnorm无法对电平进行压限,这会导致爆音。所以建议可以考虑使用专业声音处理软件如`adobe audition`等软件做响度匹配处理。若已经使用其他软件做响度匹配,可以在运行上述命令时添加`--skip_loudnorm`跳过响度匹配步骤。如: -若手动处理音频,需要以以下文件结构将数据集放入dataset目录即可。若无该目录可以自行创建。 - -``` -dataset -└───44k - ├───speaker0 - │ ├───xxx1-xxx1.wav - │ ├───... - │ └───Lxx-0xx8.wav - └───speaker1 - ├───xx2-0xxx2.wav - ├───... - └───xxx7-xxx007.wav -``` - -可以自定义说话人名称 - -``` -dataset -└───44k - └───suijiSUI - ├───1.wav - ├───... - └───25788785-20221210-200143-856_01_(Vocals)_0_0.wav +```shell +python resample.py --skip_loudnorm ``` ### 2. 自动划分训练集、验证集,以及自动生成配置文件 diff --git a/resample.py b/resample.py index 03e8891..301e292 100644 --- a/resample.py +++ b/resample.py @@ -15,12 +15,13 @@ def process(item): if os.path.exists(wav_path) and '.wav' in wav_path: os.makedirs(os.path.join(args.out_dir2, speaker), exist_ok=True) wav, sr = librosa.load(wav_path, sr=None) - wav, _ = librosa.effects.trim(wav, top_db=20) + wav, _ = librosa.effects.trim(wav, top_db=40) peak = np.abs(wav).max() if peak > 1.0: wav = 0.98 * wav / peak wav2 = librosa.resample(wav, orig_sr=sr, target_sr=args.sr2) - wav2 /= max(wav2.max(), -wav2.min()) + if not args.skip_loudnorm: + wav2 /= max(wav2.max(), -wav2.min()) save_name = wav_name save_path2 = os.path.join(args.out_dir2, speaker, save_name) wavfile.write( @@ -35,6 +36,7 @@ if __name__ == "__main__": parser.add_argument("--sr2", type=int, default=44100, help="sampling rate") parser.add_argument("--in_dir", type=str, default="./dataset_raw", help="path to source dir") parser.add_argument("--out_dir2", type=str, default="./dataset/44k", help="path to target dir") + parser.add_argument("--skip_loudnorm", action="store_true", help="Skip loudness matching if you have done it") args = parser.parse_args() processs = 30 if cpu_count() > 60 else (cpu_count()-2 if cpu_count() > 4 else 1) pool = Pool(processes=processs)