From 7e7b3a068e79a5e00d5af26330946b5c13283ace Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=BA=A2=E8=A1=80=E7=90=83AE3803?= <2544390577@qq.com> Date: Fri, 10 Mar 2023 19:20:48 +0900 Subject: [PATCH] Add files via upload --- configs/config.json | 65 ++++++++++++++++++++++++++++++ dataset_raw/wav_structure.txt | 20 +++++++++ filelists/test.txt | 4 ++ filelists/train.txt | 15 +++++++ filelists/val.txt | 4 ++ logs/44k/put_pretrained_model_here | 0 raw/put_raw_wav_here | 0 7 files changed, 108 insertions(+) create mode 100644 configs/config.json create mode 100644 dataset_raw/wav_structure.txt create mode 100644 filelists/test.txt create mode 100644 filelists/train.txt create mode 100644 filelists/val.txt create mode 100644 logs/44k/put_pretrained_model_here create mode 100644 raw/put_raw_wav_here diff --git a/configs/config.json b/configs/config.json new file mode 100644 index 0000000..f19d46d --- /dev/null +++ b/configs/config.json @@ -0,0 +1,65 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 800, + "seed": 1234, + "epochs": 10000, + "learning_rate": 0.0001, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 6, + "fp16_run": false, + "lr_decay": 0.999875, + "segment_size": 10240, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0, + "use_sr": true, + "max_speclen": 512, + "port": "8001", + "keep_ckpts": 3 + }, + "data": { + "training_files": "filelists/train.txt", + "validation_files": "filelists/val.txt", + "max_wav_value": 32768.0, + "sampling_rate": 44100, + "filter_length": 2048, + "hop_length": 512, + "win_length": 2048, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": 22050 + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [3,7,11], + "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], + "upsample_rates": [ 8, 8, 2, 2, 2], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [16,16, 4, 4, 4], + "n_layers_q": 3, + "use_spectral_norm": false, + "gin_channels": 256, + "ssl_dim": 256, + "n_speakers": 200 + }, + "spk": { + "nyaru": 0, + "huiyu": 1, + "nen": 2, + "paimon": 3, + "yunhao": 4 + } +} \ No newline at end of file diff --git a/dataset_raw/wav_structure.txt b/dataset_raw/wav_structure.txt new file mode 100644 index 0000000..68cee4e --- /dev/null +++ b/dataset_raw/wav_structure.txt @@ -0,0 +1,20 @@ +数据集准备 + +raw +├───speaker0 +│ ├───xxx1-xxx1.wav +│ ├───... +│ └───Lxx-0xx8.wav +└───speaker1 + ├───xx2-0xxx2.wav + ├───... + └───xxx7-xxx007.wav + +此外还需要编辑config.json + +"n_speakers": 10 + +"spk":{ + "speaker0": 0, + "speaker1": 1, +} diff --git a/filelists/test.txt b/filelists/test.txt new file mode 100644 index 0000000..be640cf --- /dev/null +++ b/filelists/test.txt @@ -0,0 +1,4 @@ +./dataset/44k/taffy/000562.wav +./dataset/44k/nyaru/000011.wav +./dataset/44k/nyaru/000008.wav +./dataset/44k/taffy/000563.wav diff --git a/filelists/train.txt b/filelists/train.txt new file mode 100644 index 0000000..acdb3cc --- /dev/null +++ b/filelists/train.txt @@ -0,0 +1,15 @@ +./dataset/44k/taffy/000549.wav +./dataset/44k/nyaru/000004.wav +./dataset/44k/nyaru/000006.wav +./dataset/44k/taffy/000551.wav +./dataset/44k/nyaru/000009.wav +./dataset/44k/taffy/000561.wav +./dataset/44k/nyaru/000001.wav +./dataset/44k/taffy/000553.wav +./dataset/44k/nyaru/000002.wav +./dataset/44k/taffy/000560.wav +./dataset/44k/taffy/000557.wav +./dataset/44k/nyaru/000005.wav +./dataset/44k/taffy/000554.wav +./dataset/44k/taffy/000550.wav +./dataset/44k/taffy/000559.wav diff --git a/filelists/val.txt b/filelists/val.txt new file mode 100644 index 0000000..262dfc9 --- /dev/null +++ b/filelists/val.txt @@ -0,0 +1,4 @@ +./dataset/44k/nyaru/000003.wav +./dataset/44k/nyaru/000007.wav +./dataset/44k/taffy/000558.wav +./dataset/44k/taffy/000556.wav diff --git a/logs/44k/put_pretrained_model_here b/logs/44k/put_pretrained_model_here new file mode 100644 index 0000000..e69de29 diff --git a/raw/put_raw_wav_here b/raw/put_raw_wav_here new file mode 100644 index 0000000..e69de29