147 lines
2.7 KiB
YAML
147 lines
2.7 KiB
YAML
K_step: 1000
|
|
accumulate_grad_batches: 1
|
|
audio_num_mel_bins: 128
|
|
audio_sample_rate: 44100
|
|
binarization_args:
|
|
shuffle: false
|
|
with_spk_embed: false
|
|
binarizer_cls: preprocessing.svc_binarizer.SvcBinarizer
|
|
check_val_every_n_epoch: 10
|
|
choose_test_manually: false
|
|
clip_grad_norm: 1
|
|
content_cond_steps: []
|
|
dec_ffn_kernel_size: 9
|
|
dec_layers: 4
|
|
decoder_type: fft
|
|
dict_dir: ''
|
|
diff_decoder_type: wavenet
|
|
diff_loss_type: l2
|
|
dilation_cycle_length: 4
|
|
dropout: 0.1
|
|
ds_workers: 4
|
|
dur_enc_hidden_stride_kernel:
|
|
- 0,2,3
|
|
- 0,2,3
|
|
- 0,1,3
|
|
dur_loss: mse
|
|
dur_predictor_kernel: 3
|
|
dur_predictor_layers: 5
|
|
enc_ffn_kernel_size: 9
|
|
enc_layers: 4
|
|
encoder_K: 8
|
|
encoder_type: fft
|
|
endless_ds: false
|
|
f0_bin: 256
|
|
f0_max: 1100.0
|
|
f0_min: 40.0
|
|
ffn_act: gelu
|
|
ffn_padding: SAME
|
|
fft_size: 2048
|
|
fmax: 16000
|
|
fmin: 40
|
|
fs2_ckpt: ''
|
|
gaussian_start: true
|
|
gen_dir_name: ''
|
|
gen_tgt_spk_id: -1
|
|
hidden_size: 256
|
|
hop_size: 512
|
|
hubert_gpu: true
|
|
infer: false
|
|
keep_bins: 128
|
|
lambda_commit: 0.25
|
|
lambda_energy: 0.0
|
|
lambda_f0: 1.0
|
|
lambda_ph_dur: 0.3
|
|
lambda_sent_dur: 1.0
|
|
lambda_uv: 1.0
|
|
lambda_word_dur: 1.0
|
|
load_ckpt: ''
|
|
log_interval: 100
|
|
loud_norm: false
|
|
max_beta: 0.02
|
|
max_epochs: 3000
|
|
max_eval_sentences: 1
|
|
max_eval_tokens: 60000
|
|
max_frames: 42000
|
|
max_input_tokens: 60000
|
|
max_updates: 1000000
|
|
mel_loss: ssim:0.5|l1:0.5
|
|
mel_vmax: 1.5
|
|
mel_vmin: -6.0
|
|
min_level_db: -120
|
|
norm_type: gn
|
|
num_heads: 2
|
|
num_sanity_val_steps: 1
|
|
num_spk: 1
|
|
num_test_samples: 0
|
|
num_valid_plots: 10
|
|
optimizer_adam_beta1: 0.9
|
|
optimizer_adam_beta2: 0.98
|
|
out_wav_norm: false
|
|
pe_ckpt: checkpoints/0102_xiaoma_pe/model_ckpt_steps_60000.ckpt
|
|
pe_enable: false
|
|
perform_enhance: true
|
|
pitch_ar: false
|
|
pitch_enc_hidden_stride_kernel:
|
|
- 0,2,5
|
|
- 0,2,5
|
|
- 0,2,5
|
|
pitch_extractor: parselmouth
|
|
pitch_loss: l2
|
|
pitch_norm: log
|
|
pitch_type: frame
|
|
pndm_speedup: 10
|
|
predictor_dropout: 0.5
|
|
predictor_grad: 0.1
|
|
predictor_hidden: -1
|
|
predictor_kernel: 5
|
|
predictor_layers: 5
|
|
prenet_dropout: 0.5
|
|
prenet_hidden_size: 256
|
|
pretrain_fs_ckpt: ''
|
|
processed_data_dir: xxx
|
|
profile_infer: false
|
|
ref_norm_layer: bn
|
|
rel_pos: true
|
|
reset_phone_dict: true
|
|
save_best: false
|
|
save_ckpt: true
|
|
save_codes:
|
|
- configs
|
|
- modules
|
|
- src
|
|
- utils
|
|
save_f0: true
|
|
save_gt: false
|
|
schedule_type: linear
|
|
seed: 1234
|
|
sort_by_len: true
|
|
spk_cond_steps: []
|
|
speaker_id: single
|
|
stop_token_weight: 5.0
|
|
task_cls: training.svc_task.SvcTask
|
|
test_ids: []
|
|
test_input_dir: ''
|
|
timesteps: 1000
|
|
train_set_name: train
|
|
test_set_name: test
|
|
use_denoise: false
|
|
use_energy_embed: false
|
|
use_gt_dur: false
|
|
use_gt_f0: false
|
|
use_nsf: true
|
|
use_pitch_embed: true
|
|
use_pos_embed: true
|
|
use_spk_embed: false
|
|
use_spk_id: false
|
|
use_split_spk_id: false
|
|
use_uv: false
|
|
use_var_enc: false
|
|
valid_num: 0
|
|
valid_set_name: valid
|
|
vocoder: modules.vocoders.nsf_hifigan.NsfHifiGAN
|
|
vocoder_ckpt: checkpoints/nsf_hifigan/model
|
|
warmup_updates: 2000
|
|
wav2spec_eps: 1e-6
|
|
weight_decay: 0
|
|
win_size: 2048 |