diff-svc/batch.py

import io
import os.path
from pathlib import Path

import numpy as np
import soundfile

from infer_tools import infer_tool
from infer_tools.infer_tool import Svc
from utils.hparams import hparams


def run_clip(raw_audio_path, svc_model, key, acc, use_crepe, spk_id=0, auto_key=False, units_mode=False):
    infer_tool.format_wav(raw_audio_path)
    key = svc_model.evaluate_key(raw_audio_path, key, auto_key)
    _f0_tst, _f0_pred, _audio = svc_model.infer(raw_audio_path, key=key, acc=acc, use_crepe=use_crepe, spk_id=spk_id,
                                                singer=not units_mode)
    if units_mode:
        out_path = io.BytesIO()
        soundfile.write(out_path, _audio, hparams["audio_sample_rate"], format='wav')
        out_path.seek(0)
        npy_path = Path(raw_audio_path).with_suffix(".npy")
        np.save(str(npy_path), svc_model.hubert.encode(out_path))
    else:
        out_path = f'./singer_data/{Path(raw_audio_path).name}'
        soundfile.write(out_path, _audio, hparams["audio_sample_rate"], 'PCM_16')


if __name__ == '__main__':
    # 工程文件夹名，训练时用的那个
    project_name = "fox_cn"
    model_path = f'./checkpoints/{project_name}/model_ckpt_steps_370000.ckpt'
    config_path = f'./checkpoints/{project_name}/config.yaml'

    # 此脚本为批量导出短音频（30s内）使用，同时生成f0、mel供diffsinger使用。
    # 支持wav文件，放在batch文件夹下，带扩展名
    wav_paths = infer_tool.get_end_file("./batch", "wav")
    trans = -6  # 音高调整，支持正负（半音）
    spk_id = 0  # 非多人模型不改
    # 特化专用，开启此项后，仅导出变更音色的units至batch目录，其余项不输出；关闭此项则切换为对接diffsinger的套娃导出模式
    units = True
    # 自适应变调，不懂别开
    auto_key = False
    # 加速倍数
    accelerate = 10

    # 下面不动
    os.makedirs("./singer_data", exist_ok=True)
    model = Svc(project_name, config_path, hubert_gpu=True, model_path=model_path)
    count = 0
    for audio_path in wav_paths:
        count += 1
        if os.path.exists(Path(audio_path).with_suffix(".npy")) and units:
            print(f"{audio_path}:units已存在，跳过")
            continue
        run_clip(audio_path, model, trans, accelerate, spk_id=spk_id, auto_key=auto_key, use_crepe=False,
                 units_mode=units)
        print(f"\r\nnum:{count}\r\ntotal process:{round(count * 100 / len(wav_paths), 2)}%\r\n")