From fe6bc7bcf33a4aa4ed98f9a61e70a2773715f9db Mon Sep 17 00:00:00 2001 From: 112292454 <92578848+112292454@users.noreply.github.com> Date: Sat, 18 Mar 2023 19:53:49 +0800 Subject: [PATCH 01/21] update steps read --- train.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/train.py b/train.py index e499528..3c6880b 100644 --- a/train.py +++ b/train.py @@ -100,7 +100,9 @@ def run(rank, n_gpus, hps): _, _, _, epoch_str = utils.load_checkpoint(utils.latest_checkpoint_path(hps.model_dir, "D_*.pth"), net_d, optim_d, skip_optimizer) epoch_str = max(epoch_str, 1) - global_step = (epoch_str - 1) * len(train_loader) + name=utils.latest_checkpoint_path(hps.model_dir, "D_*.pth") + global_step=int(name[name.rfind("_")+1:name.rfind(".")])+1 + #global_step = (epoch_str - 1) * len(train_loader) except: print("load old checkpoint failed...") epoch_str = 1 From 05395d8bbe0d082a92f4c9a2e517f35c40e44311 Mon Sep 17 00:00:00 2001 From: 112292454 <92578848+112292454@users.noreply.github.com> Date: Sat, 18 Mar 2023 20:10:51 +0800 Subject: [PATCH 02/21] Update train.py --- train.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/train.py b/train.py index 3c6880b..b7f2c9d 100644 --- a/train.py +++ b/train.py @@ -208,10 +208,14 @@ def train_and_evaluate(rank, epoch, hps, nets, optims, schedulers, scaler, loade if global_step % hps.train.log_interval == 0: lr = optim_g.param_groups[0]['lr'] losses = [loss_disc, loss_gen, loss_fm, loss_mel, loss_kl] + reference_loss=0 + for i in losses: + reference_loss += math.log(i, 10) + reference_loss*=10 logger.info('Train Epoch: {} [{:.0f}%]'.format( epoch, 100. * batch_idx / len(train_loader))) - logger.info(f"Losses: {[x.item() for x in losses]}, step: {global_step}, lr: {lr}") + logger.info(f"Losses: {[x.item() for x in losses]}, step: {global_step}, lr: {lr}, reference_loss={reference_loss}") scalar_dict = {"loss/g/total": loss_gen_all, "loss/d/total": loss_disc_all, "learning_rate": lr, "grad_norm_d": grad_norm_d, "grad_norm_g": grad_norm_g} From f1903611913d19b782098c9dcf06091e4759833a Mon Sep 17 00:00:00 2001 From: 112292454 <92578848+112292454@users.noreply.github.com> Date: Sat, 18 Mar 2023 20:11:54 +0800 Subject: [PATCH 03/21] Update train.py --- train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.py b/train.py index b7f2c9d..8a32e14 100644 --- a/train.py +++ b/train.py @@ -215,7 +215,7 @@ def train_and_evaluate(rank, epoch, hps, nets, optims, schedulers, scaler, loade logger.info('Train Epoch: {} [{:.0f}%]'.format( epoch, 100. * batch_idx / len(train_loader))) - logger.info(f"Losses: {[x.item() for x in losses]}, step: {global_step}, lr: {lr}, reference_loss={reference_loss}") + logger.info(f"Losses: {[x.item() for x in losses]}, step: {global_step}, lr: {lr}, reference_loss: {reference_loss}") scalar_dict = {"loss/g/total": loss_gen_all, "loss/d/total": loss_disc_all, "learning_rate": lr, "grad_norm_d": grad_norm_d, "grad_norm_g": grad_norm_g} From e330c191885ba7363962b866d0cae59740775f8e Mon Sep 17 00:00:00 2001 From: kenwaytis <40118038+kenwaytis@users.noreply.github.com> Date: Mon, 10 Apr 2023 17:23:46 +0800 Subject: [PATCH 04/21] Update requirements.txt --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index a2b60c1..23b5736 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,9 @@ Flask Flask_Cors gradio -numpy +numpy==1.23.0 pyworld==0.2.5 -scipy==1.7.3 +scipy==1.10.0 SoundFile==0.12.1 torch==1.13.1 torchaudio==0.13.1 From 1bebe913258c8e859bf675c4b44601c712474637 Mon Sep 17 00:00:00 2001 From: YuriHead Date: Mon, 10 Apr 2023 20:04:49 +0800 Subject: [PATCH 05/21] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c3234df..4cb78af 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ The singing voice conversion model uses SoftVC content encoder to extract source - The dataset creation and training process are consistent with version 3.0, but the model is completely non-universal, and the data set needs to be fully pre-processed again. - Added an option 1: automatic pitch prediction for vc mode, which means that you don't need to manually enter the pitch key when converting speech, and the pitch of male and female voices can be automatically converted. However, this mode will cause pitch shift when converting songs. - Added option 2: reduce timbre leakage through k-means clustering scheme, making the timbre more similar to the target timbre. -- Added option 3: Added [NFS-HIFIGAN Enhancer](https://github.com/yxlllc/DDSP-SVC), which has certain sound quality enhancement effect on some models with few train-sets, but has negative effect on well-trained models, so it is closed by default +- Added option 3: Added [NSF-HIFIGAN Enhancer](https://github.com/yxlllc/DDSP-SVC), which has certain sound quality enhancement effect on some models with few train-sets, but has negative effect on well-trained models, so it is closed by default ## 💬 About Python Version From 7846711d90b5210d4f39a4d6fab50d1d7bbd8d73 Mon Sep 17 00:00:00 2001 From: YuriHead Date: Mon, 10 Apr 2023 20:04:59 +0800 Subject: [PATCH 06/21] Update README_zh_CN.md --- README_zh_CN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README_zh_CN.md b/README_zh_CN.md index 624f95e..e7ee3bb 100644 --- a/README_zh_CN.md +++ b/README_zh_CN.md @@ -34,7 +34,7 @@ + 数据集制作、训练过程和3.0保持一致,但模型完全不通用,数据集也需要全部重新预处理 + 增加了可选项 1:vc模式自动预测音高f0,即转换语音时不需要手动输入变调key,男女声的调能自动转换,但仅限语音转换,该模式转换歌声会跑调 + 增加了可选项 2:通过kmeans聚类方案减小音色泄漏,即使得音色更加像目标音色 -+ 增加了可选项 3:增加了[NFS-HIFIGAN增强器](https://github.com/yxlllc/DDSP-SVC),对部分训练集少的模型有一定的音质增强效果,但是对训练好的模型有反面效果,默认关闭 ++ 增加了可选项 3:增加了[NSF-HIFIGAN增强器](https://github.com/yxlllc/DDSP-SVC),对部分训练集少的模型有一定的音质增强效果,但是对训练好的模型有反面效果,默认关闭 ## 💬 关于 Python 版本问题 From 8cff6c5cb0f12b4fec46f0da2bd5f9c1406ff74c Mon Sep 17 00:00:00 2001 From: Jared <78630856+Jared-02@users.noreply.github.com> Date: Tue, 11 Apr 2023 11:49:41 +0800 Subject: [PATCH 07/21] Revise the warm-up setup process --- train.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/train.py b/train.py index 9f6e743..dda30d9 100644 --- a/train.py +++ b/train.py @@ -114,20 +114,30 @@ def run(rank, n_gpus, hps): epoch_str = 1 global_step = 0 + warmup_epoch = hps.train.warmup_epochs scheduler_g = torch.optim.lr_scheduler.ExponentialLR(optim_g, gamma=hps.train.lr_decay, last_epoch=epoch_str - 2) scheduler_d = torch.optim.lr_scheduler.ExponentialLR(optim_d, gamma=hps.train.lr_decay, last_epoch=epoch_str - 2) scaler = GradScaler(enabled=hps.train.fp16_run) for epoch in range(epoch_str, hps.train.epochs + 1): + # update learning rate + if epoch > 1: + scheduler_g.step() + scheduler_d.step() + # set up warm-up learning rate + if epoch <= warmup_epoch: + for param_group in optim_g.param_groups: + param_group['lr'] = hps.train.learning_rate / warmup_epoch * epoch + for param_group in optim_d.param_groups: + param_group['lr'] = hps.train.learning_rate / warmup_epoch * epoch + # training if rank == 0: train_and_evaluate(rank, epoch, hps, [net_g, net_d], [optim_g, optim_d], [scheduler_g, scheduler_d], scaler, [train_loader, eval_loader], logger, [writer, writer_eval]) else: train_and_evaluate(rank, epoch, hps, [net_g, net_d], [optim_g, optim_d], [scheduler_g, scheduler_d], scaler, [train_loader, None], None, None) - scheduler_g.step() - scheduler_d.step() def train_and_evaluate(rank, epoch, hps, nets, optims, schedulers, scaler, loaders, logger, writers): From 94b2063b4308d08812696206cf96811a0bce164d Mon Sep 17 00:00:00 2001 From: SherkeyXD <253294679@qq.com> Date: Tue, 11 Apr 2023 17:38:59 +0800 Subject: [PATCH 08/21] Beautify webui and remove app.py --- app.py | 69 ---------------------- webUI.py | 177 ++++++++++++++++++++++++++++++++----------------------- 2 files changed, 102 insertions(+), 144 deletions(-) delete mode 100644 app.py diff --git a/app.py b/app.py deleted file mode 100644 index 0ff0c88..0000000 --- a/app.py +++ /dev/null @@ -1,69 +0,0 @@ -import io -import os - -# os.system("wget -P cvec/ https://huggingface.co/spaces/innnky/nanami/resolve/main/checkpoint_best_legacy_500.pt") -import gradio as gr -import librosa -import numpy as np -import soundfile -from inference.infer_tool import Svc -import logging - -logging.getLogger('numba').setLevel(logging.WARNING) -logging.getLogger('markdown_it').setLevel(logging.WARNING) -logging.getLogger('urllib3').setLevel(logging.WARNING) -logging.getLogger('matplotlib').setLevel(logging.WARNING) - -config_path = "configs/config.json" - -model = Svc("logs/44k/G_114400.pth", "configs/config.json", cluster_model_path="logs/44k/kmeans_10000.pt") - - - -def vc_fn(sid, input_audio, vc_transform, auto_f0,cluster_ratio, slice_db, noise_scale): - if input_audio is None: - return "You need to upload an audio", None - sampling_rate, audio = input_audio - # print(audio.shape,sampling_rate) - duration = audio.shape[0] / sampling_rate - if duration > 90: - return "请上传小于90s的音频,需要转换长音频请本地进行转换", None - audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32) - if len(audio.shape) > 1: - audio = librosa.to_mono(audio.transpose(1, 0)) - if sampling_rate != 16000: - audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000) - print(audio.shape) - out_wav_path = "temp.wav" - soundfile.write(out_wav_path, audio, 16000, format="wav") - print( cluster_ratio, auto_f0, noise_scale) - _audio = model.slice_inference(out_wav_path, sid, vc_transform, slice_db, cluster_ratio, auto_f0, noise_scale) - return "Success", (44100, _audio) - - -app = gr.Blocks() -with app: - with gr.Tabs(): - with gr.TabItem("Basic"): - gr.Markdown(value=""" - sovits4.0 在线demo - - 此demo为预训练底模在线demo,使用数据:云灏 即霜 辉宇·星AI 派蒙 绫地宁宁 - """) - spks = list(model.spk2id.keys()) - sid = gr.Dropdown(label="音色", choices=spks, value=spks[0]) - vc_input3 = gr.Audio(label="上传音频(长度小于90秒)") - vc_transform = gr.Number(label="变调(整数,可以正负,半音数量,升高八度就是12)", value=0) - cluster_ratio = gr.Number(label="聚类模型混合比例,0-1之间,默认为0不启用聚类,能提升音色相似度,但会导致咬字下降(如果使用建议0.5左右)", value=0) - auto_f0 = gr.Checkbox(label="自动f0预测,配合聚类模型f0预测效果更好,会导致变调功能失效(仅限转换语音,歌声不要勾选此项会究极跑调)", value=False) - slice_db = gr.Number(label="切片阈值", value=-40) - noise_scale = gr.Number(label="noise_scale 建议不要动,会影响音质,玄学参数", value=0.4) - vc_submit = gr.Button("转换", variant="primary") - vc_output1 = gr.Textbox(label="Output Message") - vc_output2 = gr.Audio(label="Output Audio") - vc_submit.click(vc_fn, [sid, vc_input3, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale], [vc_output1, vc_output2]) - - app.launch() - - - diff --git a/webUI.py b/webUI.py index c0467ba..fb81089 100644 --- a/webUI.py +++ b/webUI.py @@ -9,7 +9,6 @@ import numpy as np import soundfile from inference.infer_tool import Svc import logging -import traceback import subprocess import edge_tts @@ -27,20 +26,50 @@ logging.getLogger('multipart').setLevel(logging.WARNING) model = None spk = None -debug=False +debug = False cuda = [] if torch.cuda.is_available(): for i in range(torch.cuda.device_count()): - cuda.append("cuda:{}".format(i)) + device_name = torch.cuda.get_device_properties(i).name + cuda.append(f"CUDA:{i} {device_name}") + +def modelAnalysis(model_path,config_path,cluster_model_path,device,enhance): + global model + try: + model = Svc(model_path.name, config_path.name, device=device if device!="Auto" else None, cluster_model_path = cluster_model_path.name if cluster_model_path != None else "",nsf_hifigan_enhance=enhance) + spks = list(model.spk2id.keys()) + device_name = torch.cuda.get_device_properties(model.dev).name if "cuda" in str(model.dev) else str(model.dev) + msg = f"成功加载模型到设备{device_name}上\n" + if cluster_model_path is None: + msg += "未加载聚类模型\n" + else: + msg += f"聚类模型{cluster_model_path.name}加载成功\n" + msg += "当前模型的可用音色:\n" + for i in spks: + msg += i + " " + return sid.update(choices = spks,value=spks[0]), msg + except Exception as e: + raise gr.Error(e) + + +def modelUnload(): + global model + if model is None: + return sid.update(choices = [],value=""),"没有模型需要卸载!" + else: + model = None + torch.cuda.empty_cache() + return sid.update(choices = [],value=""),"模型卸载完毕!" + def vc_fn(sid, input_audio, vc_transform, auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,F0_mean_pooling,enhancer_adaptive_key): global model try: if input_audio is None: - return "You need to upload an audio", None + raise gr.Error("你需要上传音频") if model is None: - return "You need to upload an model", None + raise gr.Error("你需要指定模型") sampling_rate, audio = input_audio # print(audio.shape,sampling_rate) audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32) @@ -54,16 +83,16 @@ def vc_fn(sid, input_audio, vc_transform, auto_f0,cluster_ratio, slice_db, noise #构建保存文件的路径,并保存到results文件夹内 try: timestamp = str(int(time.time())) - output_file = os.path.join("./results", sid + "_" + timestamp + ".wav") + filename = sid + "_" + timestamp + ".wav" + output_file = os.path.join("./results", filename) soundfile.write(output_file, _audio, model.target_sample, format="wav") - return "Success", (model.target_sample, _audio) + return f"推理成功,音频文件保存为results/{filename}", (model.target_sample, _audio) except Exception as e: - if debug:traceback.print_exc() - return "自动保存失败,请手动保存,音乐输出见下", (model.target_sample, _audio) + raise gr.Error(e) except Exception as e: - if debug:traceback.print_exc() - return "异常信息:"+str(e)+"\n请排障后重试",None - + raise gr.Error(e) + + def tts_func(_text,_rate): #使用edge-tts把文字转成音频 # voice = "zh-CN-XiaoyiNeural"#女性,较高音 @@ -88,6 +117,7 @@ def tts_func(_text,_rate): p.wait() return output_file + def vc_fn2(sid, input_audio, vc_transform, auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,text2tts,tts_rate,F0_mean_pooling,enhancer_adaptive_key): #使用edge-tts把文字转成音频 output_file=tts_func(text2tts,tts_rate) @@ -110,76 +140,73 @@ def vc_fn2(sid, input_audio, vc_transform, auto_f0,cluster_ratio, slice_db, nois os.remove(save_path2) return a,b -app = gr.Blocks() -with app: + +with gr.Blocks( + theme=gr.themes.Base( + primary_hue = gr.themes.colors.green, + font=["Source Sans Pro", "Arial", "sans-serif"], + font_mono=['JetBrains mono', "Consolas", 'Courier New'] + ), +) as app: with gr.Tabs(): - with gr.TabItem("Sovits4.0"): + with gr.TabItem("Inference"): gr.Markdown(value=""" - Sovits4.0 WebUI + So-vits-svc 4.0 推理 webui """) - - gr.Markdown(value=""" - 下面是模型文件选择: - """) - model_path = gr.File(label="模型文件") - gr.Markdown(value=""" - 下面是配置文件选择: - """) - config_path = gr.File(label="配置文件") - gr.Markdown(value=""" - 下面是聚类模型文件选择,没有可以不填: - """) - cluster_model_path = gr.File(label="聚类模型文件") - device = gr.Dropdown(label="推理设备,默认为自动选择cpu和gpu",choices=["Auto",*cuda,"cpu"],value="Auto") - enhance = gr.Checkbox(label="是否使用NSF_HIFIGAN增强,该选项对部分训练集少的模型有一定的音质增强效果,但是对训练好的模型有反面效果,默认关闭", value=False) - gr.Markdown(value=""" - 全部上传完毕后(全部文件模块显示download),点击模型解析进行解析: - """) - model_analysis_button = gr.Button(value="模型解析") - model_unload_button = gr.Button(value="模型卸载") - sid = gr.Dropdown(label="音色(说话人)") - sid_output = gr.Textbox(label="Output Message") + with gr.Row(variant="panel"): + with gr.Column(): + gr.Markdown(value=""" + 模型设置 + """) + model_path = gr.File(label="选择模型文件") + config_path = gr.File(label="选择配置文件") + cluster_model_path = gr.File(label="选择聚类模型文件(没有可以不选)") + device = gr.Dropdown(label="推理设备,默认为自动选择CPU和GPU", choices=["Auto",*cuda,"CPU"], value="Auto") + enhance = gr.Checkbox(label="是否使用NSF_HIFIGAN增强,该选项对部分训练集少的模型有一定的音质增强效果,但是对训练好的模型有反面效果,默认关闭", value=False) + with gr.Column(): + gr.Markdown(value=""" + 左侧文件全部选择完毕后(全部文件模块显示download),点击“加载模型”进行解析: + """) + model_load_button = gr.Button(value="加载模型", variant="primary") + model_unload_button = gr.Button(value="卸载模型", variant="primary") + sid = gr.Dropdown(label="音色(说话人)") + sid_output = gr.Textbox(label="Output Message") - text2tts=gr.Textbox(label="在此输入要转译的文字。注意,使用该功能建议打开F0预测,不然会很怪") - tts_rate = gr.Number(label="tts语速", value=0) + + with gr.Row(variant="panel"): + with gr.Column(): + gr.Markdown(value=""" + 推理设置 + """) + auto_f0 = gr.Checkbox(label="自动f0预测,配合聚类模型f0预测效果更好,会导致变调功能失效(仅限转换语音,歌声勾选此项会究极跑调)", value=False) + F0_mean_pooling = gr.Checkbox(label="是否对F0使用均值滤波器(池化),对部分哑音有改善。注意,启动该选项会导致推理速度下降,默认关闭", value=False) + vc_transform = gr.Number(label="变调(整数,可以正负,半音数量,升高八度就是12)", value=0) + cluster_ratio = gr.Number(label="聚类模型混合比例,0-1之间,0即不启用聚类。使用聚类模型能提升音色相似度,但会导致咬字下降(如果使用建议0.5左右)", value=0) + slice_db = gr.Number(label="切片阈值", value=-40) + noise_scale = gr.Number(label="noise_scale 建议不要动,会影响音质,玄学参数", value=0.4) + with gr.Column(): + pad_seconds = gr.Number(label="推理音频pad秒数,由于未知原因开头结尾会有异响,pad一小段静音段后就不会出现", value=0.5) + cl_num = gr.Number(label="音频自动切片,0为不切片,单位为秒(s)", value=0) + lg_num = gr.Number(label="两端音频切片的交叉淡入长度,如果自动切片后出现人声不连贯可调整该数值,如果连贯建议采用默认值0,注意,该设置会影响推理速度,单位为秒/s", value=0) + lgr_num = gr.Number(label="自动音频切片后,需要舍弃每段切片的头尾。该参数设置交叉长度保留的比例,范围0-1,左开右闭", value=0.75) + enhancer_adaptive_key = gr.Number(label="使增强器适应更高的音域(单位为半音数)|默认为0", value=0) + with gr.Tabs(): + with gr.TabItem("音频转音频"): + vc_input3 = gr.Audio(label="选择音频") + vc_submit = gr.Button("音频转换", variant="primary") + with gr.TabItem("文字转音频"): + text2tts=gr.Textbox(label="在此输入要转译的文字。注意,使用该功能建议打开F0预测,不然会很怪") + tts_rate = gr.Number(label="tts语速", value=0) + vc_submit2 = gr.Button("文字转换", variant="primary") + with gr.Row(): + with gr.Column(): + vc_output1 = gr.Textbox(label="Output Message") + with gr.Column(): + vc_output2 = gr.Audio(label="Output Audio", interactive=False) - vc_input3 = gr.Audio(label="上传音频") - vc_transform = gr.Number(label="变调(整数,可以正负,半音数量,升高八度就是12)", value=0) - cluster_ratio = gr.Number(label="聚类模型混合比例,0-1之间,默认为0不启用聚类,能提升音色相似度,但会导致咬字下降(如果使用建议0.5左右)", value=0) - auto_f0 = gr.Checkbox(label="自动f0预测,配合聚类模型f0预测效果更好,会导致变调功能失效(仅限转换语音,歌声不要勾选此项会究极跑调)", value=False) - F0_mean_pooling = gr.Checkbox(label="是否对F0使用均值滤波器(池化),对部分哑音有改善。注意,启动该选项会导致推理速度下降,默认关闭", value=False) - slice_db = gr.Number(label="切片阈值", value=-40) - noise_scale = gr.Number(label="noise_scale 建议不要动,会影响音质,玄学参数", value=0.4) - cl_num = gr.Number(label="音频自动切片,0为不切片,单位为秒/s", value=0) - pad_seconds = gr.Number(label="推理音频pad秒数,由于未知原因开头结尾会有异响,pad一小段静音段后就不会出现", value=0.5) - lg_num = gr.Number(label="两端音频切片的交叉淡入长度,如果自动切片后出现人声不连贯可调整该数值,如果连贯建议采用默认值0,注意,该设置会影响推理速度,单位为秒/s", value=0) - lgr_num = gr.Number(label="自动音频切片后,需要舍弃每段切片的头尾。该参数设置交叉长度保留的比例,范围0-1,左开右闭", value=0.75,interactive=True) - enhancer_adaptive_key = gr.Number(label="使增强器适应更高的音域(单位为半音数)|默认为0", value=0,interactive=True) - vc_submit = gr.Button("音频直接转换", variant="primary") - vc_submit2 = gr.Button("文字转音频+转换", variant="primary") - vc_output1 = gr.Textbox(label="Output Message") - vc_output2 = gr.Audio(label="Output Audio") - def modelAnalysis(model_path,config_path,cluster_model_path,device,enhance): - global model - try: - model = Svc(model_path.name, config_path.name,device=device if device!="Auto" else None,cluster_model_path= cluster_model_path.name if cluster_model_path!=None else "",nsf_hifigan_enhance=enhance) - spks = list(model.spk2id.keys()) - device_name = torch.cuda.get_device_properties(model.dev).name if "cuda" in str(model.dev) else str(model.dev) - return sid.update(choices = spks,value=spks[0]),"ok,模型被加载到了设备{}之上".format(device_name) - except Exception as e: - if debug:traceback.print_exc() - return "","异常信息:"+str(e)+"\n请排障后重试" - def modelUnload(): - global model - if model is None: - return sid.update(choices = [],value=""),"没有模型需要卸载!" - else: - model = None - torch.cuda.empty_cache() - return sid.update(choices = [],value=""),"模型卸载完毕!" vc_submit.click(vc_fn, [sid, vc_input3, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,F0_mean_pooling,enhancer_adaptive_key], [vc_output1, vc_output2]) vc_submit2.click(vc_fn2, [sid, vc_input3, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,text2tts,tts_rate,F0_mean_pooling,enhancer_adaptive_key], [vc_output1, vc_output2]) - model_analysis_button.click(modelAnalysis,[model_path,config_path,cluster_model_path,device,enhance],[sid,sid_output]) + model_load_button.click(modelAnalysis,[model_path,config_path,cluster_model_path,device,enhance],[sid,sid_output]) model_unload_button.click(modelUnload,[],[sid,sid_output]) app.launch() From 131c7774eec3571aee7135f8d78887beb41d1bfa Mon Sep 17 00:00:00 2001 From: SherkeyXD <253294679@qq.com> Date: Tue, 11 Apr 2023 17:42:29 +0800 Subject: [PATCH 09/21] specify gradio version fix #68 --- requirements.txt | 2 +- requirements_win.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 23b5736..9dd41d4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ Flask Flask_Cors -gradio +gradio>=3.7.0 numpy==1.23.0 pyworld==0.2.5 scipy==1.10.0 diff --git a/requirements_win.txt b/requirements_win.txt index 2c57f89..8201f6d 100644 --- a/requirements_win.txt +++ b/requirements_win.txt @@ -2,7 +2,7 @@ librosa==0.9.1 fairseq==0.12.2 Flask==2.1.2 Flask_Cors==3.0.10 -gradio +gradio>=3.7.0 numpy playsound==1.3.0 PyAudio==0.2.12 From b86da35d9cd5284620d35938b96dc7535c530fe4 Mon Sep 17 00:00:00 2001 From: magic-akari Date: Tue, 11 Apr 2023 19:45:51 +0800 Subject: [PATCH 10/21] fix: cast f0_coarse to int --- utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils.py b/utils.py index 457ae8f..548be1c 100644 --- a/utils.py +++ b/utils.py @@ -205,7 +205,7 @@ def f0_to_coarse(f0): f0_mel[f0_mel <= 1] = 1 f0_mel[f0_mel > f0_bin - 1] = f0_bin - 1 - f0_coarse = (f0_mel + 0.5).long() if is_torch else np.rint(f0_mel).astype(np.int) + f0_coarse = (f0_mel + 0.5).int() if is_torch else np.rint(f0_mel).astype(np.int) assert f0_coarse.max() <= 255 and f0_coarse.min() >= 1, (f0_coarse.max(), f0_coarse.min()) return f0_coarse From c0850e14528c727c7df5337e88ec941ea2f462e4 Mon Sep 17 00:00:00 2001 From: YuriHead Date: Tue, 11 Apr 2023 20:06:50 +0800 Subject: [PATCH 11/21] Create LICENSE --- LICENSE | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/LICENSE b/LICENSE index c7202d4..28bac26 100644 --- a/LICENSE +++ b/LICENSE @@ -1,21 +1,28 @@ -MIT License +BSD 3-Clause License -Copyright (c) 2021 Jingyi Li +Copyright (c) 2023, SVC Develop Team -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. From ed405f677b2b6f149281d598e84025681df208d6 Mon Sep 17 00:00:00 2001 From: YuriHead Date: Tue, 11 Apr 2023 20:19:59 +0800 Subject: [PATCH 12/21] Update README_zh_CN.md --- README_zh_CN.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README_zh_CN.md b/README_zh_CN.md index e7ee3bb..c5a871d 100644 --- a/README_zh_CN.md +++ b/README_zh_CN.md @@ -14,8 +14,7 @@ 2. 任何发布到视频平台的基于 sovits 制作的视频,都必须要在简介明确指明用于变声器转换的输入源歌声、音频,例如:使用他人发布的视频 / 音频,通过分离的人声作为输入源进行转换的,必须要给出明确的原视频、音乐链接;若使用是自己的人声,或是使用其他歌声合成引擎合成的声音作为输入源进行转换的,也必须在简介加以说明。 3. 由输入源造成的侵权问题需自行承担全部责任和一切后果。使用其他商用歌声合成软件作为输入源时,请确保遵守该软件的使用条例,注意,许多歌声合成引擎使用条例中明确指明不可用于输入源进行转换! 4. 继续使用视为已同意本仓库 README 所述相关条例,本仓库 README 已进行劝导义务,不对后续可能存在问题负责。 -5. 如将本仓库代码二次分发,或将由此项目产出的任何结果公开发表 (包括但不限于视频网站投稿),请注明原作者及代码来源 (此仓库)。 -6. 如果将此项目用于任何其他企划,请提前联系并告知本仓库作者,十分感谢。 +5. 如果将此项目用于任何其他企划,请提前联系并告知本仓库作者,十分感谢。 ## 🆕 Update! From 2de6779d9926e564e8d5b0ba5baaa76fc792a63d Mon Sep 17 00:00:00 2001 From: YuriHead Date: Tue, 11 Apr 2023 20:20:17 +0800 Subject: [PATCH 13/21] Update README.md --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 4cb78af..8b3f444 100644 --- a/README.md +++ b/README.md @@ -14,8 +14,7 @@ 2. Any videos based on sovits that are published on video platforms must clearly indicate in the description that they are used for voice changing and specify the input source of the voice or audio, for example, using videos or audios published by others and separating the vocals as input source for conversion, which must provide clear original video or music links. If your own voice or other synthesized voices from other commercial vocal synthesis software are used as the input source for conversion, you must also explain it in the description. 3. You shall be solely responsible for any infringement problems caused by the input source. When using other commercial vocal synthesis software as input source, please ensure that you comply with the terms of use of the software. Note that many vocal synthesis engines clearly state in their terms of use that they cannot be used for input source conversion. 4. Continuing to use this project is deemed as agreeing to the relevant provisions stated in this repository README. This repository README has the obligation to persuade, and is not responsible for any subsequent problems that may arise. -5. If you distribute this repository's code or publish any results produced by this project publicly (including but not limited to video sharing platforms), please indicate the original author and code source (this repository). -6. If you use this project for any other plan, please contact and inform the author of this repository in advance. Thank you very much. +5. If you use this project for any other plan, please contact and inform the author of this repository in advance. Thank you very much. ## 🆕 Update! From c9dce1349e62f4ad17c765d8f5a5d44e829e73ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=CE=9D=CE=B1=CF=81=CE=BF=CF=85=CF=83=CE=AD=C2=B7=CE=BC?= =?UTF-8?q?=C2=B7=CE=B3=CE=B9=CE=BF=CF=85=CE=BC=CE=B5=CE=BC=CE=AF=C2=B7?= =?UTF-8?q?=CE=A7=CE=B9=CE=BD=CE=B1=CE=BA=CE=AC=CE=BD=CE=BD=CE=B1?= <40709280+NaruseMioShirakana@users.noreply.github.com> Date: Tue, 11 Apr 2023 21:21:26 +0800 Subject: [PATCH 14/21] Update README_zh_CN.md --- README_zh_CN.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/README_zh_CN.md b/README_zh_CN.md index c5a871d..08471e9 100644 --- a/README_zh_CN.md +++ b/README_zh_CN.md @@ -6,6 +6,10 @@ #### ✨ 支持实时转换的一个客户端:[w-okada/voice-changer](https://github.com/w-okada/voice-changer) +## 声明 + +本项目为开源,离线的项目,SvcDevelopTeam的所有成员与本项目的所有开发者以及维护者(以下简称贡献者)对本项目没有控制力。本项目的贡献者从未向任何组织或个人提供包括但不限于数据集提取、数据集加工、算力支持、训练支持、推理等一切形式的帮助;本项目的贡献者不知晓也无法知晓使用者使用该项目的用途。故一切基于本项目训练的AI模型和合成的音频都与本项目贡献者无关。一切由此造成的问题由使用者自行承担。 + ## 📏 使用规约 # Warning:请自行解决数据集授权问题,禁止使用非授权数据集进行训练!任何由于使用非授权数据集进行训练造成的问题,需自行承担全部责任和后果!与仓库、仓库维护者、svc develop team 无关! @@ -13,8 +17,9 @@ 1. 本项目是基于学术交流目的建立,仅供交流与学习使用,并非为生产环境准备。 2. 任何发布到视频平台的基于 sovits 制作的视频,都必须要在简介明确指明用于变声器转换的输入源歌声、音频,例如:使用他人发布的视频 / 音频,通过分离的人声作为输入源进行转换的,必须要给出明确的原视频、音乐链接;若使用是自己的人声,或是使用其他歌声合成引擎合成的声音作为输入源进行转换的,也必须在简介加以说明。 3. 由输入源造成的侵权问题需自行承担全部责任和一切后果。使用其他商用歌声合成软件作为输入源时,请确保遵守该软件的使用条例,注意,许多歌声合成引擎使用条例中明确指明不可用于输入源进行转换! -4. 继续使用视为已同意本仓库 README 所述相关条例,本仓库 README 已进行劝导义务,不对后续可能存在问题负责。 -5. 如果将此项目用于任何其他企划,请提前联系并告知本仓库作者,十分感谢。 +4. 禁止使用该项目从事违法行为与宗教、政治等活动,该项目维护者坚决抵制上述行为,不同意此条则禁止使用该项目。 +5. 继续使用视为已同意本仓库 README 所述相关条例,本仓库 README 已进行劝导义务,不对后续可能存在问题负责。 +6. 如果将此项目用于任何其他企划,请提前联系并告知本仓库作者,十分感谢。 ## 🆕 Update! From 8dd3cc8118897216b6f405c4dc2c43258f1ca176 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=CE=9D=CE=B1=CF=81=CE=BF=CF=85=CF=83=CE=AD=C2=B7=CE=BC?= =?UTF-8?q?=C2=B7=CE=B3=CE=B9=CE=BF=CF=85=CE=BC=CE=B5=CE=BC=CE=AF=C2=B7?= =?UTF-8?q?=CE=A7=CE=B9=CE=BD=CE=B1=CE=BA=CE=AC=CE=BD=CE=BD=CE=B1?= <40709280+NaruseMioShirakana@users.noreply.github.com> Date: Tue, 11 Apr 2023 21:21:47 +0800 Subject: [PATCH 15/21] Update README_zh_CN.md --- README_zh_CN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README_zh_CN.md b/README_zh_CN.md index 08471e9..9bb1a0d 100644 --- a/README_zh_CN.md +++ b/README_zh_CN.md @@ -8,7 +8,7 @@ ## 声明 -本项目为开源,离线的项目,SvcDevelopTeam的所有成员与本项目的所有开发者以及维护者(以下简称贡献者)对本项目没有控制力。本项目的贡献者从未向任何组织或个人提供包括但不限于数据集提取、数据集加工、算力支持、训练支持、推理等一切形式的帮助;本项目的贡献者不知晓也无法知晓使用者使用该项目的用途。故一切基于本项目训练的AI模型和合成的音频都与本项目贡献者无关。一切由此造成的问题由使用者自行承担。 +本项目为开源、离线的项目,SvcDevelopTeam的所有成员与本项目的所有开发者以及维护者(以下简称贡献者)对本项目没有控制力。本项目的贡献者从未向任何组织或个人提供包括但不限于数据集提取、数据集加工、算力支持、训练支持、推理等一切形式的帮助;本项目的贡献者不知晓也无法知晓使用者使用该项目的用途。故一切基于本项目训练的AI模型和合成的音频都与本项目贡献者无关。一切由此造成的问题由使用者自行承担。 ## 📏 使用规约 From 90bdcb8665299d687f7c4e8431ceb5e3377b2d3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=CE=9D=CE=B1=CF=81=CE=BF=CF=85=CF=83=CE=AD=C2=B7=CE=BC?= =?UTF-8?q?=C2=B7=CE=B3=CE=B9=CE=BF=CF=85=CE=BC=CE=B5=CE=BC=CE=AF=C2=B7?= =?UTF-8?q?=CE=A7=CE=B9=CE=BD=CE=B1=CE=BA=CE=AC=CE=BD=CE=BD=CE=B1?= <40709280+NaruseMioShirakana@users.noreply.github.com> Date: Tue, 11 Apr 2023 21:24:55 +0800 Subject: [PATCH 16/21] Update README.md --- README.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8b3f444..6834883 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,10 @@ #### ✨ A client supports real-time conversion: [w-okada/voice-changer](https://github.com/w-okada/voice-changer) +## + +This project is an open source, offline project, and all members of SvcDevelopTeam and all developers and maintainers of this project (hereinafter referred to as contributors) have no control over this project. The contributor of this project has never provided any organization or individual with any form of assistance, including but not limited to data set extraction, data set processing, computing support, training support, infering, etc. Contributors to the project do not and cannot know what users are using the project for. Therefore, all AI models and synthesized audio based on the training of this project have nothing to do with the contributors of this project. All problems arising therefrom shall be borne by the user. + ## 📏 Terms of Use # Warning: Please solve the authorization problem of the dataset on your own. You shall be solely responsible for any problems caused by the use of non-authorized datasets for training and all consequences thereof.The repository and its maintainer, svc develop team, have nothing to do with the consequences! @@ -13,8 +17,9 @@ 1. This project is established for academic exchange purposes only and is intended for communication and learning purposes. It is not intended for production environments. 2. Any videos based on sovits that are published on video platforms must clearly indicate in the description that they are used for voice changing and specify the input source of the voice or audio, for example, using videos or audios published by others and separating the vocals as input source for conversion, which must provide clear original video or music links. If your own voice or other synthesized voices from other commercial vocal synthesis software are used as the input source for conversion, you must also explain it in the description. 3. You shall be solely responsible for any infringement problems caused by the input source. When using other commercial vocal synthesis software as input source, please ensure that you comply with the terms of use of the software. Note that many vocal synthesis engines clearly state in their terms of use that they cannot be used for input source conversion. -4. Continuing to use this project is deemed as agreeing to the relevant provisions stated in this repository README. This repository README has the obligation to persuade, and is not responsible for any subsequent problems that may arise. -5. If you use this project for any other plan, please contact and inform the author of this repository in advance. Thank you very much. +4. It is forbidden to use the project to engage in illegal activities, religious and political activities. The project developers firmly resist the above activities. If they do not agree with this article, the use of the project is prohibited. +5. Continuing to use this project is deemed as agreeing to the relevant provisions stated in this repository README. This repository README has the obligation to persuade, and is not responsible for any subsequent problems that may arise. +6. If you use this project for any other plan, please contact and inform the author of this repository in advance. Thank you very much. ## 🆕 Update! From cfd930709b84fdd2a546e676d026fd4a8bbc073e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=CE=9D=CE=B1=CF=81=CE=BF=CF=85=CF=83=CE=AD=C2=B7=CE=BC?= =?UTF-8?q?=C2=B7=CE=B3=CE=B9=CE=BF=CF=85=CE=BC=CE=B5=CE=BC=CE=AF=C2=B7?= =?UTF-8?q?=CE=A7=CE=B9=CE=BD=CE=B1=CE=BA=CE=AC=CE=BD=CE=BD=CE=B1?= <40709280+NaruseMioShirakana@users.noreply.github.com> Date: Tue, 11 Apr 2023 21:25:29 +0800 Subject: [PATCH 17/21] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6834883..3253df2 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ #### ✨ A client supports real-time conversion: [w-okada/voice-changer](https://github.com/w-okada/voice-changer) -## +## Warning!!! This project is an open source, offline project, and all members of SvcDevelopTeam and all developers and maintainers of this project (hereinafter referred to as contributors) have no control over this project. The contributor of this project has never provided any organization or individual with any form of assistance, including but not limited to data set extraction, data set processing, computing support, training support, infering, etc. Contributors to the project do not and cannot know what users are using the project for. Therefore, all AI models and synthesized audio based on the training of this project have nothing to do with the contributors of this project. All problems arising therefrom shall be borne by the user. From 31bc93cf2b646e3036138f5783a531231bbc34fa Mon Sep 17 00:00:00 2001 From: ylzz1997 Date: Tue, 11 Apr 2023 23:41:21 +0800 Subject: [PATCH 18/21] Debug --- inference/infer_tool.py | 5 +++++ webUI.py | 8 +++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/inference/infer_tool.py b/inference/infer_tool.py index 5328c54..10c481d 100644 --- a/inference/infer_tool.py +++ b/inference/infer_tool.py @@ -221,6 +221,11 @@ class Svc(object): # 清理显存 torch.cuda.empty_cache() + def unload_model(self): + # 卸载模型 + del self.net_g_ms + if self.enhancer!=None: del self.enhancer + def slice_inference(self, raw_audio_path, spk, diff --git a/webUI.py b/webUI.py index fb81089..7d5054d 100644 --- a/webUI.py +++ b/webUI.py @@ -28,15 +28,16 @@ model = None spk = None debug = False -cuda = [] +cuda = {} if torch.cuda.is_available(): for i in range(torch.cuda.device_count()): device_name = torch.cuda.get_device_properties(i).name - cuda.append(f"CUDA:{i} {device_name}") + cuda[f"CUDA:{i} {device_name}"] = f"cuda:{i}" def modelAnalysis(model_path,config_path,cluster_model_path,device,enhance): global model try: + device = cuda[device] if "CUDA" in device else device model = Svc(model_path.name, config_path.name, device=device if device!="Auto" else None, cluster_model_path = cluster_model_path.name if cluster_model_path != None else "",nsf_hifigan_enhance=enhance) spks = list(model.spk2id.keys()) device_name = torch.cuda.get_device_properties(model.dev).name if "cuda" in str(model.dev) else str(model.dev) @@ -58,6 +59,7 @@ def modelUnload(): if model is None: return sid.update(choices = [],value=""),"没有模型需要卸载!" else: + model.unload_model() model = None torch.cuda.empty_cache() return sid.update(choices = [],value=""),"模型卸载完毕!" @@ -161,7 +163,7 @@ with gr.Blocks( model_path = gr.File(label="选择模型文件") config_path = gr.File(label="选择配置文件") cluster_model_path = gr.File(label="选择聚类模型文件(没有可以不选)") - device = gr.Dropdown(label="推理设备,默认为自动选择CPU和GPU", choices=["Auto",*cuda,"CPU"], value="Auto") + device = gr.Dropdown(label="推理设备,默认为自动选择CPU和GPU", choices=["Auto",*cuda.keys(),"CPU"], value="Auto") enhance = gr.Checkbox(label="是否使用NSF_HIFIGAN增强,该选项对部分训练集少的模型有一定的音质增强效果,但是对训练好的模型有反面效果,默认关闭", value=False) with gr.Column(): gr.Markdown(value=""" From f2b4515d72a00c9f89a52e58ead1e6f798351020 Mon Sep 17 00:00:00 2001 From: ylzz1997 Date: Wed, 12 Apr 2023 00:05:24 +0800 Subject: [PATCH 19/21] Debug --- inference/infer_tool.py | 2 +- webUI.py | 14 +++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/inference/infer_tool.py b/inference/infer_tool.py index 10c481d..6a2f4df 100644 --- a/inference/infer_tool.py +++ b/inference/infer_tool.py @@ -224,7 +224,7 @@ class Svc(object): def unload_model(self): # 卸载模型 del self.net_g_ms - if self.enhancer!=None: del self.enhancer + if hasattr(self,"enhancer"): del self.enhancer def slice_inference(self, raw_audio_path, diff --git a/webUI.py b/webUI.py index 7d5054d..ad9042f 100644 --- a/webUI.py +++ b/webUI.py @@ -17,6 +17,7 @@ from scipy.io import wavfile import librosa import torch import time +import traceback logging.getLogger('numba').setLevel(logging.WARNING) logging.getLogger('markdown_it').setLevel(logging.WARNING) @@ -51,6 +52,7 @@ def modelAnalysis(model_path,config_path,cluster_model_path,device,enhance): msg += i + " " return sid.update(choices = spks,value=spks[0]), msg except Exception as e: + if debug: traceback.print_exc() raise gr.Error(e) @@ -90,8 +92,10 @@ def vc_fn(sid, input_audio, vc_transform, auto_f0,cluster_ratio, slice_db, noise soundfile.write(output_file, _audio, model.target_sample, format="wav") return f"推理成功,音频文件保存为results/{filename}", (model.target_sample, _audio) except Exception as e: + if debug: traceback.print_exc() raise gr.Error(e) except Exception as e: + if debug: traceback.print_exc() raise gr.Error(e) @@ -142,6 +146,8 @@ def vc_fn2(sid, input_audio, vc_transform, auto_f0,cluster_ratio, slice_db, nois os.remove(save_path2) return a,b +def debug_change(): + debug = debug_button.value with gr.Blocks( theme=gr.themes.Base( @@ -205,9 +211,15 @@ with gr.Blocks( vc_output1 = gr.Textbox(label="Output Message") with gr.Column(): vc_output2 = gr.Audio(label="Output Audio", interactive=False) - + with gr.Row(variant="panel"): + with gr.Column(): + gr.Markdown(value=""" + WebUI设置 + """) + debug_button = gr.Checkbox(label="Debug模式,如果向社区反馈BUG需要打开,打开后控制台可以显示具体错误提示", value=debug) vc_submit.click(vc_fn, [sid, vc_input3, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,F0_mean_pooling,enhancer_adaptive_key], [vc_output1, vc_output2]) vc_submit2.click(vc_fn2, [sid, vc_input3, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,text2tts,tts_rate,F0_mean_pooling,enhancer_adaptive_key], [vc_output1, vc_output2]) + debug_button.change(debug_change,[],[]) model_load_button.click(modelAnalysis,[model_path,config_path,cluster_model_path,device,enhance],[sid,sid_output]) model_unload_button.click(modelUnload,[],[sid,sid_output]) app.launch() From 6d3d4ea06e6718ae209bad757aa4821b72f84170 Mon Sep 17 00:00:00 2001 From: ylzz1997 Date: Wed, 12 Apr 2023 00:11:02 +0800 Subject: [PATCH 20/21] Debug --- webUI.py | 1 + 1 file changed, 1 insertion(+) diff --git a/webUI.py b/webUI.py index ad9042f..ba2b361 100644 --- a/webUI.py +++ b/webUI.py @@ -147,6 +147,7 @@ def vc_fn2(sid, input_audio, vc_transform, auto_f0,cluster_ratio, slice_db, nois return a,b def debug_change(): + global debug debug = debug_button.value with gr.Blocks( From 8aa849e270f6ccfb16a13894ecbf9ae09a7bf8af Mon Sep 17 00:00:00 2001 From: ylzz1997 Date: Wed, 12 Apr 2023 00:44:16 +0800 Subject: [PATCH 21/21] Debug --- inference/infer_tool.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/inference/infer_tool.py b/inference/infer_tool.py index 6a2f4df..94b3ca6 100644 --- a/inference/infer_tool.py +++ b/inference/infer_tool.py @@ -6,6 +6,7 @@ import os import time from pathlib import Path from inference import slicer +import gc import librosa import numpy as np @@ -223,8 +224,13 @@ class Svc(object): def unload_model(self): # 卸载模型 + self.net_g_ms = self.net_g_ms.to("cpu") del self.net_g_ms - if hasattr(self,"enhancer"): del self.enhancer + if hasattr(self,"enhancer"): + self.enhancer.enhancer = self.enhancer.enhancer.to("cpu") + del self.enhancer.enhancer + del self.enhancer + gc.collect() def slice_inference(self, raw_audio_path,