This commit is contained in:
ylzz1997 2023-08-23 03:05:25 +08:00
parent fd8e717112
commit 08617333ce
2 changed files with 4 additions and 2 deletions

View File

@ -6,6 +6,7 @@ import logging
import os
import pickle
import time
import typing
from pathlib import Path
import librosa
@ -199,6 +200,7 @@ class Svc(object):
_ = self.net_g_ms.half().eval().to(self.dev)
else:
_ = self.net_g_ms.eval().to(self.dev)
del self.net_g_ms.enc_q
if spk_mix_enable:
self.net_g_ms.EnableCharacterMix(len(self.spk2id), self.dev)
@ -268,7 +270,7 @@ class Svc(object):
second_encoding = False,
loudness_envelope_adjustment = 1
):
if isinstance(raw_path, str):
if isinstance(raw_path, str) or isinstance(raw_path, io.BytesIO):
wav, sr = torchaudio.load(raw_path)
if not hasattr(self,"audio_resample_transform") or self.audio16k_resample_transform.orig_freq != sr:
self.audio_resample_transform = torchaudio.transforms.Resample(sr,self.target_sample)

View File

@ -523,7 +523,7 @@ class SynthesizerTrn(nn.Module):
if self.use_automatic_f0_prediction and predict_f0:
lf0 = 2595. * torch.log10(1. + f0.unsqueeze(1) / 700.) / 500
norm_lf0 = utils.normalize_f0(lf0, x_mask, uv, random_scale=False)
pred_lf0 = self.f0_decoder(x, norm_lf0, x_mask, spk_emb=g)
pred_lf0 = self.f0_decoder(x, norm_lf0, x_mask, spk_emb=g).to(f0)
f0 = (700 * (torch.pow(10, pred_lf0 * 500 / 2595) - 1)).squeeze(1)
z_p, m_p, logs_p, c_mask = self.enc_p(x, x_mask, f0=f0_to_coarse(f0), noice_scale=noice_scale)