This commit is contained in:
ylzz1997 2023-05-22 19:06:42 +08:00
parent 9ae948cb0c
commit 746448033f
1 changed files with 15 additions and 28 deletions

View File

@ -238,37 +238,24 @@ class Svc(object):
audio = audio[0,0].data.float() audio = audio[0,0].data.float()
if self.shallow_diffusion: if self.shallow_diffusion:
audio_mel = self.vocoder.extract(audio[None,:],self.target_sample) audio_mel = self.vocoder.extract(audio[None,:],self.target_sample)
vol = self.volume_extractor.extract(audio[None,:])[None,:,None].to(self.dev)
f0 = f0[:,:,None]
c = c.transpose(-1,-2)
audio_mel = self.diffusion_model(
c,
f0,
vol,
spk_id = sid,
spk_mix_dict = None,
gt_spec=audio_mel,
infer=True,
infer_speedup=self.diffusion_args.infer.speedup,
method=self.diffusion_args.infer.method,
k_step=k_step)
audio = self.vocoder.infer(audio_mel, f0).squeeze()
else: else:
wav = torch.FloatTensor(wav).to(self.dev) audio = torch.FloatTensor(wav).to(self.dev)
vol = self.volume_extractor.extract(wav[None,:])[None,:,None].to(self.dev) audio_mel = None
c = c.transpose(-1,-2) if self.only_diffusion or self.shallow_diffusion:
vol = self.volume_extractor.extract(audio[None,:])[None,:,None].to(self.dev)
f0 = f0[:,:,None] f0 = f0[:,:,None]
c = c.transpose(-1,-2)
audio_mel = self.diffusion_model( audio_mel = self.diffusion_model(
c, c,
f0, f0,
vol, vol,
spk_id = sid, spk_id = sid,
spk_mix_dict = None, spk_mix_dict = None,
gt_spec=None, gt_spec=audio_mel,
infer=True, infer=True,
infer_speedup=self.diffusion_args.infer.speedup, infer_speedup=self.diffusion_args.infer.speedup,
method=self.diffusion_args.infer.method, method=self.diffusion_args.infer.method,
k_step=k_step) k_step=k_step)
audio = self.vocoder.infer(audio_mel, f0).squeeze() audio = self.vocoder.infer(audio_mel, f0).squeeze()
if self.nsf_hifigan_enhance: if self.nsf_hifigan_enhance:
audio, _ = self.enhancer.enhance( audio, _ = self.enhancer.enhance(