Merge pull request #83 from svc-develop-team/optimize-some-code
删除了一些无意义代码
This commit is contained in:
commit
27ef997952
|
@ -47,6 +47,8 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset):
|
||||||
audio_norm = audio / self.max_wav_value
|
audio_norm = audio / self.max_wav_value
|
||||||
audio_norm = audio_norm.unsqueeze(0)
|
audio_norm = audio_norm.unsqueeze(0)
|
||||||
spec_filename = filename.replace(".wav", ".spec.pt")
|
spec_filename = filename.replace(".wav", ".spec.pt")
|
||||||
|
|
||||||
|
# Ideally, all data generated after Mar 25 should have .spec.pt
|
||||||
if os.path.exists(spec_filename):
|
if os.path.exists(spec_filename):
|
||||||
spec = torch.load(spec_filename)
|
spec = torch.load(spec_filename)
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -25,13 +25,11 @@ if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("--train_list", type=str, default="./filelists/train.txt", help="path to train list")
|
parser.add_argument("--train_list", type=str, default="./filelists/train.txt", help="path to train list")
|
||||||
parser.add_argument("--val_list", type=str, default="./filelists/val.txt", help="path to val list")
|
parser.add_argument("--val_list", type=str, default="./filelists/val.txt", help="path to val list")
|
||||||
parser.add_argument("--test_list", type=str, default="./filelists/test.txt", help="path to test list")
|
|
||||||
parser.add_argument("--source_dir", type=str, default="./dataset/44k", help="path to source dir")
|
parser.add_argument("--source_dir", type=str, default="./dataset/44k", help="path to source dir")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
train = []
|
train = []
|
||||||
val = []
|
val = []
|
||||||
test = []
|
|
||||||
idx = 0
|
idx = 0
|
||||||
spk_dict = {}
|
spk_dict = {}
|
||||||
spk_id = 0
|
spk_id = 0
|
||||||
|
@ -51,13 +49,11 @@ if __name__ == "__main__":
|
||||||
new_wavs.append(file)
|
new_wavs.append(file)
|
||||||
wavs = new_wavs
|
wavs = new_wavs
|
||||||
shuffle(wavs)
|
shuffle(wavs)
|
||||||
train += wavs[2:-2]
|
train += wavs[2:]
|
||||||
val += wavs[:2]
|
val += wavs[:2]
|
||||||
test += wavs[-2:]
|
|
||||||
|
|
||||||
shuffle(train)
|
shuffle(train)
|
||||||
shuffle(val)
|
shuffle(val)
|
||||||
shuffle(test)
|
|
||||||
|
|
||||||
print("Writing", args.train_list)
|
print("Writing", args.train_list)
|
||||||
with open(args.train_list, "w") as f:
|
with open(args.train_list, "w") as f:
|
||||||
|
@ -71,12 +67,6 @@ if __name__ == "__main__":
|
||||||
wavpath = fname
|
wavpath = fname
|
||||||
f.write(wavpath + "\n")
|
f.write(wavpath + "\n")
|
||||||
|
|
||||||
print("Writing", args.test_list)
|
|
||||||
with open(args.test_list, "w") as f:
|
|
||||||
for fname in tqdm(test):
|
|
||||||
wavpath = fname
|
|
||||||
f.write(wavpath + "\n")
|
|
||||||
|
|
||||||
config_template["spk"] = spk_dict
|
config_template["spk"] = spk_dict
|
||||||
config_template["model"]["n_speakers"] = spk_id
|
config_template["model"]["n_speakers"] = spk_id
|
||||||
|
|
||||||
|
|
|
@ -7,10 +7,12 @@ from random import shuffle
|
||||||
import torch
|
import torch
|
||||||
from glob import glob
|
from glob import glob
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
from modules.mel_processing import spectrogram_torch
|
||||||
|
|
||||||
import utils
|
import utils
|
||||||
import logging
|
import logging
|
||||||
logging.getLogger('numba').setLevel(logging.WARNING)
|
|
||||||
|
logging.getLogger("numba").setLevel(logging.WARNING)
|
||||||
import librosa
|
import librosa
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
@ -29,11 +31,42 @@ def process_one(filename, hmodel):
|
||||||
wav16k = torch.from_numpy(wav16k).to(device)
|
wav16k = torch.from_numpy(wav16k).to(device)
|
||||||
c = utils.get_hubert_content(hmodel, wav_16k_tensor=wav16k)
|
c = utils.get_hubert_content(hmodel, wav_16k_tensor=wav16k)
|
||||||
torch.save(c.cpu(), soft_path)
|
torch.save(c.cpu(), soft_path)
|
||||||
|
|
||||||
f0_path = filename + ".f0.npy"
|
f0_path = filename + ".f0.npy"
|
||||||
if not os.path.exists(f0_path):
|
if not os.path.exists(f0_path):
|
||||||
f0 = utils.compute_f0_dio(wav, sampling_rate=sampling_rate, hop_length=hop_length)
|
f0 = utils.compute_f0_dio(
|
||||||
|
wav, sampling_rate=sampling_rate, hop_length=hop_length
|
||||||
|
)
|
||||||
np.save(f0_path, f0)
|
np.save(f0_path, f0)
|
||||||
|
|
||||||
|
spec_path = filename.replace(".wav", ".spec.pt")
|
||||||
|
if not os.path.exists(spec_path):
|
||||||
|
# Process spectrogram
|
||||||
|
# The following code can't be replaced by torch.FloatTensor(wav)
|
||||||
|
# because load_wav_to_torch return a tensor that need to be normalized
|
||||||
|
|
||||||
|
audio, sr = utils.load_wav_to_torch(filename)
|
||||||
|
if sr != hps.data.sampling_rate:
|
||||||
|
raise ValueError(
|
||||||
|
"{} SR doesn't match target {} SR".format(
|
||||||
|
sr, hps.data.sampling_rate
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
audio_norm = audio / hps.data.max_wav_value
|
||||||
|
audio_norm = audio_norm.unsqueeze(0)
|
||||||
|
|
||||||
|
spec = spectrogram_torch(
|
||||||
|
audio_norm,
|
||||||
|
hps.data.filter_length,
|
||||||
|
hps.data.sampling_rate,
|
||||||
|
hps.data.hop_length,
|
||||||
|
hps.data.win_length,
|
||||||
|
center=False,
|
||||||
|
)
|
||||||
|
spec = torch.squeeze(spec, 0)
|
||||||
|
torch.save(spec, spec_path)
|
||||||
|
|
||||||
|
|
||||||
def process_batch(filenames):
|
def process_batch(filenames):
|
||||||
print("Loading hubert for content...")
|
print("Loading hubert for content...")
|
||||||
|
@ -46,17 +79,23 @@ def process_batch(filenames):
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("--in_dir", type=str, default="dataset/44k", help="path to input dir")
|
parser.add_argument(
|
||||||
|
"--in_dir", type=str, default="dataset/44k", help="path to input dir"
|
||||||
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
filenames = glob(f'{args.in_dir}/*/*.wav', recursive=True) # [:10]
|
filenames = glob(f"{args.in_dir}/*/*.wav", recursive=True) # [:10]
|
||||||
shuffle(filenames)
|
shuffle(filenames)
|
||||||
multiprocessing.set_start_method('spawn',force=True)
|
multiprocessing.set_start_method("spawn", force=True)
|
||||||
|
|
||||||
num_processes = 1
|
num_processes = 1
|
||||||
chunk_size = int(math.ceil(len(filenames) / num_processes))
|
chunk_size = int(math.ceil(len(filenames) / num_processes))
|
||||||
chunks = [filenames[i:i + chunk_size] for i in range(0, len(filenames), chunk_size)]
|
chunks = [
|
||||||
|
filenames[i : i + chunk_size] for i in range(0, len(filenames), chunk_size)
|
||||||
|
]
|
||||||
print([len(c) for c in chunks])
|
print([len(c) for c in chunks])
|
||||||
processes = [multiprocessing.Process(target=process_batch, args=(chunk,)) for chunk in chunks]
|
processes = [
|
||||||
|
multiprocessing.Process(target=process_batch, args=(chunk,)) for chunk in chunks
|
||||||
|
]
|
||||||
for p in processes:
|
for p in processes:
|
||||||
p.start()
|
p.start()
|
||||||
|
|
22
spec_gen.py
22
spec_gen.py
|
@ -1,22 +0,0 @@
|
||||||
from data_utils import TextAudioSpeakerLoader
|
|
||||||
import json
|
|
||||||
from tqdm import tqdm
|
|
||||||
|
|
||||||
from utils import HParams
|
|
||||||
|
|
||||||
config_path = 'configs/config.json'
|
|
||||||
with open(config_path, "r") as f:
|
|
||||||
data = f.read()
|
|
||||||
config = json.loads(data)
|
|
||||||
hps = HParams(**config)
|
|
||||||
|
|
||||||
train_dataset = TextAudioSpeakerLoader("filelists/train.txt", hps)
|
|
||||||
test_dataset = TextAudioSpeakerLoader("filelists/test.txt", hps)
|
|
||||||
eval_dataset = TextAudioSpeakerLoader("filelists/val.txt", hps)
|
|
||||||
|
|
||||||
for _ in tqdm(train_dataset):
|
|
||||||
pass
|
|
||||||
for _ in tqdm(eval_dataset):
|
|
||||||
pass
|
|
||||||
for _ in tqdm(test_dataset):
|
|
||||||
pass
|
|
Loading…
Reference in New Issue