2023-03-10 10:11:04 +00:00
import math
import multiprocessing
import os
import argparse
from random import shuffle
import torch
from glob import glob
from tqdm import tqdm
2023-03-24 05:00:14 +00:00
from modules . mel_processing import spectrogram_torch
2023-05-14 06:39:07 +00:00
import json
2023-03-10 10:11:04 +00:00
import utils
import logging
2023-03-24 05:00:14 +00:00
logging . getLogger ( " numba " ) . setLevel ( logging . WARNING )
2023-03-10 10:11:04 +00:00
import librosa
import numpy as np
hps = utils . get_hparams_from_file ( " configs/config.json " )
sampling_rate = hps . data . sampling_rate
hop_length = hps . data . hop_length
2023-05-14 07:22:20 +00:00
speech_encoder = hps [ " model " ] [ " speech_encoder " ]
2023-03-10 10:11:04 +00:00
2023-05-14 07:22:20 +00:00
def process_one ( filename , hmodel , f0p ) :
2023-03-10 10:11:04 +00:00
# print(filename)
wav , sr = librosa . load ( filename , sr = sampling_rate )
soft_path = filename + " .soft.pt "
if not os . path . exists ( soft_path ) :
2023-03-16 23:10:47 +00:00
device = torch . device ( " cuda " if torch . cuda . is_available ( ) else " cpu " )
2023-03-10 10:11:04 +00:00
wav16k = librosa . resample ( wav , orig_sr = sampling_rate , target_sr = 16000 )
2023-03-16 23:10:47 +00:00
wav16k = torch . from_numpy ( wav16k ) . to ( device )
2023-05-14 06:39:07 +00:00
c = hmodel . encoder ( wav16k )
2023-03-10 10:11:04 +00:00
torch . save ( c . cpu ( ) , soft_path )
2023-03-24 05:00:14 +00:00
2023-03-10 10:11:04 +00:00
f0_path = filename + " .f0.npy "
if not os . path . exists ( f0_path ) :
2023-05-14 06:39:07 +00:00
f0_predictor = utils . get_f0_predictor ( f0p , sampling_rate = sampling_rate , hop_length = hop_length , device = None , threshold = 0.05 )
2023-05-13 15:45:56 +00:00
f0 , uv = f0_predictor . compute_f0_uv (
2023-05-13 07:33:40 +00:00
wav
2023-03-24 05:00:14 +00:00
)
2023-05-13 15:45:56 +00:00
np . save ( f0_path , np . asanyarray ( ( f0 , uv ) , dtype = object ) )
2023-03-24 05:00:14 +00:00
spec_path = filename . replace ( " .wav " , " .spec.pt " )
if not os . path . exists ( spec_path ) :
# Process spectrogram
# The following code can't be replaced by torch.FloatTensor(wav)
# because load_wav_to_torch return a tensor that need to be normalized
audio , sr = utils . load_wav_to_torch ( filename )
if sr != hps . data . sampling_rate :
raise ValueError (
" {} SR doesn ' t match target {} SR " . format (
sr , hps . data . sampling_rate
)
)
audio_norm = audio / hps . data . max_wav_value
audio_norm = audio_norm . unsqueeze ( 0 )
spec = spectrogram_torch (
audio_norm ,
hps . data . filter_length ,
hps . data . sampling_rate ,
hps . data . hop_length ,
hps . data . win_length ,
center = False ,
)
spec = torch . squeeze ( spec , 0 )
torch . save ( spec , spec_path )
2023-03-10 10:11:04 +00:00
2023-05-14 07:22:20 +00:00
def process_batch ( filenames , f0p ) :
2023-03-10 10:11:04 +00:00
print ( " Loading hubert for content... " )
device = " cuda " if torch . cuda . is_available ( ) else " cpu "
2023-05-14 06:39:07 +00:00
hmodel = utils . get_speech_encoder ( speech_encoder , device = device )
2023-03-10 10:11:04 +00:00
print ( " Loaded hubert. " )
for filename in tqdm ( filenames ) :
2023-05-14 07:22:20 +00:00
process_one ( filename , hmodel , f0p )
2023-03-10 10:11:04 +00:00
if __name__ == " __main__ " :
parser = argparse . ArgumentParser ( )
2023-03-24 05:00:14 +00:00
parser . add_argument (
" --in_dir " , type = str , default = " dataset/44k " , help = " path to input dir "
)
2023-05-14 06:39:07 +00:00
parser . add_argument (
' --f0_predictor ' , type = str , default = " dio " , help = ' Select F0 predictor, can select crepe,pm,dio,harvest, default pm(note: crepe is original F0 using mean filter) '
)
2023-03-10 10:11:04 +00:00
args = parser . parse_args ( )
2023-05-14 06:39:07 +00:00
f0p = args . f0_predictor
print ( speech_encoder )
2023-05-14 07:22:20 +00:00
print ( f0p )
2023-03-24 05:00:14 +00:00
filenames = glob ( f " { args . in_dir } /*/*.wav " , recursive = True ) # [:10]
2023-03-10 10:11:04 +00:00
shuffle ( filenames )
2023-03-24 05:00:14 +00:00
multiprocessing . set_start_method ( " spawn " , force = True )
2023-03-10 10:11:04 +00:00
num_processes = 1
chunk_size = int ( math . ceil ( len ( filenames ) / num_processes ) )
2023-03-24 05:00:14 +00:00
chunks = [
filenames [ i : i + chunk_size ] for i in range ( 0 , len ( filenames ) , chunk_size )
]
2023-03-10 10:11:04 +00:00
print ( [ len ( c ) for c in chunks ] )
2023-03-24 05:00:14 +00:00
processes = [
2023-05-14 07:22:20 +00:00
multiprocessing . Process ( target = process_batch , args = ( chunk , f0p ) ) for chunk in chunks
2023-03-24 05:00:14 +00:00
]
2023-03-10 10:11:04 +00:00
for p in processes :
p . start ( )