diff --git a/.ruff.toml b/.ruff.toml index 8b13789..ba961ac 100644 --- a/.ruff.toml +++ b/.ruff.toml @@ -1 +1,4 @@ +select = ["E", "F", "I"] +# Never enforce `E501` (line length violations). +ignore = ["E501"] diff --git a/cluster/__init__.py b/cluster/__init__.py index 68758d0..ae00ea6 100644 --- a/cluster/__init__.py +++ b/cluster/__init__.py @@ -1,6 +1,7 @@ import torch from sklearn.cluster import KMeans + def get_cluster_model(ckpt_path): checkpoint = torch.load(ckpt_path) kmeans_dict = {} diff --git a/cluster/kmeans.py b/cluster/kmeans.py index 0b78ed6..4d0ac69 100644 --- a/cluster/kmeans.py +++ b/cluster/kmeans.py @@ -1,7 +1,11 @@ -import torch,pynvml -from torch.nn.functional import normalize from time import time + import numpy as np +import pynvml +import torch +from torch.nn.functional import normalize + + # device=torch.device("cuda:0") def _kpp(data: torch.Tensor, k: int, sample_size: int = -1): """ Picks k points in the data based on the kmeans++ method. diff --git a/cluster/train_cluster.py b/cluster/train_cluster.py index 4858192..7e42de4 100644 --- a/cluster/train_cluster.py +++ b/cluster/train_cluster.py @@ -1,17 +1,17 @@ -import time -import tqdm -import os -from pathlib import Path -import logging import argparse -from kmeans import KMeansGPU -import torch +import logging +import os +import time +from pathlib import Path + import numpy as np -from sklearn.cluster import KMeans,MiniBatchKMeans +import torch +import tqdm +from kmeans import KMeansGPU +from sklearn.cluster import KMeans, MiniBatchKMeans logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -import torch def train_cluster(in_dir, n_clusters, use_minibatch=True, verbose=False,use_gpu=False):#gpu_minibatch真拉,虽然库支持但是也不考虑 logger.info(f"Loading features from {in_dir}") diff --git a/data_utils.py b/data_utils.py index 0b3c385..3edf0c7 100644 --- a/data_utils.py +++ b/data_utils.py @@ -1,12 +1,13 @@ import os import random + import numpy as np import torch import torch.utils.data import utils -from modules.mel_processing import spectrogram_torch, spectrogram_torch -from utils import load_wav_to_torch, load_filepaths_and_text +from modules.mel_processing import spectrogram_torch +from utils import load_filepaths_and_text, load_wav_to_torch # import h5py diff --git a/diffusion/data_loaders.py b/diffusion/data_loaders.py index ea802ef..b4ed93b 100644 --- a/diffusion/data_loaders.py +++ b/diffusion/data_loaders.py @@ -1,12 +1,14 @@ import os import random -import numpy as np + import librosa +import numpy as np import torch -import random -from utils import repeat_expand_2d -from tqdm import tqdm from torch.utils.data import Dataset +from tqdm import tqdm + +from utils import repeat_expand_2d + def traverse_dir( root_dir, diff --git a/diffusion/diffusion.py b/diffusion/diffusion.py index b95d305..646234b 100644 --- a/diffusion/diffusion.py +++ b/diffusion/diffusion.py @@ -1,9 +1,10 @@ from collections import deque from functools import partial from inspect import isfunction -import torch.nn.functional as F + import numpy as np import torch +import torch.nn.functional as F from torch import nn from tqdm import tqdm @@ -254,7 +255,11 @@ class GaussianDiffusion(nn.Module): if method is not None and infer_speedup > 1: if method == 'dpm-solver' or method == 'dpm-solver++': - from .dpm_solver_pytorch import NoiseScheduleVP, model_wrapper, DPM_Solver + from .dpm_solver_pytorch import ( + DPM_Solver, + NoiseScheduleVP, + model_wrapper, + ) # 1. Define the noise schedule. noise_schedule = NoiseScheduleVP(schedule='discrete', betas=self.betas[:t]) @@ -332,7 +337,7 @@ class GaussianDiffusion(nn.Module): infer_speedup, cond=cond ) elif method == 'unipc': - from .uni_pc import NoiseScheduleVP, model_wrapper, UniPC + from .uni_pc import NoiseScheduleVP, UniPC, model_wrapper # 1. Define the noise schedule. noise_schedule = NoiseScheduleVP(schedule='discrete', betas=self.betas[:t]) diff --git a/diffusion/diffusion_onnx.py b/diffusion/diffusion_onnx.py index 1d60edf..6282288 100644 --- a/diffusion/diffusion_onnx.py +++ b/diffusion/diffusion_onnx.py @@ -1,14 +1,14 @@ +import math from collections import deque from functools import partial from inspect import isfunction -import torch.nn.functional as F + import numpy as np -from torch.nn import Conv1d -from torch.nn import Mish import torch +import torch.nn.functional as F from torch import nn +from torch.nn import Conv1d, Mish from tqdm import tqdm -import math def exists(x): @@ -390,7 +390,11 @@ class GaussianDiffusion(nn.Module): if method is not None and infer_speedup > 1: if method == 'dpm-solver': - from .dpm_solver_pytorch import NoiseScheduleVP, model_wrapper, DPM_Solver + from .dpm_solver_pytorch import ( + DPM_Solver, + NoiseScheduleVP, + model_wrapper, + ) # 1. Define the noise schedule. noise_schedule = NoiseScheduleVP(schedule='discrete', betas=self.betas[:t]) diff --git a/diffusion/infer_gt_mel.py b/diffusion/infer_gt_mel.py index da5f36c..0bdf1fe 100644 --- a/diffusion/infer_gt_mel.py +++ b/diffusion/infer_gt_mel.py @@ -1,5 +1,6 @@ import torch import torch.nn.functional as F + from diffusion.unit2mel import load_model_vocoder diff --git a/diffusion/logger/saver.py b/diffusion/logger/saver.py index 4233613..954ce99 100644 --- a/diffusion/logger/saver.py +++ b/diffusion/logger/saver.py @@ -2,14 +2,16 @@ author: wayn391@mastertones ''' +import datetime import os import time -import yaml -import datetime -import torch + import matplotlib.pyplot as plt +import torch +import yaml from torch.utils.tensorboard import SummaryWriter + class Saver(object): def __init__( self, diff --git a/diffusion/logger/utils.py b/diffusion/logger/utils.py index 1420076..a907de7 100644 --- a/diffusion/logger/utils.py +++ b/diffusion/logger/utils.py @@ -1,7 +1,9 @@ -import os -import yaml import json +import os + import torch +import yaml + def traverse_dir( root_dir, diff --git a/diffusion/onnx_export.py b/diffusion/onnx_export.py index 3663cfd..6a4ea22 100644 --- a/diffusion/onnx_export.py +++ b/diffusion/onnx_export.py @@ -1,10 +1,12 @@ -from diffusion_onnx import GaussianDiffusion import os -import yaml + +import numpy as np import torch import torch.nn as nn -import numpy as np import torch.nn.functional as F +import yaml +from diffusion_onnx import GaussianDiffusion + class DotDict(dict): def __getattr__(*args): diff --git a/diffusion/solver.py b/diffusion/solver.py index 8b38900..52657cc 100644 --- a/diffusion/solver.py +++ b/diffusion/solver.py @@ -1,12 +1,15 @@ import time + +import librosa import numpy as np import torch -import librosa -from diffusion.logger.saver import Saver -from diffusion.logger import utils from torch import autocast from torch.cuda.amp import GradScaler +from diffusion.logger import utils +from diffusion.logger.saver import Saver + + def test(args, model, vocoder, loader_test, saver): print(' [*] testing...') model.eval() diff --git a/diffusion/uni_pc.py b/diffusion/uni_pc.py index c920f92..72d8f51 100644 --- a/diffusion/uni_pc.py +++ b/diffusion/uni_pc.py @@ -1,6 +1,7 @@ -import torch import math +import torch + class NoiseScheduleVP: def __init__( diff --git a/diffusion/unit2mel.py b/diffusion/unit2mel.py index 0f40c0e..19090fe 100644 --- a/diffusion/unit2mel.py +++ b/diffusion/unit2mel.py @@ -1,11 +1,14 @@ import os -import yaml + +import numpy as np import torch import torch.nn as nn -import numpy as np +import yaml + from .diffusion import GaussianDiffusion -from .wavenet import WaveNet from .vocoder import Vocoder +from .wavenet import WaveNet + class DotDict(dict): def __getattr__(*args): diff --git a/diffusion/vocoder.py b/diffusion/vocoder.py index bbaa47f..ec9c80e 100644 --- a/diffusion/vocoder.py +++ b/diffusion/vocoder.py @@ -1,9 +1,10 @@ import torch -from vdecoder.nsf_hifigan.nvSTFT import STFT -from vdecoder.nsf_hifigan.models import load_model,load_config from torchaudio.transforms import Resample - +from vdecoder.nsf_hifigan.models import load_config, load_model +from vdecoder.nsf_hifigan.nvSTFT import STFT + + class Vocoder: def __init__(self, vocoder_type, vocoder_ckpt, device = None): if device is None: diff --git a/flask_api.py b/flask_api.py index b3f1e06..5547e00 100644 --- a/flask_api.py +++ b/flask_api.py @@ -7,7 +7,7 @@ import torchaudio from flask import Flask, request, send_file from flask_cors import CORS -from inference.infer_tool import Svc, RealTimeVC +from inference.infer_tool import RealTimeVC, Svc app = Flask(__name__) diff --git a/flask_api_full_song.py b/flask_api_full_song.py index 9dbf66a..29fbd72 100644 --- a/flask_api_full_song.py +++ b/flask_api_full_song.py @@ -1,10 +1,10 @@ import io + import numpy as np import soundfile from flask import Flask, request, send_file -from inference import infer_tool -from inference import slicer +from inference import infer_tool, slicer app = Flask(__name__) diff --git a/inference/infer_tool.py b/inference/infer_tool.py index 5dddb41..3ae57c7 100644 --- a/inference/infer_tool.py +++ b/inference/infer_tool.py @@ -1,15 +1,16 @@ +import gc import hashlib import io import json import logging import os +import pickle import time from pathlib import Path -from inference import slicer -import gc import librosa import numpy as np + # import onnxruntime import soundfile import torch @@ -17,10 +18,9 @@ import torchaudio import cluster import utils -from models import SynthesizerTrn -import pickle - from diffusion.unit2mel import load_model_vocoder +from inference import slicer +from models import SynthesizerTrn logging.getLogger('matplotlib').setLevel(logging.WARNING) diff --git a/inference/infer_tool_grad.py b/inference/infer_tool_grad.py index 0b3e72c..136e904 100644 --- a/inference/infer_tool_grad.py +++ b/inference/infer_tool_grad.py @@ -1,16 +1,18 @@ +import io import logging import os -import io + import librosa import numpy as np -from inference import slicer import parselmouth import soundfile import torch import torchaudio import utils +from inference import slicer from models import SynthesizerTrn + logging.getLogger('numba').setLevel(logging.WARNING) logging.getLogger('matplotlib').setLevel(logging.WARNING) diff --git a/inference_main.py b/inference_main.py index 3705172..d446649 100644 --- a/inference_main.py +++ b/inference_main.py @@ -1,8 +1,10 @@ import logging -from spkmix import spk_mix_map + import soundfile + from inference import infer_tool from inference.infer_tool import Svc +from spkmix import spk_mix_map logging.getLogger('numba').setLevel(logging.WARNING) chunks_dict = infer_tool.read_temp("inference/chunks_temp.json") diff --git a/models.py b/models.py index a125f7a..67909a5 100644 --- a/models.py +++ b/models.py @@ -1,18 +1,17 @@ import torch from torch import nn +from torch.nn import Conv1d, Conv2d from torch.nn import functional as F +from torch.nn.utils import spectral_norm, weight_norm import modules.attentions as attentions import modules.commons as commons import modules.modules as modules - -from torch.nn import Conv1d, Conv2d -from torch.nn.utils import weight_norm, spectral_norm - import utils from modules.commons import get_padding from utils import f0_to_coarse + class ResidualCouplingBlock(nn.Module): def __init__(self, channels, diff --git a/modules/F0Predictor/CrepeF0Predictor.py b/modules/F0Predictor/CrepeF0Predictor.py index e005288..086ca10 100644 --- a/modules/F0Predictor/CrepeF0Predictor.py +++ b/modules/F0Predictor/CrepeF0Predictor.py @@ -1,7 +1,9 @@ -from modules.F0Predictor.F0Predictor import F0Predictor -from modules.F0Predictor.crepe import CrepePitchExtractor import torch +from modules.F0Predictor.crepe import CrepePitchExtractor +from modules.F0Predictor.F0Predictor import F0Predictor + + class CrepeF0Predictor(F0Predictor): def __init__(self,hop_length=512,f0_min=50,f0_max=1100,device=None,sampling_rate=44100,threshold=0.05,model="full"): self.F0Creper = CrepePitchExtractor(hop_length=hop_length,f0_min=f0_min,f0_max=f0_max,device=device,threshold=threshold,model=model) diff --git a/modules/F0Predictor/DioF0Predictor.py b/modules/F0Predictor/DioF0Predictor.py index 27483a9..ef470a4 100644 --- a/modules/F0Predictor/DioF0Predictor.py +++ b/modules/F0Predictor/DioF0Predictor.py @@ -1,6 +1,8 @@ -from modules.F0Predictor.F0Predictor import F0Predictor -import pyworld import numpy as np +import pyworld + +from modules.F0Predictor.F0Predictor import F0Predictor + class DioF0Predictor(F0Predictor): def __init__(self,hop_length=512,f0_min=50,f0_max=1100,sampling_rate=44100): diff --git a/modules/F0Predictor/HarvestF0Predictor.py b/modules/F0Predictor/HarvestF0Predictor.py index d382082..fe279f6 100644 --- a/modules/F0Predictor/HarvestF0Predictor.py +++ b/modules/F0Predictor/HarvestF0Predictor.py @@ -1,6 +1,8 @@ -from modules.F0Predictor.F0Predictor import F0Predictor -import pyworld import numpy as np +import pyworld + +from modules.F0Predictor.F0Predictor import F0Predictor + class HarvestF0Predictor(F0Predictor): def __init__(self,hop_length=512,f0_min=50,f0_max=1100,sampling_rate=44100): diff --git a/modules/F0Predictor/PMF0Predictor.py b/modules/F0Predictor/PMF0Predictor.py index 486a801..cb7355f 100644 --- a/modules/F0Predictor/PMF0Predictor.py +++ b/modules/F0Predictor/PMF0Predictor.py @@ -1,6 +1,8 @@ -from modules.F0Predictor.F0Predictor import F0Predictor -import parselmouth import numpy as np +import parselmouth + +from modules.F0Predictor.F0Predictor import F0Predictor + class PMF0Predictor(F0Predictor): def __init__(self,hop_length=512,f0_min=50,f0_max=1100,sampling_rate=44100): diff --git a/modules/F0Predictor/crepe.py b/modules/F0Predictor/crepe.py index 4b00419..e68f19c 100644 --- a/modules/F0Predictor/crepe.py +++ b/modules/F0Predictor/crepe.py @@ -1,4 +1,5 @@ -from typing import Optional,Union +from typing import Optional, Union + try: from typing import Literal except Exception: diff --git a/modules/attentions.py b/modules/attentions.py index bb591e9..9086e0e 100644 --- a/modules/attentions.py +++ b/modules/attentions.py @@ -1,4 +1,5 @@ import math + import torch from torch import nn from torch.nn import functional as F diff --git a/modules/commons.py b/modules/commons.py index abb20ac..756234c 100644 --- a/modules/commons.py +++ b/modules/commons.py @@ -1,7 +1,9 @@ import math + import torch from torch.nn import functional as F + def slice_pitch_segments(x, ids_str, segment_size=4): ret = torch.zeros_like(x[:, :segment_size]) for i in range(x.size(0)): diff --git a/modules/enhancer.py b/modules/enhancer.py index 3767631..a3f0dd0 100644 --- a/modules/enhancer.py +++ b/modules/enhancer.py @@ -1,10 +1,12 @@ import numpy as np import torch import torch.nn.functional as F -from vdecoder.nsf_hifigan.nvSTFT import STFT -from vdecoder.nsf_hifigan.models import load_model from torchaudio.transforms import Resample +from vdecoder.nsf_hifigan.models import load_model +from vdecoder.nsf_hifigan.nvSTFT import STFT + + class Enhancer: def __init__(self, enhancer_type, enhancer_ckpt, device=None): if device is None: diff --git a/modules/losses.py b/modules/losses.py index 4a489cd..494e979 100644 --- a/modules/losses.py +++ b/modules/losses.py @@ -1,5 +1,4 @@ -import torch - +import torch def feature_loss(fmap_r, fmap_g): diff --git a/modules/modules.py b/modules/modules.py index 3f11cc0..6af6227 100644 --- a/modules/modules.py +++ b/modules/modules.py @@ -1,13 +1,11 @@ import torch from torch import nn -from torch.nn import functional as F - from torch.nn import Conv1d -from torch.nn.utils import weight_norm, remove_weight_norm +from torch.nn import functional as F +from torch.nn.utils import remove_weight_norm, weight_norm import modules.commons as commons -from modules.commons import init_weights, get_padding - +from modules.commons import get_padding, init_weights LRELU_SLOPE = 0.1 diff --git a/onnx_export.py b/onnx_export.py index a70a912..27f49dd 100644 --- a/onnx_export.py +++ b/onnx_export.py @@ -1,6 +1,8 @@ import torch -from onnxexport.model_onnx import SynthesizerTrn + import utils +from onnxexport.model_onnx import SynthesizerTrn + def main(NetExport): path = "SoVits4.0" diff --git a/onnx_export_speaker_mix.py b/onnx_export_speaker_mix.py index 742ca39..aa3c9f0 100644 --- a/onnx_export_speaker_mix.py +++ b/onnx_export_speaker_mix.py @@ -1,8 +1,11 @@ -import torch -from onnxexport.model_onnx_speaker_mix import SynthesizerTrn -import utils import json +import torch + +import utils +from onnxexport.model_onnx_speaker_mix import SynthesizerTrn + + def main(): path = "crs" diff --git a/onnxexport/model_onnx.py b/onnxexport/model_onnx.py index 09e69ae..0f83c03 100644 --- a/onnxexport/model_onnx.py +++ b/onnxexport/model_onnx.py @@ -1,18 +1,16 @@ import torch from torch import nn +from torch.nn import Conv1d, Conv2d from torch.nn import functional as F +from torch.nn.utils import spectral_norm, weight_norm import modules.attentions as attentions import modules.commons as commons import modules.modules as modules - -from torch.nn import Conv1d, Conv2d -from torch.nn.utils import weight_norm, spectral_norm - import utils from modules.commons import get_padding -from vdecoder.hifigan.models import Generator from utils import f0_to_coarse +from vdecoder.hifigan.models import Generator class ResidualCouplingBlock(nn.Module): diff --git a/onnxexport/model_onnx_speaker_mix.py b/onnxexport/model_onnx_speaker_mix.py index b188998..e6505c8 100644 --- a/onnxexport/model_onnx_speaker_mix.py +++ b/onnxexport/model_onnx_speaker_mix.py @@ -4,10 +4,9 @@ from torch.nn import functional as F import modules.attentions as attentions import modules.modules as modules - - from utils import f0_to_coarse + class ResidualCouplingBlock(nn.Module): def __init__(self, channels, diff --git a/preprocess_flist_config.py b/preprocess_flist_config.py index b4ba15f..517435d 100644 --- a/preprocess_flist_config.py +++ b/preprocess_flist_config.py @@ -1,11 +1,11 @@ -import os import argparse +import json +import os import re +import wave +from random import shuffle from tqdm import tqdm -from random import shuffle -import json -import wave import diffusion.logger.utils as du diff --git a/preprocess_hubert_f0.py b/preprocess_hubert_f0.py index 2452198..578014b 100644 --- a/preprocess_hubert_f0.py +++ b/preprocess_hubert_f0.py @@ -1,19 +1,20 @@ -import os -import utils -import torch -import random -import librosa -import logging import argparse +import logging import multiprocessing -import numpy as np -import diffusion.logger.utils as du - -from glob import glob -from tqdm import tqdm -from random import shuffle -from diffusion.vocoder import Vocoder +import os +import random from concurrent.futures import ProcessPoolExecutor +from glob import glob +from random import shuffle + +import librosa +import numpy as np +import torch +from tqdm import tqdm + +import diffusion.logger.utils as du +import utils +from diffusion.vocoder import Vocoder from modules.mel_processing import spectrogram_torch logging.getLogger("numba").setLevel(logging.WARNING) diff --git a/resample.py b/resample.py index a99c509..50eacc1 100644 --- a/resample.py +++ b/resample.py @@ -1,10 +1,11 @@ -import os import argparse -import librosa -import numpy as np import concurrent.futures +import os from concurrent.futures import ProcessPoolExecutor from multiprocessing import cpu_count + +import librosa +import numpy as np from scipy.io import wavfile from tqdm import tqdm diff --git a/train.py b/train.py index 0139ffa..202a135 100644 --- a/train.py +++ b/train.py @@ -6,27 +6,24 @@ logging.getLogger('matplotlib').setLevel(logging.WARNING) logging.getLogger('numba').setLevel(logging.WARNING) import os + import torch +import torch.distributed as dist +import torch.multiprocessing as mp +from torch.cuda.amp import GradScaler, autocast from torch.nn import functional as F +from torch.nn.parallel import DistributedDataParallel as DDP from torch.utils.data import DataLoader from torch.utils.tensorboard import SummaryWriter -import torch.multiprocessing as mp -import torch.distributed as dist -from torch.nn.parallel import DistributedDataParallel as DDP -from torch.cuda.amp import autocast, GradScaler import modules.commons as commons import utils -from data_utils import TextAudioSpeakerLoader, TextAudioCollate +from data_utils import TextAudioCollate, TextAudioSpeakerLoader from models import ( - SynthesizerTrn, MultiPeriodDiscriminator, + SynthesizerTrn, ) -from modules.losses import ( - kl_loss, - generator_loss, discriminator_loss, feature_loss -) - +from modules.losses import discriminator_loss, feature_loss, generator_loss, kl_loss from modules.mel_processing import mel_spectrogram_torch, spec_to_mel_torch torch.backends.cudnn.benchmark = True diff --git a/train_diff.py b/train_diff.py index 8bfd907..adf5fb3 100644 --- a/train_diff.py +++ b/train_diff.py @@ -1,8 +1,10 @@ import argparse + import torch from torch.optim import lr_scheduler -from diffusion.logger import utils + from diffusion.data_loaders import get_data_loaders +from diffusion.logger import utils from diffusion.solver import train from diffusion.unit2mel import Unit2Mel from diffusion.vocoder import Vocoder diff --git a/train_index.py b/train_index.py index a8d8cae..13d66d3 100644 --- a/train_index.py +++ b/train_index.py @@ -1,8 +1,8 @@ -import utils -import pickle -import os import argparse +import os +import pickle +import utils if __name__ == "__main__": parser = argparse.ArgumentParser() diff --git a/utils.py b/utils.py index 691cd0b..c3336b1 100644 --- a/utils.py +++ b/utils.py @@ -1,17 +1,18 @@ -import os -import glob -import re -import sys import argparse -import logging +import glob import json +import logging +import os +import re import subprocess +import sys + +import faiss import librosa import numpy as np -from scipy.io.wavfile import read import torch +from scipy.io.wavfile import read from torch.nn import functional as F -import faiss MATPLOTLIB_FLAG = False diff --git a/vdecoder/hifigan/models.py b/vdecoder/hifigan/models.py index c94a367..99d533e 100644 --- a/vdecoder/hifigan/models.py +++ b/vdecoder/hifigan/models.py @@ -1,13 +1,15 @@ -import os import json -from .env import AttrDict +import os + import numpy as np import torch -import torch.nn.functional as F import torch.nn as nn -from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d -from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm -from .utils import init_weights, get_padding +import torch.nn.functional as F +from torch.nn import AvgPool1d, Conv1d, Conv2d, ConvTranspose1d +from torch.nn.utils import remove_weight_norm, spectral_norm, weight_norm + +from .env import AttrDict +from .utils import get_padding, init_weights LRELU_SLOPE = 0.1 diff --git a/vdecoder/hifigan/nvSTFT.py b/vdecoder/hifigan/nvSTFT.py index 87d1511..b3321b2 100644 --- a/vdecoder/hifigan/nvSTFT.py +++ b/vdecoder/hifigan/nvSTFT.py @@ -1,11 +1,13 @@ import os -os.environ["LRU_CACHE_CAPACITY"] = "3" + +import librosa +import numpy as np +import soundfile as sf import torch import torch.utils.data -import numpy as np -import librosa from librosa.filters import mel as librosa_mel_fn -import soundfile as sf + +os.environ["LRU_CACHE_CAPACITY"] = "3" def load_wav_to_torch(full_path, target_sr=None, return_empty_on_exception=False): sampling_rate = None diff --git a/vdecoder/hifigan/utils.py b/vdecoder/hifigan/utils.py index 89ccf5f..e519e2b 100644 --- a/vdecoder/hifigan/utils.py +++ b/vdecoder/hifigan/utils.py @@ -1,9 +1,10 @@ import glob import os -import torch -from torch.nn.utils import weight_norm + # matplotlib.use("Agg") import matplotlib.pylab as plt +import torch +from torch.nn.utils import weight_norm def plot_spectrogram(spectrogram): diff --git a/vdecoder/hifiganwithsnake/alias/__init__.py b/vdecoder/hifiganwithsnake/alias/__init__.py index a2318b6..117e5ac 100644 --- a/vdecoder/hifiganwithsnake/alias/__init__.py +++ b/vdecoder/hifiganwithsnake/alias/__init__.py @@ -1,6 +1,6 @@ # Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0 # LICENSE is in incl_licenses directory. +from .act import * from .filter import * from .resample import * -from .act import * \ No newline at end of file diff --git a/vdecoder/hifiganwithsnake/alias/act.py b/vdecoder/hifiganwithsnake/alias/act.py index 1465d1c..e46b346 100644 --- a/vdecoder/hifiganwithsnake/alias/act.py +++ b/vdecoder/hifiganwithsnake/alias/act.py @@ -4,10 +4,10 @@ import torch import torch.nn as nn import torch.nn.functional as F - -from torch import sin, pow +from torch import pow, sin from torch.nn import Parameter -from .resample import UpSample1d, DownSample1d + +from .resample import DownSample1d, UpSample1d class Activation1d(nn.Module): diff --git a/vdecoder/hifiganwithsnake/alias/filter.py b/vdecoder/hifiganwithsnake/alias/filter.py index d2ccf1a..45dccd3 100644 --- a/vdecoder/hifiganwithsnake/alias/filter.py +++ b/vdecoder/hifiganwithsnake/alias/filter.py @@ -1,10 +1,11 @@ # Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0 # LICENSE is in incl_licenses directory. +import math + import torch import torch.nn as nn import torch.nn.functional as F -import math if 'sinc' in dir(torch): sinc = torch.sinc diff --git a/vdecoder/hifiganwithsnake/alias/resample.py b/vdecoder/hifiganwithsnake/alias/resample.py index 53773c7..3ef9481 100644 --- a/vdecoder/hifiganwithsnake/alias/resample.py +++ b/vdecoder/hifiganwithsnake/alias/resample.py @@ -3,8 +3,8 @@ import torch.nn as nn from torch.nn import functional as F -from .filter import LowPassFilter1d -from .filter import kaiser_sinc_filter1d + +from .filter import LowPassFilter1d, kaiser_sinc_filter1d class UpSample1d(nn.Module): diff --git a/vdecoder/hifiganwithsnake/models.py b/vdecoder/hifiganwithsnake/models.py index 1d3a0c0..04277bd 100644 --- a/vdecoder/hifiganwithsnake/models.py +++ b/vdecoder/hifiganwithsnake/models.py @@ -1,15 +1,18 @@ -import os import json -from .env import AttrDict +import os + import numpy as np import torch -import torch.nn.functional as F import torch.nn as nn -from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d -from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm -from .utils import init_weights, get_padding +import torch.nn.functional as F +from torch.nn import AvgPool1d, Conv1d, Conv2d, ConvTranspose1d +from torch.nn.utils import remove_weight_norm, spectral_norm, weight_norm + from vdecoder.hifiganwithsnake.alias.act import SnakeAlias +from .env import AttrDict +from .utils import get_padding, init_weights + LRELU_SLOPE = 0.1 diff --git a/vdecoder/hifiganwithsnake/nvSTFT.py b/vdecoder/hifiganwithsnake/nvSTFT.py index 87d1511..b3321b2 100644 --- a/vdecoder/hifiganwithsnake/nvSTFT.py +++ b/vdecoder/hifiganwithsnake/nvSTFT.py @@ -1,11 +1,13 @@ import os -os.environ["LRU_CACHE_CAPACITY"] = "3" + +import librosa +import numpy as np +import soundfile as sf import torch import torch.utils.data -import numpy as np -import librosa from librosa.filters import mel as librosa_mel_fn -import soundfile as sf + +os.environ["LRU_CACHE_CAPACITY"] = "3" def load_wav_to_torch(full_path, target_sr=None, return_empty_on_exception=False): sampling_rate = None diff --git a/vdecoder/hifiganwithsnake/utils.py b/vdecoder/hifiganwithsnake/utils.py index 89ccf5f..e519e2b 100644 --- a/vdecoder/hifiganwithsnake/utils.py +++ b/vdecoder/hifiganwithsnake/utils.py @@ -1,9 +1,10 @@ import glob import os -import torch -from torch.nn.utils import weight_norm + # matplotlib.use("Agg") import matplotlib.pylab as plt +import torch +from torch.nn.utils import weight_norm def plot_spectrogram(spectrogram): diff --git a/vdecoder/nsf_hifigan/models.py b/vdecoder/nsf_hifigan/models.py index 4fa33a1..8a35b13 100644 --- a/vdecoder/nsf_hifigan/models.py +++ b/vdecoder/nsf_hifigan/models.py @@ -1,13 +1,15 @@ -import os import json -from .env import AttrDict +import os + import numpy as np import torch -import torch.nn.functional as F import torch.nn as nn -from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d -from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm -from .utils import init_weights, get_padding +import torch.nn.functional as F +from torch.nn import AvgPool1d, Conv1d, Conv2d, ConvTranspose1d +from torch.nn.utils import remove_weight_norm, spectral_norm, weight_norm + +from .env import AttrDict +from .utils import get_padding, init_weights LRELU_SLOPE = 0.1 diff --git a/vdecoder/nsf_hifigan/nvSTFT.py b/vdecoder/nsf_hifigan/nvSTFT.py index 369c7be..e756cca 100644 --- a/vdecoder/nsf_hifigan/nvSTFT.py +++ b/vdecoder/nsf_hifigan/nvSTFT.py @@ -1,12 +1,14 @@ import os -os.environ["LRU_CACHE_CAPACITY"] = "3" -import torch -import torch.utils.data -import numpy as np + import librosa -from librosa.filters import mel as librosa_mel_fn +import numpy as np import soundfile as sf +import torch import torch.nn.functional as F +import torch.utils.data +from librosa.filters import mel as librosa_mel_fn + +os.environ["LRU_CACHE_CAPACITY"] = "3" def load_wav_to_torch(full_path, target_sr=None, return_empty_on_exception=False): sampling_rate = None diff --git a/vdecoder/nsf_hifigan/utils.py b/vdecoder/nsf_hifigan/utils.py index 84bff02..58d0e70 100644 --- a/vdecoder/nsf_hifigan/utils.py +++ b/vdecoder/nsf_hifigan/utils.py @@ -1,10 +1,12 @@ import glob import os + import matplotlib +import matplotlib.pylab as plt import torch from torch.nn.utils import weight_norm + matplotlib.use("Agg") -import matplotlib.pylab as plt def plot_spectrogram(spectrogram): diff --git a/vencoder/CNHubertLarge.py b/vencoder/CNHubertLarge.py index e8eacf3..f436947 100644 --- a/vencoder/CNHubertLarge.py +++ b/vencoder/CNHubertLarge.py @@ -1,7 +1,8 @@ -from vencoder.encoder import SpeechEncoder import torch from fairseq import checkpoint_utils +from vencoder.encoder import SpeechEncoder + class CNHubertLarge(SpeechEncoder): def __init__(self, vec_path="pretrain/chinese-hubert-large-fairseq-ckpt.pt", device=None): diff --git a/vencoder/ContentVec256L12_Onnx.py b/vencoder/ContentVec256L12_Onnx.py index 6663c06..466e6c1 100644 --- a/vencoder/ContentVec256L12_Onnx.py +++ b/vencoder/ContentVec256L12_Onnx.py @@ -1,7 +1,8 @@ -from vencoder.encoder import SpeechEncoder import onnxruntime import torch +from vencoder.encoder import SpeechEncoder + class ContentVec256L12_Onnx(SpeechEncoder): def __init__(self, vec_path="pretrain/vec-256-layer-12.onnx", device=None): diff --git a/vencoder/ContentVec256L9.py b/vencoder/ContentVec256L9.py index fef12cb..c973090 100644 --- a/vencoder/ContentVec256L9.py +++ b/vencoder/ContentVec256L9.py @@ -1,7 +1,8 @@ -from vencoder.encoder import SpeechEncoder import torch from fairseq import checkpoint_utils +from vencoder.encoder import SpeechEncoder + class ContentVec256L9(SpeechEncoder): def __init__(self, vec_path="pretrain/checkpoint_best_legacy_500.pt", device=None): diff --git a/vencoder/ContentVec256L9_Onnx.py b/vencoder/ContentVec256L9_Onnx.py index 27f7a93..a27e1f7 100644 --- a/vencoder/ContentVec256L9_Onnx.py +++ b/vencoder/ContentVec256L9_Onnx.py @@ -1,7 +1,9 @@ -from vencoder.encoder import SpeechEncoder import onnxruntime import torch +from vencoder.encoder import SpeechEncoder + + class ContentVec256L9_Onnx(SpeechEncoder): def __init__(self, vec_path="pretrain/vec-256-layer-9.onnx", device=None): super().__init__() diff --git a/vencoder/ContentVec768L12.py b/vencoder/ContentVec768L12.py index b9f1856..066b824 100644 --- a/vencoder/ContentVec768L12.py +++ b/vencoder/ContentVec768L12.py @@ -1,7 +1,8 @@ -from vencoder.encoder import SpeechEncoder import torch from fairseq import checkpoint_utils +from vencoder.encoder import SpeechEncoder + class ContentVec768L12(SpeechEncoder): def __init__(self, vec_path="pretrain/checkpoint_best_legacy_500.pt", device=None): diff --git a/vencoder/ContentVec768L12_Onnx.py b/vencoder/ContentVec768L12_Onnx.py index 0562623..e737594 100644 --- a/vencoder/ContentVec768L12_Onnx.py +++ b/vencoder/ContentVec768L12_Onnx.py @@ -1,7 +1,8 @@ -from vencoder.encoder import SpeechEncoder import onnxruntime import torch +from vencoder.encoder import SpeechEncoder + class ContentVec768L12_Onnx(SpeechEncoder): def __init__(self, vec_path="pretrain/vec-768-layer-12.onnx", device=None): diff --git a/vencoder/ContentVec768L9_Onnx.py b/vencoder/ContentVec768L9_Onnx.py index 40d6329..3bd0f33 100644 --- a/vencoder/ContentVec768L9_Onnx.py +++ b/vencoder/ContentVec768L9_Onnx.py @@ -1,7 +1,8 @@ -from vencoder.encoder import SpeechEncoder import onnxruntime import torch +from vencoder.encoder import SpeechEncoder + class ContentVec768L9_Onnx(SpeechEncoder): def __init__(self,vec_path = "pretrain/vec-768-layer-9.onnx",device=None): diff --git a/vencoder/DPHubert.py b/vencoder/DPHubert.py index a62cbac..130064f 100644 --- a/vencoder/DPHubert.py +++ b/vencoder/DPHubert.py @@ -1,6 +1,7 @@ -from vencoder.encoder import SpeechEncoder import torch + from vencoder.dphubert.model import wav2vec2_model +from vencoder.encoder import SpeechEncoder class DPHubert(SpeechEncoder): diff --git a/vencoder/HubertSoft.py b/vencoder/HubertSoft.py index 9847a7b..423c159 100644 --- a/vencoder/HubertSoft.py +++ b/vencoder/HubertSoft.py @@ -1,5 +1,6 @@ -from vencoder.encoder import SpeechEncoder import torch + +from vencoder.encoder import SpeechEncoder from vencoder.hubert import hubert_model diff --git a/vencoder/HubertSoft_Onnx.py b/vencoder/HubertSoft_Onnx.py index 9b502d8..038d78e 100644 --- a/vencoder/HubertSoft_Onnx.py +++ b/vencoder/HubertSoft_Onnx.py @@ -1,7 +1,8 @@ -from vencoder.encoder import SpeechEncoder import onnxruntime import torch +from vencoder.encoder import SpeechEncoder + class HubertSoft_Onnx(SpeechEncoder): def __init__(self, vec_path="pretrain/hubert-soft.onnx", device=None): diff --git a/vencoder/WavLMBasePlus.py b/vencoder/WavLMBasePlus.py index 8d45a35..99df15b 100644 --- a/vencoder/WavLMBasePlus.py +++ b/vencoder/WavLMBasePlus.py @@ -1,5 +1,6 @@ -from vencoder.encoder import SpeechEncoder import torch + +from vencoder.encoder import SpeechEncoder from vencoder.wavlm.WavLM import WavLM, WavLMConfig diff --git a/vencoder/WhisperPPG.py b/vencoder/WhisperPPG.py index 5d156cc..86af53e 100644 --- a/vencoder/WhisperPPG.py +++ b/vencoder/WhisperPPG.py @@ -1,8 +1,8 @@ -from vencoder.encoder import SpeechEncoder import torch -from vencoder.whisper.model import Whisper, ModelDimensions -from vencoder.whisper.audio import pad_or_trim, log_mel_spectrogram +from vencoder.encoder import SpeechEncoder +from vencoder.whisper.audio import log_mel_spectrogram, pad_or_trim +from vencoder.whisper.model import ModelDimensions, Whisper class WhisperPPG(SpeechEncoder): diff --git a/vencoder/WhisperPPGLarge.py b/vencoder/WhisperPPGLarge.py index 4494c08..cd0ff76 100644 --- a/vencoder/WhisperPPGLarge.py +++ b/vencoder/WhisperPPGLarge.py @@ -1,8 +1,8 @@ -from vencoder.encoder import SpeechEncoder import torch -from vencoder.whisper.model import Whisper, ModelDimensions -from vencoder.whisper.audio import pad_or_trim, log_mel_spectrogram +from vencoder.encoder import SpeechEncoder +from vencoder.whisper.audio import log_mel_spectrogram, pad_or_trim +from vencoder.whisper.model import ModelDimensions, Whisper class WhisperPPGLarge(SpeechEncoder): diff --git a/vencoder/dphubert/components.py b/vencoder/dphubert/components.py index 1f8ae27..be5cc8c 100644 --- a/vencoder/dphubert/components.py +++ b/vencoder/dphubert/components.py @@ -5,19 +5,19 @@ https://github.com/pytorch/audio/blob/main/torchaudio/models/wav2vec2/components """ +import math from collections import defaultdict from typing import List, Optional, Tuple -import math import torch -from torch import nn, Tensor +from torch import Tensor, nn from torch.nn import Module from .hardconcrete import HardConcrete from .pruning_utils import ( - prune_linear_layer, prune_conv1d_layer, prune_layer_norm, + prune_linear_layer, ) diff --git a/vencoder/dphubert/utils/import_huggingface_wavlm.py b/vencoder/dphubert/utils/import_huggingface_wavlm.py index 1a2ea31..24a3f38 100644 --- a/vencoder/dphubert/utils/import_huggingface_wavlm.py +++ b/vencoder/dphubert/utils/import_huggingface_wavlm.py @@ -10,7 +10,7 @@ from typing import Any, Dict from torch.nn import Module -from ..model import wav2vec2_model, Wav2Vec2Model, wavlm_model +from ..model import Wav2Vec2Model, wav2vec2_model, wavlm_model _LG = logging.getLogger(__name__) diff --git a/vencoder/wavlm/WavLM.py b/vencoder/wavlm/WavLM.py index 656e504..5a3986f 100644 --- a/vencoder/wavlm/WavLM.py +++ b/vencoder/wavlm/WavLM.py @@ -7,26 +7,26 @@ # https://github.com/pytorch/fairseq # -------------------------------------------------------- -import math import logging +import math from typing import List, Optional, Tuple import numpy as np - import torch import torch.nn as nn import torch.nn.functional as F from torch.nn import LayerNorm + from vencoder.wavlm.modules import ( Fp32GroupNorm, Fp32LayerNorm, + GLU_Linear, GradMultiply, MultiheadAttention, SamePad, - init_bert_params, - get_activation_fn, TransposeLast, - GLU_Linear, + get_activation_fn, + init_bert_params, ) logger = logging.getLogger(__name__) diff --git a/vencoder/wavlm/modules.py b/vencoder/wavlm/modules.py index 1dcfc6f..add4a1a 100644 --- a/vencoder/wavlm/modules.py +++ b/vencoder/wavlm/modules.py @@ -10,10 +10,11 @@ import math import warnings from typing import Dict, Optional, Tuple + import torch +import torch.nn.functional as F from torch import Tensor, nn from torch.nn import Parameter -import torch.nn.functional as F class TransposeLast(nn.Module): diff --git a/vencoder/whisper/audio.py b/vencoder/whisper/audio.py index 7b3b796..05890dc 100644 --- a/vencoder/whisper/audio.py +++ b/vencoder/whisper/audio.py @@ -5,11 +5,10 @@ import ffmpeg import numpy as np import torch import torch.nn.functional as F +from librosa.filters import mel as librosa_mel_fn from .utils import exact_div -from librosa.filters import mel as librosa_mel_fn - # hard-coded audio hyperparameters SAMPLE_RATE = 16000 N_FFT = 400 diff --git a/vencoder/whisper/decoding.py b/vencoder/whisper/decoding.py index 133c2e7..45e50b1 100644 --- a/vencoder/whisper/decoding.py +++ b/vencoder/whisper/decoding.py @@ -1,5 +1,5 @@ from dataclasses import dataclass, field -from typing import Dict, List, Tuple, Iterable, Optional, Sequence, Union, TYPE_CHECKING +from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Sequence, Tuple, Union import numpy as np import torch diff --git a/vencoder/whisper/model.py b/vencoder/whisper/model.py index cb3781c..f3de4d3 100644 --- a/vencoder/whisper/model.py +++ b/vencoder/whisper/model.py @@ -1,14 +1,13 @@ from dataclasses import dataclass -from typing import Dict -from typing import Iterable, Optional +from typing import Dict, Iterable, Optional import numpy as np import torch import torch.nn.functional as F -from torch import Tensor -from torch import nn +from torch import Tensor, nn -from .decoding import detect_language as detect_language_function, decode as decode_function +from .decoding import decode as decode_function +from .decoding import detect_language as detect_language_function @dataclass diff --git a/wav_upload.py b/wav_upload.py index cac679d..fffe12a 100644 --- a/wav_upload.py +++ b/wav_upload.py @@ -1,7 +1,9 @@ -from google.colab import files -import shutil -import os import argparse +import os +import shutil + +from google.colab import files + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--type", type=str, required=True, help="type of file to upload") diff --git a/webUI.py b/webUI.py index b6a4f01..5f4f1e3 100644 --- a/webUI.py +++ b/webUI.py @@ -1,4 +1,11 @@ +import json +import logging import os +import re +import subprocess +import time +import traceback +from itertools import chain # os.system("wget -P cvec/ https://huggingface.co/spaces/innnky/nanami/resolve/main/checkpoint_best_legacy_500.pt") import gradio as gr @@ -6,20 +13,12 @@ import gradio.processing_utils as gr_pu import librosa import numpy as np import soundfile -from inference.infer_tool import Svc -import logging -import re -import json - -import subprocess -from scipy.io import wavfile -import librosa import torch -import time -import traceback -from itertools import chain -from utils import mix_model +from scipy.io import wavfile + from compress_model import removeOptimizer +from inference.infer_tool import Svc +from utils import mix_model logging.getLogger('numba').setLevel(logging.WARNING) logging.getLogger('markdown_it').setLevel(logging.WARNING)