From 9c2e463a81ee603fe4944a0c29c6d131e4fd924d Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Thu, 18 May 2023 00:01:20 +0800 Subject: [PATCH] updated_kmeans_pytorch_gpu_version updated_kmeans_pytorch_gpu_version --- cluster/km_train.py | 55 ++++++++++++ cluster/kmeans.py | 201 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 256 insertions(+) create mode 100644 cluster/km_train.py create mode 100644 cluster/kmeans.py diff --git a/cluster/km_train.py b/cluster/km_train.py new file mode 100644 index 0000000..002c35e --- /dev/null +++ b/cluster/km_train.py @@ -0,0 +1,55 @@ +import time,pdb +import tqdm +from time import time as ttime +import os +from pathlib import Path +import logging +import argparse +from cluster.kmeans import KMeansGPU +import torch +import numpy as np +from sklearn.cluster import KMeans + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) +from time import time as ttime +import pynvml,torch + +def train_cluster(in_dir, n_clusters, use_minibatch=True, verbose=False,use_gpu=False):#gpu_minibatch真拉,虽然库支持但是也不考虑 + logger.info(f"Loading features from {in_dir}") + features = [] + nums = 0 + for path in tqdm.tqdm(in_dir.glob("*.soft.pt")): + # for name in os.listdir(in_dir): + # path="%s/%s"%(in_dir,name) + features.append(torch.load(path,map_location="cpu").squeeze(0).numpy().T) + # print(features[-1].shape) + features = np.concatenate(features, axis=0) + print(nums, features.nbytes/ 1024**2, "MB , shape:",features.shape, features.dtype) + features = features.astype(np.float32) + logger.info(f"Clustering features of shape: {features.shape}") + t = time.time() + if(use_gpu==False): + if use_minibatch: + kmeans = MiniBatchKMeans(n_clusters=n_clusters,verbose=verbose, batch_size=4096, max_iter=80).fit(features) + else: + kmeans = KMeans(n_clusters=n_clusters,verbose=verbose).fit(features) + else: + kmeans = KMeansGPU(n_clusters=n_clusters, mode='euclidean', verbose=2 if verbose else 0,max_iter=500,tol=1e-2)# + features=torch.from_numpy(features)#.to(device) + labels = kmeans.fit_predict(features)# + + print(time.time()-t, "s") + + x = { + "n_features_in_": kmeans.n_features_in_ if use_gpu==False else features.shape[0], + "_n_threads": kmeans._n_threads if use_gpu==False else 4, + "cluster_centers_": kmeans.cluster_centers_ if use_gpu==False else kmeans.centroids.cpu().numpy(), + } + print("end") + + return x + +if __name__ == "__main__": + res=train_cluster("/data/docker/dataset/12b-co256tensor",1000,use_minibatch=False,verbose=False,use_gpu=True) + pdb.set_trace() diff --git a/cluster/kmeans.py b/cluster/kmeans.py new file mode 100644 index 0000000..8e133ab --- /dev/null +++ b/cluster/kmeans.py @@ -0,0 +1,201 @@ +import math,pdb +import torch,pynvml +from torch.nn.functional import normalize +from time import time +import numpy as np +# device=torch.device("cuda:0") +def _kpp(data: torch.Tensor, k: int, sample_size: int = -1): + """ Picks k points in the data based on the kmeans++ method. + + Parameters + ---------- + data : torch.Tensor + Expect a rank 1 or 2 array. Rank 1 is assumed to describe 1-D + data, rank 2 multidimensional data, in which case one + row is one observation. + k : int + Number of samples to generate. + sample_size : int + sample data to avoid memory overflow during calculation + + Returns + ------- + init : ndarray + A 'k' by 'N' containing the initial centroids. + + References + ---------- + .. [1] D. Arthur and S. Vassilvitskii, "k-means++: the advantages of + careful seeding", Proceedings of the Eighteenth Annual ACM-SIAM Symposium + on Discrete Algorithms, 2007. + .. [2] scipy/cluster/vq.py: _kpp + """ + batch_size=data.shape[0] + if batch_size>sample_size: + data = data[torch.randint(0, batch_size,[sample_size], device=data.device)] + dims = data.shape[1] if len(data.shape) > 1 else 1 + init = torch.zeros((k, dims)).to(data.device) + r = torch.distributions.uniform.Uniform(0, 1) + for i in range(k): + if i == 0: + init[i, :] = data[torch.randint(data.shape[0], [1])] + else: + D2 = torch.cdist(init[:i, :][None, :], data[None, :], p=2)[0].amin(dim=0) + probs = D2 / torch.sum(D2) + cumprobs = torch.cumsum(probs, dim=0) + init[i, :] = data[torch.searchsorted(cumprobs, r.sample([1]).to(data.device))] + return init +class KMeansGPU: + ''' + Kmeans clustering algorithm implemented with PyTorch + + Parameters: + n_clusters: int, + Number of clusters + + max_iter: int, default: 100 + Maximum number of iterations + + tol: float, default: 0.0001 + Tolerance + + verbose: int, default: 0 + Verbosity + + mode: {'euclidean', 'cosine'}, default: 'euclidean' + Type of distance measure + + init_method: {'random', 'point', '++'} + Type of initialization + + minibatch: {None, int}, default: None + Batch size of MinibatchKmeans algorithm + if None perform full KMeans algorithm + + Attributes: + centroids: torch.Tensor, shape: [n_clusters, n_features] + cluster centroids + ''' + def __init__(self, n_clusters, max_iter=200, tol=1e-4, verbose=0, mode="euclidean",device=torch.device("cuda:0")): + self.n_clusters = n_clusters + self.max_iter = max_iter + self.tol = tol + self.verbose = verbose + self.mode = mode + self.device=device + pynvml.nvmlInit() + gpu_handle = pynvml.nvmlDeviceGetHandleByIndex(device.index) + info = pynvml.nvmlDeviceGetMemoryInfo(gpu_handle) + self.minibatch=int(33e6/self.n_clusters*info.free/ 1024 / 1024 / 1024) + print("free_mem/GB:",info.free/ 1024 / 1024 / 1024,"minibatch:",self.minibatch) + + @staticmethod + def cos_sim(a, b): + """ + Compute cosine similarity of 2 sets of vectors + + Parameters: + a: torch.Tensor, shape: [m, n_features] + + b: torch.Tensor, shape: [n, n_features] + """ + return normalize(a, dim=-1) @ normalize(b, dim=-1).transpose(-2, -1) + + @staticmethod + def euc_sim(a, b): + """ + Compute euclidean similarity of 2 sets of vectors + Parameters: + a: torch.Tensor, shape: [m, n_features] + b: torch.Tensor, shape: [n, n_features] + """ + return 2 * a @ b.transpose(-2, -1) -(a**2).sum(dim=1)[..., :, None] - (b**2).sum(dim=1)[..., None, :] + + def max_sim(self, a, b): + """ + Compute maximum similarity (or minimum distance) of each vector + in a with all of the vectors in b + Parameters: + a: torch.Tensor, shape: [m, n_features] + b: torch.Tensor, shape: [n, n_features] + """ + if self.mode == 'cosine': + sim_func = self.cos_sim + elif self.mode == 'euclidean': + sim_func = self.euc_sim + sim = sim_func(a, b) + max_sim_v, max_sim_i = sim.max(dim=-1) + return max_sim_v, max_sim_i + + def fit_predict(self, X): + """ + Combination of fit() and predict() methods. + This is faster than calling fit() and predict() seperately. + Parameters: + X: torch.Tensor, shape: [n_samples, n_features] + centroids: {torch.Tensor, None}, default: None + if given, centroids will be initialized with given tensor + if None, centroids will be randomly chosen from X + Return: + labels: torch.Tensor, shape: [n_samples] + + mini_=33kk/k*remain + mini=min(mini_,fea_shape) + offset=log2(k/1000)*1.5 + kpp_all=min(mini_*10/offset,fea_shape) + kpp_sample=min(mini_/12/offset,fea_shape) + """ + assert isinstance(X, torch.Tensor), "input must be torch.Tensor" + assert X.dtype in [torch.half, torch.float, torch.double], "input must be floating point" + assert X.ndim == 2, "input must be a 2d tensor with shape: [n_samples, n_features] " + # print("verbose:%s"%self.verbose) + + offset = np.power(1.5,np.log(self.n_clusters / 1000))/np.log(2) + with torch.no_grad(): + batch_size= X.shape[0] + # print(self.minibatch, int(self.minibatch * 10 / offset), batch_size) + start_time = time() + if (self.minibatch*10//offset< batch_size): + x = X[torch.randint(0, batch_size,[int(self.minibatch*10/offset)])].to(self.device) + else: + x = X.to(self.device) + # print(x.device) + self.centroids = _kpp(x, self.n_clusters, min(int(self.minibatch/12/offset),batch_size)) + del x + torch.cuda.empty_cache() + # self.centroids = self.centroids.to(self.device) + num_points_in_clusters = torch.ones(self.n_clusters, device=self.device, dtype=X.dtype)#全1 + closest = None#[3098036]#int64 + if(self.minibatch>=batch_size//2 and self.minibatch=batch_size): + X=X.to(self.device) + for i in range(self.max_iter): + iter_time = time() + if self.minibatch= 2: + print('iter:', i, 'error:', error.item(), 'time spent:', round(time()-iter_time, 4)) + if error <= self.tol: + break + + if self.verbose >= 1: + print(f'used {i+1} iterations ({round(time()-start_time, 4)}s) to cluster {batch_size} items into {self.n_clusters} clusters') + return closest