From 2809e5b157662ac663d06ded3e77b453fd8b2bd0 Mon Sep 17 00:00:00 2001 From: ylzz1997 Date: Fri, 26 May 2023 21:21:14 +0800 Subject: [PATCH] Updata Mel --- vencoder/whisper/assets/mel_filters.npz | Bin 2048 -> 0 bytes vencoder/whisper/audio.py | 5 +++-- 2 files changed, 3 insertions(+), 2 deletions(-) delete mode 100644 vencoder/whisper/assets/mel_filters.npz diff --git a/vencoder/whisper/assets/mel_filters.npz b/vencoder/whisper/assets/mel_filters.npz deleted file mode 100644 index 1a7839244dfb6b1cc02e4f3cfe12e4817a073bc7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2048 zcmZ{ldpr|t8^`A`DO<+otT3lH(m_`ITt zB|;Oynx)HJT`(1EF>38(a0=aik_wy4J{lY+B#Bb-2ffpu*yJ{j!qm{t z^xMtm2Y$T#4*xq|STK_Bn9hLgL(WcH=%@WYxHDaPwEEIhuyv@h^Db}I$OqvWOB2NL z9X<7CRj&D~xo5)OmJz)I%U zkqrV5CK)ikMW)(OKUdr{dmUe=eJMloLF>s8*;&VMlJ*DNvtmAby$w&owv{^-#*jy_ zl48d&xIAmG7BO0JHJ6_M%wWjAKQ9ypKNO3a<)zkCRVOz1iaFi!K=wf)9!9o+0|$1c zLNd9&GovRa-1K^bDWjo5Dn_kBA`9tBg_Y}gVeCAO8KzK_Cv_D`o}#u!WyDmupof?h zX9Ts&DRcf=GgUf)0;da`PprFOaNuv`+HBl{W46HWXpIW|5ardLmBl!}jWJ&>pXk2Z zEXcf7f+vz9U|i^HtnIZjOcAXeE^!H3#J%LlrB?=Y^XYydKfT_mIl5a(WmffC`T5Dw z2k*V9tMN!{aOt1-szXI+_ZYsEm?>V5f@jBm-_NiHO=A^|3m0jtgCZ>bRK;-Hk;KlD zcgJfnB`#hSs5EYbzD;Ev$fCW`*lv@ZqJ#+ERE@NX!x(kdZWmCLuI?E30ZkF~di6`A z;Hkg*C%W^V#PDvPF)R0N9m0G`^WH(Cc3P0|y*{1^LEHP0V}cnm96Vhuh1EJQOAK+j z%!rc*`!I$SJZzYR1D7>NHr+P`2)Fn!UE|X0NsT|r9J&&sDb73hQzG;pXV~xMh(KQJ zB+KRgBh!VXat#;hT54}r`bteh@?*pr{v2nwp5U1z*>Sq^9WQ^&nJum?5a<<8cne%o z>rA+=7j|rtqsY4}(3x8JwKeyL$l+L72-7-I_K%j~D<2>TEAB~F!u;T#<;YR_@rOE( z!=j~8p0Sn;{|^Dwe2>Ujx7c593PrgxbKban*_`uC=DE9>wa`J32eh!h-F8fg!Ee2Y z_>tB#ZhIL-4ASQ zB_h=;c&TlyPo>3%-QpA=6JHBuwY4MRQz}uMhe=kv`AZHbQd+mO+2;ko0==-HW)(FL z@;D^Xl!8Ya?@Urcsb{>=3^w9Scux7j?UxMKY?yWnqk(t}OKJ^>|JSv2%{3x@zj0bebHU1c1LTFejo6z~93MmrCb z57vI#AdVaN)R)8+_`EMG_l~ufd4pAKSx)+|`*l54nZDrk#2(pz@d%Y$hb~5sLAXbL zbwW~-x8cuD@8P!$C_v&+8s?}fcHTTq>6I$vqaqE>0&SeTIR_TnBdH<9%IOy_(GK)D zw%VM%TO7@|>|oq-j=ToC5${4Ms5;y{yfjmJY}L1?i*jOqu=6WF5)6;%fKc)@WtU5L z%FFxRDH}ak_2iLEazKX~Crl-DcaIiaTaI3kE>dl|a^eBrPWrRwOjEDXZlH!Qn>%|4 zF7h5!&)J`B_%p=8`33K}xkH)DU8u^WF;B|X(~LE6*jx7f7RFZj_LCrndK%b>g}+8= zSTWHyV%k)2YLpQ9|9RGd`ZLtXZwnLZ837ZVN&qWIuN0>Y7zK zkL-B_&B2iQ8d^Q!^@RmAMMEO^VwiKi6n{=kvWcU;PDe+!o(5+gL$%o31 z5HGnVUbofaprVI<-&15~ZJ5PmVr$CDzsqWss5W2vzM>uKBj3HQT1AD_GmDL7n{@kWa8>c3U| EFD(_kqyPW_ diff --git a/vencoder/whisper/audio.py b/vencoder/whisper/audio.py index de8a195..3bdb70b 100644 --- a/vencoder/whisper/audio.py +++ b/vencoder/whisper/audio.py @@ -9,6 +9,8 @@ import torch.nn.functional as F from .utils import exact_div +from librosa.filters import mel as librosa_mel_fn + # hard-coded audio hyperparameters SAMPLE_RATE = 16000 N_FFT = 400 @@ -85,8 +87,7 @@ def mel_filters(device, n_mels: int = N_MELS) -> torch.Tensor: ) """ assert n_mels == 80, f"Unsupported n_mels: {n_mels}" - with np.load(os.path.join(os.path.dirname(__file__), "assets", "mel_filters.npz")) as f: - return torch.from_numpy(f[f"mel_{n_mels}"]).to(device) + return torch.from_numpy(librosa_mel_fn(sr=SAMPLE_RATE,n_fft=N_FFT,n_mels=n_mels)).to(device) def log_mel_spectrogram(audio: Union[str, np.ndarray, torch.Tensor], n_mels: int = N_MELS):