对python中Librosa的mfcc步骤详解

Kamilia ·
更新时间:2024-09-21
· 699 次阅读

1.对语音数据归一化

如16000hz的数据,会将每个点/32768

2.计算窗函数:(*注意librosa中不进行预处理)

3.进行数据扩展填充,他进行的是镜像填充("reflect")

如原数据为 12345 -》 填充为4的,左右各填充4 即:5432123454321 即:5432-12345-4321

4.分帧

5.加窗:对每一帧进行加窗,

6.进行fft傅里叶变换

librosa中fft计算,可以使用.net中的System.Numerics

MathNet.Numerics.IntegralTransforms.Fourier.Forward(FFT_frame, FourierOptions.Matlab) 计算,结果相同

7.mel计算(每一帧取20个特征点)

Imports System.Numerics Imports MathNet.Numerics Imports MathNet.Numerics.IntegralTransforms Module mfcc_module Public Class Librosa End Class Dim pi As Double = 3.1415926535897931 Public Function spectrum(fft_data(,) As Complex) As Double(,) Dim new_data(fft_data.GetLength(0) - 1, fft_data.GetLength(1) - 1) As Double For n = 0 To fft_data.GetLength(0) - 1 ' Debug.Print("////////////////////////spectrum//////////////////") ' Debug.Print("////////////////////////spectrum//////////////////") For i = 0 To fft_data.GetLength(1) - 1 new_data(n, i) = fft_data(n, i).MagnitudeSquared ' Debug.Write(new_data(n, i) & " ") Next Next Return new_data End Function Public Function FFT(data As Double(,)) As Complex(,) Dim result(data.GetLength(0) - 1, 1024) As Complex '2049 加了一个 数组类型 0 开始 Dim FFT_frame As Complex() = New Complex(data.GetLength(1) - 1) {} For n = 0 To data.GetLength(0) - 1 For i As Integer = 0 To data.GetLength(1) - 1 FFT_frame(i) = data(n, i) Next MathNet.Numerics.IntegralTransforms.Fourier.Forward(FFT_frame, FourierOptions.Matlab) For k = 0 To 1024 result(n, k) = FFT_frame(k) Next 'Debug.Print("fft **************") 'For Each mem In FFT_frame ' Debug.Print(mem.ToString & " ") 'Next Next n Return result End Function Public Function _mfcc(dct_ As Double(,), power_to_db_ As Double(,)) As Double(,) 'dct 20,128 'power_to_db 5,128 'result = 20,5 Dim result(dct_.GetLength(0) - 1, power_to_db_.GetLength(1) - 1) As Double Dim r1, r2 As Double For n = 0 To dct_.GetLength(0) - 1 '20 For i = 0 To power_to_db_.GetLength(1) - 1 '5 r2 = 0 For k = 0 To dct_.GetLength(1) - 1 '128 r1 = dct_(n, k) * power_to_db_(k, i) r2 = r2 + r1 Next result(n, i) = r2 Next Next Return result End Function Public Function Dct(n_filters As Integer, n_input As Integer) As Double(,) Dim t1 As Double = 2 * n_input Dim samples(n_input - 1) As Double Dim basis(n_filters - 1, n_input - 1) As Double Dim n As Integer = 1 For i = 0 To n_input - 1 samples(i) = n * pi / (2 * n_input) n = n + 2 Next i For i = 0 To n_input - 1 basis(0, i) = 1 / Math.Sqrt(n_input) Next For n = 1 To n_filters - 1 For i = 0 To n_input - 1 basis(n, i) = Math.Cos(n * samples(i)) * Math.Sqrt(2 / n_input) Next Next Return basis End Function '1e-10 = 0.0000000001 Public Function power_to_db(S As Double(,), Optional ref As Double = 1, Optional admin As Double = 0.0000000001, Optional top_db As Double = 80) As Double(,) Dim result(S.GetLength(0) - 1, S.GetLength(1) - 1) As Double Dim log_spec As Double For n = 0 To S.GetLength(0) - 1 For i = 0 To S.GetLength(1) - 1 log_spec = 10 * Math.Log10(Math.Max(admin, S(n, i))) result(n, i) = log_spec - 10 * Math.Log10(Math.Max(admin, ref)) Next Next 'If top_db <> 0 Then ' For n = 0 To S.GetLength(0) - 1 ' For i = 0 To S.GetLength(1) - 1 ' 'result(n, i) = Math.Max(result(n, i), result(n, i) - top_db) ' Next ' Next 'End If Return result End Function Public Function melspectrogram(mel_basis(,) As Double, s(,) As Double) As Double(,) 'mel_basis 128,1025 's 5 ,1025 -> 1025,5 ' result 128,5 Dim result(mel_basis.GetLength(0) - 3, s.GetLength(0) - 1) As Double Dim r1, r2 As Double For n = 0 To mel_basis.GetLength(0) - 3 For i = 0 To s.GetLength(0) - 1 For k = 0 To mel_basis.GetLength(1) - 1 r1 = mel_basis(n, k) * s(i, k) r2 = r2 + r1 Next result(n, i) = r2 r2 = 0 Next Next Return result End Function Public Function normal(mel_f As Double(), weights(,) As Double) As Double(,) Dim enorm(mel_f.Length - 2) As Double ' Debug.Print("*************normal//////////////") ' Debug.Print("*************normal//////////////") For i = 0 To mel_f.Length - 3 enorm(i) = 2 / (mel_f(2 + i) - mel_f(i)) Next For i = 0 To weights.GetLength(1) - 1 For n = 0 To weights.GetLength(0) - 2 weights(n, i) = weights(n, i) * enorm(n) Next Next Return weights End Function Public Function weight(a As Double(,), fdiff As Double()) As Double(,) Dim lower, upper As Double Dim data(a.GetLength(0) - 1, a.GetLength(1) - 1) As Double For n = 0 To a.GetLength(0) - 3 For i = 0 To a.GetLength(1) - 1 lower = -(a(n, i) / fdiff(n)) upper = a(n + 2, i) / fdiff(n + 1) data(n, i) = Math.Max(0, Math.Min(lower, upper)) Next Next Return data End Function Public Function ramps(A As Double(), B As Double()) As Double(,) Dim data(A.Length - 1, B.Length - 1) As Double ' Debug.Print("ramps*********************") For n = 0 To A.Length - 1 'Debug.Print("******") 'Debug.Print("------") For i = 0 To B.Length - 1 data(n, i) = A(n) - B(i) 'Debug.Write(data(n, i) & " ") Next Next Return data End Function Public Function diff(arr As Double()) As Double() Dim data(arr.Length - 2) As Double For i = 1 To arr.Length - 1 data(i - 1) = arr(i) - arr(i - 1) 'Debug.Print(data(i - 1)) Next Return data End Function '分帧 算法2 Public Function Frame2(y As Double(), Optional n_ftt As Integer = 2048, Optional hop As Integer = 512) As Double(,) Dim tim As Integer = Math.Floor((y.Length - n_ftt) / hop) + 1 Dim new_buff(tim - 1, n_ftt - 1) As Double Dim copypos As Integer = 0 For i = 0 To tim - 1 For k = 0 To n_ftt - 1 new_buff(i, k) = y(copypos + k) Next copypos = copypos + hop Next 'For k = 0 To tim - 1 ' Debug.Print("//////////////////////////////////////") ' Debug.Print("///////////////fram2///////////////////////" & k) ' For i = 0 To n_ftt - 1 ' Debug.Print(new_buff(k, i) & " ") ' Next 'Next k Return new_buff End Function ' Public Function Frame(y As Double(), Optional n_ftt As Integer = 2048, Optional hop As Integer = 512) As Double() Dim tim As Integer = Math.Floor((y.Length - n_ftt) / hop) + 1 Dim new_buff(tim * n_ftt) As Double Dim pos As Integer = 0 Dim copypos As Integer = 0 For i = 0 To tim - 1 Array.Copy(y, copypos, new_buff, pos, n_ftt) 'Buffer.BlockCopy(y, 0, new_buff, pos, n_ftt) copypos = copypos + hop pos = pos + n_ftt Next For k = 0 To tim - 1 'Debug.Print("//////////////////////////////////////") 'Debug.Print("//////////////////////////////////////") For i = 0 To n_ftt - 1 Debug.Write(new_buff(k * n_ftt + i) & " ") Next Next k Return new_buff End Function Public Function MelFilter() As Double() Dim filter_points(128 + 1) As Integer '40个滤波器,需要41点 Const sampleRate As Integer = 16000 '采样频率 16000 Const filterNum As Integer = 128 '滤波器数量 取40个 Const frameSize As Integer = 512 '帧长512 Dim freMax As Double = sampleRate / 2 '实际最大频率 Dim freMin As Double = 0 '实际最小频率 Dim melFremax As Double = hz_to_mel(freMax) '将实际频率转换成梅尔频率 Dim melFremin As Double = 1125 * Math.Log(1 + freMin / 700) Dim k As Double = (melFremax - melFremin) / (filterNum + 1) Dim m As Double() = New Double(filterNum + 1) {} Dim h As Double() = New Double(filterNum + 1) {} For i As Integer = 0 To filterNum + 1 m(i) = melFremin + k * i 'h(i) = 700 * (Math.Exp(m(i) / 1125) - 1) '将梅尔频率转换成实际频率 filter_points(i) = mel_to_hz(m(i)) 'Debug.Print(m(i)) Next Dim hzs As Double() = mel_to_hz2(m) 'For i = 0 To filterNum + 1 ' ' Debug.Print(hzs(i)) 'Next Return hzs End Function Public Function hz_to_mel(frequencies As Double, Optional htk As Boolean = False) As Double Dim mels As Double If htk Then mels = 1125 * Math.Log(1 + frequencies / 700) Else Dim f_min As Double = 0.0 Dim f_sp As Double = 200.0 / 3 Dim min_log_hz As Double = 1000.0 ' beginning of log region (Hz) Dim min_log_mel As Double = (min_log_hz - f_min) / f_sp ' same (Mels) Dim logstep As Double = Math.Log(6.4) / 27.0 ' step size for log region mels = min_log_mel + Math.Log(frequencies / min_log_hz) / logstep End If Return mels End Function Public Function mel_to_hz2(mel() As Double, Optional htk As Boolean = False) As Double() Dim hz(mel.Length - 1) As Double Dim f_min As Double = 0.0 Dim f_sp As Double = 200.0 / 3 Dim freqs(mel.Length - 1) As Double For i = 0 To mel.Length - 1 freqs(i) = f_min + f_sp * mel(i) Next i Dim min_log_hz As Double = 1000.0 ' beginning of log region (Hz) Dim min_log_mel As Double = (min_log_hz - f_min) / f_sp ' same (Mels) Dim logstep As Double = Math.Log(6.4) / 27.0 For i = 0 To mel.Length - 1 If (mel(i) > min_log_mel) Then freqs(i) = min_log_hz * Math.Exp(logstep * (mel(i) - min_log_mel)) End If Next 'hz = min_log_hz * Math.Exp(logstep * (mel - min_log_mel)) Return freqs End Function Public Function mel_to_hz(mel As Double, Optional htk As Boolean = False) As Double Dim hz As Double If htk Then hz = 700 * (Math.Exp(mel) / 1125) - 1 Else Dim f_min As Double = 0.0 Dim f_sp As Double = 200.0 / 3 Dim freqs = f_min + f_sp * mel Dim min_log_hz As Double = 1000.0 ' beginning of log region (Hz) Dim min_log_mel As Double = (min_log_hz - f_min) / f_sp ' same (Mels) Dim logstep As Double = Math.Log(6.4) / 27.0 hz = min_log_hz * Math.Exp(logstep * (mel - min_log_mel)) 'hz = min_log_hz * Math.Exp(logstep * (mel - min_log_mel)) End If Return hz End Function Public Function fft_frequencies(sr As Integer, n_fft As Integer) As Double() Dim fft_data(n_fft / 2) As Double For i = 0 To n_fft / 2 fft_data(i) = i * sr / n_fft Next Return fft_data End Function '左右填充,优化 Public Function PadReflect2(data() As Double, num As Integer) 'pad 10 ,10 Dim tim(data.Length - 3) As Double For i = 0 To data.Length - 3 tim(i) = data(data.Length - 2 - i) Next Dim dump() As Double = data.Concat(tim).ToArray() 'For Each i In dump ' Debug.Write(i) End Function Public Function PadReflect(data() As Double, num As Integer) 'pad 10 ,10 Dim tim(data.Length - 3) As Double For i = 0 To data.Length - 3 tim(i) = data(data.Length - 2 - i) Next Dim dump() As Double = data.Concat(tim).ToArray() 'For Each i In dump ' Debug.Write(i) 'Next 'left_edge ' Debug.Print("***************************") Dim left_edge(num - 1) As Double _CopyDup(left_edge, dump, True) 'For i = 0 To num - 1 ' Debug.Write(left_edge(i)) 'Next 'right_edge 'Debug.Print("***************************") Dim right_edge(num + data.Length) As Double _CopyDup(right_edge, dump, False) 'For i = 0 To num - 1 ' Debug.Write(right_edge(i)) 'Next 'Debug.Print("***************************") Dim result As Double() = left_edge.Concat(right_edge).ToArray() Return result End Function 'copy tim to data dumply Public Function _CopyDup(data() As Double, tim() As Double, Optional left As Boolean = True) Dim last As Integer = data.Length Mod tim.Length Dim times As Integer = Math.Floor(data.Length / tim.Length) Dim pos As Integer If left Then Array.Copy(tim, tim.Length - last, data, 0, last) pos = last For i = 0 To times - 1 Array.Copy(tim, 0, data, pos, tim.Length) pos = pos + tim.Length Next Else 'RIGHT pos = 0 For i = 0 To times - 1 Array.Copy(tim, 0, data, pos, tim.Length) pos = pos + tim.Length Next Array.Copy(tim, 0, data, pos, last) End If End Function Public Function General_cosine(M As Integer, alpha As Double(), sym As Boolean) As Double() If Not sym Then M = M + 1 End If Dim tim As Double = (2 * pi) / (M - 1) Dim x(M) As Double Dim w(M) As Double 'Debug.Print("ine") For i = 0 To M - 1 x(i) = -pi + tim * i 'Debug.Write(x(i) & " ") Next 'Debug.Print("******") For i = 0 To alpha.GetLength(0) - 1 For k = 0 To M - 1 w(k) = w(k) + alpha(i) * Math.Cos(i * x(k)) 'Debug.Write(w(k) & " ") Next Next Return w End Function ''' <summary> ''' 汉明窗 ''' </summary> ''' <param name="M"> 窗长</param> ''' <returns></returns> Public Function General_hamming(M As Integer) As Double() Dim db As Double() = {0.5, 1 - 0.5} Return General_cosine(M, db, False) '进行加1 ,若sys为false End Function Public Function Get_window(M As Integer) As Double() Return General_hamming(M) End Function End Module Imports System.IO Imports System.Numerics Imports TensorFlow 'Install-Package TensorFlowSharp Public Class KeyWordDetect Dim graph As TFGraph Dim session As TFSession '加载模型 Public Sub New() Dim model As Byte() = File.ReadAllBytes("f:\graph1.pb") '导入GraphDef graph = New TFGraph() graph.Import(model, "") session = New TFSession(graph) ' Threading.ThreadPool.SetMaxThreads(5, 5) End Sub Protected Overrides Sub finalize() session.CloseSession() End Sub '将声音数据变为mfcc byte数据 Public Function DataBToMFCC(dataB() As Byte) As Double(,) Dim buff16(dataB.Length / 2 - 1) As Int16 Buffer.BlockCopy(dataB, 0, buff16, 0, dataB.Length - 1) Dim result(,) As Double = MFCC(buff16) Return result End Function '将声音数据变为mfcc Public Function DataToMFCC(dataI() As Int16) As Double(,) Dim result(,) As Double = MFCC(dataI) Return result End Function '将mfcc变为输入数据格式 Public Function MFCCToVect(mfcc As Double(,)) As Double(,,) Dim data(0, 1, 129) As Double Dim n As Integer = 0, m As Integer = 0 For i = 0 To mfcc.GetLength(0) - 1 For k = 0 To mfcc.GetLength(1) - 1 data(0, m, n) = mfcc(i, k) n = n + 1 Next If n = 130 Then m = 1 n = 0 End If Next Return data End Function Dim output Dim runner As TFSession.Runner Dim result Dim rshape '关键字检测 Public Function Detected(Data(,,) As Double) As Double ' Dim tensor As TFTensor = New TFTensor(Data) runner = session.GetRunner() runner.AddInput(graph("input")(0), Data).Fetch(graph("out")(0)) output = runner.Run() result = output(0) rshape = result.Shape Dim rt As Double rt = result.GetValue(True)(0)(0) 'For k = 0 To rshape.GetValue(0) - 1 ' rt = result.GetValue(True)(k)(0) ' 'Debug.Print(rt) ' If (rt > 0.8) Then ' Debug.Print("-----------recogxili") ' ' MsgBox("recgo") ' End If 'Next Return RT End Function 'Public Function RunB(dataB() As Byte) ' Dim mfccd As Double(,) = DataBToMFCC(dataB) ' Dim inputx As Double(,,) = MFCCToVect(mfccd) ' Detected(inputx) 'End Function 'Public Function ThreadPoolRun(dataI() As Int16) ' Threading.ThreadPool.QueueUserWorkItem(Run(dataI), dataI) ' ' Dim thrd1 As New Threading.Thread(New Threading.ParameterizedThreadStart(AddressOf Run)) ' ' thrd1.Start(dataI) 'End Function 'Delegate Function DelgRun(dataI() As Int16) 'Public Function ThreadRun(dataI() As Int16) ' ' Dim drun As New DelgRun(AddressOf Run) ' Dim thrd1 As New Threading.Thread(New Threading.ParameterizedThreadStart(AddressOf Run)) ' thrd1.Start(dataI) 'End Function Public Function Run(dataI() As Int16) As Double ' Debug.Print("thread *****1") Dim mfccd As Double(,) = DataToMFCC(dataI) Dim inputx As Double(,,) = MFCCToVect(mfccd) Return Detected(inputx) End Function Public Function MFCC(buff16() As Int16) As Double(,) Dim datalen As Integer = buff16.Length * 2 Dim double_buff(datalen / 2 - 1) As Double Dim len As Integer = datalen / 2 Array.Copy(buff16, double_buff, len) '****************** For i = 0 To double_buff.Length - 1 double_buff(i) = double_buff(i) / 32768 ' Debug.Print(double_buff(i)) Next '汉明窗create Dim hann_window As Double() = Get_window(2048) 'Debug.Print("--------------------------") 'Debug.Print("hann_window**************") For Each i In hann_window 'Debug.Print(i & " ") Next 'Debug.Print("--------------------------") 'Debug.Print("*************pad reflect**************") Dim y As Double() = PadReflect(double_buff, 1024) ' Dim y As Double() = double_buff 'For Each i In y ' 'Debug.Print(i & " ") 'Next 'Debug.Print("--------------------------") 'Debug.Print("***************frame************") Dim frams As Double(,) = Frame2(y) Dim tim As Integer = frams.GetLength(0) 'Debug.Print("--------------------------") 'Debug.Print("**********hann * data**************") Dim hannData(tim - 1, 2047) As Double For n = 0 To tim - 1 For i = 0 To 2048 - 1 hannData(n, i) = frams(n, i) * hann_window(i) ' Debug.Print(hannData(i) & " ") Next Next n '\\\\\\\\\\\\\\\\melspecture Dim specturm1(,) As Complex = FFT(hannData) 'For i = 0 To specturm1.GetLength(0) - 1 ' Debug.Print("--------------------------------------") ' Debug.Print("--------------------------------------") ' For k = 0 To specturm1.GetLength(1) - 1 ' Debug.Print(specturm1(i, k).Real & " " & specturm1(i, k).Imaginary) ' Next 'Next Dim s As Double(,) = spectrum(specturm1) Dim fftfreqs() As Double = fft_frequencies(16000, 2048) 'Debug.Print("***************fftfreqs*****************") 'Debug.Print("***************fftfreqs*****************") 'Debug.Print("fftfreqs.shape", fftfreqs.Length) 'For i = 0 To fftfreqs.Length - 1 ' 'Debug.Write(fftfreqs(i) & " ") 'Next ''''''''''''''''mel * specturm1 'Debug.Print("**************") 'Debug.Print("****滤波器创建**********") Dim mel_f As Double() = MelFilter() 'Debug.Print("--------------------------") 'Debug.Print("hann_window**************") 'Debug.Print("diff") Dim fdiff As Double() = diff(mel_f) Dim ramps_ As Double(,) = ramps(mel_f, fftfreqs) Dim weights(,) As Double = weight(ramps_, fdiff) normal(mel_f, weights) 'S*WEIGHT = melspectrogram 'weight 128,1025 's 5 ,1025 Dim melspectrogram_(,) As Double = melspectrogram(weights, s) Dim power_to_db_ As Double(,) = power_to_db(melspectrogram_) Dim dct_ As Double(,) = Dct(20, 128) Return _mfcc(dct_, power_to_db_) End Function End Class

以上这篇对python中Librosa的mfcc步骤详解就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持软件开发网。

您可能感兴趣的文章:利用python提取wav文件的mfcc方法对Python使用mfcc的两种方式详解



mfcc Python

需要 登录 后方可回复, 如果你还没有账号请 注册新账号