1.对语音数据归一化
如16000hz的数据,会将每个点/32768
2.计算窗函数:(*注意librosa中不进行预处理)
3.进行数据扩展填充,他进行的是镜像填充("reflect")
如原数据为 12345 -》 填充为4的,左右各填充4 即:5432123454321 即:5432-12345-4321
4.分帧
5.加窗:对每一帧进行加窗,
6.进行fft傅里叶变换
librosa中fft计算,可以使用.net中的System.Numerics
MathNet.Numerics.IntegralTransforms.Fourier.Forward(FFT_frame, FourierOptions.Matlab) 计算,结果相同
7.mel计算(每一帧取20个特征点)
Imports System.Numerics
Imports MathNet.Numerics
Imports MathNet.Numerics.IntegralTransforms
Module mfcc_module
Public Class Librosa
End Class
Dim pi As Double = 3.1415926535897931
Public Function spectrum(fft_data(,) As Complex) As Double(,)
Dim new_data(fft_data.GetLength(0) - 1, fft_data.GetLength(1) - 1) As Double
For n = 0 To fft_data.GetLength(0) - 1
' Debug.Print("////////////////////////spectrum//////////////////")
' Debug.Print("////////////////////////spectrum//////////////////")
For i = 0 To fft_data.GetLength(1) - 1
new_data(n, i) = fft_data(n, i).MagnitudeSquared
' Debug.Write(new_data(n, i) & " ")
Next
Next
Return new_data
End Function
Public Function FFT(data As Double(,)) As Complex(,)
Dim result(data.GetLength(0) - 1, 1024) As Complex
'2049 加了一个 数组类型 0 开始
Dim FFT_frame As Complex() = New Complex(data.GetLength(1) - 1) {}
For n = 0 To data.GetLength(0) - 1
For i As Integer = 0 To data.GetLength(1) - 1
FFT_frame(i) = data(n, i)
Next
MathNet.Numerics.IntegralTransforms.Fourier.Forward(FFT_frame, FourierOptions.Matlab)
For k = 0 To 1024
result(n, k) = FFT_frame(k)
Next
'Debug.Print("fft **************")
'For Each mem In FFT_frame
' Debug.Print(mem.ToString & " ")
'Next
Next n
Return result
End Function
Public Function _mfcc(dct_ As Double(,), power_to_db_ As Double(,)) As Double(,)
'dct 20,128
'power_to_db 5,128
'result = 20,5
Dim result(dct_.GetLength(0) - 1, power_to_db_.GetLength(1) - 1) As Double
Dim r1, r2 As Double
For n = 0 To dct_.GetLength(0) - 1 '20
For i = 0 To power_to_db_.GetLength(1) - 1 '5
r2 = 0
For k = 0 To dct_.GetLength(1) - 1 '128
r1 = dct_(n, k) * power_to_db_(k, i)
r2 = r2 + r1
Next
result(n, i) = r2
Next
Next
Return result
End Function
Public Function Dct(n_filters As Integer, n_input As Integer) As Double(,)
Dim t1 As Double = 2 * n_input
Dim samples(n_input - 1) As Double
Dim basis(n_filters - 1, n_input - 1) As Double
Dim n As Integer = 1
For i = 0 To n_input - 1
samples(i) = n * pi / (2 * n_input)
n = n + 2
Next i
For i = 0 To n_input - 1
basis(0, i) = 1 / Math.Sqrt(n_input)
Next
For n = 1 To n_filters - 1
For i = 0 To n_input - 1
basis(n, i) = Math.Cos(n * samples(i)) * Math.Sqrt(2 / n_input)
Next
Next
Return basis
End Function
'1e-10 = 0.0000000001
Public Function power_to_db(S As Double(,), Optional ref As Double = 1, Optional admin As Double = 0.0000000001, Optional top_db As Double = 80) As Double(,)
Dim result(S.GetLength(0) - 1, S.GetLength(1) - 1) As Double
Dim log_spec As Double
For n = 0 To S.GetLength(0) - 1
For i = 0 To S.GetLength(1) - 1
log_spec = 10 * Math.Log10(Math.Max(admin, S(n, i)))
result(n, i) = log_spec - 10 * Math.Log10(Math.Max(admin, ref))
Next
Next
'If top_db <> 0 Then
' For n = 0 To S.GetLength(0) - 1
' For i = 0 To S.GetLength(1) - 1
' 'result(n, i) = Math.Max(result(n, i), result(n, i) - top_db)
' Next
' Next
'End If
Return result
End Function
Public Function melspectrogram(mel_basis(,) As Double, s(,) As Double) As Double(,)
'mel_basis 128,1025
's 5 ,1025 -> 1025,5
' result 128,5
Dim result(mel_basis.GetLength(0) - 3, s.GetLength(0) - 1) As Double
Dim r1, r2 As Double
For n = 0 To mel_basis.GetLength(0) - 3
For i = 0 To s.GetLength(0) - 1
For k = 0 To mel_basis.GetLength(1) - 1
r1 = mel_basis(n, k) * s(i, k)
r2 = r2 + r1
Next
result(n, i) = r2
r2 = 0
Next
Next
Return result
End Function
Public Function normal(mel_f As Double(), weights(,) As Double) As Double(,)
Dim enorm(mel_f.Length - 2) As Double
' Debug.Print("*************normal//////////////")
' Debug.Print("*************normal//////////////")
For i = 0 To mel_f.Length - 3
enorm(i) = 2 / (mel_f(2 + i) - mel_f(i))
Next
For i = 0 To weights.GetLength(1) - 1
For n = 0 To weights.GetLength(0) - 2
weights(n, i) = weights(n, i) * enorm(n)
Next
Next
Return weights
End Function
Public Function weight(a As Double(,), fdiff As Double()) As Double(,)
Dim lower, upper As Double
Dim data(a.GetLength(0) - 1, a.GetLength(1) - 1) As Double
For n = 0 To a.GetLength(0) - 3
For i = 0 To a.GetLength(1) - 1
lower = -(a(n, i) / fdiff(n))
upper = a(n + 2, i) / fdiff(n + 1)
data(n, i) = Math.Max(0, Math.Min(lower, upper))
Next
Next
Return data
End Function
Public Function ramps(A As Double(), B As Double()) As Double(,)
Dim data(A.Length - 1, B.Length - 1) As Double
' Debug.Print("ramps*********************")
For n = 0 To A.Length - 1
'Debug.Print("******")
'Debug.Print("------")
For i = 0 To B.Length - 1
data(n, i) = A(n) - B(i)
'Debug.Write(data(n, i) & " ")
Next
Next
Return data
End Function
Public Function diff(arr As Double()) As Double()
Dim data(arr.Length - 2) As Double
For i = 1 To arr.Length - 1
data(i - 1) = arr(i) - arr(i - 1)
'Debug.Print(data(i - 1))
Next
Return data
End Function
'分帧 算法2
Public Function Frame2(y As Double(), Optional n_ftt As Integer = 2048, Optional hop As Integer = 512) As Double(,)
Dim tim As Integer = Math.Floor((y.Length - n_ftt) / hop) + 1
Dim new_buff(tim - 1, n_ftt - 1) As Double
Dim copypos As Integer = 0
For i = 0 To tim - 1
For k = 0 To n_ftt - 1
new_buff(i, k) = y(copypos + k)
Next
copypos = copypos + hop
Next
'For k = 0 To tim - 1
' Debug.Print("//////////////////////////////////////")
' Debug.Print("///////////////fram2///////////////////////" & k)
' For i = 0 To n_ftt - 1
' Debug.Print(new_buff(k, i) & " ")
' Next
'Next k
Return new_buff
End Function
'
Public Function Frame(y As Double(), Optional n_ftt As Integer = 2048, Optional hop As Integer = 512) As Double()
Dim tim As Integer = Math.Floor((y.Length - n_ftt) / hop) + 1
Dim new_buff(tim * n_ftt) As Double
Dim pos As Integer = 0
Dim copypos As Integer = 0
For i = 0 To tim - 1
Array.Copy(y, copypos, new_buff, pos, n_ftt)
'Buffer.BlockCopy(y, 0, new_buff, pos, n_ftt)
copypos = copypos + hop
pos = pos + n_ftt
Next
For k = 0 To tim - 1
'Debug.Print("//////////////////////////////////////")
'Debug.Print("//////////////////////////////////////")
For i = 0 To n_ftt - 1
Debug.Write(new_buff(k * n_ftt + i) & " ")
Next
Next k
Return new_buff
End Function
Public Function MelFilter() As Double()
Dim filter_points(128 + 1) As Integer '40个滤波器,需要41点
Const sampleRate As Integer = 16000 '采样频率 16000
Const filterNum As Integer = 128 '滤波器数量 取40个
Const frameSize As Integer = 512 '帧长512
Dim freMax As Double = sampleRate / 2 '实际最大频率
Dim freMin As Double = 0 '实际最小频率
Dim melFremax As Double = hz_to_mel(freMax) '将实际频率转换成梅尔频率
Dim melFremin As Double = 1125 * Math.Log(1 + freMin / 700)
Dim k As Double = (melFremax - melFremin) / (filterNum + 1)
Dim m As Double() = New Double(filterNum + 1) {}
Dim h As Double() = New Double(filterNum + 1) {}
For i As Integer = 0 To filterNum + 1
m(i) = melFremin + k * i
'h(i) = 700 * (Math.Exp(m(i) / 1125) - 1)
'将梅尔频率转换成实际频率
filter_points(i) = mel_to_hz(m(i))
'Debug.Print(m(i))
Next
Dim hzs As Double() = mel_to_hz2(m)
'For i = 0 To filterNum + 1
' ' Debug.Print(hzs(i))
'Next
Return hzs
End Function
Public Function hz_to_mel(frequencies As Double, Optional htk As Boolean = False) As Double
Dim mels As Double
If htk Then
mels = 1125 * Math.Log(1 + frequencies / 700)
Else
Dim f_min As Double = 0.0
Dim f_sp As Double = 200.0 / 3
Dim min_log_hz As Double = 1000.0 ' beginning of log region (Hz)
Dim min_log_mel As Double = (min_log_hz - f_min) / f_sp ' same (Mels)
Dim logstep As Double = Math.Log(6.4) / 27.0 ' step size for log region
mels = min_log_mel + Math.Log(frequencies / min_log_hz) / logstep
End If
Return mels
End Function
Public Function mel_to_hz2(mel() As Double, Optional htk As Boolean = False) As Double()
Dim hz(mel.Length - 1) As Double
Dim f_min As Double = 0.0
Dim f_sp As Double = 200.0 / 3
Dim freqs(mel.Length - 1) As Double
For i = 0 To mel.Length - 1
freqs(i) = f_min + f_sp * mel(i)
Next i
Dim min_log_hz As Double = 1000.0 ' beginning of log region (Hz)
Dim min_log_mel As Double = (min_log_hz - f_min) / f_sp ' same (Mels)
Dim logstep As Double = Math.Log(6.4) / 27.0
For i = 0 To mel.Length - 1
If (mel(i) > min_log_mel) Then
freqs(i) = min_log_hz * Math.Exp(logstep * (mel(i) - min_log_mel))
End If
Next
'hz = min_log_hz * Math.Exp(logstep * (mel - min_log_mel))
Return freqs
End Function
Public Function mel_to_hz(mel As Double, Optional htk As Boolean = False) As Double
Dim hz As Double
If htk Then
hz = 700 * (Math.Exp(mel) / 1125) - 1
Else
Dim f_min As Double = 0.0
Dim f_sp As Double = 200.0 / 3
Dim freqs = f_min + f_sp * mel
Dim min_log_hz As Double = 1000.0 ' beginning of log region (Hz)
Dim min_log_mel As Double = (min_log_hz - f_min) / f_sp ' same (Mels)
Dim logstep As Double = Math.Log(6.4) / 27.0
hz = min_log_hz * Math.Exp(logstep * (mel - min_log_mel))
'hz = min_log_hz * Math.Exp(logstep * (mel - min_log_mel))
End If
Return hz
End Function
Public Function fft_frequencies(sr As Integer, n_fft As Integer) As Double()
Dim fft_data(n_fft / 2) As Double
For i = 0 To n_fft / 2
fft_data(i) = i * sr / n_fft
Next
Return fft_data
End Function
'左右填充,优化
Public Function PadReflect2(data() As Double, num As Integer)
'pad 10 ,10
Dim tim(data.Length - 3) As Double
For i = 0 To data.Length - 3
tim(i) = data(data.Length - 2 - i)
Next
Dim dump() As Double = data.Concat(tim).ToArray()
'For Each i In dump
' Debug.Write(i)
End Function
Public Function PadReflect(data() As Double, num As Integer)
'pad 10 ,10
Dim tim(data.Length - 3) As Double
For i = 0 To data.Length - 3
tim(i) = data(data.Length - 2 - i)
Next
Dim dump() As Double = data.Concat(tim).ToArray()
'For Each i In dump
' Debug.Write(i)
'Next
'left_edge
' Debug.Print("***************************")
Dim left_edge(num - 1) As Double
_CopyDup(left_edge, dump, True)
'For i = 0 To num - 1
' Debug.Write(left_edge(i))
'Next
'right_edge
'Debug.Print("***************************")
Dim right_edge(num + data.Length) As Double
_CopyDup(right_edge, dump, False)
'For i = 0 To num - 1
' Debug.Write(right_edge(i))
'Next
'Debug.Print("***************************")
Dim result As Double() = left_edge.Concat(right_edge).ToArray()
Return result
End Function
'copy tim to data dumply
Public Function _CopyDup(data() As Double, tim() As Double, Optional left As Boolean = True)
Dim last As Integer = data.Length Mod tim.Length
Dim times As Integer = Math.Floor(data.Length / tim.Length)
Dim pos As Integer
If left Then
Array.Copy(tim, tim.Length - last, data, 0, last)
pos = last
For i = 0 To times - 1
Array.Copy(tim, 0, data, pos, tim.Length)
pos = pos + tim.Length
Next
Else
'RIGHT
pos = 0
For i = 0 To times - 1
Array.Copy(tim, 0, data, pos, tim.Length)
pos = pos + tim.Length
Next
Array.Copy(tim, 0, data, pos, last)
End If
End Function
Public Function General_cosine(M As Integer, alpha As Double(), sym As Boolean) As Double()
If Not sym Then
M = M + 1
End If
Dim tim As Double = (2 * pi) / (M - 1)
Dim x(M) As Double
Dim w(M) As Double
'Debug.Print("ine")
For i = 0 To M - 1
x(i) = -pi + tim * i
'Debug.Write(x(i) & " ")
Next
'Debug.Print("******")
For i = 0 To alpha.GetLength(0) - 1
For k = 0 To M - 1
w(k) = w(k) + alpha(i) * Math.Cos(i * x(k))
'Debug.Write(w(k) & " ")
Next
Next
Return w
End Function
''' <summary>
''' 汉明窗
''' </summary>
''' <param name="M"> 窗长</param>
''' <returns></returns>
Public Function General_hamming(M As Integer) As Double()
Dim db As Double() = {0.5, 1 - 0.5}
Return General_cosine(M, db, False) '进行加1 ,若sys为false
End Function
Public Function Get_window(M As Integer) As Double()
Return General_hamming(M)
End Function
End Module
Imports System.IO
Imports System.Numerics
Imports TensorFlow
'Install-Package TensorFlowSharp
Public Class KeyWordDetect
Dim graph As TFGraph
Dim session As TFSession
'加载模型
Public Sub New()
Dim model As Byte() = File.ReadAllBytes("f:\graph1.pb")
'导入GraphDef
graph = New TFGraph()
graph.Import(model, "")
session = New TFSession(graph)
' Threading.ThreadPool.SetMaxThreads(5, 5)
End Sub
Protected Overrides Sub finalize()
session.CloseSession()
End Sub
'将声音数据变为mfcc byte数据
Public Function DataBToMFCC(dataB() As Byte) As Double(,)
Dim buff16(dataB.Length / 2 - 1) As Int16
Buffer.BlockCopy(dataB, 0, buff16, 0, dataB.Length - 1)
Dim result(,) As Double = MFCC(buff16)
Return result
End Function
'将声音数据变为mfcc
Public Function DataToMFCC(dataI() As Int16) As Double(,)
Dim result(,) As Double = MFCC(dataI)
Return result
End Function
'将mfcc变为输入数据格式
Public Function MFCCToVect(mfcc As Double(,)) As Double(,,)
Dim data(0, 1, 129) As Double
Dim n As Integer = 0, m As Integer = 0
For i = 0 To mfcc.GetLength(0) - 1
For k = 0 To mfcc.GetLength(1) - 1
data(0, m, n) = mfcc(i, k)
n = n + 1
Next
If n = 130 Then
m = 1
n = 0
End If
Next
Return data
End Function
Dim output
Dim runner As TFSession.Runner
Dim result
Dim rshape
'关键字检测
Public Function Detected(Data(,,) As Double) As Double
' Dim tensor As TFTensor = New TFTensor(Data)
runner = session.GetRunner()
runner.AddInput(graph("input")(0), Data).Fetch(graph("out")(0))
output = runner.Run()
result = output(0)
rshape = result.Shape
Dim rt As Double
rt = result.GetValue(True)(0)(0)
'For k = 0 To rshape.GetValue(0) - 1
' rt = result.GetValue(True)(k)(0)
' 'Debug.Print(rt)
' If (rt > 0.8) Then
' Debug.Print("-----------recogxili")
' ' MsgBox("recgo")
' End If
'Next
Return RT
End Function
'Public Function RunB(dataB() As Byte)
' Dim mfccd As Double(,) = DataBToMFCC(dataB)
' Dim inputx As Double(,,) = MFCCToVect(mfccd)
' Detected(inputx)
'End Function
'Public Function ThreadPoolRun(dataI() As Int16)
' Threading.ThreadPool.QueueUserWorkItem(Run(dataI), dataI)
' ' Dim thrd1 As New Threading.Thread(New Threading.ParameterizedThreadStart(AddressOf Run))
' ' thrd1.Start(dataI)
'End Function
'Delegate Function DelgRun(dataI() As Int16)
'Public Function ThreadRun(dataI() As Int16)
' ' Dim drun As New DelgRun(AddressOf Run)
' Dim thrd1 As New Threading.Thread(New Threading.ParameterizedThreadStart(AddressOf Run))
' thrd1.Start(dataI)
'End Function
Public Function Run(dataI() As Int16) As Double
' Debug.Print("thread *****1")
Dim mfccd As Double(,) = DataToMFCC(dataI)
Dim inputx As Double(,,) = MFCCToVect(mfccd)
Return Detected(inputx)
End Function
Public Function MFCC(buff16() As Int16) As Double(,)
Dim datalen As Integer = buff16.Length * 2
Dim double_buff(datalen / 2 - 1) As Double
Dim len As Integer = datalen / 2
Array.Copy(buff16, double_buff, len)
'******************
For i = 0 To double_buff.Length - 1
double_buff(i) = double_buff(i) / 32768
' Debug.Print(double_buff(i))
Next
'汉明窗create
Dim hann_window As Double() = Get_window(2048)
'Debug.Print("--------------------------")
'Debug.Print("hann_window**************")
For Each i In hann_window
'Debug.Print(i & " ")
Next
'Debug.Print("--------------------------")
'Debug.Print("*************pad reflect**************")
Dim y As Double() = PadReflect(double_buff, 1024)
' Dim y As Double() = double_buff
'For Each i In y
' 'Debug.Print(i & " ")
'Next
'Debug.Print("--------------------------")
'Debug.Print("***************frame************")
Dim frams As Double(,) = Frame2(y)
Dim tim As Integer = frams.GetLength(0)
'Debug.Print("--------------------------")
'Debug.Print("**********hann * data**************")
Dim hannData(tim - 1, 2047) As Double
For n = 0 To tim - 1
For i = 0 To 2048 - 1
hannData(n, i) = frams(n, i) * hann_window(i)
' Debug.Print(hannData(i) & " ")
Next
Next n
'\\\\\\\\\\\\\\\\melspecture
Dim specturm1(,) As Complex = FFT(hannData)
'For i = 0 To specturm1.GetLength(0) - 1
' Debug.Print("--------------------------------------")
' Debug.Print("--------------------------------------")
' For k = 0 To specturm1.GetLength(1) - 1
' Debug.Print(specturm1(i, k).Real & " " & specturm1(i, k).Imaginary)
' Next
'Next
Dim s As Double(,) = spectrum(specturm1)
Dim fftfreqs() As Double = fft_frequencies(16000, 2048)
'Debug.Print("***************fftfreqs*****************")
'Debug.Print("***************fftfreqs*****************")
'Debug.Print("fftfreqs.shape", fftfreqs.Length)
'For i = 0 To fftfreqs.Length - 1
' 'Debug.Write(fftfreqs(i) & " ")
'Next
''''''''''''''''mel * specturm1
'Debug.Print("**************")
'Debug.Print("****滤波器创建**********")
Dim mel_f As Double() = MelFilter()
'Debug.Print("--------------------------")
'Debug.Print("hann_window**************")
'Debug.Print("diff")
Dim fdiff As Double() = diff(mel_f)
Dim ramps_ As Double(,) = ramps(mel_f, fftfreqs)
Dim weights(,) As Double = weight(ramps_, fdiff)
normal(mel_f, weights)
'S*WEIGHT = melspectrogram
'weight 128,1025
's 5 ,1025
Dim melspectrogram_(,) As Double = melspectrogram(weights, s)
Dim power_to_db_ As Double(,) = power_to_db(melspectrogram_)
Dim dct_ As Double(,) = Dct(20, 128)
Return _mfcc(dct_, power_to_db_)
End Function
End Class
以上这篇对python中Librosa的mfcc步骤详解就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持软件开发网。
您可能感兴趣的文章:利用python提取wav文件的mfcc方法对Python使用mfcc的两种方式详解