Skr-Eric的机器学习课堂（九）-- 语音识别、图像识别和人脸识别

Paloma ·

更新时间:2024-09-20

· 869 次阅读

语音识别 1.梅尔频率倒谱系数(MFCC)矩阵

首先将音频输入按照时间顺序划分为若干片段，将每个片段做傅里叶变换，得到相对应的频率分布，从中提取与人类语言内容相关性最强的十三的特征频率所对应的能量强度，构成一个样本。将从每个片段中所获得的频率样本按行组成一个矩阵，即梅尔频率倒谱系数(MFCC)矩阵。MFCC矩阵反映了该音频输入的内容特征，可被用于对语音内容的识别。


# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import numpy as np
import scipy.io.wavfile as wf
import python_speech_features as sf
import matplotlib.pyplot as mp
sample_rate, sigs = wf.read(
    '../../data/speeches/training/banana/banana01.wav')
mfcc = sf.mfcc(sigs, sample_rate)
mp.matshow(mfcc.T, cmap='jet', fignum='MFCC')
mp.title('MFCC', fontsize=20)
mp.xlabel('Sample', fontsize=14)
mp.ylabel('Feature', fontsize=14)
mp.tick_params(which='both', top='False',
               labeltop='False', labelbottom='True',
               labelsize=10)
mp.show()
2.语音识别
apple
     xxx.wav -> MFCC \
     xxx.wav -> MFCC  > MFCC -> HMM \  0.8
     ...                                   /                           |
     lime                       MFCC <- xxx.wav
     xxx.wav -> MFCC \                                 |               |
     xxx.wav -> MFCC  > MFCC -> HMM /  0.9 max v
     ...                                                          /     lime

# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import os
import warnings
import numpy as np
import scipy.io.wavfile as wf
import python_speech_features as sf
import hmmlearn.hmm as hl
warnings.filterwarnings(
    'ignore', category=DeprecationWarning)
np.seterr(all='ignore')
def search_speeches(directory, speeches):
    directory = os.path.normpath(directory)
    if not os.path.isdir(directory):
        raise IOError(
            "The directory '" + directory +
            "' doesn't exist!")
    for entry in os.listdir(directory):
        label = directory[directory.rfind(
            os.path.sep) + 1:]
        path = os.path.join(directory, entry)
        if os.path.isdir(path):
            search_speeches(path, speeches)
        elif os.path.isfile(path) and \
                path.endswith('.wav'):
            if label not in speeches:
                speeches[label] = []
            speeches[label].append(path)
train_speeches = {}
search_speeches(
    '../../data/speeches/training', train_speeches)
train_x, train_y = [], []
for label, filenames in train_speeches.items():
    mfccs = np.array([])
    for filename in filenames:
        sample_rate, sigs = wf.read(filename)
        mfcc = sf.mfcc(sigs, sample_rate)
        if len(mfccs) == 0:
            mfccs = mfcc
        else:
            mfccs = np.append(mfccs, mfcc, axis=0)
    train_x.append(mfccs)
    train_y.append(label)
models = {}
for mfccs, label in zip(train_x, train_y):
    model = hl.GaussianHMM(
        n_components=4, covariance_type='diag',
        n_iter=1000)
    models[label] = model.fit(mfccs)
test_speeches = {}
search_speeches(
    '../../data/speeches/testing', test_speeches)
test_x, test_y = [], []
for label, filenames in test_speeches.items():
    mfccs = np.array([])
    for filename in filenames:
        sample_rate, sigs = wf.read(filename)
        mfcc = sf.mfcc(sigs, sample_rate)
        if len(mfccs) == 0:
            mfccs = mfcc
        else:
            mfccs = np.append(mfccs, mfcc, axis=0)
    test_x.append(mfccs)
    test_y.append(label)
pred_test_y = []
for mfccs in test_x:
    best_score, best_label = None, None
    for label, model in models.items():
        score = model.score(mfccs)
        if (best_score is None) or (
                best_score < score):
            best_score, best_label = score, label
    pred_test_y.append(best_label)
print(test_y)
print(pred_test_y)
 
图像识别
1.OpenCV，开源计算机视觉库

# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import numpy as np
import cv2 as cv
original = cv.imread('../../data/forest.jpg')
print(original.shape)
cv.imshow('Original', original)
blue = np.zeros_like(original)
blue[..., 0] = original[..., 0]
cv.imshow('Blue', blue)
green = np.zeros_like(original)
green[..., 1] = original[..., 1]
cv.imshow('Green', green)
red = np.zeros_like(original)
red[..., 2] = original[..., 2]
cv.imshow('Red', red)
h, w = original.shape[:2]
l, t = int(w / 4), int(h / 4)
r, b = int(w * 3 / 4), int(h * 3 / 4)
cropped = original[t:b, l:r]
cv.imshow('Cropped', cropped)
'''
scaled = cv.resize(
    original, (int(w / 2), int(h / 2)))
'''
scaled = cv.resize(original, None, fx=0.5, fy=0.5)
cv.imshow('Scaled', scaled)
cv.waitKey()
cv.imwrite('../../data/blue.jpg', blue)
cv.imwrite('../../data/green.jpg', green)
cv.imwrite('../../data/red.jpg', red)
cv.imwrite('../../data/cropped.jpg', cropped)
cv.imwrite('../../data/scaled.jpg', scaled)
2.边缘识别

# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import cv2 as cv
original = cv.imread('../../data/chair.jpg')
cv.imshow('Original', original)
canny = cv.Canny(original, 50, 240)
cv.imshow('Canny', canny)
cv.waitKey()
3.直方图均衡化

# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import cv2 as cv
original = cv.imread('../../data/sunrise.jpg')
cv.imshow('Original', original)
gray = cv.cvtColor(original, cv.COLOR_BGR2GRAY)
cv.imshow('Gray', gray)
equalized_gray = cv.equalizeHist(gray)
cv.imshow('Equalized Gray', equalized_gray)
yuv = cv.cvtColor(original, cv.COLOR_BGR2YUV)
yuv[..., 0] = cv.equalizeHist(yuv[..., 0])
equalized_color = cv.cvtColor(yuv, cv.COLOR_YUV2BGR)
cv.imshow('Equalized Color', equalized_color)
cv.waitKey()
4.结构特征

# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import cv2 as cv
original = cv.imread('../../data/table.jpg')
cv.imshow('Original', original)
gray = cv.cvtColor(original, cv.COLOR_BGR2GRAY)
cv.imshow('Gray', gray)
# 创建STAR特征点检测器
star = cv.xfeatures2d.StarDetector_create()
keypoints = star.detect(gray)
mixture = original.copy()
cv.drawKeypoints(
    original, keypoints, mixture,
    flags=cv.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
cv.imshow('Mixture', mixture)
cv.waitKey()
5.图像的特征描述矩阵
图像的特征描述矩阵来源于反映图像中物体结构特征关键点的空间直方图。

# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import cv2 as cv
import matplotlib.pyplot as mp
original = cv.imread('../../data/table.jpg')
cv.imshow('Original', original)
gray = cv.cvtColor(original, cv.COLOR_BGR2GRAY)
cv.imshow('Gray', gray)
# 创建STAR特征点检测器
star = cv.xfeatures2d.StarDetector_create()
keypoints = star.detect(gray)
# 创建SIFT特征点描述器
sift = cv.xfeatures2d.SIFT_create()
_, desc = sift.compute(gray, keypoints)
mp.matshow(desc, cmap='gray', fignum='Description')
mp.title('Description', fontsize=20)
mp.xlabel('Feature', fontsize=14)
mp.ylabel('Sample', fontsize=14)
mp.tick_params(which='both', top='False',
               labeltop='False', labelbottom='True',
               labelsize=10)
mp.show()
6.基于特征描述矩阵的图像物体识别

# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import os
import warnings
import numpy as np
import cv2 as cv
import hmmlearn.hmm as hl
warnings.filterwarnings(
    'ignore', category=DeprecationWarning)
np.seterr(all='ignore')
def search_objects(directory):
    directory = os.path.normpath(directory)
    if not os.path.isdir(directory):
        raise IOError(
            "The directory '" + directory +
            "' doesn't exist!")
    objects = {}
    for curdir, subdirs, files in os.walk(directory):
        for jpeg in (file for file
                     in files if file.endswith(
                         '.jpg')):
            path = os.path.join(curdir, jpeg)
            label = path.split(os.path.sep)[-2]
            if label not in objects:
                objects[label] = []
            objects[label].append(path)
    return objects
train_objects = search_objects(
    '../../data/objects/training/')
train_x, train_y = [], []
for label, filenames in train_objects.items():
    descs = np.array([])
    for filename in filenames:
        image = cv.imread(filename)
        gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
        h, w = gray.shape[:2]
        f = 200 / min(h, w)
        gray = cv.resize(gray, None, fx=f, fy=f)
        star = cv.xfeatures2d.StarDetector_create()
        keypoints = star.detect(gray)
        sift = cv.xfeatures2d.SIFT_create()
        _, desc = sift.compute(gray, keypoints)
        if len(descs) == 0:
            descs = desc
        else:
            descs = np.append(descs, desc, axis=0)
    train_x.append(descs)
    train_y.append(label)
models = {}
for descs, label in zip(train_x, train_y):
    model = hl.GaussianHMM(
        n_components=4, covariance_type='diag',
        n_iter=1000)
    models[label] = model.fit(descs)
test_objects = search_objects(
    '../../data/objects/testing/')
test_x, test_y, test_z = [], [], []
for label, filenames in test_objects.items():
    test_z.append([])
    descs = np.array([])
    for filename in filenames:
        image = cv.imread(filename)
        test_z[-1].append(image)
        gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
        h, w = gray.shape[:2]
        f = 200 / min(h, w)
        gray = cv.resize(gray, None, fx=f, fy=f)
        star = cv.xfeatures2d.StarDetector_create()
        keypoints = star.detect(gray)
        sift = cv.xfeatures2d.SIFT_create()
        _, desc = sift.compute(gray, keypoints)
        if len(descs) == 0:
            descs = desc
        else:
            descs = np.append(descs, desc, axis=0)
    test_x.append(descs)
    test_y.append(label)
pred_test_y = []
for descs in test_x:
    best_score, best_label = None, None
    for label, model in models.items():
        score = model.score(descs)
        if (best_score is None) or (
                best_score < score):
            best_score, best_label = score, label
    pred_test_y.append(best_label)
i = 0
for label, pred_label, images in zip(
        test_y, pred_test_y, test_z):
    for image in images:
        i += 1
        cv.imshow('{} - {} {} {}'.format(
            i, label,
            '==' if label == pred_label
            else '!=', pred_label), image)
cv.waitKey()
 
人脸识别
1.视频捕捉

# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import cv2 as cv
vc = cv.VideoCapture(0)
while True:
    frame = vc.read()[1]
    cv.imshow('VideoCapture', frame)
    if cv.waitKey(33) == 27:
        break
vc.release()
cv.destroyAllWindows()
2.人脸定位

# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import cv2 as cv
# 用描述文件构造级联分类器
fd = cv.CascadeClassifier('../../data/haar/face.xml')
ed = cv.CascadeClassifier('../../data/haar/eye.xml')
nd = cv.CascadeClassifier('../../data/haar/nose.xml')
vc = cv.VideoCapture(0)
while True:
    frame = vc.read()[1]
    faces = fd.detectMultiScale(frame, 1.3, 5)
    for l, t, w, h in faces:
        a, b = int(w / 2), int(h / 2)
        cv.ellipse(frame, (l + a, t + b), (a, b),
                   0, 0, 360, (255, 0, 255), 2)
        face = frame[t:t + h, l:l + w]
        eyes = ed.detectMultiScale(face, 1.3, 5)
        for l, t, w, h in eyes:
            a, b = int(w / 2), int(h / 2)
            cv.ellipse(face, (l + a, t + b), (a, b),
                       0, 0, 360, (0, 255, 0), 2)
        noses = nd.detectMultiScale(face, 1.3, 5)
        for l, t, w, h in noses:
            a, b = int(w / 2), int(h / 2)
            cv.ellipse(face, (l + a, t + b), (a, b),
                       0, 0, 360, (0, 255, 255), 2)
    cv.imshow('VideoCapture', frame)
    if cv.waitKey(33) == 27:
        break
vc.release()
cv.destroyAllWindows()
3.人脸识别

# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import os
import numpy as np
import cv2 as cv
import sklearn.preprocessing as sp
fd = cv.CascadeClassifier('../../data/haar/face.xml')
def search_faces(directory):
    directory = os.path.normpath(directory)
    if not os.path.isdir(directory):
        raise IOError(
            "The directory '" + directory +
            "' doesn't exist!")
    faces = {}
    for curdir, subdirs, files in os.walk(directory):
        for jpeg in (file for file
                     in files if file.endswith(
                         '.jpg')):
            path = os.path.join(curdir, jpeg)
            label = path.split(os.path.sep)[-2]
            if label not in faces:
                faces[label] = []
            faces[label].append(path)
    return faces
train_faces = search_faces(
    '../../data/faces/training')
codec = sp.LabelEncoder()
codec.fit(list(train_faces.keys()))
train_x, train_y = [], []
for label, filenames in train_faces.items():
    for filename in filenames:
        image = cv.imread(filename)
        gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
        faces = fd.detectMultiScale(
            gray, 1.1, 2, minSize=(100, 100))
        for l, t, w, h in faces:
            train_x.append(gray[t:t + h, l:l + w])
            train_y.append(int(
                codec.transform([label])[0]))
train_y = np.array(train_y)
# 创建局部二值模式直方图人脸识别器
model = cv.face.LBPHFaceRecognizer_create()
model.train(train_x, train_y)
test_faces = search_faces(
    '../../data/faces/testing')
test_x, test_y, test_z = [], [], []
for label, filenames in test_faces.items():
    for filename in filenames:
        image = cv.imread(filename)
        gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
        faces = fd.detectMultiScale(
            gray, 1.1, 2, minSize=(100, 100))
        for l, t, w, h in faces:
            test_x.append(gray[t:t + h, l:l + w])
            test_y.append(int(
                codec.transform([label])[0]))
            a, b = int(w / 2), int(h / 2)
            cv.ellipse(
                image, (l + a, t + b), (a, b),
                0, 0, 360, (255, 0, 255), 2)
            test_z.append(image)
test_y = np.array(test_y)
pred_test_y = []
for face in test_x:
    pred_code = model.predict(face)[0]
    pred_test_y.append(pred_code)
escape = False
while not escape:
    for code, pred_code, image in zip(
            test_y, pred_test_y, test_z):
        label, pred_label = codec.inverse_transform(
            [code, pred_code])
        text = '{} {} {}'.format(
            label,
            '==' if code == pred_code
            else '!=', pred_label)
        cv.putText(image, text, (10, 60),
                   cv.FONT_HERSHEY_SIMPLEX, 2,
                   (255, 255, 255), 6)
        cv.imshow('Recognizing Face...', image)
        if cv.waitKey(1000) == 27:
            escape = True
            break
想要看更多的课程请微信关注SkrEric的编程课堂



作者：Skr-Eric
                    
 
                

                            学习
                            图像识别
                            机器学习
                            人脸识别
                            语音识别


           
    
    

            
                
                    
                
            
            
                
    
        
            需要 登录 后方可回复, 如果你还没有账号请 注册新账号
        
    
                
            
                
                    
                        相关文章

    
        
            Perl 包和模块
        
        
            Ilona
            2020-09-10
        
    
    
        820
    


    
        
            PHP 5 echo 和 print 语句
        
        
            Bonita
            2020-01-10
        
    
    
        744
    


    
        
            Kotlin Android 环境搭建
        
        
            Quirita
            2021-04-07
        
    
    
        999
    


    
        
            golang中之strconv包的具体使用方法
        
        
            Haidee
            2020-11-16
        
    
    
        636
    


    
        
            Shell中去除字符串前后空格的方法
        
        
            Iris
            2021-08-03
        
    
    
        567
    


    
        
            关于MongoDB数据库学习路线指南
        
        
            Kande
            2023-05-13
        
    
    
        295
    


    
        
            SQL注入之sqlmap入门学习
        
        
            Ula
            2023-05-13
        
    
    
        1477
    


    
        
            Docker AIGC等大模型深度学习环境搭建步骤最新详细版
        
        
            Jacinda
            2023-05-13
        
    
    
        1807
    


    
        
            一文带你学习一下C++中的构造函数
        
        
            Winona
            2023-05-13
        
    
    
        520
    


    
        
            C++中set的用法学习
        
        
            Fawn
            2023-05-13
        
    
    
        226
    


    
        
            Three.js概述和基础知识学习
        
        
            Echo
            2023-05-13
        
    
    
        1103
    


    
        
            Vue自定义指令学习及应用详解
        
        
            Maha
            2023-05-13
        
    
    
        1234
    


    
        
            写给小白学习的地理信息的表示法GeoJSON
        
        
            Kande
            2023-05-15
        
    
    
        702
    


    
        
            从错误中学习改正Go语言五个坏习惯提高编程技巧
        
        
            Viridis
            2023-05-17
        
    
    
        1281
    


    
        
    
    
        
            Java NIO下ByteBuffer的常用方法学习
        
        
            Maleah
            2023-05-17
        
    
    
        473
    


    
        
    
    
        
            MySQL基础学习之字符集的应用
        
        
            Nafisa
            2023-05-17
        
    
    
        1150
    


    
        
            C++函数模板学习示例教程指南
        
        
            Pandora
            2023-07-07
        
    
    
        25
    


    
        
            go语言定义零值可用的类型学习教程
        
        
            Tallulah
            2023-07-17
        
    
    
        216
    


    
        
            Redis数据结构之intset整数集合使用学习
        
        
            Janna
            2023-07-20
        
    
    
        1721
    


    
        
            Redis数据结构之跳跃表使用学习
        
        
            Ophelia
            2023-07-20
        
    
    
        644


        
    
        
            我要提问
        
    
    
        
        
    
        致谢
        
            帮助他人，成就自己。
            人生最大成功就是伸出热情而温暖的双手，尽自己所能去帮助身边的每一个人，只要无私的奉献，就会收获到美好的生活。
            1024问感谢每一位朋友的帮助和支持。
            软件开发网提供编程的基础软件技术培训教程,软件开发编程实例讲解Go,Node,HTML,CSS,Javascript,Python,Java,Ruby,C,PHP,MySQL等软件开发编程语言以及数据开发的基础知识，也提供大量的软件开发在线实例、从入门到精通就在1024问。
        
    
    
        
            
    育儿网
    微养生
    全球行
    美食街
    育儿
    菜谱大全
    海南旅游
    女性
    养狗百科
    星座