【机器学习】PCA主成分分析应用 手写数字识别

Judith ·
更新时间:2024-11-11
· 975 次阅读

from sklearn.neighbors import KNeighborsClassifier from sklearn.model_selection import train_test_split from sklearn.svm import SVC import matplotlib.pyplot as plt import numpy as np path = "./knn_num_data/%s/%s_%s.bmp" data = [] target = [] for i in range(10): for j in range(1,501): #被训练的数据一定是二维的 data.append(plt.imread(path%(i,i,j))) target.append(i) data = np.array(data) data = data.reshape(data.shape[0],-1) target = np.array(target) #方差过滤 from sklearn.feature_selection import VarianceThreshold var = VarianceThreshold() new_data=var.fit_transform(data) X_train,X_test,y_train,y_test=train_test_split(new_data,target) knn = KNeighborsClassifier(n_jobs=6).fit(X_train,y_train) knn.score(X_train,y_train),knn.score(X_test,y_test) (0.8013333333333333, 0.7096) #主成分分析 from sklearn.decomposition import PCA pca = PCA(n_components=150,whiten=True) new_data = pca.fit_transform(data) X_train,X_test,y_train,y_test=train_test_split(new_data,target) svc = SVC(kernel='linear').fit(X_train,y_train) svc.score(X_train,y_train),svc.score(X_test,y_test) (1.0, 0.8776)
作者:♚木思风



学习 pca 机器学习

需要 登录 后方可回复, 如果你还没有账号请 注册新账号