from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import numpy as np
path = "./knn_num_data/%s/%s_%s.bmp"
data = []
target = []
for i in range(10):
for j in range(1,501):
#被训练的数据一定是二维的
data.append(plt.imread(path%(i,i,j)))
target.append(i)
data = np.array(data)
data = data.reshape(data.shape[0],-1)
target = np.array(target)
#方差过滤
from sklearn.feature_selection import VarianceThreshold
var = VarianceThreshold()
new_data=var.fit_transform(data)
X_train,X_test,y_train,y_test=train_test_split(new_data,target)
knn = KNeighborsClassifier(n_jobs=6).fit(X_train,y_train)
knn.score(X_train,y_train),knn.score(X_test,y_test)
(0.8013333333333333, 0.7096)
#主成分分析
from sklearn.decomposition import PCA
pca = PCA(n_components=150,whiten=True)
new_data = pca.fit_transform(data)
X_train,X_test,y_train,y_test=train_test_split(new_data,target)
svc = SVC(kernel='linear').fit(X_train,y_train)
svc.score(X_train,y_train),svc.score(X_test,y_test)
(1.0, 0.8776)