from time import time
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import offsetbox
from sklearn import (manifold,datasets,decomposition,ensemble,discriminant_analysis,random_projection)
digits=datasets.load_digits(n_class=6)
X=digits.data
y=digits.target
n_samples,n_features=X.shape
n_neighbors=30
#缩放和可视化嵌入向量
def plot_embedding(X,title=None):
x_min,x_max=np.min(X,0),np.max(X,0)
X=(X-x_min)/(x_max-x_min)
plt.figure()
ax = plt.subplot(111)
for i in range(X.shape[0]):
plt.text(X[i,0],X[i,1],str(digits.target[i]),
color=plt.cm.Set1(y[i]/10.),
fontdict={'weight':'bold','size':9})
if hasattr(offsetbox,'AnnotationBbox'):
shown_images=np.array([[1.,1.]])
for i in range(digits.data.shape[0]):
dist=np.sum((X[i]-shown_images)**2,1)
if np.min(dist)<4e-3:
continue
shown_images=np.r_[shown_images,[X[i]]]
imagebox=offsetbox.AnnotationBbox(offsetbox.OffsetImage(digits.images[i],cmap=plt.cm.gray_r),X[i])
ax.add_artist(imagebox)
plt.xticks([]),plt.yticks([])
if title is not None:
plt.title(title)
#绘制数字图像
n_img_per_row=20
img=np.zeros((10*n_img_per_row,10*n_img_per_row))
for i in range(n_img_per_row):
ix=10*i+1
for j in range(n_img_per_row):
iy=10*j+1
img[ix:ix+8,iy:iy+8]=X[i*n_img_per_row+j].reshape((8,8))
plt.imshow(img,cmap=plt.cm.binary)
plt.xticks([])
plt.yticks([])
plt.title('选择64维数字数据集')
#MDS嵌入数字数据集
print("计算MDS嵌入")
clf=manifold.MDS(n_components=2,n_init=1,max_iter=100)
t0=time()
X_mds=clf.fit_transform(X)
print("Done. Stress: %f"%clf.stress_)
plot_embedding(X_mds,"MDS嵌入数字数据集 (时间 %.2fs)" %(time()-t0))
plt.show()
算法:MDS是基于欧式距离的和保留全局特征的非线性的降维方法。
链接:https://github.com/wepe/MachineLearning/tree/master/ManifoldLearning/DimensionalityReduction_DataVisualizing
本文分享自 图像处理与模式识别研究所 微信公众号,前往查看
如有侵权,请联系 cloudcommunity@tencent.com 删除。
本文参与 腾讯云自媒体同步曝光计划 ,欢迎热爱写作的你一起参与!