import numpy as np
from sklearn.cluster import DBSCAN
from sklearn import metrics
from sklearn.datasets.samples_generator import make_blobs
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
plt.rcParams['axes.unicode_minus']=False
plt.rcParams['font.sans-serif']=['SimHei']
class DBScan (object):
def __init__(self,p,l_stauts):
self.point=p
self.labels_stats=l_stauts
self.db=DBSCAN(eps=0.2,min_samples=10).fit(self.point)
def draw(self):
coreSamplesMask=np.zeros_like(self.db.labels_,dtype=bool)
coreSamplesMask[self.db.core_sample_indices_]=True
labels=self.db.labels_
nclusters=denoise(labels)
#输出模型评估参数,包括估计的集群数量、均匀度、完整性、V度量、
#调整后的RI、调整后的互信息量、轮廓系数
print('Estimated number of clusters: %d'%nclusters)
print("Homogeneity: %0.3f"%metrics.homogeneity_score(self.labels_stats, labels))
print("Completeness: %0.3f"%metrics.completeness_score(self.labels_stats, labels))
print("V-measure: %0.3f"%metrics.v_measure_score(self.labels_stats, labels))
print("Adjusted Rand Index: %0.3f"%metrics.adjusted_rand_score(self.labels_stats, labels))
print("Adjusted Mutual Information: %0.3f"%metrics.adjusted_mutual_info_score(self.labels_stats, labels))
print("Silhouette Coefficient: %0.3f"%metrics.silhouette_score(self.point, labels))
#黑色被移除并被标记为噪音
unique_labels=set(labels)
colors=plt.cm.Spectral(np.linspace(0,1,len(unique_labels)))
for k, col in zip(unique_labels,colors):
if k==-1:
#黑色是噪声
col='k'
classMemberMask=(labels==k)
#绘制分类点集
xy=self.point[classMemberMask&coreSamplesMask]
plt.plot(xy[:,0],xy[:,1],'o',markerfacecolor=col,
markeredgecolor='k',markersize=6)
#绘制噪声点集
xy=self.point[classMemberMask&~coreSamplesMask]
plt.plot(xy[:,0],xy[:,1],'o',markerfacecolor=col,
markeredgecolor='k',markersize=3)
plt.title('集群的估计数量: %d'%nclusters)
plt.show()
def denoise (labels):
#标签中的簇数(忽略噪声)
clusters=len(set(labels))-(1 if -1 in labels else 0)
return clusters
def standar_scaler(points):
p=StandardScaler().fit_transform(points)
return p
if __name__=="__main__":
centers=[[1, 1],[-1, -1],[-1, 1],[1,-1]]
point,labelsTrue=make_blobs(n_samples=1000,centers=centers,cluster_std=0.4,random_state=0)
point=standar_scaler(point)
db=DBScan(point,labelsTrue)
db.draw()
算法:密度聚类(Density-Based Spatial Clustering of Applications with Noise,DBSCAN)是依据样本分布的紧密程度来确定聚类结构。 文献:Bi, F. M. , Wang, W. K. , & Long, C. . (2012). Dbscan: density-based spatial clustering of applications with noise. Journal of Nanjing University(Natural Sciences), 48(4), 491-498.
本文分享自 图像处理与模式识别研究所 微信公众号,前往查看
如有侵权,请联系 cloudcommunity@tencent.com 删除。
本文参与 腾讯云自媒体同步曝光计划 ,欢迎热爱写作的你一起参与!