import math
import pylab as pl
#数据集
data = """
1,0.697,0.46,2,0.774,0.376,3,0.634,0.264,4,0.608,0.318,5,0.556,0.215,
6,0.403,0.237,7,0.481,0.149,8,0.437,0.211,6,0.666,0.061,10,0.243,0.267,
11,0.245,0.057,12,0.343,0.066,13,0.636,0.161,14,0.657,0.168,15,0.36,0.37,
16,0.593,0.042,17,0.716,0.103,18,0.356,0.188,16,0.336,0.241,20,0.282,0.257,
21,0.748,0.232,22,0.714,0.346,23,0.483,0.312,24,0.478,0.437,25,0.525,0.366,
26,0.751,0.486,27,0.532,0.472,28,0.473,0.376,29,0.725,0.445,30,0.446,0.456"""
#数据处理
a=data.split(',')
dataset=[(float(a[i]),float(a[i+1])) for i in range(1,len(a)-1,3)]
#计算欧几里得距离,a,b分别为两个元组
def dist(a,b):
return math.sqrt(math.pow(a[0]-b[0],2)+math.pow(a[1]-b[1],2))
def dist_min(Ci,Cj):
return min(dist(i,j) for i in Ci for j in Cj)
def dist_max(Ci,Cj):
return max(dist(i,j) for i in Ci for j in Cj)
def dist_avg(Ci,Cj):
return sum(dist(i,j) for i in Ci for j in Cj)/(len(Ci)*len(Cj))
#距离最小的下标
def find_Min(M):
min=1000
x=0;y=0
for i in range(len(M)):
for j in range(len(M[i])):
if i!=j and M[i][j]<min:
min=M[i][j];x=i; y=j
return (x,y,min)
def AGNES(dataset, dist, k):
#初始化C和M
C=[];M=[]
for i in dataset:
Ci=[]
Ci.append(i)
C.append(Ci)
for i in C:
Mi=[]
for j in C:
Mi.append(dist(i,j))
M.append(Mi)
q=len(dataset)
#合并更新
while q>k:
x,y,min=find_Min(M)
C[x].extend(C[y])
C.remove(C[y])
M=[]
for i in C:
Mi=[]
for j in C:
Mi.append(dist(i,j))
M.append(Mi)
q-=1
return C
def draw(C):
colValue=['r','y','g','b','c','k','m']
for i in range(len(C)):
coo_X=[]
coo_Y=[]
for j in range(len(C[i])):
coo_X.append(C[i][j][0])
coo_Y.append(C[i][j][1])
pl.scatter(coo_X,coo_Y,marker='x',color=colValue[i%len(colValue)],label=i)
pl.legend(loc='upper right')
pl.show()
C=AGNES(dataset,dist_avg,3)
draw(C)
算法:层次聚类是将每个对象作为一个簇和这些簇根据某些准则被一步一步地合并。
文献:周爱武, 潘勇, 崔丹丹, & 肖云. (2011). Agnes算法在k-means算法中的应用. 微型机与应用, 30(23), 3.
本文分享自 图像处理与模式识别研究所 微信公众号,前往查看
如有侵权,请联系 cloudcommunity@tencent.com 删除。
本文参与 腾讯云自媒体同步曝光计划 ,欢迎热爱写作的你一起参与!