python中聚类的函数 python 聚类函数( 六 ) _数据

for i in xrange(1, len(cluster_centers)):# i=1...len(c_c)-1
sum = 0
for j, p in enumerate(points):
d[j] = nearest_cluster_center(p, cluster_centers[:i])[1] #第j个数据点p与各个中心点距离的最小值
sum += d[j]
sum *= random()
for j, di in enumerate(d):
sum -= di
if sum0:
continue
cluster_centers[i] = copy(points[j])
break
for p in points:
p.group = nearest_cluster_center(p, cluster_centers)[0]
'''
points是数据点，nclusters是给定的簇类数目
'''
def lloyd(points, nclusters):
cluster_centers = [Point() for _ in xrange(nclusters)]#根据指定的中心点个数，初始化中心点，均为(0,0,0)
# call k++ init
kpp(points, cluster_centers)#选择初始种子点
# 下面是kmeans
lenpts10 = len(points)10
changed = 0
while True:
# group element for centroids are used as counters
for cc in cluster_centers:
cc.x = 0
cc.y = 0
cc.group = 0
for p in points:
cluster_centers[p.group].group += 1#与该种子点在同一簇的数据点的个数
cluster_centers[p.group].x += p.x
cluster_centers[p.group].y += p.y
for cc in cluster_centers:#生成新的中心点
cc.x /= cc.group
cc.y /= cc.group
# find closest centroid of each PointPtr
changed = 0#记录所属簇发生变化的数据点的个数
for p in points:
min_i = nearest_cluster_center(p, cluster_centers)[0]
if min_i != p.group:
changed += 1
p.group = min_i
# stop when 99.9% of points are good
if changed = lenpts10:
break
for i, cc in enumerate(cluster_centers):
cc.group = i
return cluster_centers
def print_eps(points, cluster_centers, W=400, H=400):
Color = namedtuple("Color", "r g b");
colors = []
for i in xrange(len(cluster_centers)):
colors.append(Color((3 * (i + 1) % 11) / 11.0,
(7 * i % 11) / 11.0,
(9 * i % 11) / 11.0))
max_x = max_y = -FLOAT_MAX
min_x = min_y = FLOAT_MAX
for p in points:
if max_xp.x: max_x = p.x
if min_xp.x: min_x = p.x
if max_yp.y: max_y = p.y
if min_yp.y: min_y = p.y
scale = min(W / (max_x - min_x),
H / (max_y - min_y))
cx = (max_x + min_x) / 2
cy = (max_y + min_y) / 2
print "%%!PS-Adobe-3.0\n%%%%BoundingBox: -5 -5 %d %d" % (W + 10, H + 10)
print ("/l {rlineto} def /m {rmoveto} def\n" +
"/c { .25 sub exch .25 sub exch .5 0 360 arc fill } def\n" +
"/s { moveto -2 0 m 2 2 l 2 -2 l -2 -2 l closepath " +
"gsave 1 setgray fill grestore gsave 3 setlinewidth" +
" 1 setgray stroke grestore 0 setgray stroke }def")
for i, cc in enumerate(cluster_centers):
print ("%g %g %g setrgbcolor" %
(colors[i].r, colors[i].g, colors[i].b))
for p in points:
if p.group != i:
continue
print ("%.3f %.3f c" % ((p.x - cx) * scale + W / 2,
(p.y - cy) * scale + H / 2))
print ("\n0 setgray %g %g s" % ((cc.x - cx) * scale + W / 2,
(cc.y - cy) * scale + H / 2))
print "\n%%%%EOF"
def main():
npoints = 30000
k = 7 # # clusters
points = generate_points(npoints, 10)
cluster_centers = lloyd(points, k)
print_eps(points, cluster_centers)
main()
上述代码实现的算法是针对二维数据的，所以Point对象有三个属性，分别是在x轴上的值、在y轴上的值、以及所属的簇的标识。函数lloyd是
kmeans++算法的整体实现，其先是通过kpp函数选取合适的种子点，然后对数据集实行kmeans算法进行聚类。kpp函数的实现完全符合上述
kmeans++的基本思路的2、3、4步。
谱聚类（Spectral clustering）（python实现）谱聚类概念：
谱聚类是一种基于图论的聚类方法，通过对样本数据的拉普拉斯矩阵的特征向量进行聚类，从而达到对样本数据聚类的母的。谱聚类可以理解为将高维空间的数据映射到低维，然后在低维空间用其它聚类算法（如KMeans）进行聚类。

python中聚类的函数 python 聚类函数( 六 )

推荐阅读

金桔冰糖一天吃多少合适呢

我国帮助信息网络犯罪活动犯罪立案追诉标准是什么

点痣后要注意什么祛痣几天可以洗脸化妆

服务器扩容很难吗服务器扩容mongodb

为何恋爱中会出现抑郁情况为何恋爱中会出现抑郁情况呢

3种情况要慎用甘草

鸿蒙系统振动小，鸿蒙系统震动自己变响铃

番禺建设银行社保卡广州社保卡即时制卡建设银行网点

幻塔喷气滑板怎么获得喷气滑板获得方法

来大姨妈可以打新冠疫苗吗打了新冠疫苗大姨妈不来怎么办

手机最好图文编辑软件有哪些，手机上用的PS软件有哪些最好用的是

春分到咋防春困

泡凤爪能不能带上动车可以带泡凤爪做动车吗

提高语文成绩的五种方法值得收藏起来

【codevs1576】最长严格上升子序列

redis线程安全问题 redis守护线程锁

redis中sdiff命令，redis sadd命令

手写英文用什么字体？手写发票是正规发票吗

redis集群三种方式 redis集群iocp

你们家乡有哪些有趣的防疫顺口溜？分享一下？