pythonfuzzyc-meansdemo

OStack程序员社区-中国程序员成长平台 › 门户 › 编程› C++›C++教程

原作者: [db:作者] 来自: [db:来源] 收藏邀请

摘自：http://pythonhosted.org/scikit-fuzzy/auto_examples/plot_cmeans.html#example-plot-cmeans-py，加入了自己的理解！

# coding: utf-8

from __future__ import division, print_function
import numpy as np
import matplotlib.pyplot as plt
import skfuzzy as fuzz

colors = ['b', 'orange', 'g', 'r', 'c', 'm', 'y', 'k', 'Brown', 'ForestGreen']
#plt.figure(1)

# Define three cluster centers
centers = [[4, 2],
           [1, 7],
           [5, 6]]

# Define three cluster sigmas in x and y, respectively
sigmas = [[0.8, 0.3],
          [0.3, 0.5],
          [1.1, 0.7]]

# Generate test data
np.random.seed(42)  # Set seed for reproducibility
xpts = np.zeros(1)
ypts = np.zeros(1)
labels = np.zeros(1)

# 伪造3个高斯分布，以u和sigma作为特征分布
for i, ((xmu, ymu), (xsigma, ysigma)) in enumerate(zip(centers, sigmas)):
    xpts = np.hstack((xpts, np.random.standard_normal(200) * xsigma + xmu))
    ypts = np.hstack((ypts, np.random.standard_normal(200) * ysigma + ymu))
    labels = np.hstack((labels, np.ones(200) * i))

# Visualize the test data
fig0, ax0 = plt.subplots()
for label in range(3):
    ax0.plot(xpts[labels == label], ypts[labels == label], '.',
             color=colors[label])
ax0.set_title('Test data: 200 points x3 clusters.')
#plt.show()

# Set up the loop and plot
alldata = np.vstack((xpts, ypts))

#print alldata 

# Regenerate fuzzy model with 3 cluster centers - note that center ordering
# is random in this clustering algorithm, so the centers may change places
# 使用FCM的模型训练，注意，聚集的结果在cntr里,我的机器上运行结果为：
'''
[ 5.26724628  6.14961671]
[ 1.01594428  6.98518109]
[ 3.95895105  2.05785626]
'''
cntr, u_orig, _, _, _, _, _ = fuzz.cluster.cmeans(
    alldata, 3, 2, error=0.005, maxiter=1000)

# Show 3-cluster model
fig2, ax2 = plt.subplots()
ax2.set_title('Trained model')
for j in range(3):
    ax2.plot(alldata[0, u_orig.argmax(axis=0) == j],
             alldata[1, u_orig.argmax(axis=0) == j], 'o',
             label='series ' + str(j))

# 将聚类的中心点标记在图上             
for pt in cntr:
    print (pt)
    ax2.plot(pt[0], pt[1], 's')

ax2.legend()


# Generate uniformly sampled data spread across the range [0, 10] in x and y
newdata = np.random.uniform(0, 1, (1100, 2)) * 10

# Predict new cluster membership with `cmeans_predict` as well as
# `cntr` from the 3-cluster model
u, u0, d, jm, p, fpc = fuzz.cluster.cmeans_predict(newdata.T, cntr, 2, error=0.005, maxiter=1000)

# Plot the classified uniform data. Note for visualization the maximum
# membership value has been taken at each point (i.e. these are hardened,
# not fuzzy results visualized) but the full fuzzy result is the output
# from cmeans_predict.

'''
按照行求max的index，index范围为0~2
u长成这样子：
[[ 0.54256489  0.0631068   0.00291562 ...,  0.15580619  0.17543005
   0.15652909]
 [ 0.35176643  0.02712891  0.99530463 ...,  0.2065651   0.31637093
   0.22570475]
 [ 0.10566868  0.90976429  0.00177975 ...,  0.63762871  0.50819901
   0.61776617]]
最后返回像：
[0 2 1 ..., 2 2 2]
其实，u就是聚类的概率啊，特定列的行数值求和就是1哇！
下面返回的cluster_membership 其实就是聚类的结果，0表示聚在类别0,2表示聚集在类别2,...！
补充：(np.argmax([[1,2,3],[4,1,4],[2,8,9]], axis=0)) == [1,2,2]
'''
cluster_membership = np.argmax(u, axis=0)  # Hardening for visualization


fig3, ax3 = plt.subplots()
ax3.set_title('Random points classifed according to known centers')

# 将聚类预测的三类结果绘图
for j in range(3):
    ax3.plot(newdata[cluster_membership == j, 0],
             newdata[cluster_membership == j, 1], 'o',
             label='series ' + str(j))
ax3.legend()
plt.show()

预测：

经过测试，是可以处理三维数据聚类的：

    centers = [[4, 2, 100],
           [1, 7, 200],
           [5, 6, 300]]
    sigmas = [[0.8, 0.3, 0.1],
              [0.3, 0.5, 0.2],
              [1.1, 0.7, 0.9]]
    # Generate test data
    np.random.seed(42)  # Set seed for reproducibility
    xpts = np.zeros(1)
    ypts = np.zeros(1)
    zpts = np.zeros(1)
    labels = np.zeros(1)

    print "*********************"
    for i, ((xmu, ymu, zmu), (xsigma, ysigma, zsigma)) in enumerate(zip(centers, sigmas)):
        xpts = np.hstack((xpts, np.random.standard_normal(200) * xsigma + xmu))
        ypts = np.hstack((ypts, np.random.standard_normal(200) * ysigma + ymu))
        zpts = np.hstack((zpts, np.random.standard_normal(200) * zsigma + zmu))
        labels = np.hstack((labels, np.ones(200) * i))
    alldata = np.vstack((xpts, ypts, zpts))
    cntr, u_orig, _, _, _, _, _ = fuzz.cluster.cmeans(alldata, 3, 2, error=0.005, maxiter=1000)

    print "*********************"
    for pt in cntr:
        print (pt)
    print "*********************"

结果为：

*********************
*********************
[  3.95666441   2.02029976  99.72164117]
[   5.00568412    6.05765152  300.09246382]
[   1.00252037    7.06293498  199.99320473]
*********************