 
import numpy as np
from sklearn.cluster import DBSCAN
from sklearn import metrics
from sklearn.datasets import make_blobs
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties

#1 使用make_blobs()生成聚类所需的样本数据集
centers = [[1, 1], [-1, -1], [1, -1]]
X, labels_true = make_blobs(n_samples=750, centers=centers, cluster_std=0.4,
                            random_state=0)
# 标准化原始数据
X = StandardScaler().fit_transform(X)


#2 根据生成的样本数据集，进行聚类运算（DBSCAN）
dbscan = DBSCAN(eps=0.3, min_samples=10).fit(X)
core_samples_mask = np.zeros_like(dbscan.labels_, dtype=bool)
core_samples_mask[dbscan.core_sample_indices_] = True
labels = dbscan.labels_


# 计算聚类后簇的数量。注：减去噪声类别，即离群点类别（离群点以-1标签值表示）
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
n_noise_ = list(labels).count(-1)

print('聚类结果中簇（组别）的数量  ：%d' % n_clusters_)
print('聚类结果中离群点(噪音)的数量：%d' % n_noise_   )
print("--------------------------------度量指标"  )
print("同质性（Homogeneity） ：%0.3f" % metrics.homogeneity_score(labels_true, labels))
print("完整性（Completeness）：%0.3f" % metrics.completeness_score(labels_true, labels))
print("V-指标（V-measure）   ：%0.3f" % metrics.v_measure_score(labels_true, labels))
print("调整Rand指数：%0.3f" % metrics.adjusted_rand_score(labels_true, labels))
print("调整互信息  ：%0.3f" % metrics.adjusted_mutual_info_score(labels_true, labels))
print("平均轮廓系数：%0.3f" % metrics.silhouette_score(X, labels))


# 可视化
### 构建一个字体对象，以使pyplot支持中文
font = FontProperties(fname='C:\\Windows\\Fonts\\SimHei.ttf')  #, size=16)

# 保留离群点类别的最组别数（离群点以-1标签值表示）
unique_labels = set(labels)
colors = [plt.cm.Spectral(each)
          for each in np.linspace(0, 1, len(unique_labels))]
# zip将unique_labels和colors中对应的元素打包成一个个元组，然后返回由这些元组组成的列表。
for k, col in zip(unique_labels, colors):
    if k == -1:
        # Black used for noise.
        col = [0, 0, 0, 1]

    class_member_mask = (labels == k)

    xy = X[class_member_mask & core_samples_mask]
    plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
             markeredgecolor='k', markersize=14)

    xy = X[class_member_mask & ~core_samples_mask]
    plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
             markeredgecolor='k', markersize=6)

plt.title("聚类结果中簇（组别）的数量：%d，离群点数量 %d(黑色)" % (n_clusters_, n_noise_), fontproperties=font)
plt.show()
 
