from sklearn.metrics.cluster import normalized_mutual_info_score as NMI, \
adjusted_mutual_info_score as AMI, adjusted_rand_score as AR, silhouette_score as SI, calinski_harabasz_score as CH
def clustring_indicators (pred, data=None, labels=None, model_name='cluster', verbose=1):
measure_dict = dict()
#如果有原始数据
if data is not None:
measure_dict['si'] = SI(data, pred)
measure_dict['ch'] = CH(data, pred)
#如果数据有标签
if labels is not None:
measure_dict['acc'] = cluster_acc(pred, labels)[0]
measure_dict['nmi'] = NMI(labels, pred)
measure_dict['ar'] = AR(labels, pred)
measure_dict['ami'] = AMI(labels, pred)
#如果需要打印所有指标
if verbose:
char = ''
for (key, value) in measure_dict.items():
char += '{}: {:.4f} '.format(key, value)
print('{} {}'.format(model_name, char))
return measure_dict
##参考论文Unsupervised deep embedding for clustering analysis
def cluster_acc(Y_pred, Y):
assert Y_pred.size == Y.size
D = max(Y_pred.max(), Y.max()) + 1
w = np.zeros((D, D), dtype=np.int64)
for i in range(Y_pred.size):
w[Y_pred[i], Y[i]] += 1
ind = linear_assignment(w.max() - w)
total = 0
for i in range(len(ind[0])):
total += w[ind[0][i], ind[1][i]]
return total * 1.0 / Y_pred.size, w
聚类指标SI,CH,ACC,NMI,AR,AMI的python函数实现
于 2020-11-20 10:45:48 首次发布