k-means-clustering
k-means-clustering
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
[4]: data
1
plt.xlabel('Variable_1')
plt.ylabel('Variable_2')
for i, txt in enumerate(n):
plt.annotate(txt, (x[i], y[i]))
C:\Users\lenovo\anaconda3\Lib\site-packages\sklearn\cluster\_kmeans.py:870:
FutureWarning: The default value of `n_init` will change from 10 to 'auto' in
1.4. Set the value of `n_init` explicitly to suppress the warning
warnings.warn(
C:\Users\lenovo\anaconda3\Lib\site-packages\sklearn\cluster\_kmeans.py:1382:
UserWarning: KMeans is known to have a memory leak on Windows with MKL, when
there are less chunks than available threads. You can avoid it by setting the
environment variable OMP_NUM_THREADS=1.
2
warnings.warn(
C:\Users\lenovo\anaconda3\Lib\site-packages\sklearn\cluster\_kmeans.py:870:
FutureWarning: The default value of `n_init` will change from 10 to 'auto' in
1.4. Set the value of `n_init` explicitly to suppress the warning
warnings.warn(
C:\Users\lenovo\anaconda3\Lib\site-packages\sklearn\cluster\_kmeans.py:1382:
UserWarning: KMeans is known to have a memory leak on Windows with MKL, when
there are less chunks than available threads. You can avoid it by setting the
environment variable OMP_NUM_THREADS=1.
warnings.warn(
C:\Users\lenovo\anaconda3\Lib\site-packages\sklearn\cluster\_kmeans.py:870:
FutureWarning: The default value of `n_init` will change from 10 to 'auto' in
1.4. Set the value of `n_init` explicitly to suppress the warning
warnings.warn(
C:\Users\lenovo\anaconda3\Lib\site-packages\sklearn\cluster\_kmeans.py:1382:
UserWarning: KMeans is known to have a memory leak on Windows with MKL, when
there are less chunks than available threads. You can avoid it by setting the
environment variable OMP_NUM_THREADS=1.
warnings.warn(
C:\Users\lenovo\anaconda3\Lib\site-packages\sklearn\cluster\_kmeans.py:870:
FutureWarning: The default value of `n_init` will change from 10 to 'auto' in
1.4. Set the value of `n_init` explicitly to suppress the warning
warnings.warn(
C:\Users\lenovo\anaconda3\Lib\site-packages\sklearn\cluster\_kmeans.py:1382:
UserWarning: KMeans is known to have a memory leak on Windows with MKL, when
there are less chunks than available threads. You can avoid it by setting the
environment variable OMP_NUM_THREADS=1.
warnings.warn(
[61]: individual_clustering_score
[62]: plt.figure(figsize=(10,6))
plt.plot(range(1,5), individual_clustering_score)
plt.title("elbow methos")
plt.xlabel("Number of clusters")
plt.ylabel("Within cluster sum of squares")
plt.show()
3
[63]: labels = kmeans.predict(data)
[64]: labels
[65]: kmeans.labels_
4
colors = map(lambda x: colmap[x+1], labels)
colors1=list(colors)
plt.scatter(x, y, color= colors1, alpha = 0.5 )
# plotting the centroids wrt color
for idx, centroid in enumerate(centroids):
plt.scatter(*centroid, color = colmap[idx+1])
# labeling the points as 0,1,2,....18
for i, txt in enumerate(n):
plt.annotate(txt, (x[i], y[i]))
plt.grid()
[16]: 0.6179376814567372
[86]: print(colors1)
['k', 'k', 'k', 'm', 'm', 'm', 'm', 'g', 'g', 'g', 'g', 'g', 'g', 'b', 'b', 'b',
5
'b', 'b', 'b']
[ ]: