01/12/2023 21:07 TP5_EX1_KMEANS - Jupyter Notebook
Entrée [2]:
import pandas
#import imageio
from sklearn.preprocessing import StandardScaler
from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler
#from scipy.cluster.hierarchy import dendrogram, linkage
#from sklearn.cluster import KMeans
import numpy as np
Entrée [3]:
Columns = ['NumTimesPrg', 'PlGlcConc', 'BloodP', 'SkinThick', 'TwoHourSerIns
dataframe = pandas.read_csv('pima-indians-diabetes.data.csv')
dataframe.columns=Columns
Entrée [4]: #construction de l'ensemble de données formé par 100 individus et 2 variable
array = dataframe.values
data = array[:100,1:3]
127.0.0.1:8888/notebooks/TP5_EX1_KMEANS.ipynb 1/7
01/12/2023 21:07 TP5_EX1_KMEANS - Jupyter Notebook
Entrée [5]: #normalisation
scaler = MinMaxScaler(feature_range=(0, 1))
data = scaler.fit_transform(data)
scaler.transform(data)
127.0.0.1:8888/notebooks/TP5_EX1_KMEANS.ipynb 2/7
01/12/2023 21:07 TP5_EX1_KMEANS - Jupyter Notebook
Out[5]: array([[0.00219021, 0.00545455],
[0.0047154 , 0.00528926],
[0.00229328, 0.00545455],
[0.00353011, 0.00330579],
[0.002989 , 0.0061157 ],
[0.00200984, 0.00413223],
[0.00296323, 0. ],
[0.00507614, 0.00578512],
[0.0032209 , 0.00793388],
[0.00283439, 0.00760331],
[0.00432889, 0.0061157 ],
[0.00358164, 0.00661157],
[0.00487 , 0.00495868],
[0.00427736, 0.00595041],
[0.00257672, 0. ],
[0.00304053, 0.00694215],
[0.00275709, 0.0061157 ],
[0.00265402, 0.00247934],
[0.00296323, 0.00578512],
[0.00324667, 0.00727273],
[0.00255095, 0.00694215],
[0.00505037, 0.00743802],
[0.0030663 , 0.00661157],
[0.00368471, 0.0077686 ],
[0.0032209 , 0.00578512],
[0.00378778, 0.00628099],
[0.00249942, 0.00545455],
[0.00373625, 0.00677686],
[0.00301476, 0.00760331],
[0.00280863, 0.00619835],
[0.00407122, 0.00628099],
[0.00226752, 0.00479339],
[0.00237058, 0.00760331],
[0.0031436 , 0.00644628],
[0.00265402, 0.00495868],
[0.00355588, 0.00628099],
[0.00262826, 0.00628099],
[0.00231905, 0.00561983],
[0.00286016, 0.00595041],
[0.0046381 , 0.00528926],
[0.00342704, 0.00694215],
[0.00273133, 0.00760331],
[0.00440619, 0.00909091],
[0.00409699, 0.00528926],
[0.0046381 , 0.00545455],
[0.00376201, 0.0046281 ],
[0.00182947, 0.00578512],
[0.00265402, 0.00545455],
[0.00270556, 0. ],
[0.00265402, 0.00661157],
[0.00260249, 0.00413223],
[0.00226752, 0.00545455],
[0.00453503, 0.00743802],
[0.00386508, 0.00545455],
[0.00188101, 0.00413223],
[0.00481847, 0.00561983],
[0.00257672, 0.00727273],
[0.00376201, 0.00677686],
[0.00270556, 0.00528926],
[0.00216445, 0. ],
[0.00342704, 0.00595041],
127.0.0.1:8888/notebooks/TP5_EX1_KMEANS.ipynb 3/7
01/12/2023 21:07 TP5_EX1_KMEANS - Jupyter Notebook
[0.00113376, 0.00512397],
[0.00363318, 0.00479339],
[0.00293746, 0.00545455],
[0.00255095, 0.0061157 ],
[0.00280863, 0.00727273],
[0.00280863, 0.00760331],
[0.00244789, 0.00545455],
[0.00376201, 0.00702479],
[0.00257672, 0.00545455],
[0.00358164, 0.00528926],
[0.00324667, 0.00743802],
[0.00332397, 0.00710744],
[0.00203561, 0.00619835],
[0. , 0.00396694],
[0.00159757, 0.00644628],
[0.00244789, 0.00595041],
[0.00337551, 0. ],
[0.00288593, 0.00545455],
[0.0029117 , 0.00363636],
[0.00190677, 0. ],
[0.00213868, 0.00644628],
[0.00260249, 0.0053719 ],
[0.00353011, 0.00892562],
[0.00283439, 0.0061157 ],
[0.00273133, 0.00595041],
[0.00257672, 0.00561983],
[0.00350434, 0.00578512],
[0.00275709, 0.00561983],
[0.00206138, 0.00454545],
[0.00316937, 0.00661157],
[0.00208714, 0.00644628],
[0.00345281, 0.00595041],
[0.00365895, 0.00677686],
[0.00371048, 0.00595041],
[0.00237058, 0.00512397],
[0.00182947, 0.00396694],
[0.00239635, 0.00413223],
[0.0031436 , 0.00743802],
[0.00420006, 0.00595041]])
127.0.0.1:8888/notebooks/TP5_EX1_KMEANS.ipynb 4/7
01/12/2023 21:07 TP5_EX1_KMEANS - Jupyter Notebook
Entrée [6]:
# Affichage des points initiaux
plt.scatter(data[:,0], data[:,1], c='r')
plt.show()
Entrée [7]: from sklearn.cluster import KMeans
#effectuer la catégorisation en 2 classes avec k-means
kmeans = KMeans(n_clusters=2)
kmeans.fit(data)
y_km = kmeans.fit_predict(data)
#Afficher les poitns aprés la la catégorisation pour 2 classes
plt.scatter(data[y_km ==0,0], data[y_km == 0,1], s=20, c='r')
plt.scatter(data[y_km ==1,0], data[y_km == 1,1], s=20, c='m')
#Conclusion : points abérants ont été inclus dans l'une des deux classes
Out[7]: <matplotlib.collections.PathCollection at 0x9ab0d0>
127.0.0.1:8888/notebooks/TP5_EX1_KMEANS.ipynb 5/7
01/12/2023 21:07 TP5_EX1_KMEANS - Jupyter Notebook
Entrée [8]: #effectuer la catégorisation en 3 classes avec k-means
kmeans = KMeans(n_clusters=3)
y_km = kmeans.fit_predict(data)
#Afficher les poitns aprés la la catégorisation pour 3 classes
plt.scatter(data[y_km ==0,0], data[y_km == 0,1], s=20, c='r')
plt.scatter(data[y_km ==1,0], data[y_km == 1,1], s=20, c='m')
plt.scatter(data[y_km ==2,0], data[y_km == 2,1], s=20, c='y')
#Conclusion : la troisième classe ne contient que les points abérants
Out[8]: <matplotlib.collections.PathCollection at 0xc99508>
Entrée [9]:
#effectuer la catégorisation en 4 classes avec k-means
kmeans = KMeans(n_clusters=4)
kmeans.fit(data)
y_km = kmeans.fit_predict(data)
#Afficher les poitns aprés la la catégorisation pour 4 classes
plt.scatter(data[y_km ==0,0], data[y_km == 0,1], s=20, c='r')
plt.scatter(data[y_km ==1,0], data[y_km == 1,1], s=20, c='m')
plt.scatter(data[y_km ==2,0], data[y_km == 2,1], s=20, c='y')
plt.scatter(data[y_km ==3,0], data[y_km == 3,1], s=20, c='b')
#Conclusion : sensibilité de k-means au bruit: un groupe qui a été divisé e
Out[9]: <matplotlib.collections.PathCollection at 0x15ce6748>
Entrée [ ]:
127.0.0.1:8888/notebooks/TP5_EX1_KMEANS.ipynb 6/7
01/12/2023 21:07 TP5_EX1_KMEANS - Jupyter Notebook
127.0.0.1:8888/notebooks/TP5_EX1_KMEANS.ipynb 7/7