ex3TP1
ex3TP1
November 5, 2024
[5]: #
df=pd.read_csv("Healthcare-Diabetes.csv")
[6]: df.head()
[7]: df.tail()
1
[7]: Id Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \
2763 2764 2 75 64 24 55 29.7
2764 2765 8 179 72 42 130 32.7
2765 2766 6 85 78 0 0 31.2
2766 2767 0 129 110 46 130 67.1
2767 2768 2 81 72 15 76 30.1
[8]: df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2768 entries, 0 to 2767
Data columns (total 10 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Id 2768 non-null int64
1 Pregnancies 2768 non-null int64
2 Glucose 2768 non-null int64
3 BloodPressure 2768 non-null int64
4 SkinThickness 2768 non-null int64
5 Insulin 2768 non-null int64
6 BMI 2768 non-null float64
7 DiabetesPedigreeFunction 2768 non-null float64
8 Age 2768 non-null int64
9 Outcome 2768 non-null int64
dtypes: float64(2), int64(8)
memory usage: 216.4 KB
[9]: df.describe()
2
mean 80.127890 32.137392 0.471193 33.132225
std 112.301933 8.076127 0.325669 11.777230
min 0.000000 0.000000 0.078000 21.000000
25% 0.000000 27.300000 0.244000 24.000000
50% 37.000000 32.200000 0.375000 29.000000
75% 130.000000 36.625000 0.624000 40.000000
max 846.000000 80.600000 2.420000 81.000000
Outcome
count 2768.000000
mean 0.343931
std 0.475104
min 0.000000
25% 0.000000
50% 0.000000
75% 1.000000
max 1.000000
[10]: df.isnull().sum()
[10]: Id 0
Pregnancies 0
Glucose 0
BloodPressure 0
SkinThickness 0
Insulin 0
BMI 0
DiabetesPedigreeFunction 0
Age 0
Outcome 0
dtype: int64
[11]: df.hist(figsize=(15,15))
3
[12]: plt.figure()
sns.scatterplot(x='Pregnancies',y='Glucose',data=df,␣
↪hue="Outcome",palette="coolwarm")
4
[13]: df.corr()
5
BMI 0.393494 0.215926 1.000000
DiabetesPedigreeFunction 0.179830 0.190500 0.129766
Age -0.111895 -0.073458 0.038175
Outcome 0.075603 0.123646 0.280928
[14]: plt.figure(figsize=(10,10))
sns.heatmap(df.corr(),annot=True,cmap="YlGnBu")
plt.show()
6
[15]: #Normalisation des donnees
X = df.drop(['Outcome'], axis=1)
X_norm=X.apply(lambda x:(x-x.min())/(x.max()-x.min()))
X_norm.head()
7
BMI DiabetesPedigreeFunction Age
0 0.416873 0.234415 0.483333
1 0.330025 0.116567 0.166667
2 0.289082 0.253629 0.183333
3 0.348635 0.038002 0.000000
4 0.534739 0.943638 0.200000
[16]: Y=df.Outcome
Y
[16]: 0 1
1 0
2 1
3 0
4 1
..
2763 0
2764 1
2765 0
2766 1
2767 0
Name: Outcome, Length: 2768, dtype: int64
[17]: x_train,x_test,y_train,y_test=train_test_split(X_norm,Y,test_size=0.
↪2,random_state=42)
#Dense(64, activation='relu'),
#Dropout(0.5),
Dense(1, activation='sigmoid') # Couche de sortie linéaire pour la␣
↪classification binaire
Epoch 1/5
222/222 �������������������� 6s 12ms/step -
accuracy: 0.6454 - loss: 0.6578 - val_accuracy: 0.6643 - val_loss: 0.6200
8
Epoch 2/5
222/222 �������������������� 2s 8ms/step -
accuracy: 0.6622 - loss: 0.6111 - val_accuracy: 0.6931 - val_loss: 0.5793
Epoch 3/5
222/222 �������������������� 1s 3ms/step -
accuracy: 0.6993 - loss: 0.5776 - val_accuracy: 0.7274 - val_loss: 0.5511
Epoch 4/5
222/222 �������������������� 1s 5ms/step -
accuracy: 0.7178 - loss: 0.5439 - val_accuracy: 0.7401 - val_loss: 0.5290
Epoch 5/5
222/222 �������������������� 2s 7ms/step -
accuracy: 0.7327 - loss: 0.5335 - val_accuracy: 0.7437 - val_loss: 0.5167
Epoch 1/10
222/222 �������������������� 2s 8ms/step -
accuracy: 0.7596 - loss: 0.5119 - val_accuracy: 0.7653 - val_loss: 0.5010
Epoch 2/10
222/222 �������������������� 2s 10ms/step -
accuracy: 0.7655 - loss: 0.4975 - val_accuracy: 0.7690 - val_loss: 0.4920
Epoch 3/10
222/222 �������������������� 2s 9ms/step -
accuracy: 0.7988 - loss: 0.4638 - val_accuracy: 0.7635 - val_loss: 0.4890
Epoch 4/10
222/222 �������������������� 2s 8ms/step -
accuracy: 0.7766 - loss: 0.4814 - val_accuracy: 0.7653 - val_loss: 0.4807
Epoch 5/10
222/222 �������������������� 2s 8ms/step -
accuracy: 0.8003 - loss: 0.4552 - val_accuracy: 0.7780 - val_loss: 0.4795
Epoch 6/10
222/222 �������������������� 1s 5ms/step -
accuracy: 0.7894 - loss: 0.4643 - val_accuracy: 0.7708 - val_loss: 0.4728
Epoch 7/10
222/222 �������������������� 1s 5ms/step -
accuracy: 0.7857 - loss: 0.4590 - val_accuracy: 0.7690 - val_loss: 0.4733
Epoch 8/10
222/222 �������������������� 1s 3ms/step -
accuracy: 0.7830 - loss: 0.4629 - val_accuracy: 0.7671 - val_loss: 0.4672
Epoch 9/10
222/222 �������������������� 1s 4ms/step -
accuracy: 0.7843 - loss: 0.4461 - val_accuracy: 0.7726 - val_loss: 0.4654
Epoch 10/10
222/222 �������������������� 1s 4ms/step -
accuracy: 0.7980 - loss: 0.4375 - val_accuracy: 0.7726 - val_loss: 0.4642
9
[71]: #pour 15epochs
train_model = model.fit(x_train, y_train, epochs=15, batch_size=10,␣
↪validation_data=(x_test, y_test))
Epoch 1/15
222/222 �������������������� 4s 10ms/step -
accuracy: 0.5684 - loss: 0.6904 - val_accuracy: 0.6606 - val_loss: 0.6560
Epoch 2/15
222/222 �������������������� 1s 6ms/step -
accuracy: 0.5905 - loss: 0.6804 - val_accuracy: 0.6606 - val_loss: 0.6468
Epoch 3/15
222/222 �������������������� 2s 9ms/step -
accuracy: 0.6189 - loss: 0.6707 - val_accuracy: 0.6606 - val_loss: 0.6433
Epoch 4/15
222/222 �������������������� 1s 4ms/step -
accuracy: 0.6450 - loss: 0.6660 - val_accuracy: 0.6625 - val_loss: 0.6406
Epoch 5/15
222/222 �������������������� 1s 4ms/step -
accuracy: 0.6578 - loss: 0.6553 - val_accuracy: 0.6625 - val_loss: 0.6381
Epoch 6/15
222/222 �������������������� 1s 4ms/step -
accuracy: 0.6745 - loss: 0.6425 - val_accuracy: 0.6625 - val_loss: 0.6357
Epoch 7/15
222/222 �������������������� 1s 3ms/step -
accuracy: 0.6579 - loss: 0.6410 - val_accuracy: 0.6625 - val_loss: 0.6327
Epoch 8/15
222/222 �������������������� 1s 3ms/step -
accuracy: 0.6611 - loss: 0.6469 - val_accuracy: 0.6625 - val_loss: 0.6306
Epoch 9/15
222/222 �������������������� 1s 4ms/step -
accuracy: 0.6507 - loss: 0.6564 - val_accuracy: 0.6625 - val_loss: 0.6284
Epoch 10/15
222/222 �������������������� 1s 4ms/step -
accuracy: 0.6521 - loss: 0.6503 - val_accuracy: 0.6625 - val_loss: 0.6265
Epoch 11/15
222/222 �������������������� 1s 4ms/step -
accuracy: 0.6504 - loss: 0.6472 - val_accuracy: 0.6625 - val_loss: 0.6248
Epoch 12/15
222/222 �������������������� 2s 9ms/step -
accuracy: 0.6493 - loss: 0.6382 - val_accuracy: 0.6625 - val_loss: 0.6233
Epoch 13/15
222/222 �������������������� 1s 4ms/step -
accuracy: 0.6600 - loss: 0.6368 - val_accuracy: 0.6625 - val_loss: 0.6219
Epoch 14/15
222/222 �������������������� 2s 9ms/step -
accuracy: 0.6734 - loss: 0.6334 - val_accuracy: 0.6625 - val_loss: 0.6206
Epoch 15/15
222/222 �������������������� 1s 4ms/step -
10
accuracy: 0.6664 - loss: 0.6299 - val_accuracy: 0.6625 - val_loss: 0.6192
Epoch 1/25
222/222 �������������������� 4s 11ms/step -
accuracy: 0.5764 - loss: 0.6762 - val_accuracy: 0.6805 - val_loss: 0.6574
Epoch 2/25
222/222 �������������������� 2s 9ms/step -
accuracy: 0.6529 - loss: 0.6524 - val_accuracy: 0.6643 - val_loss: 0.6463
Epoch 3/25
222/222 �������������������� 2s 8ms/step -
accuracy: 0.6670 - loss: 0.6378 - val_accuracy: 0.6643 - val_loss: 0.6413
Epoch 4/25
222/222 �������������������� 2s 8ms/step -
accuracy: 0.6518 - loss: 0.6407 - val_accuracy: 0.6661 - val_loss: 0.6367
Epoch 5/25
222/222 �������������������� 2s 9ms/step -
accuracy: 0.6779 - loss: 0.6183 - val_accuracy: 0.6661 - val_loss: 0.6327
Epoch 6/25
222/222 �������������������� 1s 6ms/step -
accuracy: 0.6671 - loss: 0.6263 - val_accuracy: 0.6661 - val_loss: 0.6286
Epoch 7/25
222/222 �������������������� 1s 4ms/step -
accuracy: 0.6619 - loss: 0.6285 - val_accuracy: 0.6661 - val_loss: 0.6249
Epoch 8/25
222/222 �������������������� 1s 3ms/step -
accuracy: 0.6806 - loss: 0.6164 - val_accuracy: 0.6733 - val_loss: 0.6212
Epoch 9/25
222/222 �������������������� 1s 3ms/step -
accuracy: 0.6773 - loss: 0.6158 - val_accuracy: 0.6805 - val_loss: 0.6177
Epoch 10/25
222/222 �������������������� 1s 3ms/step -
accuracy: 0.6771 - loss: 0.6109 - val_accuracy: 0.6805 - val_loss: 0.6144
Epoch 11/25
222/222 �������������������� 1s 4ms/step -
accuracy: 0.6809 - loss: 0.6090 - val_accuracy: 0.6841 - val_loss: 0.6113
Epoch 12/25
222/222 �������������������� 1s 4ms/step -
accuracy: 0.6775 - loss: 0.6096 - val_accuracy: 0.6823 - val_loss: 0.6084
Epoch 13/25
222/222 �������������������� 1s 3ms/step -
accuracy: 0.6732 - loss: 0.6112 - val_accuracy: 0.6859 - val_loss: 0.6054
Epoch 14/25
222/222 �������������������� 1s 5ms/step -
accuracy: 0.6843 - loss: 0.5997 - val_accuracy: 0.6877 - val_loss: 0.6027
11
Epoch 15/25
222/222 �������������������� 1s 4ms/step -
accuracy: 0.7025 - loss: 0.5918 - val_accuracy: 0.6931 - val_loss: 0.6001
Epoch 16/25
222/222 �������������������� 1s 4ms/step -
accuracy: 0.6991 - loss: 0.5897 - val_accuracy: 0.6913 - val_loss: 0.5977
Epoch 17/25
222/222 �������������������� 1s 6ms/step -
accuracy: 0.6812 - loss: 0.5976 - val_accuracy: 0.6877 - val_loss: 0.5954
Epoch 18/25
222/222 �������������������� 1s 3ms/step -
accuracy: 0.6963 - loss: 0.5865 - val_accuracy: 0.6931 - val_loss: 0.5933
Epoch 19/25
222/222 �������������������� 2s 10ms/step -
accuracy: 0.6807 - loss: 0.5947 - val_accuracy: 0.6931 - val_loss: 0.5912
Epoch 20/25
222/222 �������������������� 2s 7ms/step -
accuracy: 0.6990 - loss: 0.5890 - val_accuracy: 0.6968 - val_loss: 0.5892
Epoch 21/25
222/222 �������������������� 1s 3ms/step -
accuracy: 0.6920 - loss: 0.5914 - val_accuracy: 0.7022 - val_loss: 0.5873
Epoch 22/25
222/222 �������������������� 1s 4ms/step -
accuracy: 0.7050 - loss: 0.5788 - val_accuracy: 0.7022 - val_loss: 0.5854
Epoch 23/25
222/222 �������������������� 1s 3ms/step -
accuracy: 0.6870 - loss: 0.5928 - val_accuracy: 0.7022 - val_loss: 0.5836
Epoch 24/25
222/222 �������������������� 1s 3ms/step -
accuracy: 0.6877 - loss: 0.5791 - val_accuracy: 0.7022 - val_loss: 0.5819
Epoch 25/25
222/222 �������������������� 1s 3ms/step -
accuracy: 0.6906 - loss: 0.5762 - val_accuracy: 0.7040 - val_loss: 0.5803
12
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()
#interpretation de accuracy
'''
on voit qu'il y a une difference entre train et validation accuracy au cours␣
↪des epochs la premiere vers 0,68et la deuxieme vers 0,7 c
ela signifie que le modele generalise bien aux nouvelles donnees que les␣
↪donnees d'entrainement
'''
#interpretation de loss
'''
on remarque que validation loss inferieur a training loss ce qui indique la␣
↪devision des donnees est difficile a modeliser et on a underfitting
13
[81]: # Visualisation de la performance du modèle
# Courbe de précision
plt.plot(train_model.history['accuracy'], label='Train Accuracy')
plt.plot(train_model.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy over epochs')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()
# Courbe de perte
plt.plot(train_model.history['loss'], label='Train Loss')
plt.plot(train_model.history['val_loss'], label='Validation Loss')
plt.title('Loss over epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()
14
15
[ ]: #interpretation d'accuracy
'''
apres l'ajout des epochs on voit que train accuracy et validation accuracy␣
↪augmentent au cours des epochs
ce qui montre que le modèle apprend efficacement sur les données d'entraînement␣
↪tout en restant généralisable aux nouvelles données
'''
#interpretation de loss
'''
apres l'ajout des epochs on voit que train loss et validation loss se diminuent␣
↪vers 0 cela indique que le modele
'''
[83]: model.summary()
# Le modèle est rapide à entraîner et léger avec seulement 10 paramètres␣
↪entraînables
Model: "sequential_6"
16
����������������������������������������������������������������������������
� Layer (type) � Output Shape � Param # �
����������������������������������������������������������������������������
� dense_7 (Dense) � (None, 1) � 10 �
����������������������������������������������������������������������������
[ ]:
17