TP.ipynb - Colab
TP.ipynb - Colab
ipynb - Colab
keyboard_arrow_down Imports
# Mount Drive
from google.colab import drive
drive.mount('/content/drive')
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
import sklearn
print(sklearn.__version__)
1.2.2
# Load in Data
df = pd.read_csv('/content/drive/MyDrive/datasets/creditcard.csv')
df.head()
0 0.0 -1.359807 -0.072781 2.536347 1.378155 -0.338321 0.462388 0.239599 0.098698 0.363787 ... -0.018307 0.277838 -0.110474 0
1 0.0 1.191857 0.266151 0.166480 0.448154 0.060018 -0.082361 -0.078803 0.085102 -0.255425 ... -0.225775 -0.638672 0.101288 -0
2 1.0 -1.358354 -1.340163 1.773209 0.379780 -0.503198 1.800499 0.791461 0.247676 -1.514654 ... 0.247998 0.771679 0.909412 -0
3 1.0 -0.966272 -0.185226 1.792993 -0.863291 -0.010309 1.247203 0.237609 0.377436 -1.387024 ... -0.108300 0.005274 -0.190321 -1
4 2.0 -1.158233 0.877737 1.548718 0.403034 -0.407193 0.095921 0.592941 -0.270533 0.817739 ... -0.009431 0.798278 -0.137458 0
5 rows × 31 columns
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 284807 entries, 0 to 284806
Data columns (total 31 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Time 284807 non-null float64
1 V1 284807 non-null float64
2 V2 284807 non-null float64
3 V3 284807 non-null float64
4 V4 284807 non-null float64
5 V5 284807 non-null float64
6 V6 284807 non-null float64
7 V7 284807 non-null float64
8 V8 284807 non-null float64
9 V9 284807 non-null float64
10 V10 284807 non-null float64
https://colab.research.google.com/drive/1ewnFbr_xMraYUy3Hri4N_6QUsbjypW-b#scrollTo=9SK9D3ac-N8U&printMode=true 1/6
1/11/25, 6:16 PM TP4_ISI_KEF_24_25.ipynb - Colab
11 V11 284807 non-null float64
12 V12 284807 non-null float64
13 V13 284807 non-null float64
14 V14 284807 non-null float64
15 V15 284807 non-null float64
16 V16 284807 non-null float64
17 V17 284807 non-null float64
18 V18 284807 non-null float64
19 V19 284807 non-null float64
20 V20 284807 non-null float64
21 V21 284807 non-null float64
22 V22 284807 non-null float64
23 V23 284807 non-null float64
24 V24 284807 non-null float64
25 V25 284807 non-null float64
26 V26 284807 non-null float64
27 V27 284807 non-null float64
28 V28 284807 non-null float64
29 Amount 284807 non-null float64
30 Class 284807 non-null int64
dtypes: float64(30), int64(1)
memory usage: 67.4 MB
1081
df.drop_duplicates(inplace=True)
df.duplicated().sum()
0 283253
1 473
Name: Class, dtype: int64
0 0.998333
1 0.001667
Name: Class, dtype: float64
# Define a function that takes in arguments and prints out a classification report, confusion matrix and ROC AUC
def evaluate_classification(model, X_test, y_test, cmap='Greens',
normalize='true', classes=None, figsize=(20, 5)):
test_preds = model.predict(X_test)
print(metrics.classification_report(y_test, test_preds, target_names=classes))
https://colab.research.google.com/drive/1ewnFbr_xMraYUy3Hri4N_6QUsbjypW-b#scrollTo=9SK9D3ac-N8U&printMode=true 2/6
1/11/25, 6:16 PM TP4_ISI_KEF_24_25.ipynb - Colab
curve.ax_.grid()
curve.ax_.plot([0,1],[0,1], ls=':')
▸ Pipeline
▸ StandardScaler
▸ LogisticRegression
array([0, 0, 0, 0, 0])
array([[0.99803098, 0.00196902],
[0.99972247, 0.00027753],
[0.99972296, 0.00027704],
[0.99992753, 0.00007247],
[0.9999754 , 0.0000246 ]])
https://colab.research.google.com/drive/1ewnFbr_xMraYUy3Hri4N_6QUsbjypW-b#scrollTo=9SK9D3ac-N8U&printMode=true 3/6
1/11/25, 6:16 PM TP4_ISI_KEF_24_25.ipynb - Colab
# Evaluate the logistic regression pipe using function
evaluate_classification(logreg_pipe, X_test, y_test)
keyboard_arrow_down GridSearchCV
logreg2 = LogisticRegression()
▸ GridSearchCV
▸ estimator: Pipeline
▸ StandardScaler
▸ LogisticRegression
▸ GridSearchCV
▸ estimator: Pipeline
▸ StandardScaler
▸ LogisticRegression
https://colab.research.google.com/drive/1ewnFbr_xMraYUy3Hri4N_6QUsbjypW-b#scrollTo=9SK9D3ac-N8U&printMode=true 4/6
1/11/25, 6:16 PM TP4_ISI_KEF_24_25.ipynb - Colab
{'logisticregression__C': 10,
'logisticregression__penalty': 'l1',
'logisticregression__solver': 'liblinear'}
keyboard_arrow_down SMOTE
from imblearn.over_sampling import SMOTE
pd.Series(y_train).value_counts()
0 212439
1 355
Name: Class, dtype: int64
0 212439
1 212439
Name: Class, dtype: int64
smote_logreg = LogisticRegression(max_iter=1000)
smote_logreg.fit(X_train_smote, y_train_smote)
▾ LogisticRegression
LogisticRegression(max_iter=1000)
https://colab.research.google.com/drive/1ewnFbr_xMraYUy3Hri4N_6QUsbjypW-b#scrollTo=9SK9D3ac-N8U&printMode=true 5/6
1/11/25, 6:16 PM TP4_ISI_KEF_24_25.ipynb - Colab
https://colab.research.google.com/drive/1ewnFbr_xMraYUy3Hri4N_6QUsbjypW-b#scrollTo=9SK9D3ac-N8U&printMode=true 6/6