Untitled2.Ipynb - Colab
Untitled2.Ipynb - Colab
ipynb - Colab
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score
import io
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
https://colab.research.google.com/drive/1dOSJItEe0P2tYlbFeuBxId4-CO9tj7_x#scrollTo=6oxSkoppFBZY&printMode=true 1/8
5/11/25, 10:35 PM Untitled2.ipynb - Colab
resting ecg max heart rate exercise angina oldpeak ST slope target
0 0 172 0 0.0 1 0
1 0 156 0 1.0 2 1
2 1 98 0 0.0 1 0
3 0 108 1 1.5 2 1
4 0 122 0 0.0 1 0
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1190 entries, 0 to 1189
Data columns (total 12 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 age 1190 non-null int64
1 sex 1190 non-null int64
2 chest pain type 1190 non-null int64
3 resting bp s 1190 non-null int64
4 cholesterol 1190 non-null int64
5 fasting blood sugar 1190 non-null int64
6 resting ecg 1190 non-null int64
7 max heart rate 1190 non-null int64
8 exercise angina 1190 non-null int64
9 oldpeak 1190 non-null float64
10 ST slope 1190 non-null int64
11 target 1190 non-null int64
dtypes: float64(1), int64(11)
memory usage: 111.7 KB
None
age 0
sex 0
chest pain type 0
resting bp s 0
cholesterol 0
fasting blood sugar 0
resting ecg 0
max heart rate 0
exercise angina 0
oldpeak 0
ST slope 0
target 0
dtype: int64
fig, axes = plt.subplots(1, 3, figsize=(24, 8)) # Increase width and height for more space
# Cholesterol
sns.histplot(df['cholesterol'], kde=True, color='blue', ax=axes[0, 0])
axes[0, 0].set_title("Cholesterol Distribution")
axes[0, 0].set_xlabel("Cholesterol Level")
axes[0, 0].set_ylabel("Frequency")
# Age
sns.histplot(df['age'], kde=True, color='purple', ax=axes[0, 1])
axes[0, 1].set_title("Age Distribution")
axes[0, 1].set_xlabel("Age")
axes[0, 1].set_ylabel("Frequency")
# Oldpeak
sns.histplot(df['oldpeak'], kde=True, color='green', ax=axes[1, 1])
axes[1, 1].set_title("Oldpeak (ST Depression) Distribution")
axes[1, 1].set_xlabel("Oldpeak")
https://colab.research.google.com/drive/1dOSJItEe0P2tYlbFeuBxId4-CO9tj7_x#scrollTo=6oxSkoppFBZY&printMode=true 3/8
5/11/25, 10:35 PM Untitled2.ipynb - Colab
axes[1, 1].set_ylabel("Frequency")
# Adjust layout
plt.tight_layout()
plt.show()
https://colab.research.google.com/drive/1dOSJItEe0P2tYlbFeuBxId4-CO9tj7_x#scrollTo=6oxSkoppFBZY&printMode=true 4/8
5/11/25, 10:35 PM Untitled2.ipynb - Colab
scaler = MinMaxScaler()
https://colab.research.google.com/drive/1dOSJItEe0P2tYlbFeuBxId4-CO9tj7_x#scrollTo=6oxSkoppFBZY&printMode=true 5/8
5/11/25, 10:35 PM Untitled2.ipynb - Colab
df[numerical_columns] = scaler.fit_transform(df[numerical_columns])
print(df.head())
fasting blood sugar resting ecg max heart rate exercise angina \
0 0.0 0.0 0.788732 0.0
1 0.0 0.0 0.676056 0.0
2 0.0 0.5 0.267606 0.0
3 0.0 0.0 0.338028 1.0
4 0.0 0.0 0.436620 0.0
X = df.drop(columns=['target'])
y = df['target']
# Splitting data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Evaluate models
# Logistic Regression
print("Logistic Regression Accuracy:", accuracy_score(y_test, logreg_pred))
print("Logistic Regression Confusion Matrix:\n", confusion_matrix(y_test, logreg_pred))
https://colab.research.google.com/drive/1dOSJItEe0P2tYlbFeuBxId4-CO9tj7_x#scrollTo=6oxSkoppFBZY&printMode=true 6/8
5/11/25, 10:35 PM Untitled2.ipynb - Colab
print("Logistic Regression Classification Report:\n", classification_report(y_test, logreg_pred))
# Decision Tree
print("Decision Tree Accuracy:", accuracy_score(y_test, dt_pred))
print("Decision Tree Confusion Matrix:\n", confusion_matrix(y_test, dt_pred))
print("Decision Tree Classification Report:\n", classification_report(y_test, dt_pred))
# Random Forest
print("Random Forest Accuracy:", accuracy_score(y_test, rf_pred))
print("Random Forest Confusion Matrix:\n", confusion_matrix(y_test, rf_pred))
print("Random Forest Classification Report:\n", classification_report(y_test, rf_pred))
# SVM
print("SVM Accuracy:", accuracy_score(y_test, svm_pred))
print("SVM Confusion Matrix:\n", confusion_matrix(y_test, svm_pred))
print("SVM Classification Report:\n", classification_report(y_test, svm_pred))
plt.figure(figsize=(10, 6))
plt.plot(fpr, tpr, color='blue', label=f'Logistic Regression (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='gray', linestyle='--')
for i in range(0, len(thresholds), max(1, len(thresholds)//10)):
plt.annotate(f'{thresholds[i]:.2f}', (fpr[i], tpr[i]), textcoords="offset points", xytext=(5, -10), ha='left', fontsize=8)
plt.title('ROC Curve with Thresholds (Logistic Regression)')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc='lower right')
plt.grid(True)
plt.show()
plt.figure(figsize=(10, 6))
plt.plot(fpr, tpr, color='red', label=f'Decision Tree (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='gray', linestyle='--')
for i in range(0, len(thresholds), max(1, len(thresholds)//10)):
plt.annotate(f'{thresholds[i]:.2f}', (fpr[i], tpr[i]), textcoords="offset points", xytext=(5, -10), ha='left', fontsize=8)
plt.title('ROC Curve with Thresholds (Decision Tree)')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc='lower right')
plt.grid(True)
plt.show()
plt.figure(figsize=(10, 6))
plt.plot(fpr, tpr, color='green', label=f'Random Forest (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='gray', linestyle='--')
for i in range(0, len(thresholds), max(1, len(thresholds)//10)):
plt.annotate(f'{thresholds[i]:.2f}', (fpr[i], tpr[i]), textcoords="offset points", xytext=(5, -10), ha='left', fontsize=8)
plt.title('ROC Curve with Thresholds (Random Forest)')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc='lower right')
plt.grid(True)
plt.show()
# SVM Evaluation
print("\nSVM Evaluation:")
https://colab.research.google.com/drive/1dOSJItEe0P2tYlbFeuBxId4-CO9tj7_x#scrollTo=6oxSkoppFBZY&printMode=true 8/8