0% found this document useful (0 votes)
4 views

Lab4 - Jupyter Notebook

Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
4 views

Lab4 - Jupyter Notebook

Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 7

9/4/24, 10:37 AM lab4 - Jupyter Notebook

localhost:8888/notebooks/lab4.ipynb# 1/7
9/4/24, 10:37 AM lab4 - Jupyter Notebook

localhost:8888/notebooks/lab4.ipynb# 2/7
9/4/24, 10:37 AM lab4 - Jupyter Notebook

In [12]: import numpy as np


import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_curve, auc
import matplotlib.pyplot as plt

# Load the dataset
df = pd.read_csv('antfile17.csv')

# Assuming the target variable is 'bug' and features are all other columns
X = df.drop('bug', axis=1).values
y = df['bug'].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Scale the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Logistic Regression Functions

def sigmoid(z):
return 1 / (1 + np.exp(-z))

def cost_function(y, y_pred):
m = len(y)
return -np.sum(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred)) / m

def gradient_descent(X, y, alpha, num_iterations):
m, n = X.shape
beta = np.zeros(n)
for _ in range(num_iterations):
z = np.dot(X, beta)
y_pred = sigmoid(z)
gradient = np.dot(X.T, (y_pred - y)) / m
beta -= alpha * gradient
return beta

localhost:8888/notebooks/lab4.ipynb# 3/7
9/4/24, 10:37 AM lab4 - Jupyter Notebook
def predict(X, beta):
return sigmoid(np.dot(X, beta)) >= 0.5

# Train Logistic Regression Model
alpha = 0.01
num_iterations = 1000
beta = gradient_descent(X_train, y_train, alpha, num_iterations)

# Predict and evaluate
y_train_pred = predict(X_train, beta)
y_test_pred = predict(X_test, beta)

# Accuracy
train_accuracy = accuracy_score(y_train, y_train_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)

print(f'Training Accuracy: {train_accuracy:.2f}')
print(f'Test Accuracy: {test_accuracy:.2f}')

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_test_pred)

# Classification Report
class_report = classification_report(y_test, y_test_pred)
print('Classification Report:')
print(class_report)

# ROC Curve and AUC
y_test_proba = sigmoid(np.dot(X_test, beta))
fpr, tpr, _ = roc_curve(y_test, y_test_proba)
roc_auc = auc(fpr, tpr)
print(f'AUC: {roc_auc:.2f}')

# Plot Confusion Matrix as Heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='pink', xticklabels=['No Bug', 'Bug'], yticklabels=['No Bug', 'Bug'
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

# Plot ROC Curve
localhost:8888/notebooks/lab4.ipynb# 4/7
9/4/24, 10:37 AM lab4 - Jupyter Notebook
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='red', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC)')
plt.legend(loc='lower right')
plt.show()

Training Accuracy: 0.75


Test Accuracy: 0.78
Classification Report:
precision recall f1-score support

0 0.88 0.80 0.84 136


1 0.57 0.71 0.63 51

accuracy 0.78 187


macro avg 0.73 0.75 0.74 187
weighted avg 0.80 0.78 0.78 187

AUC: 0.86

localhost:8888/notebooks/lab4.ipynb# 5/7
9/4/24, 10:37 AM lab4 - Jupyter Notebook

localhost:8888/notebooks/lab4.ipynb# 6/7
9/4/24, 10:37 AM lab4 - Jupyter Notebook

In [ ]: ​

localhost:8888/notebooks/lab4.ipynb# 7/7

You might also like