0% found this document useful (0 votes)
9 views19 pages

IRis

The document outlines a machine learning assignment using the Iris dataset, detailing the process of data loading, preprocessing, and model training with various classifiers including Naive Bayes, KNN, Logistic Regression, SVM, Decision Tree, and Random Forest. It includes accuracy evaluations and confusion matrices for each model, with KNN achieving the highest accuracy of 1.0. The results are summarized in a DataFrame comparing the performance of each model.

Uploaded by

Somosree Dey
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
9 views19 pages

IRis

The document outlines a machine learning assignment using the Iris dataset, detailing the process of data loading, preprocessing, and model training with various classifiers including Naive Bayes, KNN, Logistic Regression, SVM, Decision Tree, and Random Forest. It includes accuracy evaluations and confusion matrices for each model, with KNN achieving the highest accuracy of 1.0. The results are summarized in a DataFrame comparing the performance of each model.

Uploaded by

Somosree Dey
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 19

Assignment:- 1

Aim:-
Software Required:-
Procedure/Code:-
from google.colab import files
uploaded = files.upload()

iris.csv(text/csv) - 4550 bytes, last modified: 3/6/2025 - 100% done


Saving iris.csv to iris.csv

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data = pd.read_csv("iris.csv")
data

data.head()
data.tail()

data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 149 entries, 0 to 148
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 5.1 149 non-null float64
1 3.5 149 non-null float64
2 1.4 149 non-null float64
3 0.2 149 non-null float64
4 Iris-setosa 149 non-null object
dtypes: float64(4), object(1)
memory usage: 5.9+ KB

data=pd.read_csv("iris.csv",names=['Sepal length','Sepal width','Petal length','Petal


width','Class'])
data.head()
data.isna()

data.isna().sum()
X=data.drop(['Class'],axis=1)
Y=data['Class']
print(X)

Y.shape
(150,)

from sklearn import preprocessing


le=preprocessing.LabelEncoder()
data.Class=le.fit_transform(data.Class)
X=data.drop(['Class'],axis=1)
Y=data['Class']
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=.20,random_state=2)

from sklearn.naive_bayes import GaussianNB


naive=GaussianNB()
naive.fit(X_train,Y_train)

predicted_naive=naive.predict(X_test)
print(predicted_naive)

[0 0 2 0 0 2 0 2 2 0 0 0 0 0 1 1 0 1 2 1 2 1 2 1 1 0 0 2 0 2]
import sklearn.metrics as matrics
cm_naive=matrics.confusion_matrix(Y_test,predicted_naive)
print(cm_naive)

[[14 0 0]
[ 0 7 1]
[ 0 0 8]]

from sklearn.metrics import ConfusionMatrixDisplay


cmd = ConfusionMatrixDisplay(cm_naive, display_labels=['0', '1', '2'])
cmd.plot()

import sklearn.metrics as matrics


accuracy_score=matrics.accuracy_score(Y_test,predicted_naive)
print(accuracy_score)

0.9666666666666667

from sklearn.metrics import classification_report


print(classification_report(Y_test,predicted_naive))
from sklearn.metrics import roc_curve
naive_probs=naive.predict_proba(X_test)
fpr={}
tpr={}
thresh={}
n_class=3
for i in range(n_class):
fpr[i],tpr[i],thresh[i]=roc_curve(Y_test,naive_probs[:,i],pos_label=i)
#Corrected to be inside for loop block
plt.plot(fpr[0],tpr[0],linestyle='--',color='green',label='Class 0 Vs Rest') # corrected the
syntax from fpr[0].tpr[0] to fpr[0],tpr[0]
plt.plot(fpr[1],tpr[1],linestyle='--',color='red',label='Class 1 Vs Rest') # corrected the
syntax from fpr[1].tpr[1] to fpr[1],tpr[1]
plt.plot(fpr[2],tpr[2],linestyle='--',color='orange',label='Class 2 Vs Rest') # corrected
the syntax from fpr[2].tpr[2] to fpr[2],tpr[2]
plt.title('Multiclass ROC Curve')
from sklearn.neighbors import KNeighborsClassifier
knn=KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train,Y_train)

predicted_Knn=knn.predict(X_test)

predicted_Knn = knn.predict(X_test)
cmd = ConfusionMatrixDisplay(cm_naive, display_labels=['0', '1', '2'])
cmd.plot()
from sklearn.metrics import classification_report
print(classification_report(Y_test,predicted_Knn))

accuracy_knn=matrics.accuracy_score(Y_test,predicted_Knn)
print(accuracy_knn)

1.0

knn_provbs = knn.predict_proba(X_test)
fpr = {}
tpr = {}
thresh = {}
n_class = 3
# Move plotting outside the loop:
for i in range(n_class):
fpr[i], tpr[i], thresh[i] = roc_curve(Y_test, knn_provbs[:, i], pos_label=i)

# Check if the key exists before plotting:


for i in range(n_class):
if i in fpr and i in tpr: # Check if key exists
if i == 0:
plt.plot(fpr[i], tpr[i], linestyle='--', color='red', label='Class 0 vs Rest')
elif i == 1:
plt.plot(fpr[i], tpr[i], linestyle='--', color='green', label='Class 1 vs Rest')
elif i == 2:
plt.plot(fpr[i], tpr[i], linestyle='--', color='orange', label='Class 2 vs Rest')

plt.title('Multiclass ROC curve')


plt.legend(loc='best')
plt.show()

from sklearn.linear_model import LogisticRegression


lr=LogisticRegression()
lr.fit(X_train,Y_train)

predicted_lr=lr.predict(X_test)

cm_lr=matrics.confusion_matrix(Y_test,predicted_lr)
cmd=ConfusionMatrixDisplay(cm_lr,display_labels=['0','1','2'])
cmd.plot()

from sklearn.metrics import classification_report


print(classification_report(Y_test,predicted_lr))
accuracy_lr=matrics.accuracy_score(Y_test,predicted_lr)
accuracy_lr

0.9666666666666667

lr_probs=lr.predict_proba(X_test)
fpr={}
tpr={}
thresh={}
n_class=3
for i in range(n_class):
fpr[i],tpr[i],thresh[i]=roc_curve(Y_test,lr_probs[:,i],pos_label=i)
plt.plot(fpr[0],tpr[0],linestyle='--',color='red',label='Class 0 Vs Rest')
plt.plot(fpr[1],tpr[1],linestyle='--',color='orange',label='Class 1 Vs Rest')
plt.plot(fpr[2],tpr[2],linestyle='--',color='green',label='Class 2 Vs Rest')
plt.title('Multiclass ROC Curve')
from sklearn import svm
svm=svm.SVC(probability=True)
svm.fit(X_train,Y_train)

predicted_svm=svm.predict(X_test)

cm_svm=matrics.confusion_matrix(Y_test,predicted_svm)
cmd=ConfusionMatrixDisplay(cm_svm,display_labels=['0','1','2'])
cmd.plot()
from sklearn.metrics import classification_report
print(classification_report(Y_test,predicted_svm))

accuracy_svm=matrics.accuracy_score(Y_test,predicted_svm)
accuracy_svm

0.9666666666666667

svm_probs=svm.predict_proba(X_test)
fpr={}
tpr={}
thresh={}
n_class=3
for i in range(n_class):
fpr[i],tpr[i],thresh[i]=roc_curve(Y_test,svm_probs[:,i],pos_label=i)
plt.plot(fpr[0],tpr[0],linestyle='--',color='red',label='class 0 Vs Rest')
plt.plot(fpr[1],tpr[1],linestyle='--',color='green',label='class 1 Vs Rest')
plt.plot(fpr[2],tpr[2],linestyle='--',color='orange',label='class 2 Vs Rest')
plt.title('Multiclass ROC Curve')

from sklearn.tree import DecisionTreeClassifier


dtree=DecisionTreeClassifier()
dtree.fit(X_train,Y_train)

predicted_dtree=dtree.predict(X_test)
predicted_dtree

array([0, 0, 2, 0, 0, 1, 0, 2, 2, 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 1, 2, 1,
2, 1, 1, 0, 0, 2, 0, 2])

cm_dtree=matrics.confusion_matrix(Y_test,predicted_dtree)
cmd=ConfusionMatrixDisplay(cm_dtree,display_labels=['0','1','2'])
cmd.plot()

from sklearn.metrics import classification_report


print(classification_report(Y_test,predicted_dtree))

dtree_probs=dtree.predict_proba(X_test)
fpr={}
tpr={}
thresh={}
n_class=3
for i in range(n_class):
fpr[i],tpr[i],thresh[i]=roc_curve(Y_test,dtree_probs[:,i],pos_label=i)
plt.plot(fpr[0],tpr[0],linestyle='--',color='red',label='class 0 Vs Rest')
plt.plot(fpr[2],tpr[2],linestyle='--',color='orange',label='class 1 Vs Rest')
plt.title('Multiclass ROC Curve')

accuracy_dtree=matrics.accuracy_score(Y_test,predicted_dtree)
accuracy_dtree

0.9333333333333333

from sklearn.ensemble import RandomForestClassifier


rforest=RandomForestClassifier()
rforest.fit(X_train,Y_train)
predicted_rforest=rforest.predict(X_test)

cm_rforest=matrics.confusion_matrix(Y_test,predicted_rforest)
cmd=ConfusionMatrixDisplay(cm_rforest,display_labels=['0','1','2'])
cmd.plot()

from sklearn.metrics import classification_report


print(classification_report(Y_test,predicted_rforest))

accuracy_rforest=matrics.accuracy_score(Y_test,predicted_rforest)
accuracy_rforest
0.9666666666666667

rforest_probs=rforest.predict_proba(X_test)
fpr={}
tpr={}
thresh={}
n_class=3
for i in range(n_class):
fpr[i],tpr[i],thresh[i]=roc_curve(Y_test,rforest_probs[:,i],pos_label=i)
plt.plot(fpr[0],tpr[0],linestyle='--',color='red',label='class 0 Vs Rest')
plt.plot(fpr[1],tpr[1],linestyle='--',color='green',label='class 1 Vs Rest')
plt.plot(fpr[2],tpr[2],linestyle='--',color='orange',label='class 2 Vs Rest')
plt.title('Multiclass ROC Curve')

results=pd.DataFrame({'Model':['Naive Bayes','KNN','SVM','Logistic
Regression','Decision Tree','Random Forest'],'Score':
['.966','1.0','.966','.966','.9333','.966']})
result_df=results.sort_values(by='Score',ascending=False)
result_df=result_df.set_index('Score')
result_df
result_df

You might also like