0% found this document useful (0 votes)
3 views36 pages

ml-batch(1)

The document outlines a machine learning implementation using K-Nearest Neighbors (KNN), Support Vector Machine (SVM), and Naïve Bayes classifiers on a dataset related to car purchases. It includes data preprocessing, model training, predictions, and performance evaluation through accuracy scores and confusion matrices. Visualizations of the results for both training and test sets are also provided for each classifier.

Uploaded by

dammuvinayk86
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
3 views36 pages

ml-batch(1)

The document outlines a machine learning implementation using K-Nearest Neighbors (KNN), Support Vector Machine (SVM), and Naïve Bayes classifiers on a dataset related to car purchases. It includes data preprocessing, model training, predictions, and performance evaluation through accuracy scores and confusion matrices. Visualizations of the results for both training and test sets are also provided for each classifier.

Uploaded by

dammuvinayk86
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 36

K nearest neighbour

import numpy as nm
import matplotlib.pyplot as mtp
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from matplotlib.colors import ListedColormap
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.neighbors import KNeighborsClassifier

dataset = pd.read_csv("/Users/dianamoses/Documents/MCET/Course Files/ML/ML


LAB/Data/Logistic_car_data.csv")
dataset.head
Out[]:
<bound method NDFrame.head of User ID Gender Age AnnualSalary Purchased
0 385 Male 35 20000 0
1 681 Male 40 43500 0
2 353 Male 49 74000 0
3 895 Male 40 107500 1
4 661 Male 25 79000 0
.. ... ... ... ... ...
995 863 Male 38 59000 0
996 800 Female 47 23500 0
997 407 Female 28 138500 1
998 299 Female 48 134000 1
999 687 Female 44 73500 0

[1000 rows x 5 columns]>

# input
x = dataset.iloc[:, [2,3]].values
x
Out[]:
array([[ 35, 20000],
[ 40, 43500],
[ 49, 74000],
...,
[ 28, 138500],
[ 48, 134000],
[ 44, 73500]])

# output
y = dataset.iloc[:, 4].values
y
Out[17]:
array([0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1,
…….. 0, 0, 0, 0, 1, 0, 0, 1, 1, 0])

xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.25, random_state=0)

sc = StandardScaler()
xtrain = sc.fit_transform(xtrain)
xtest = sc.transform(xtest)

knn = KNeighborsClassifier(n_neighbors=7)

knn.fit(xtrain, ytrain)

Out[]: KNeighborsClassifier(n_neighbors=7)

ypred = knn.predict(xtest)
print(ypred)
[1 0 1 0 0 0 1 1 1 0 0 0 0 0 1 1 0 0 1 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 1 0 0 1 1 1 1 1 0 0 0 1 0 1 1 0 0 0 1 1 1]

knn.score(xtest, ytest)
Out[]: 0.924

print ("\n\nAccuracy : ", accuracy_score(ytest, ypred)*100)

Accuracy : 92.4

cm = confusion_matrix(ytest, ypred)
print ("Confusion Matrix : \n", cm)

Confusion Matrix :
[[142 10]
[ 9 89]]

fig, ax = plt.subplots(figsize=(5, 5))


ax.imshow(cm)
ax.grid(False)
ax.xaxis.set(ticks=(0, 1), ticklabels=('Predicted Does not Buy car', 'Predicted Buys Car'))
ax.yaxis.set(ticks=(0, 1), ticklabels=('Actual Does not Buy car ', 'Actual Buys Car'))
ax.set_ylim(1.5, -0.5)
for i in range(2):
for j in range(2):
ax.text(j, i, cm[i, j], ha='center', va='center', color='black')
plt.show()
X_set=x
y_set=y
for i, j in enumerate(nm.unique(y_set)):
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
c = ListedColormap(('red','green','blue'))(i), label = j)
plt.xlim(0, 80)
plt.ylim(0, 180000)
plt.title('Classifier (Train set)')
plt.xlabel('Age')
plt.ylabel('Annual Salary')
plt.legend()
plt.show()
#Visulazing The Test Set
X_set=xtest
y_set=ytest
for i, j in enumerate(nm.unique(y_set)):
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
c = ListedColormap(('red','green','blue'))(i), label = j)
plt.title('Classifier (Test set)')
plt.xlabel('Age')
plt.ylabel('Annual Salary')
plt.legend()
plt.show()
\
Support Vector Machine for Binomial Classification

import numpy as nm
import matplotlib.pyplot as mtp
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from matplotlib.colors import ListedColormap
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.svm import SVC

# Importing the dataset


dataset = pd.read_csv('/Users/dianamoses/Documents/MCET/Course Files/ML/ML
LAB/Data/Logistic_car_data.csv')
dataset.head

Out[]:
<bound method NDFrame.head of User ID Gender Age AnnualSalary Purchased
0 385 Male 35 20000 0
1 681 Male 40 43500 0
2 353 Male 49 74000 0
3 895 Male 40 107500 1
4 661 Male 25 79000 0
.. ... ... ... ... ...
995 863 Male 38 59000 0
996 800 Female 47 23500 0
997 407 Female 28 138500 1
998 299 Female 48 134000 1
999 687 Female 44 73500 0

[1000 rows x 5 columns]>

# input
x = dataset.iloc[:, [2,3]].values
x
Out[]:
array([[ 35, 20000],
[ 40, 43500],
[ 49, 74000],
...,
[ 28, 138500],
[ 48, 134000],
[ 44, 73500]])

# output
y = dataset.iloc[:, 4].values
y
Out[17]:
array([0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1,
…….. 0, 0, 0, 0, 1, 0, 0, 1, 1, 0])

xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.25, random_state=0)

sc = StandardScaler()
xtrain = sc.fit_transform(xtrain)
xtest = sc.transform(xtest)

classifier = SVC(kernel = 'linear', random_state = 0)


classifier.fit(xtrain, ytrain)

Out[]: SVC(kernel='linear', random_state=0)

ypred = classifier.predict(xtest)
print(ypred)
[1 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1 1 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0
0 0 0 0 1 0 1 0 0 0 1 1 1 1 1 0 0 0 1 0 1 1 0 0 0 1 1 0]

classifier.score(xtest, ytest)
Out[]: 0.84

print ("\n\nAccuracy : ", accuracy_score(ytest, ypred)*100)

Accuracy : 84.0

cm = confusion_matrix(ytest, ypred)
print ("Confusion Matrix : \n", cm)
Confusion Matrix :
[[138 14]
[ 26 72]]

fig, ax = plt.subplots(figsize=(5, 5))


ax.imshow(cm)
ax.grid(False)
ax.xaxis.set(ticks=(0, 1), ticklabels=('Predicted Does not Buy car', 'Predicted Buys Car'))
ax.yaxis.set(ticks=(0, 1), ticklabels=('Actual Does not Buy car ', 'Actual Buys Car'))
ax.set_ylim(1.5, -0.5)
for i in range(2):
for j in range(2):
ax.text(j, i, cm[i, j], ha='center', va='center', color='black')
plt.show()

X_set=x
y_set=y
for i, j in enumerate(nm.unique(y_set)):
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
c = ListedColormap(('red','green','blue'))(i), label = j)
plt.xlim(0, 80)
plt.ylim(0, 180000)
plt.title('Classifier (Test set)')
plt.xlabel('Age')
plt.ylabel('Annual Salary')
plt.legend()
plt.show()
# Visualizing Test Results
X_set, y_set = xtest, ytest
X1, X2 = nm.meshgrid(nm.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1,
step = 0.01),
nm.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step =
0.01))
plt.contourf(X1, X2, classifier.predict(nm.array([X1.ravel(),
X2.ravel()]).T).reshape(X1.shape),
alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(nm.unique(y_set)):
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
c = ListedColormap(('white', 'black'))(i), label = j)
plt.title('SVM (Test set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()
# USING RBF KERNAL FOR SVM
classifier = SVC(kernel = 'rbf', random_state = 0)
classifier.fit(xtrain, ytrain)
Out[]: SVC(random_state=0)

ypred = classifier.predict(xtest)
print(ypred)
[1 0 1 0 0 0 1 1 1 0 1 0 0 0 1 1 0 1 1 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 1 0 0 1 1 1 1 1 0 1 0 1 0 1 1 0 0 0 1 1 0]

classifier.score(xtest, ytest)
Out[]: 0.9

print ("\n\nAccuracy : ", accuracy_score(ytest, ypred)*100)

Accuracy : 90.0

cm = confusion_matrix(ytest, ypred)
print ("Confusion Matrix : \n", cm)
Confusion Matrix :
[[138 14]
[ 11 87]]

fig, ax = plt.subplots(figsize=(5, 5))


ax.imshow(cm)
ax.grid(False)
ax.xaxis.set(ticks=(0, 1), ticklabels=('Predicted Does not Buy car', 'Predicted Buys Car'))
ax.yaxis.set(ticks=(0, 1), ticklabels=('Actual Does not Buy car ', 'Actual Buys Car'))
ax.set_ylim(1.5, -0.5)
for i in range(2):
for j in range(2):
ax.text(j, i, cm[i, j], ha='center', va='center', color='black')
plt.show()

X_set=x
y_set=y
for i, j in enumerate(nm.unique(y_set)):
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
c = ListedColormap(('red','green','blue'))(i), label = j)
plt.xlim(0, 80)
plt.ylim(0, 180000)
plt.title('Classifier (Test set)')
plt.xlabel('Age')
plt.ylabel('Annual Salary')
plt.legend()
plt.show()
# Visualizing Test Results
X_set, y_set = xtest, ytest
X1, X2 = nm.meshgrid(nm.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1,
step = 0.01),
nm.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step =
0.01))
plt.contourf(X1, X2, classifier.predict(nm.array([X1.ravel(),
X2.ravel()]).T).reshape(X1.shape),
alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(nm.unique(y_set)):
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
c = ListedColormap(('white', 'black'))(i), label = j)
plt.title('SVM (Test set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()
Naïve bayes
import numpy as nm
import matplotlib.pyplot as mtp
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from matplotlib.colors import ListedColormap
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score

# Importing the dataset


dataset = pd.read_csv('/Users/dianamoses/Documents/MCET/Course Files/ML/ML
LAB/Data/Logistic_car_data.csv')

dataset
Out[103]:
User ID Gender Age AnnualSalary Purchased
0 385 Male 35 20000 0
1 681 Male 40 43500 0
2 353 Male 49 74000 0
3 895 Male 40 107500 1
4 661 Male 25 79000 0
.. ... ... ... ... ...
995 863 Male 38 59000 0
996 800 Female 47 23500 0
997 407 Female 28 138500 1
998 299 Female 48 134000 1
999 687 Female 44 73500 0

[1000 rows x 5 columns]

# input
x = dataset.iloc[:, [2, 3]].values
x
Out[]:
array([[ 35, 20000],
[ 40, 43500],
[ 49, 74000],
...,
[ 28, 138500],
[ 48, 134000],
[ 44, 73500]])

# Target
y = dataset.iloc[:, 4].values
y
Out[]:
array([0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1,
….. 0, 0, 0, 0, 1, 0, 0, 1, 1, 0])

x2 = dataset.iloc[:, [2]].values
plt.scatter(x2,y)
plt.xlabel("Age")
plt.ylabel("Purchased")
plt.show()

x3 = dataset.iloc[:, [3]].values
plt.scatter(x3,y)
plt.xlabel("Salary")
plt.ylabel("Purchased")
plt.show()
# Splitting the dataset into the Training set and Test set
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 0)

# Feature Scaling
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

# Fitting Naive Bayes to the Training set


classifier = GaussianNB()
classifier.fit(x_train, y_train)
Out[]: GaussianNB()

# Predicting the Test set results


y_pred = classifier.predict(x_test)

print("Predicted values:")
print(y_pred)
Predicted values:
[1 0 1 0 0 0 0 1 1 0 1 0 0 0 0 1 0 1 1 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0
0 0 0 0 0 0 1 1 0 0 1 1 1 1 1 0 0 0 1 0 1 1 0 0 0 1 1 0]
acc= accuracy_score(y_test,y_pred)*100
print ("\n\nAccuracy of Naïve Bayes Classifier: ", acc)

Accuracy of Naïve Bayes Classifier: 88.0

# Making the Confusion Matrix


cm = confusion_matrix(y_test, y_pred)
cm
Out[]:
array([[140, 12],
[ 18, 80]])

fig, ax = plt.subplots(figsize=(5, 5))


ax.imshow(cm)
ax.grid(False)
ax.xaxis.set(ticks=(0, 1), ticklabels=('Predicted Does not Buy car', 'Predicted Buys Car'))
ax.yaxis.set(ticks=(0, 1), ticklabels=('Actual Does not Buy car ', 'Actual Buys Car'))
ax.set_ylim(1.5, -0.5)
for i in range(2):
for j in range(2):
ax.text(j, i, cm[i, j], ha='center', va='center', color='black')
plt.show()
# Visualising the Training set results
x_set, y_set = x_train, y_train
X1, X2 = nm.meshgrid(nm.arange(start = x_set[:, 0].min() - 1, stop = x_set[:, 0].max() + 1,
step = 0.01),
nm.arange(start = x_set[:, 1].min() - 1, stop = x_set[:, 1].max() + 1, step = 0.01))
mtp.contourf(X1, X2, classifier.predict(nm.array([X1.ravel(),
X2.ravel()]).T).reshape(X1.shape), alpha = 0.75, cmap = ListedColormap(('white', 'black')))
mtp.xlim(X1.min(), X1.max())
mtp.ylim(X2.min(), X2.max())
for i, j in enumerate(nm.unique(y_set)):
mtp.scatter(x_set[y_set == j, 0], x_set[y_set == j, 1],
c = ListedColormap(('purple', 'green'))(i), label = j)
mtp.title('Naive Bayes (Training set)')
mtp.xlabel('Age')
mtp.ylabel('Estimated Salary')
mtp.legend()
mtp.show()
# Visualising the Test set results
x_set, y_set = x_test, y_test
X1, X2 = nm.meshgrid(nm.arange(start = x_set[:, 0].min() - 1, stop = x_set[:, 0].max() + 1,
step = 0.01),
nm.arange(start = x_set[:, 1].min() - 1, stop = x_set[:, 1].max() + 1, step = 0.01))
mtp.contourf(X1, X2, classifier.predict(nm.array([X1.ravel(),
X2.ravel()]).T).reshape(X1.shape),
alpha = 0.75, cmap = ListedColormap(('white', 'black')))
mtp.xlim(X1.min(), X1.max())
mtp.ylim(X2.min(), X2.max())
for i, j in enumerate(nm.unique(y_set)):
mtp.scatter(x_set[y_set == j, 0], x_set[y_set == j, 1],
c = ListedColormap(('purple', 'green'))(i), label = j)
mtp.title('Naive Bayes (test set)')
mtp.xlabel('Age')
mtp.ylabel('Estimated Salary')
mtp.legend()
mtp.show()
Demonstration of Clustering algorithms using Hierarchical algorithms (agglomerative
etc).
import numpy as nm
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from matplotlib.colors import ListedColormap
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.cluster import KMeans
from scipy.cluster.hierarchy import fcluster, linkage,dendrogram

import warnings
warnings.filterwarnings('ignore')

dataset = pd.read_csv("/Users/dianamoses/Documents/MCET/Course Files/ML/ML


LAB/Logistic_Iris.csv")
dataset.head

Out[]:
<bound method NDFrame.head of Sepal Length Sepal Width Petal Length Peatal Width
Species
0 5.1 3.5 1.4 0.2 Iris-setosa
1 4.9 3.0 1.4 0.2 Iris-setosa
2 4.7 3.2 1.3 0.2 Iris-setosa
3 4.6 3.1 1.5 0.2 Iris-setosa
4 5.0 3.6 1.4 0.2 Iris-setosa
.. ... ... ... .. ...
145 6.7 3.0 5.2 2.3 Iris-virginica
146 6.3 2.5 5.0 1.9 Iris-virginica
147 6.5 3.0 5.2 2.0 Iris-virginica
148 6.2 3.4 5.4 2.3 Iris-virginica
149 5.9 3.0 5.1 1.8 Iris-virginica

[150 rows x 5 columns]>

# input
x = dataset.iloc[:, [0,1,2,3]].values
x
Out[]:
array([[5.1, 3.5, 1.4, 0.2],
[4.9, 3. , 1.4, 0.2],
[4.7, 3.2, 1.3, 0.2],
[4.6, 3.1, 1.5, 0.2],
[5. , 3.6, 1.4, 0.2],
………
sns.pairplot(dataset)
#Finding the optimum number of clusters for k-means classification
Elbow = []
for i in range(1, 11):
kmeans = KMeans(n_clusters = i, init = 'k-means++', max_iter = 300, n_init = 10, random_state = 0)
kmeans.fit(x)
Elbow.append(kmeans.inertia_)
#Plotting the results onto a Line graph, allowing us to observe ‘The Elbow’
plt.plot(range(1, 11), Elbow, marker='o')
plt.title('The Elbow method')
plt.xlabel('Number of clusters')
plt.ylabel('Elbow')
#within cluster sum of squares
plt.show()
distance_matrix = linkage(x, method = 'ward', metric = 'euclidean')

# Create a dendrogram
dn = dendrogram(distance_matrix)

# Display the dendogram


plt.show()
dn = dendrogram(distance_matrix)
plt.axhline(y=13, color='r', linestyle='--')
plt.ylim(0,15)
plt.show()

cluster = AgglomerativeClustering(n_clusters=2, affinity='euclidean', linkage='ward')


cluster.fit_predict(x)
plt.title('Agglomerative Clustering – 2 Clusters')
plt.scatter(x[:,2],x[:,3], c=cluster.labels_, label= cluster.labels_)
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
dn = dendrogram(distance_matrix)
plt.axhline(y=8, color='r', linestyle='--')
plt.ylim(0,15)
plt.show()

cluster = AgglomerativeClustering(n_clusters=3, affinity='euclidean', linkage='ward')


cluster.fit_predict(x)
plt.title('Agglomerative Clustering – 3 Clusters')
plt.scatter(x[:,2],x[:,3], c=cluster.labels_)
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
dn = dendrogram(distance_matrix)
plt.axhline(y=4, color='r', linestyle='--')
plt.ylim(0,15)
plt.show()

cluster = AgglomerativeClustering(n_clusters=5, affinity='euclidean', linkage='ward')


cluster.fit_predict(x)
plt.title('Agglomerative Clustering – 5 Clusters')
plt.scatter(x[:,2],x[:,3], c=cluster.labels_)
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
Build a classifier, compare its performance with an ensemble technique like random
forest.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

dataset = pd.read_csv("/Users/dianamoses/Documents/MCET/Course Files/ML/ML


LAB/Data/Logistic_Iris.csv")
dataset.head
Out[]:
<bound method NDFrame.head of Sepal Length Sepal Width Petal Length Peatal Width
Species
0 5.1 3.5 1.4 0.2 Iris-setosa
1 4.9 3.0 1.4 0.2 Iris-setosa
2 4.7 3.2 1.3 0.2 Iris-setosa
3 4.6 3.1 1.5 0.2 Iris-setosa
4 5.0 3.6 1.4 0.2 Iris-setosa
.. ... ... ... .. ...
145 6.7 3.0 5.2 2.3 Iris-virginica
146 6.3 2.5 5.0 1.9 Iris-virginica
147 6.5 3.0 5.2 2.0 Iris-virginica
148 6.2 3.4 5.4 2.3 Iris-virginica
149 5.9 3.0 5.1 1.8 Iris-virginica

[150 rows x 5 columns]>

# input
x = dataset.iloc[:, [0,1,2,3]].values
x
Out[]:
array([[5.1, 3.5, 1.4, 0.2],
[4.9, 3. , 1.4, 0.2],
[4.7, 3.2, 1.3, 0.2],
[4.6, 3.1, 1.5, 0.2],
[5. , 3.6, 1.4, 0.2],
………

# target
y = dataset.iloc[:, 4].values
y
Out[]:
array(['Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
….
'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
……
'Iris-virginica', 'Iris-virginica', 'Iris-virginica',
…..], dtype=object)

xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.25, random_state=0)

sc = StandardScaler()
xtrain = sc.fit_transform(xtrain)
xtest = sc.transform(xtest)

dtree= DecisionTreeClassifier()
dtree.fit(xtrain, ytrain)
Out[]: DecisionTreeClassifier()

y_pred1 = dtree.predict(xtest)
print("Predicted values:")
y_pred1
Predicted values:
Out[]:
array(['Iris-virginica', 'Iris-versicolor', 'Iris-setosa',
'Iris-virginica'], dtype=object)

acc_dtree= accuracy_score(ytest,y_pred1)*100
print ("\n\nAccuracy using Single Decision Tree: ", acc_dtree)

Accuracy using Single Decision Tree: 97.36842105263158

cm = confusion_matrix(ytest, y_pred1)
print ("\n\n Confusion Matrix for Single Decision Tree: \n", cm)

Confusion Matrix for Single Decision Tree:


[[13 0 0]
[ 0 15 1]
[ 0 0 9]]

# Create a Random forest Classifier


RF = RandomForestClassifier(n_estimators = 100)
# Train the model using the training sets
RF.fit(xtrain, ytrain)
Out[]: RandomForestClassifier()
y_pred2 = RF.predict(xtest)
print("Predicted values:")
y_pred2

y_pred2 = RF.predict(xtest)
print("Predicted values:")
y_pred2
Predicted values:
Out[]:
array(['Iris-virginica', 'Iris-versicolor', 'Iris-setosa',
…… 'Iris-virginica'], dtype=object)

acc_rf= accuracy_score(ytest,y_pred2)*100
print ("\n\nAccuracy using Random Forest: ", acc_rf)

Accuracy using Random Forest: 97.36842105263158

cm = confusion_matrix(ytest, y_pred2)
print ("\n\n Confusion Matrix for Random Forest Classifier: \n", cm)

Confusion Matrix for Random Forest Classifier:


[[13 0 0]
[ 0 15 1]
[ 0 0 9]]

Bagging
import numpy as nm
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from matplotlib.colors import ListedColormap
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn import model_selection
from sklearn.ensemble import BaggingClassifier
from sklearn.naive_bayes import GaussianNB
import warnings
warnings.filterwarnings('ignore')

dataset = pd.read_csv("/Users/dianamoses/Documents/MCET/Course Files/ML/ML


LAB/Data/Logistic_Iris.csv")
dataset.head
Out[]:
<bound method NDFrame.head of Sepal Length Sepal Width Petal Length Peatal Width
Species
0 5.1 3.5 1.4 0.2 Iris-setosa
1 4.9 3.0 1.4 0.2 Iris-setosa
2 4.7 3.2 1.3 0.2 Iris-setosa
3 4.6 3.1 1.5 0.2 Iris-setosa
4 5.0 3.6 1.4 0.2 Iris-setosa
.. ... ... ... .. ...
145 6.7 3.0 5.2 2.3 Iris-virginica
146 6.3 2.5 5.0 1.9 Iris-virginica
147 6.5 3.0 5.2 2.0 Iris-virginica
148 6.2 3.4 5.4 2.3 Iris-virginica
149 5.9 3.0 5.1 1.8 Iris-virginica

[150 rows x 5 columns]>

# input
x = dataset.iloc[:, [0,1,2,3]].values
x
Out[]:
array([[5.1, 3.5, 1.4, 0.2],
[4.9, 3. , 1.4, 0.2],
[4.7, 3.2, 1.3, 0.2],
[4.6, 3.1, 1.5, 0.2],
[5. , 3.6, 1.4, 0.2],
………

# target
y = dataset.iloc[:, 4].values
y
Out[]:
array(['Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
….
'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
……
'Iris-virginica', 'Iris-virginica', 'Iris-virginica',
…..], dtype=object)

# Splitting the dataset into the Training set and Test set
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 0)

Single = GaussianNB()
Single.fit(xtrain, ytrain)
Out[]: GaussianNB()

y_pred = Single.predict(xtest)
print("Predicted values for single Naïve Bayes Classifier:")
y_pred
Predicted values for single Naïve Bayes Classifier:
Out[]:
array(['Iris-virginica', 'Iris-versicolor', 'Iris-setosa',
….
'Iris-versicolor'], dtype='<U15')

Acc_Single= accuracy_score(ytest,y_pred)*100
print ("\n\nAccuracy using single Naïve Bayes Classifier: ",Acc_Single)

Accuracy using single Naïve Bayes Classifier: 100.0

cm = confusion_matrix(ytest, y_pred)
print ("\n\n Confusion Matrix -using single Naïve Bayes Classifier: \n", cm)

Confusion Matrix -using single Naïve Bayes Classifier:


[[13 0 0]
[ 0 16 0]
[ 0 0 9]]

# initialize the base classifier


base_cls = GaussianNB()

# no. of base classifier


num_class = 100

# bagging classifier
Bag = BaggingClassifier(base_estimator = base_cls, n_estimators = num_class, random_state
= 0)
Bag.fit(xtrain, ytrain)
Out[]: BaggingClassifier(base_estimator=GaussianNB(), n_estimators=100, random_state=0)

results = model_selection.cross_val_score(Bag, xtest, ytest, cv = 10)


print("\n\nAccuracy using Bagged Set of Naïve Bayes Classifiers :", results.mean()*100)

Accuracy using Bagged Set of Naïve Bayes Classifiers : 94.16666666666667

Boosting
import numpy as nm
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from matplotlib.colors import ListedColormap
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn import model_selection
from sklearn.ensemble import AdaBoostClassifier

import warnings
warnings.filterwarnings('ignore')

dataset = pd.read_csv("/Users/dianamoses/Documents/MCET/Course Files/ML/ML


LAB/Data/Logistic_Iris.csv")
dataset.head
Out[]:
<bound method NDFrame.head of Sepal Length Sepal Width Petal Length Peatal Width
Species
0 5.1 3.5 1.4 0.2 Iris-setosa
1 4.9 3.0 1.4 0.2 Iris-setosa
2 4.7 3.2 1.3 0.2 Iris-setosa
3 4.6 3.1 1.5 0.2 Iris-setosa
4 5.0 3.6 1.4 0.2 Iris-setosa
.. ... ... ... .. ...
145 6.7 3.0 5.2 2.3 Iris-virginica
146 6.3 2.5 5.0 1.9 Iris-virginica
147 6.5 3.0 5.2 2.0 Iris-virginica
148 6.2 3.4 5.4 2.3 Iris-virginica
149 5.9 3.0 5.1 1.8 Iris-virginica

[150 rows x 5 columns]>

# input
x = dataset.iloc[:, [0,1,2,3]].values
x
Out[]:
array([[5.1, 3.5, 1.4, 0.2],
[4.9, 3. , 1.4, 0.2],
[4.7, 3.2, 1.3, 0.2],
[4.6, 3.1, 1.5, 0.2],
[5. , 3.6, 1.4, 0.2],
………

# target
y = dataset.iloc[:, 4].values
y
Out[]:
array(['Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
….
'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
……
'Iris-virginica', 'Iris-virginica', 'Iris-virginica',
…..], dtype=object)

xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.25, random_state=0)

sc = StandardScaler()
xtrain = sc.fit_transform(xtrain)
xtest = sc.transform(xtest)

adaboost = AdaBoostClassifier(n_estimators = 50, learning_rate = 0.2)


adaboost. fit(xtrain, ytrain)
Out[]: AdaBoostClassifier(learning_rate=0.2)

adaboost.score(xtest, ytest)
Out[]: 0.8947368421052632

y_pred = adaboost.predict(xtest)
print("Predicted values for AdaBoost Classifier:")
y_pred
Out[]:
array(['Iris-virginica', 'Iris-versicolor', 'Iris-setosa',
…..
'Iris-virginica'], dtype=object)

Acc_adaboost= accuracy_score(ytest,y_pred)*100
print ("\n\nTest Accuracy using AdaBoost Classifier: ", Acc_adaboost)

Test Accuracy using AdaBoost Classifier: 89.47368421052632

cm = confusion_matrix(ytest, y_pred)
print ("\n\n Confusion Matrix for AdaBoost Classifier: \n", cm)

Confusion Matrix for AdaBoost Classifier:


[[13 0 0]
[ 0 15 1]
[ 0 3 6]]

fig, ax = plt.subplots(figsize=(6, 6))


ax.imshow(cm)
ax.grid(False)
ax.xaxis.set(ticks=(0,1,2), ticklabels=('Predicted Setosa', 'Predicted Versicolor', 'Predicted
Virginica'))
ax.yaxis.set(ticks=(0,1,2), ticklabels=('Actual Setosa', 'Actual Versicolor', 'Actual Virginica'))
ax.set_ylim(2.5, -0.5)
for i in range(3):
for j in range(3):
ax.text(j, i, cm[i, j], ha='center', va='center', color='white')
plt.show()

You might also like