Machine Learning
Machine Learning
M. Ashwin 21BCE5695
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix,accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
import numpy as np
def load_dataset():
data=pd.read_csv("Iris_Data.csv")
dataset=data.values
X=dataset[:,:-1]
y=dataset[:,-1]
return X,y
Load the dataset by calling the function and split it into testing and training data
X, Y = load_dataset()
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size =
0.8, random_state = 1, shuffle = True)
print('Train',X_train.shape,Y_train.shape)
print('Test',X_test.shape,Y_test.shape)
sc=StandardScaler()
X_train=sc.fit_transform(X_train)
X_test=sc.transform(X_test)
NB=GaussianNB()
NB.fit(X_train,Y_train)
GaussianNB()
Predicting the values for the testing data
Y_pred = NB.predict(X_test)
Performance evaluation
cm=confusion_matrix(Y_test,Y_pred)
acc=accuracy_score(Y_test,Y_pred)
print(cm, acc)
[[11 0 0]
[ 0 12 1]
[ 0 0 6]] 0.9666666666666667
class_rep=classification_report(Y_test,Y_pred)
print(class_rep)
accuracy 0.97 30
macro avg 0.95 0.97 0.96 30
weighted avg 0.97 0.97 0.97 30
grid_param={'var_smoothing':np.logspace(0,-9,100)}
grid_NB=GridSearchCV(estimator=NB, param_grid=grid_param,cv=cv,
verbose=1, scoring='accuracy')
data_trans=PowerTransformer().fit_transform(X_test)
grid_NB.fit(data_trans,Y_test)
GridSearchCV(cv=RepeatedStratifiedKFold(n_repeats=3, n_splits=5,
random_state=1),
estimator=GaussianNB(),
param_grid={'var_smoothing': array([1.00000000e+00,
8.11130831e-01, 6.57933225e-01, 5.33669923e-01,
4.32876128e-01, 3.51119173e-01, 2.84803587e-01, 2.31012970e-01,
1.87381742e-01, 1.51991108e-01, 1.23284674e-01, 1.00000000e-01,
8.11130831e-02, 6.57933225e-02, 5.3...
1.23284674e-07, 1.00000000e-07, 8.11130831e-08, 6.57933225e-08,
5.33669923e-08, 4.32876128e-08, 3.51119173e-08, 2.84803587e-08,
2.31012970e-08, 1.87381742e-08, 1.51991108e-08, 1.23284674e-08,
1.00000000e-08, 8.11130831e-09, 6.57933225e-09, 5.33669923e-09,
4.32876128e-09, 3.51119173e-09, 2.84803587e-09, 2.31012970e-09,
1.87381742e-09, 1.51991108e-09, 1.23284674e-09, 1.00000000e-
09])},
scoring='accuracy', verbose=1)
grid_NB.best_score_
0.9666666666666668
grid_NB.best_params_
{'var_smoothing': 1.0}
Y_pred=grid_NB.predict(X_test)
cm=confusion_matrix(Y_test,Y_pred)
acc=accuracy_score(Y_test,Y_pred)
print(cm, acc)
[[11 0 0]
[ 0 13 0]
[ 0 1 5]] 0.9666666666666667
cr=classification_report(Y_test,Y_pred)
print(cr)
accuracy 0.97 30
macro avg 0.98 0.94 0.96 30
weighted avg 0.97 0.97 0.97 30