Practical - 5 - 52
Practical - 5 - 52
Mounted at /content/drive
import pandas as pd
import numpy as np
X=df.drop('TARGET
CLASS',axis=1) 2
y=df['TARGET CLASS']
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=100)
----Arguments----
KNeighborsClassi er(
n_neighbors=5,
weights='uniform'(---'uniform' or 'callable'),
leaf_size=30,
metric='minkowski',
n_jobs=None,
print(confusion_matrix(y_test,pred))
print(classification_report(y_test,pred))
[[98 12]
[ 8 82]]
precision recall f1-score support
1
20BECE30058
KNN using Standard Scaler
#------Here we are not knowing that what are the features so how to group the data points?
#-----If the values of some features are higher than it is required to do the feature scaling otherwise such features will show a very majo
#-----it will have much effect on the distance between the features
import pandas as pd
import numpy as np
X=df.drop('TARGET CLASS',axis=1)
y=df['TARGET CLASS']
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=100)
scaler=StandardScaler()
scaler.fit(X_train) #--- It will drop the target class as we dont want to scale the labels
StandardScaler()
scaled_features_X_train=scaler.transform(X_train)
scaled_features_X_test=scaler.transform(X_test)
array([0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0,
1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1,
1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0,
0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1,
1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0,
0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0,
1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1,
0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0,
0, 1])
2
20BECE30058
4) Find the Classi cation Report for KNN =1 using scaled Data
print(confusion_matrix(y_test,pred_1))
print(classification_report(y_test,pred_1))
#---Here you can see that the number of Misclassifications(17) in scaled dataset is more as compared to unscaled dataset(20).
for i in range(1,40):
knn=KNeighborsClassifier(n_neighbors=i)
knn.fit(scaled_features_X_train,y_train)
pred_i=knn.predict(scaled_features_X_test)
error_rate.append(np.mean(pred_i != y_test))
#-- - -taking the mean of all prediction and actual labels which are not equal
print(error_rat)
[0.085, 0.09, 0.09, 0.08, 0.09, 0.075, 0.09, 0.075, 0.095, 0.075, 0.095, 0.075, 0.08, 0.08, 0.075, 0.085, 0.085, 0.085, 0.08, 0.085
plt.figure(figsize=(10,6))
plt.plot(range(1,40),error_rate,color='blue',linestyle='--',marker='o')
plt.title('Error Rate vs K value (1 to 40)') plt.xlabel('K value')
plt.ylabel('Error rate') plt.grid()
knn=KNeighborsClassifier(n_neighbors=32)
3
20BECE30058
knn.fit(scaled_features_X_train,y_train) pred_28=knn.predict(scaled_features_X_test)
#---Compare the confusion matrix for k=1 and for k=28 , it has better classsification
[[99 11]
[ 3 87]]