Q 2
Q 2
library("dplyr")
library("tidyr")
library("ggplot2")
library("ROCR")
library("rpart")
library("rpart.plot")
library("caret")
library("randomForest")
library("tidyverse")
library("tm")
library("SnowballC")
library("softImpute")
library("glmnet")
library("Hmisc")
library("dummies")
library('tinytex')
library('GGally')
library('gplots')
library('FNN')
library("dplyr")
library("tidyr")
library("caTools")
library("ggpubr")
library("reshape")
rm(list=ls())
bank = read.csv("UniversalBank.csv")
bank$Education = as.factor(bank$Education)
bank_dummy = dummy.data.frame(select(bank,-c(ZIP.Code,ID)))
bank_dummy$Personal.Loan = as.factor(bank_dummy$Personal.Loan)
bank_dummy$CCAvg = as.integer(bank_dummy$CCAvg)
set.seed(1)
train.index <- sample(row.names(bank_dummy), 0.6*dim(bank_dummy)[1]) ## need to
look at hints
test.index <- setdiff(row.names(bank_dummy), train.index)
train.df <- bank_dummy[train.index, ]
valid.df <- bank_dummy[test.index, ]
for(i in 1:14) {
knn.2 <- knn(train = train.df[,-10],test = valid.df[,-10], cl = train.df[,10],
k=i, prob=TRUE)
accuracy.df[i, 2] <- confusionMatrix(knn.2, valid.df[,10])$overall[1]
}
accuracy.df
#partc
knn.3 <- knn(train = train.df[,-10],test = valid.df[,-10], cl = train.df[,10], k=3,
prob=TRUE)
confusionMatrix(knn.3, valid.df[,10])
#part d
###part e
bank_dummy = dummy.data.frame(select(bank,-c(ZIP.Code,ID)))
bank_dummy$Personal.Loan = as.factor(bank_dummy$Personal.Loan)
bank_dummy$CCAvg = as.integer(bank_dummy$CCAvg)
set.seed(1)
train.index <- sample(rownames(bank_dummy), 0.5*dim(bank_dummy)[1]) ## need to
look at hints
set.seed(1)
valid.index <- sample(setdiff(rownames(bank_dummy),train.index),
0.3*dim(bank_dummy)[1])
test.index = setdiff(rownames(bank_dummy), union(train.index, valid.index))
confusionMatrix(testknn, test.df[,10])
confusionMatrix(validknn, valid.df[,10])
confusionMatrix(trainknn, train.df[,10])