Tutorial 8
Tutorial 8
DSA1101
Introduction to Data Science
October 26, 2018
1 # training set
2 banktrain <- read . table ( " bank - sample . csv " , header = TRUE , sep = " ," )
3 # drop a few columns
4 drops <- c ( " balance " , " day " , " campaign " , " pdays " , " previous " , " month "
)
5 banktrain <- banktrain [ , ! ( names ( banktrain ) % in % drops ) ]
6 # testing set
7 banktest <- read . table ( " bank - sample - test . csv " , header = TRUE , sep = " ," )
8 banktest <- banktest [ , ! ( names ( banktest ) % in % drops ) ]
1
(b) Build the naı̈ve Bayes classifier based on the training dataset, and per-
form prediction for the test dataset.
1 library ( e1071 )
2
3 # build the naive Bayes classifier
4 nb _ model <- naiveBayes ( subscribed ~ . ,
5 data = banktrain )
6 # perform on the testing set
7 nb _ prediction <- predict ( nb _ model ,
8 # remove column " subscribed "
9 banktest [ , - ncol ( banktest ) ] ,
10 type = ’ raw ’)
(c) Plot the ROC curve for the naı̈ve Bayes classifier.
1 library ( ROCR )
2
3 score <- nb _ prediction [ , c ( " yes " ) ]
4
5 actual _ class <- banktest $ subscribed == ’ yes ’
6 pred <- prediction ( score , actual _ class )
7
8 perf <- performance ( pred , " tpr " , " fpr " )
9 plot ( perf , lwd =2 , xlab = " False Positive Rate ( FPR ) " ,
10 ylab = " True Positive Rate ( TPR ) " )
11 abline ( a =0 , b =1 , col = " gray50 " , lty =3)