Regression Splines
Regression Splines
200
100
50
wage
300
library(ISLR)
attach(Wage)
library(splines)
agelims=range(age)
age.grid=seq(agelims[1],agelims[2])
fit=lm(wage~bs(age, knots =c(25 ,40 ,60)), data=Wage) #prespecified knots
pred=predict(fit ,newdata =list(age =age.grid),se=T)
plot(age,wage ,col =" gray ")
lines(age.grid ,pred$fit,col="blue" ,lwd =2)
lines(age.grid ,pred$fit +2* pred$se, lty ="dashed")
lines(age.grid ,pred$fit -2* pred$se, lty ="dashed")
20
30
40
50
age
60
70
80
200
50
100
wage
300
pred2=predict(fit2,newdata =list(age=age.grid),se=T)
plot(age,wage ,col =" gray ")
lines(age.grid ,pred$fit,col="blue" ,lwd =2)
lines(age.grid, pred2$fit ,col ="red",lwd =2)
20
30
40
50
60
70
age
Smoothing Spline
fit=smooth.spline(age ,wage ,df =16)
fit2=smooth.spline(age ,wage, cv=T)
## Warning in smooth.spline(age, wage, cv = T): cross-validation with non## unique 'x' values seems doubtful
2
80
fit2$df
## [1] 6.794596
plot(age,wage,xlim=agelims,cex =.5, col ="darkgrey",main="Smoothing Spline")
lines(fit ,col ="red ",lwd =2)
lines(fit2 ,col =" blue",lwd =2)
legend ("topright",legend =c("16 DF" ,"6.8 DF"),col=c("red","blue"),lty =1, lwd =2, cex =.8)
Smoothing Spline
200
50
100
wage
300
16 DF
6.8 DF
20
30
40
50
60
70
80
age
Local Regression
fit=loess (wage~age ,span =.2, data=Wage)
fit2=loess(wage~age ,span =.5, data=Wage)
plot(age,wage ,xlim=agelims ,cex =.5, col ="darkgrey",main="Local Regression")
lines(age.grid,predict(fit,data.frame(age=age.grid)),col ="red",lwd =2)
lines(age.grid,predict(fit2 ,data.frame(age=age.grid)),col ="blue",lwd =2)
legend ("topright",legend =c("Span = 0.2","Span = 0.5") ,col=c("red","blue"),lty =1, lwd =2, cex =.8)
Local Regression
200
50
100
wage
300
Span = 0.2
Span = 0.5
20
30
40
50
60
70
80
age
Reference:
James, Gareth, et al. An introduction to statistical learning. New
York: springer, 2013.