## ——————————————
##
## ST4060 / ST6015 / ST6040
## R script – Fri 5 Nov 2021 lecture
## Eric Wolsztynski
##
## Nonparametric regression / smoothing
##
## ——————————————
rm(list=ls())
# ————————————————————
# Nonparametric regression (Section 4)
# Local polynomial regression
f_value = 1
plot(Prestige$income, Prestige$prestige, xlab=”Average income”, ylab=”Prestige”)
lines(lowess(Prestige$income, Prestige$prestige, f=as.numeric(f_value), iter=0),
lwd=2, col=’blue’)
?lowess
?loess
attach(Prestige)
mod.lo = loess(prestige~income+education, span=.5, degree=2)
summary(mod.lo)
#———————————————————————-
### Finding/modelling nonlinear patterns
# Example dataset: airquality
dat = na.omit(airquality)
x = dat$Temp
y = dat$Ozone
par(font=2, font.lab=2, font.axis=2) # use bold fonts in figure
plot(x, y, pch=20, xlab=”Temperature (degrees F)”, ylab=”Ozone (ppb)”,
xlim=c(55,105))
#### Pick a nonlinear model and fit it using nls()
nlso = nls(y~a*exp(b*x), start=list(a=1, b=0.01))
summary(nlso); coef(nlso)
points(x, fitted(nlso), pch=20, col=4)
# how to predict from this fit:
newx = seq(95,100,by=1)
pred.nls = predict(nlso, newdata=list(x=newx))
points(newx, pred.nls, pch=15, col=2)
#### Pick a nonlinear model and fit it using optim()
# First, write a function that evaluates the criterion:
criterion <- function(param,x,y){
# The function has to be defined this way
# (with param as its 1st argument)
# if we are going to apply optim() to it... cf. ?optim
a = param[1]
b = param[2]
return( mean( (y - a*exp(b*x))^2 ) )
}
# Then, apply optim() to it:
oo = optim(par=list(a=1, b=0.01), fn=criterion, x=x, y=y)
# now reconstruct fitted values from optim() output:
a.hat = oo$par[1]
b.hat = oo$par[2]
y.hat = a.hat * exp(b.hat*x)
points(x, y.hat, pch=21, col=2, cex=1.2)
legend("topleft", legend=c("data","NLS fit","OPTIM fit"),
col=c(1,4,2), pch=c(20,20,21), bty='n')
#----------------------------------------------------------------------
#### Finding/modelling patterns non-parametrically
#### Using local polynomial regression
lo = loess(y~x)
names(lo)
# check output values are in the same order as input values:
summary(c(lo$x - x)) # ok, they are
summary(c(lo$y - y)) # ok, they are
plot(x, y, pch=20, col=1,
xlab="Temperature (degrees F)", ylab="Ozone (ppb)",
xlim=c(55,105))
points(x, fitted(nlso), pch=20, col=4)
points(x, lo$fitted, col='navy', pch=15)
#
# ... or...
low = lowess(x, y)
names(low)
# check output values are in the same order as input values:
summary(c(low$x - x)) # NO, THEY ARE NOT
summary(c(low$x - sort(x))) # coz lowess() sorted the input values!
lines(sort(x), low$y, col='red', lwd=3)
legend("topleft", legend=c("data","NLS fit","loess fit","lowess fit"),
lty=c(0,0,0,1), col=c('grey','blue','navy','red'), pch=c(20,20,15,-1), lwd=3,
bty='n')
#
# NB: to work out their MSE's:
sqrt( mean( (y-fitted(nlso))^2 ) )
sqrt( mean( (y-fitted(lo))^2 ) )
sqrt( mean( (y[order(x)]-low$y)^2 ) )
# how to predict from the regression polynomial:
# ?predict.loess # exists...
# ?predict.lowess # does NOT exist! (we'd have to use approx() to interpolate)
newx = seq(95,100,by=1)
# abline(v=newx, col=1, lty=3)
pred.lo = predict(lo, newdata=data.frame(x=newx))
points(newx, pred.lo, pch=15, col=3, cex=2)
pred.low = approx(low$x, low$y, xout=newx, rule=2)$y
points(newx, pred.low, pch='x', col='navy', cex=2)
# extrapolating the loess output:
interp.lo = approx(x=lo$x, y=lo$fitted, xout=newx, rule=2)$y
points(newx, interp.lo, pch=20, col='orange', cex=1.5)
#### Using regression splines
library(splines) # contains function bs()
#
# we can compute a B-spline:
KN = quantile(x, c(0.15,0.40, 0.60, 0.70, 0.80, 0.90))
BM = bs(x, knots=KN)
B.spline = lm(y~BM)
#
# or a P-spline:
P.spline = smooth.spline(x, y, cv=TRUE) # just ignore that warning message 🙂
#
# check it out:
plot(x, y, pch=20, col=8, xlab="Temperature (degrees F)", ylab="Ozone (ppb)",
xlim=c(55,105))
points(x, fitted(nlso), pch=20, col=4)
points(x, fitted(B.spline), pch=20, col=2)
points(x, fitted(P.spline), pch=15, col='navy')
legend("topleft", legend=c("data","NLS fit","B-spline","P-spline"),
col=c(8,4,2,'navy'), pch=c(20,20,20,15), bty='n')
# how to predict from the P-spline:
pred.pspline = predict(P.spline, x=newx)
points(pred.pspline, col=3, pch=14)