##
## ST4060 / ST6015 / ST6040
## Tutorial exercises – Section 1
## Eric Wolsztynski
##
# ————————————————————
# Question 1.1
# generate a random sample from Exp(0.5):
set.seed(4060)
N = 1000
x = rexp(N, rate=.5)
hist(x, col=’pink’, freq=FALSE)
# add theoretic pdf:
grid = seq(0, 15, length=1000)
lines(grid, dexp(grid, rate=0.5), lwd=2, col=1)
# add Kernel Density Estimate
# (= sample distribution estimate):
f.hat = density(x)
lines(f.hat, col=’navy’, lwd=3)
# more info on KDE:
?density
# ————————————————————
# Question 1.2
# Recall: to simulate N data points from a given model,
# say the Normal N(10, 3^2):
N = 100
x = rnorm(N, mean=10, sd=3)
hist(x)
# what the Student-t pdf looks like:
u = seq(-10, 10, length=1000)
plot(u, dnorm(u), lwd=3, t=’l’, col=2)
lines(u, dt(u, df=3), lwd=3)
# WRONG methods:
set.seed(4060)
x1 = rnorm(N)
x2 = rt(N, df=3)
#
# somewhat WRONG:
x = c(x1[1:95], x2[1:5])
x = sample(x, replace=FALSE)
#
# very WRONG:
eps = .95
y = eps*x1 + (1-eps)*x2
#
par(mfrow=c(2,1))
hist(x)
hist(y)
# Method 1:
set.seed(4060)
x1 = rnorm(N)
x2 = rt(N, df=3)
# use a mixing rule:
u = runif(N, 0, 1) # check out mean(u
x[contamination.locations] = x2[contamination.locations]
# Method 2:
set.seed(4060)
u = runif(N, 0, 1) # mixing parameter
x = numeric(N)
for(i in 1:N){
if(u[i] > eps){
# then replace these values with contamination
x[i] = rt(1, df=3)
} else {
x[i] = rnorm(1)
}
}
hist(x)
# Now generate 3 samples:
set.seed(4060)
eps1 = .95
eps2 = .85
eps3 = .55
u1 = runif(N, 0, 1) # mixing parameter
u2 = runif(N, 0, 1) # mixing parameter
u3 = runif(N, 0, 1) # mixing parameter
x1 = rnorm(N)
x2 = rnorm(N)
x3 = rnorm(N)
for(i in 1:N){
if(u1[i] > eps1){ # replace these values with contamination
x1[i] = rt(1, df=3)
}
if(u2[i] > eps2){
x2[i] = rt(1, df=3)
}
if(u3[i] > eps3){
x3[i] = rt(1, df=3)
}
}
par(mfrow=c(3,1))
hist(x1)
hist(x2)
hist(x3)
# ————————————————————
# Tutorial question 1.3:
N = 1000 # sample size
#
# Ex: N(2,1)
set.seed(4060)
x = rnorm(N, mean=2, sd=1)
fx = density(x)
hist(x, col=’pink’, freq=FALSE)
# outline of KDE for x:
lines(fx, col=1, lwd=2)
# outline of true pdf of x:
u = seq(-1,5, length=1000)
lines(u ,dnorm(u, mean=2, sd=1), col=’navy’, lwd=2)
#
# Ex: t(3)
set.seed(4060)
x = rt(N, df=3)
fx = density(x)
hist(x, col=’pink’, freq=FALSE, ylim=c(0,.5))
# outline of KDE for x:
lines(fx, col=1, lwd=2)
# outline of true pdf of x:
u = seq(-15,15, length=1000)
lines(u ,dt(u, df=3), col=’navy’, lwd=2)
#
# Ex: Exp(2)
set.seed(4060)
x = rexp(N, rate=2)
fx = density(x)
hist(x, col=’pink’, freq=FALSE, ylim=c(0,1.5))
# outline of KDE for x:
lines(fx, col=1, lwd=2)
# outline of true pdf of x:
u = seq(0,5, length=1000)
lines(u ,dexp(u,rate=2), col=’navy’, lwd=2)
# ————————————————————
# Question 1.4
# install.packages(‘KernSmooth’)
library(KernSmooth)
# check out package vignette:
# https://cran.r-project.org/web/packages/KernSmooth/KernSmooth.pdf
?bkde2D
head(faithful)
plot(faithful, pch=20, cex=2)
# (a)
bw = apply(faithful, 2, sd)/2
fx = bkde2D(faithful, bandwidth=bw)
# names(fx)
contour(fx$x1, fx$x2, fx$fhat)
plot(faithful, pch=20, cex=2, xlim=c(0,7))
contour(fx$x1, fx$x2, fx$fhat, add=TRUE)
persp(fx$x1, fx$x2, fx$fhat, shade=.15, theta=-30, phi=15)
# (b)
# ?locpoly
lp.fit = locpoly(faithful$eruptions, faithful$waiting, bandwidth=1)
plot(faithful, pch=20)
lines(lp.fit, lwd=3, col=3)
# ————————————————————
# Question 1.5
library(MASS)
# (a) generate the sample
N = 1000
set.seed(4060)
Mu = c(1,4)
Sigma = c(2,1)
X1 = rnorm(N, mean=Mu[1], sd=Sigma[1])
X2 = rnorm(N, mean=Mu[2], sd=Sigma[2])
X = cbind(X1,X2)
plot(X, pch=20)
# (b) 2D KDE from MASS::kde2d
bw = c(.5, .5) * 4
f.kde2d = kde2d(X1, X2, h=bw)
plot(X, pch=20)
contour(f.kde2d, add=TRUE, col=’tomato’, lwd=2)
# (c) 2D KDE from Kernel product estimator
f1 = density(X1, bw=bw[1],
from=min(f.kde2d$x), to=max(f.kde2d$x), n=length(f.kde2d$x))
f2 = density(X2, bw=bw[2],
from=min(f.kde2d$y), to=max(f.kde2d$y), n=length(f.kde2d$y))
fhat = outer(f1$y, f2$y)
#
gx1 = seq(from=min(f.kde2d$x), to=max(f.kde2d$x), length=length(f.kde2d$x))
gx2 = seq(from=min(f.kde2d$y), to=max(f.kde2d$y), length=length(f.kde2d$y))
plot(X, pch=20)
contour(f.kde2d, add=TRUE, col=’tomato’, lwd=2)
contour(gx1, gx2, fhat, add=TRUE, col=’navy’, lwd=2)
par(mfrow=c(2,2))
persp(f.kde2d, shade=.15, theta=30, phi=15)
persp(f.kde2d, shade=.15, theta=-60, phi=15)
persp(gx1, gx2, fhat, shade=.15, theta=30, phi=15)
persp(gx1, gx2, fhat, shade=.15, theta=-60, phi=15)