2020/9/29 https://learn.canterbury.ac.nz/pluginfile.php/3318991/mod_resource/content/1/Histograms.r
#####################################################################################
##
##
##
##
##
#####################################################################################
##
R code for ‘Histograms’ lectures ##
##
STAT221 S2 2020 Term 4 ##
##
set.seed(1)
x = rnorm(100)
# display 4 subgraphs in one (2 rows by 2 columns)
par(mfrow = c(2, 2))
hist(x, main = “Frequency Histogram”)
hist(x, freq = F, main= “Density Histogram”)
hist(x, prob = T, breaks=5, main = “not enough classes”)
hist(x, prob = T, nclass = 30, main = “too many classes”)
# remember to change back to the default layout afterwards (1 row by 1 column)
par(mfrow = c(1,1))
#####################################################################################
par(mfrow = c(1, 2))
H = hist(x)
H$counts = H$counts / sum(H$counts)
plot(H, freq=TRUE, ylab=”Relative Frequency”,
main=”Relative Frequency Histogram”)
par(mfrow = c(1, 1))
#####################################################################################
set.seed(15)
x = rnorm(100)
H = hist(x)
H # the name of the object we want to look at
#####################################################################################
set.seed(1)
x = rnorm(100)
hist(x, freq=F)
x.to.plot = seq(-5,5,0.01)
y.to.plot = dnorm(x.to.plot, mean=0, sd=1)
lines(x.to.plot, y.to.plot)
#####################################################################################
hist(x, freq=F)
x.to.plot = seq(-5, 5, 0.4)
y.to.plot = dnorm(x.to.plot, mean=0, sd=1)
lines(x.to.plot, y.to.plot)
#####################################################################################
hist(x, freq=F)
curve(dnorm(x), from = -2.5, to = 2.5, add = TRUE)
#####################################################################################
set.seed(1)
x = rgamma(100, shape = 3, scale = 8)
https://learn.canterbury.ac.nz/pluginfile.php/3318991/mod_resource/content/1/Histograms.r 1/6
2020/9/29 https://learn.canterbury.ac.nz/pluginfile.php/3318991/mod_resource/content/1/Histograms.r
#####################################################################################
par(mfrow = c(2, 2))
hist(x, main = “”, breaks = 3)
hist(x, main = “”, breaks = 4)
hist(x, main = “”, breaks = 8 )
hist(x, main = “”, breaks = 20)
par(mfrow = c(1,1))
#####################################################################################
set.seed(1)
x = rgamma(30, shape = 3, scale = 8)
par(mfrow = c(2, 2))
hist(x, main = “”, breaks = c(0, 10, 20, 30, 40, 50, 60, 70, 80))
hist(x, main = “”, breaks = c(-8, 2, 12, 22, 32, 42, 52, 62, 72, 82))
hist(x, main = “”, breaks = c(-6, 4, 14, 24, 34, 44, 54, 64, 74, 84))
hist(x, main = “”, breaks = c(-4, 6, 16, 26, 36, 46, 56, 66, 76, 86))
par(mfrow = c(1,1))
#####################################################################################
## START OF LECTURE 2
#####################################################################################
set.seed(1)
x = rnorm(100)
hist(x, freq=F)
min(x)
max(x)
#####################################################################################
(max(x)-min(x))/ceiling(1+log2(100))
#####################################################################################
set.seed(1)
x = rnorm(100)
h = (max(x) – min(x)) / ceiling(1 + log2(100)) # Sturges Rule
breakpoints = seq(min(x), max(x) + h, h)
hist(x, breaks=breakpoints, freq=F)
#####################################################################################
set.seed(1)
x = rnorm(100)
h = (max(x) – min(x)) / ceiling(1 + log2(100)) # Sturges Rule
breakpoints = seq(min(x), max(x) + h, h)
par(mfrow=c(1,2))
hist(x, freq=F, main=”Prettified”)
hist(x, breaks=breakpoints, freq=F, main=”Using Sturges rule”)
par(mfrow=c(1,1))
https://learn.canterbury.ac.nz/pluginfile.php/3318991/mod_resource/content/1/Histograms.r 2/6
2020/9/29 https://learn.canterbury.ac.nz/pluginfile.php/3318991/mod_resource/content/1/Histograms.r
#####################################################################################
# so we were able to use H$breaks
H$breaks
# and H$density
H$density
# to do this calculation.
#####################################################################################
sum(H$density * (H$breaks[2] – H$breaks[1]))
#####################################################################################
set.seed(1)
x = rnorm(100)
H = hist(x, freq=F)
x.to.find = 0.6
min(which(H$breaks >= x.to.find))
H$breaks[7:8]
H$density[7]
#####################################################################################
library(MASS)
data(survey)
breakpoints=c(16,17,18,19,20,21,25,30,45,65,75)
H=hist(survey$Age,breaks=breakpoints) # defaults to density for unequal bins
#####################################################################################
sum(H$density * diff(H$breaks))
#####################################################################################
## START OF LECTURE 3
#####################################################################################
v = 10 v
v <- 11 v
12 -> v v
#####################################################################################
methods(summary)
#####################################################################################
set.seed(1)
x = rnorm(100)
H = hist(x, breaks=5, freq=F, main=”Frequency polygon”, xlim=c(-4,4))
lines(H$mid, H$density, lwd=2, col=”blue”)
#####################################################################################
set.seed(1)
x = rnorm(100)
https://learn.canterbury.ac.nz/pluginfile.php/3318991/mod_resource/content/1/Histograms.r 3/6
2020/9/29 https://learn.canterbury.ac.nz/pluginfile.php/3318991/mod_resource/content/1/Histograms.r
H = hist(x, breaks=seq(-5,5), freq=F, main=”Extra Breaks Added”)
lines(H$mid, H$density, lwd=2, col=”blue”)
#####################################################################################
par(mfrow = c(1,2))
set.seed(1)
x = rnorm(100)
H = hist(x, breaks=seq(-5,5, 0.5), freq=F, main=”Extra Breaks Added”)
lines(H$mid, H$density, lwd=2, col=”blue”)
H = hist(x,breaks=5,freq=F,main=”Original plot”,xlim=c(-4,4))
lines(H$mid, H$density, lwd=2, col=”blue”)
par(mfrow = c(1,1))
#####################################################################################
library(ash)
x <- rnorm(100)
ab <- c(-5,5)
bins <- bin1(x, ab, 50)
f <- ash1(bins, m=5)
f
hist(x, freq=F, xlim=c(-3,3))
lines(f$x, f$y)
# data
# bin interval
# bin x into 50 bins over ab
# where m is a smoothing parameter
#####################################################################################
set.seed(1)
n=10
x=rnorm(n)
h=0.25
plot(x, rep(0,n), ylim=c(0,1), xlim=c(-2.5, 2.5),xlab="x", ylab="Density")
x.to.plot=seq(-5, 5, 0.01)
for (i in 1:n) {
lines(x.to.plot, dnorm(x.to.plot, x[i],h)/n, col="red")
lines(rep(x[i], 2), c(0, dnorm(x[i], x[i], h))/n, lty=2)
}
temp <- function(x.to.plot){mean(dnorm(x.to.plot, x, h))}
lines(x.to.plot, sapply(x.to.plot, temp), col="blue")
#####################################################################################
set.seed(2)
n=10
x=sort(rnorm(n))
allx=c(-100,x,100)
y=(1:n)/n
ally=c(0,y,1)
plot(allx,ally,xlim=c(-2.5,2.5),ylim=c(-0.05,1.05),xlab="x",ylab="cdf F(x)",type="s")
points(x,y,pch=19)
points(x,c(0,y[-n]))
rug(x)
#####################################################################################
https://learn.canterbury.ac.nz/pluginfile.php/3318991/mod_resource/content/1/Histograms.r 4/6
2020/9/29 https://learn.canterbury.ac.nz/pluginfile.php/3318991/mod_resource/content/1/Histograms.r
library(evmix)
xx = seq(-2, 2, 0.01)
plot(xx, kdgaussian(xx), type = "l", col = "black",ylim = c(0, 1.4),xlab="u",ylab="k(u)")
lines(xx, kduniform(xx), col = "grey")
lines(xx, kdtriangular(xx), col = "blue")
lines(xx, kdepanechnikov(xx), col = "darkgreen")
lines(xx, kdbiweight(xx), col = "red")
lines(xx, kdtriweight(xx), col = "purple")
lines(xx, kdtricube(xx), col = "orange")
lines(xx, kdparzen(xx), col = "salmon")
lines(xx, kdcosine(xx), col = "cyan")
lines(xx, kdoptcosine(xx), col = "goldenrod")
legend("topright", c("Gaussian", "uniform", "triangular", "Epanechnikov",
"biweight", "triweight", "tricube", "Parzen", "cosine", "optcosine"), lty = 1,
col = c("black", "grey", "blue", "darkgreen", "red", "purple", "orange",
"salmon", "cyan", "goldenrod"))
#####################################################################################
library(evmix)
set.seed(1)
n = 20
x = rnorm(n)
h = 0.25
x.to.plot = seq(-4,4,0.001)
y.to.plot = dkden(x.to.plot, x, h, kernel="rectangular")
par(mfrow=c(1,2))
# kernel density estimator
plot(x.to.plot, y.to.plot, type="l",xlab="x",ylab="Density", ylim=c(0,0.75),
main = "KDE of x")
rug(x) # adds a tickmark on x axis where each datapoint is located
# density histogram
hist(x, freq=F, xlim=c(-4,4), ylim=c(0,0.75))
par(mfrow=c(1,1))
#####################################################################################
library(evmix)
set.seed(1)
n = 20
x = rnorm(n)
h = 0.3
x.to.plot = seq(-4, 4, 0.001)
plot(x.to.plot, dkden(x.to.plot,x,h, kernel="triangular"), col="blue",type="l",
ylim=c(0,0.8),xlab="x",ylab="Density")
lines(x.to.plot, dkden(x.to.plot,x,h, kernel="epanechnikov"), col="darkgreen")
lines(x.to.plot, dkden(x.to.plot,x,h, kernel="biweight"), col="red")
lines(x.to.plot, dkden(x.to.plot,x,h, kernel="triweight"), col="purple")
lines(x.to.plot, dkden(x.to.plot,x,h, kernel="tricube"), col="orange")
lines(x.to.plot, dkden(x.to.plot,x,h, kernel="parzen"), col="salmon")
lines(x.to.plot, dkden(x.to.plot,x,h, kernel="cosine"), col="cyan")
lines(x.to.plot, dkden(x.to.plot,x,h, kernel="optcosine"), col="goldenrod")
rug(x)
legend("topright", c("triangular", "Epanechnikov",
"biweight", "triweight", "tricube", "Parzen", "cosine", "optcosine"),
lty = 1, col = c("blue", "darkgreen", "red", "purple", "orange",
https://learn.canterbury.ac.nz/pluginfile.php/3318991/mod_resource/content/1/Histograms.r 5/6
2020/9/29 https://learn.canterbury.ac.nz/pluginfile.php/3318991/mod_resource/content/1/Histograms.r
"salmon", "cyan", "goldenrod"))
#####################################################################################
library(evmix)
set.seed(1)
n=20
x=rnorm(n)
x.to.plot=seq(-4, 4, 0.0001)
par(mfrow=c(2, 2))
plot(x.to.plot, dkden(x.to.plot,x,0.05), main="Bandwidth h=0.05",
type="l", ylim=c(0,1.1),xlab="x",ylab="Density")
plot(x.to.plot, dkden(x.to.plot,x,0.1), main="Bandwidth h=0.1",
type="l", ylim=c(0,1.1),xlab="x",ylab="Density")
plot(x.to.plot, dkden(x.to.plot,x,0.3), main="Bandwidth h=0.3",
type="l", ylim=c(0,1.1),xlab="x",ylab="Density")
plot(x.to.plot, dkden(x.to.plot,x,1), main="Bandwidth h=1",
type="l", ylim=c(0,1.1),xlab="x",ylab="Density")
par(mfrow=c(1, 1))
#####################################################################################
https://learn.canterbury.ac.nz/pluginfile.php/3318991/mod_resource/content/1/Histograms.r 6/6