Bayesian methods in ecology and evolution¶
https://bitbucket.org/mfumagal/statistical_inference
solutions for activities¶
day 1a: Bayesian thinking¶
In [ ]:
# function to calculate log-like of data D={0,1} given N={0,1}
calcLogLikeNessie <- function(D) {
# initialise
logLikeNessie <- rep(0,2)
# for each data point
for (d in D) if (d==1) logLikeNessie <- logLikeNessie + c(log(0.05), log(0.99)) else logLikeNessie <- logLikeNessie + c(log(0.95), log(0.01))
logLikeNessie
}
In [ ]:
loglike <- calcLogLikeNessie(c(1,0,1))
cat("LogLike:", loglike)
plot(x=c(0,1), y=loglike, xlab="N (random variable)", ylab="log-likelihood", type="b")
cat("\nMLE:", which.max(loglike)-1)
cat("\nLR:", diff(loglike))
In [ ]:
diff(calcLogLikeNessie(rep(1,3)))
day 1b: Bayesian concepts¶
In [ ]:
### sharper prior
## mu=2 and tau=0.5
# prior
mu <- 2
tau <- 0.5
x <- seq(-4,10,0.01)
plot(x=x, dnorm(x=x, mean=mu, sd=tau), ylim=c(0,1),
type="l", lty=1, ylab="Density", xlab=expression(theta), main="")
legend(x="topleft", legend=c(expression(pi(theta)),
expression(f(y~"|"~theta)), expression(p(theta~"|"~y))), lty=1:3) # prior
# likelihood
y <- 6
sigma <- 1
points(x=x, y=dnorm(x=y, mean=x, sd=sigma), type="l", lty=2) # likelihood
# posterior
B <- sigma^2/(sigma^2+tau^2)
postMean <- B*mu + (1-B)*y
postVar <- B*tau^2
points(x=x, y=dnorm(x=x, mean=postMean, sd=sqrt(postVar)), type="l", lty=3)
print(x[which.max(dnorm(x=x, mean=postMean, sd=sqrt(postVar)))])
print(B)
In [ ]:
### wider prior
## mu=2 and tau=2
# prior
mu <- 2
tau <- 2
x <- seq(-4,10,0.01)
plot(x=x, dnorm(x=x, mean=mu, sd=tau), ylim=c(0,0.5),
type="l", lty=1, ylab="Density", xlab=expression(theta), main="")
legend(x="topleft", legend=c(expression(pi(theta)),
expression(f(y~"|"~theta)), expression(p(theta~"|"~y))), lty=1:3) # prior
# likelihood
y <- 6
sigma <- 1
points(x=x, y=dnorm(x=y, mean=x, sd=sigma), type="l", lty=2) # likelihood
# posterior
B <- sigma^2/(sigma^2+tau^2)
postMean <- B*mu + (1-B)*y
postVar <- B*tau^2
points(x=x, y=dnorm(x=x, mean=postMean, sd=sqrt(postVar)), type="l", lty=3)
print(x[which.max(dnorm(x=x, mean=postMean, sd=sqrt(postVar)))])
print(B)
day 2a: prior distributions¶
The posterior is a Gamma distribution $G(1+4,1/(1+1/3)$ with a prior $G(1,3)$.
In [ ]:
## Gamma posterior
alpha <- 1
beta <- 3
theta <- seq(0, 20, 0.1)
prior <- dgamma(x=theta, shape=alpha, scale=beta)
y <- 4
posterior <- dgamma(x=theta, shape=y+alpha, scale=1/(1+1/beta))
plot(x=theta, y=posterior, xlab=expression(theta), ylab="Density", type="l")
lines(theta, prior, lty=3)
postdraw <- rgamma(n=1e5, shape=y+alpha, scale=1/(1+1/beta))
histdraw <- hist(postdraw, breaks=20, plot=F)
lines(histdraw, lty=3, col="grey", freq=F)
day 2b: Bayesian inference¶
In [ ]:
## confidence intervals
theta <- seq(0, 10, 0.05)
x <- rnorm(n=1e5, mean=2, sd=1)
a <- 1-0.90
# equal-tailed interval
eqi <- quantile(x, probs=c( a/2, 1-(a/2) ) )
hist(x, xlab=expression(theta), ylab="Posterior density")
abline(v=eqi, lty=2)
day 3: Bayesian computation¶
In [ ]:
source("Data/functions.R")
# function `L_times_pi(x)` will return L(x)*pi(x)
# g is a uniform envelope function
x <- seq(0,1,0.01)
# M is the set value
M <- 25000
# initiliase vector that contains accepted values
thetas <- c()
# we want N samples
N <- 1e4
while (length(thetas) < N) {
theta_j <- runif(1, 0, 1)
g_theta_j <- dunif(theta_j, 0, 1)
U <- runif(1, 0, 1)
if (M*g_theta_j*U < L_times_pi(theta_j)) thetas <- c(thetas, theta_j)
}
# plot the posterior
hist(thetas)
In [ ]:
# check with a qq-plot assuming a known posterior beta(3,10)
rawDensity <- rbeta(N, 3, 10)
qqplot(rawDensity, thetas)
abline(0,1)
day 4a: approximate Bayesian computation¶
In [ ]:
source("Data/abc.R")
# Rejection algorithm with proportions of simulations to accept
N <- 1e4
Y <- c(91.34, 89.21, 88.98)
th <- 0.05
# function to simulate is called "simulateWaterTemp"
# distance function
rho <- function(x,y) sum(sqrt((x-y)^2))/length(y)
thetas <- distances <- c()
for (i in 1:N) {
# 1. draw from prior (continuous, bounded, Normal)
theta <- 0
while (theta<80 | theta>110) {
theta <- rnorm(1, mean=90, sd=sqrt(20))
}
thetas <- c(thetas, theta)
# 2. simulate observations
ysim <- simulateWaterTemp(theta)
# 3. calculate and retain distances
distances <- c(distances, rho(ysim,Y))
}
# prior / parameter
hist(thetas)
quantile(thetas)
# relationship between parameter and distance
head(cbind(thetas,distances))
# distances
hist(distances)
accepted <- which(rank(distances, ties.method="random")/length(distances)<=th)
range(distances[accepted])
abline(v=max(distances[accepted]), lty=2)
# plot prior/posterior
par(mfrow=c(2,1))
hist(thetas, xlim=c(80,110), main="Prior")
hist(thetas[accepted], xlim=c(80,110), main="Posterior")
quantile(thetas[accepted])
# expected value
mean(thetas[accepted])
# 95% HPD
library(coda)
HPDinterval(as.mcmc(thetas[accepted]), prob=0.95)
day 5: Bayesian networks¶
$P(d^0, g^2, s^0) = P(g^2 | d^0) P(s^0 | d^0) P(d^0)$
In [ ]:
# probability of $P(d^0, g^2, s^0) = P(g^2 | d^0) P(s^0 | d^0) P(d^0)$
0.34 * 0.95 * 0.70
In [ ]:
# confidence $P(d^1, g^2, s^1)$
(0.3/0.7)*(0.17/0.34)*(0.80/0.05)
In [ ]: