程序代写代做 Bayesian algorithm Bayesian network 


Bayesian methods in ecology and evolution¶
https://bitbucket.org/mfumagal/statistical_inference
solutions for activities¶

day 1a: Bayesian thinking¶
In [ ]:
# function to calculate log-like of data D={0,1} given N={0,1}
calcLogLikeNessie <- function(D) { # initialise logLikeNessie <- rep(0,2) # for each data point for (d in D) if (d==1) logLikeNessie <- logLikeNessie + c(log(0.05), log(0.99)) else logLikeNessie <- logLikeNessie + c(log(0.95), log(0.01)) logLikeNessie } In [ ]: loglike <- calcLogLikeNessie(c(1,0,1)) cat("LogLike:", loglike) plot(x=c(0,1), y=loglike, xlab="N (random variable)", ylab="log-likelihood", type="b") cat("\nMLE:", which.max(loglike)-1) cat("\nLR:", diff(loglike)) In [ ]: diff(calcLogLikeNessie(rep(1,3))) day 1b: Bayesian concepts¶ In [ ]: ### sharper prior ## mu=2 and tau=0.5 # prior mu <- 2 tau <- 0.5 x <- seq(-4,10,0.01) plot(x=x, dnorm(x=x, mean=mu, sd=tau), ylim=c(0,1), type="l", lty=1, ylab="Density", xlab=expression(theta), main="") legend(x="topleft", legend=c(expression(pi(theta)), expression(f(y~"|"~theta)), expression(p(theta~"|"~y))), lty=1:3) # prior # likelihood y <- 6 sigma <- 1 points(x=x, y=dnorm(x=y, mean=x, sd=sigma), type="l", lty=2) # likelihood # posterior B <- sigma^2/(sigma^2+tau^2) postMean <- B*mu + (1-B)*y postVar <- B*tau^2 points(x=x, y=dnorm(x=x, mean=postMean, sd=sqrt(postVar)), type="l", lty=3) print(x[which.max(dnorm(x=x, mean=postMean, sd=sqrt(postVar)))]) print(B) In [ ]: ### wider prior ## mu=2 and tau=2 # prior mu <- 2 tau <- 2 x <- seq(-4,10,0.01) plot(x=x, dnorm(x=x, mean=mu, sd=tau), ylim=c(0,0.5), type="l", lty=1, ylab="Density", xlab=expression(theta), main="") legend(x="topleft", legend=c(expression(pi(theta)), expression(f(y~"|"~theta)), expression(p(theta~"|"~y))), lty=1:3) # prior # likelihood y <- 6 sigma <- 1 points(x=x, y=dnorm(x=y, mean=x, sd=sigma), type="l", lty=2) # likelihood # posterior B <- sigma^2/(sigma^2+tau^2) postMean <- B*mu + (1-B)*y postVar <- B*tau^2 points(x=x, y=dnorm(x=x, mean=postMean, sd=sqrt(postVar)), type="l", lty=3) print(x[which.max(dnorm(x=x, mean=postMean, sd=sqrt(postVar)))]) print(B) day 2a: prior distributions¶ The posterior is a Gamma distribution $G(1+4,1/(1+1/3)$ with a prior $G(1,3)$. In [ ]: ## Gamma posterior alpha <- 1 beta <- 3 theta <- seq(0, 20, 0.1) prior <- dgamma(x=theta, shape=alpha, scale=beta) y <- 4 posterior <- dgamma(x=theta, shape=y+alpha, scale=1/(1+1/beta)) plot(x=theta, y=posterior, xlab=expression(theta), ylab="Density", type="l") lines(theta, prior, lty=3) postdraw <- rgamma(n=1e5, shape=y+alpha, scale=1/(1+1/beta)) histdraw <- hist(postdraw, breaks=20, plot=F) lines(histdraw, lty=3, col="grey", freq=F) day 2b: Bayesian inference¶ In [ ]: ## confidence intervals theta <- seq(0, 10, 0.05) x <- rnorm(n=1e5, mean=2, sd=1) a <- 1-0.90 # equal-tailed interval eqi <- quantile(x, probs=c( a/2, 1-(a/2) ) ) hist(x, xlab=expression(theta), ylab="Posterior density") abline(v=eqi, lty=2) day 3: Bayesian computation¶ In [ ]: source("Data/functions.R") # function `L_times_pi(x)` will return L(x)*pi(x) # g is a uniform envelope function x <- seq(0,1,0.01) # M is the set value M <- 25000 # initiliase vector that contains accepted values thetas <- c() # we want N samples N <- 1e4 while (length(thetas) < N) { theta_j <- runif(1, 0, 1) g_theta_j <- dunif(theta_j, 0, 1) U <- runif(1, 0, 1) if (M*g_theta_j*U < L_times_pi(theta_j)) thetas <- c(thetas, theta_j) } # plot the posterior hist(thetas) In [ ]: # check with a qq-plot assuming a known posterior beta(3,10) rawDensity <- rbeta(N, 3, 10) qqplot(rawDensity, thetas) abline(0,1) day 4a: approximate Bayesian computation¶ In [ ]: source("Data/abc.R") # Rejection algorithm with proportions of simulations to accept N <- 1e4 Y <- c(91.34, 89.21, 88.98) th <- 0.05 # function to simulate is called "simulateWaterTemp" # distance function rho <- function(x,y) sum(sqrt((x-y)^2))/length(y) thetas <- distances <- c() for (i in 1:N) { # 1. draw from prior (continuous, bounded, Normal) theta <- 0 while (theta<80 | theta>110) {
theta <- rnorm(1, mean=90, sd=sqrt(20)) } thetas <- c(thetas, theta) # 2. simulate observations ysim <- simulateWaterTemp(theta) # 3. calculate and retain distances distances <- c(distances, rho(ysim,Y)) } # prior / parameter hist(thetas) quantile(thetas) # relationship between parameter and distance head(cbind(thetas,distances)) # distances hist(distances) accepted <- which(rank(distances, ties.method="random")/length(distances)<=th) range(distances[accepted]) abline(v=max(distances[accepted]), lty=2) # plot prior/posterior par(mfrow=c(2,1)) hist(thetas, xlim=c(80,110), main="Prior") hist(thetas[accepted], xlim=c(80,110), main="Posterior") quantile(thetas[accepted]) # expected value mean(thetas[accepted]) # 95% HPD library(coda) HPDinterval(as.mcmc(thetas[accepted]), prob=0.95) day 5: Bayesian networks¶ $P(d^0, g^2, s^0) = P(g^2 | d^0) P(s^0 | d^0) P(d^0)$ In [ ]: # probability of $P(d^0, g^2, s^0) = P(g^2 | d^0) P(s^0 | d^0) P(d^0)$ 0.34 * 0.95 * 0.70 In [ ]: # confidence $P(d^1, g^2, s^1)$ (0.3/0.7)*(0.17/0.34)*(0.80/0.05) In [ ]: