# Discussed in class on Tue Feb 2
# Bioassay example from Section 3.7 of Gelman et al
# Key in data
rm(list=ls()); set.seed(20210202);
x <- c(-.86, -.30, -.05, .73) n <- c(5, 5, 5, 5) y <- c(0, 1, 3, 5) S <- 1000 # Number of simulations from posterior # Need to identify subset of (alpha, beta) plane where most of the # posterior probability is concentrated m1 <- glm(cbind(y,n-y) ~ x, family=binomial(link="logit")) summary(m1) # Take point estimate plus or minus 5 standard errors .85 + c(-5,5) * 1.02 7.75 + c(-5,5) * 4.87 # Start with alpha in [-5,7] and beta in [-20,35] alpha <- seq(-5, 7, by=.05) beta <- seq(-20, 35, by=.25) # Now compute the posterior I <- length(alpha); J <- length(beta); I; J; log.post <- matrix(NA, I, J) library(boot) for(i in 1:I) { for(j in 1:J) { theta <- inv.logit(alpha[i] + beta[j]*x) log.post[i,j] <- sum(dbinom(y, n, theta, log=T)) }} rm(i,j,theta) # Gelman et al suggest subtract off max before exponentiate maxie <- max(log.post); maxie; log.post <- log.post - maxie; rm(maxie); post <- exp(log.post); rm(log.post); # Now draw the contour plot contours <- c(.001, .01, seq(.05, .95, .10)) contour(alpha, beta, post, levels=contours, drawlabels=F) # Seems that we missed the mark # After some trial and error landed on alpha in [-4,8], beta in [-5,45] alpha <- seq(-4, 8, .05) beta <- seq(-5, 45, .20) I <- length(alpha); J <- length(beta); I; J; log.post <- matrix(NA, I, J) for(i in 1:I) { for(j in 1:J) { theta <- inv.logit(alpha[i] + beta[j]*x) log.post[i,j] <- sum(dbinom(y, n, theta, log=T)) }} rm(i,j,theta) maxie <- max(log.post); maxie; log.post <- log.post - maxie; rm(maxie); post <- exp(log.post); rm(log.post); contour(alpha, beta, post, levels=contours, drawlabels=F) # Random sample from posterior distribution delta <- (alpha[2] - alpha[1]) / 2 epsilon <- (beta[2] - beta[1]) / 2 post.a <- apply(post, 1, sum) # marginal posterior for alpha alpha.sim <- rep(NA, S); beta.sim <- rep(NA, S); for(s in 1:S) { i <- sample(I, 1, prob=post.a) j <- sample(J, 1, prob=post[i,]) alpha.sim[s] <- alpha[i] + runif(1, -delta, delta) beta.sim[s] <- beta[j] + runif(1, -epsilon, epsilon) } plot(alpha.sim, beta.sim) # Scatterplot! # Put contour plot and scatterplot on same display op <- par(mfrow=c(1,2)) contour(alpha, beta, post, levels=contours, drawlabels=F, xlab="alpha", ylab="beta") plot(alpha.sim, beta.sim, xlim=range(alpha), ylim=range(beta), xlab="alpha", ylab="beta", pch=19, cex=.75) par(op) # Or overlay the random sample on the contour plot contour(alpha, beta, post, levels=contours, drawlabels=F, xlab="alpha", ylab="beta") points(alpha.sim, beta.sim, pch=19, cex=.75) # LD50 = Dose at which prob of death is .50 # Solve inv.logit(alpha + beta*x) = .50 # Get alpha + beta*x = 0; ie, x=-alpha/beta # This assumes dose is harmful, ie, that beta > 0
# The posterior probability that drug is harmful estimated to be
mean(beta.sim > 0) # estimate of P(beta > 0 | y)
# And given that it is, our posterior for LD50 is
sel <- beta.sim > 0
LD50.sim <- -alpha.sim[sel] / beta.sim[sel] LD50.PI <- quantile(LD50.sim, c(.025, .975)) LD50.PI # 95% posterior probability interval hist(LD50.sim, freq=F, right=F, breaks=40); lines(density(LD50.sim)); abline(v=LD50.PI, lty=2); # For LD50 in terms of raw dose, exponentiate out of log scale exp(LD50.PI) # A prediction problem: # Suppose 10 more animals are given the third dose considered, # which was x = -.05 # Find the posterior predictive distribution for the number of # deaths among those 10 animals theta.sim <- inv.logit(alpha.sim + beta.sim*x[3]) ytilde.sim <- rbinom(S, size=10, prob=theta.sim) PPD <- table(ytilde.sim) / S op <- par(mfrow=c(1,2)) hist(theta.sim, freq=F, right=F, breaks=seq(0, 1,.05), xlab="theta3", ylab="p(theta3|y)", main="" ); lines(density(theta.sim)); plot(PPD, type="p", xlab="y.tilde", ylab="p(ytilde|y)", main="", pch=19); lines(PPD); par(op) # What do these plots represent, exactly?