# Fitting finite mixture models in Stan
rm(list=ls()); set.seed(20210316);
glucose <- scan("~/Bayes/Data/glucose.txt")
hist(glucose, freq=F, right=F)
lines(density(glucose))
y <- glucose; rm(glucose);
n <- length(y); n;
library(rstan)
stan_model <- "
data{
int
int
real y[n];
}
parameters{
simplex[H] pi;
ordered[H] mu;
real
}
transformed parameters{
real
sigma2 <- sigma^2;
}
model{
mu ~ normal(120, 100);
sigma2 ~ inv_gamma(.01,.01);
for(i in 1:n){
vector[H] lps = log(pi);
for(h in 1:H)
lps[h] += normal_lpdf(y[i] | mu[h], sigma);
target += log_sum_exp(lps);
}
}
"
H <- 3;
fit <- stan(model_code=stan_model,
data=c("H", "n", "y"), iter=10000, chains=2)
round(summary(fit)$summary[1:7,4:8], 2)
sims <- extract(fit)
names(sims)
# This is what the homework problem asks you to do in part (b):
# Assess the fit of the mixture model with parameters set to
# their posterior means
yvals <- seq(25, 225, 1)
pi.hat <- apply(sims$pi, 2, mean)
mu.hat <- apply(sims$mu, 2, mean)
sigma.hat <- mean(sims$sigma)
g.hat <- rep(NA, length(yvals))
for(j in 1:length(yvals))
{
g.hat[j] <- sum(pi.hat * dnorm(yvals[j], mu.hat, sigma.hat))
}
hist(y, freq=F, breaks=30, xlim=c(25,225))
lines(yvals, g.hat, col="red", lwd=2)
lines(density(y), col="blue", lty=2, lwd=2)
legend("topright", inset=.05, lty=1:2, col=c("red", "blue"), lwd=2,
legend=c("Mixture of 3 normals", "Kernel density estimate"))
# Part (c) of the homework problem calls for posterior predictive
# checks against the sample mean, variance, skewness and kurtosis
S <- length(sims$sigma); n <- length(y);
y.rep <- matrix(NA, S, n);
for(s in 1:S)
{
z <- sample(H, n, replace=T, prob=sims$pi[s,])
y.rep[s,] <- rnorm(n, mean=sims$mu[s,z], sd=sims$sigma[s])
}
# My test statistics are mean, variance, skewness, kurtosis
library(moments)
T1 <- apply(y.rep, 1, mean)
T2 <- apply(y.rep, 1, var)
T3 <- apply(y.rep, 1, skewness)
T4 <- apply(y.rep, 1, kurtosis)
op <- par(mfrow=c(2,2))
p.val <- round(mean(T1 > mean(y)), 3);
hist(T1, freq=F, breaks=40, xlab=””, ylab=””,
main=paste(“PPD of Mean, p-value =”, p.val));
lines(density(T1), col=”red”);
abline(v=mean(y), lty=2, col=”blue”)
p.val <- round(mean(T2 > var(y)), 3);
hist(T2, freq=F, breaks=40, xlab=””, ylab=””,
main=paste(“PPD of variance, p-value =”, p.val));
lines(density(T2), col=”red”);
abline(v=var(y), lty=2, col=”blue”)
p.val <- round(mean(T3 > skewness(y)), 3);
hist(T3, freq=F, breaks=40, xlab=””, ylab=””,
main=paste(“PPD of skewness, p-value =”, p.val));
lines(density(T3), col=”red”);
abline(v=skewness(y), lty=2, col=”blue”)
p.val <- round(mean(T4 > kurtosis(y)), 3);
hist(T4, freq=F, breaks=40, xlab=””, ylab=””,
main=paste(“PPD of kurtosis, p-value =”, p.val));
lines(density(T4), col=”red”);
abline(v=kurtosis(y), lty=2, col=”blue”)
# Setting parameter values to their posterior means is not the
# best idea for assessing model fit. Better to compare the
# observed data to simulations from the posterior predictive
# distribution.
# We can compare the distributions in side-by-side plots, like
# we did for the location-scale mixture of two normals
y.tilde <- rep(NA, S); for(s in 1:S) { z <- sample(H, 1, prob=sims$pi[s,]) y.tilde[s] <- rnorm(1, mean=sims$mu[s,z], sd=sims$sigma[s]) } par(mfrow=c(1,2)) minny <- min(c(y, y.tilde)); maxie <- max(c(y, y.tilde)); hist(y, freq=F, right=F, breaks=20, main="Empirical distribution", xlim=c(minny, maxie), ylim=c(0, .016), xlab="Glucose (n=532)") lines(density(y)) hist(y.tilde, freq=F, right=F, breaks=20, main="Posterior predictive dist", xlim=c(minny, maxie), ylim=c(0, .016), xlab="Location only mixture (H=3)") lines(density(y.tilde))