library(rstan) # observe startup messages
## Loading required package: StanHeaders
## Loading required package: ggplot2
## rstan (Version 2.21.2, GitRev: 2e1f913d3ca3)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores()) # Use all local cores (does this work for all r packages?)
rstan_options(auto_write = TRUE) # saves stan to hard drive
Sys.setenv(TZ = "America/Toronto") # this deals with a time zone bug issue that was causes an error when running the model
This is an example in Section 5.5 of Gelman et al (2003), which studied coaching effects from eight schools. For simplicity, we call this example “eight schools.”
We start by writing a Stan program for the model in a text file. If you are using RStudio version 1.2.x or greater, click on File -> New File -> Stan File . Otherwise, open your favorite text editor. Either way, paste in the following and save your work to a file called schools.stan in R’s working directory (which can be seen by executing getwd()) You can also embed plots, for example:
// saved as schools.stan
data {
int<lower=0> J; // number of schools
real y[J]; // estimated treatment effects
real<lower=0> sigma[J]; // standard error of effect estimates
}
parameters {
real mu; // population treatment effect
real<lower=0> tau; // standard deviation in treatment effects
vector[J] eta; // unscaled deviation from mu by school
}
transformed parameters {
vector[J] theta = mu + tau * eta; // school treatment effects
}
model {
target += normal_lpdf(eta | 0, 1); // prior log-density
target += normal_lpdf(y | theta, sigma); // log-likelihood
}
Be sure that your Stan programs ends in a blank line without any characters including spaces and comments.
In this Stan program, we let theta be a transformation of mu, eta, and tau instead of declaring theta in the parameters block, which allows the sampler will run more efficiently .
We can prepare the data (which typically is a named list) in R with:
rm(list = ls())
schools_dat <- list(J = 9,
y = c(5.41, 5, 6.29, 10.95, 9.53, 7.28, 6.59, 2.72, 6.94),
sigma = c(0.33, 0.25, 0.2, 0.12, 0.19, 0.23, 0.24, 0.22, 0.17)) # treatment effects for each school
condition_dat <- list(J = 4,
y = c(7.41, 6.01, 7.21, 7.33),
sigma = c(0.18, 0.18, 0.13, 0.18)) # treatment effects for each school
#WHY DO WE USE THE SE? SHOULDN"T SIGMA BE THE SD (its it because the SE takes into account sample size?)
And we can get a fit with the following R command. Note that the argument to file = should point to where the file is on your file system unless you have put it in the working directory of R in which case the below will work.
RESEARCH QUESTIONS: Does the postest vary between schools?
fit <- stan(file = 'schools.stan', data = schools_dat)
## Warning in .local(object, ...): some chains had errors; consider specifying
## chains = 1 to debug
## here are whatever error messages were returned
## [[1]]
## Stan model 'schools' does not contain samples.
## Warning: There were 1 divergent transitions after warmup. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: Examine the pairs() plot to diagnose sampling problems
print(fit)
## Inference for Stan model: schools.
## 3 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=3000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## mu 6.79 0.05 0.97 4.96 6.17 6.73 7.42 8.85 421 1.01
## tau 2.90 0.05 0.84 1.73 2.29 2.74 3.33 4.98 301 1.01
## eta[1] -0.50 0.02 0.36 -1.29 -0.71 -0.49 -0.26 0.15 442 1.01
## eta[2] -0.65 0.02 0.37 -1.44 -0.87 -0.64 -0.40 0.03 425 1.01
## eta[3] -0.18 0.02 0.34 -0.86 -0.40 -0.17 0.05 0.46 428 1.01
## eta[4] 1.54 0.03 0.54 0.56 1.16 1.51 1.92 2.62 349 1.00
## eta[5] 1.01 0.02 0.43 0.17 0.72 1.01 1.30 1.88 373 1.00
## eta[6] 0.18 0.02 0.34 -0.53 -0.06 0.20 0.41 0.85 422 1.00
## eta[7] -0.07 0.02 0.33 -0.75 -0.30 -0.05 0.15 0.57 436 1.00
## eta[8] -1.49 0.03 0.50 -2.59 -1.79 -1.46 -1.16 -0.55 358 1.01
## eta[9] 0.06 0.02 0.33 -0.62 -0.17 0.07 0.28 0.68 417 1.00
## theta[1] 5.43 0.01 0.33 4.79 5.21 5.44 5.66 6.08 2918 1.00
## theta[2] 5.02 0.00 0.25 4.53 4.85 5.01 5.18 5.50 3103 1.00
## theta[3] 6.30 0.00 0.20 5.90 6.16 6.30 6.44 6.69 3851 1.00
## theta[4] 10.94 0.00 0.12 10.70 10.86 10.94 11.02 11.18 2630 1.00
## theta[5] 9.51 0.00 0.19 9.15 9.39 9.51 9.64 9.87 4086 1.00
## theta[6] 7.27 0.00 0.23 6.81 7.11 7.27 7.44 7.73 3771 1.00
## theta[7] 6.59 0.00 0.25 6.10 6.42 6.58 6.75 7.08 3716 1.00
## theta[8] 2.75 0.00 0.22 2.32 2.60 2.75 2.90 3.18 4118 1.00
## theta[9] 6.93 0.00 0.17 6.61 6.82 6.94 7.04 7.25 3993 1.00
## lp__ -9.89 0.14 3.11 -16.74 -11.82 -9.53 -7.67 -4.77 469 1.00
##
## Samples were drawn using NUTS(diag_e) at Sat Sep 25 06:39:27 2021.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
plot(fit, pars = c("mu",
"tau",
"eta[1]", # differences between the overall mean and the cluster mean
"eta[2]",
"eta[3]",
"eta[4]",
"eta[5]",
"eta[6]",
"eta[7]",
"eta[8]",
"eta[9]"
))
## ci_level: 0.8 (80% intervals)
## outer_level: 0.95 (95% intervals)
yi ~ N(theta1i, sigmai) theta ~ N(mu, tau)
pairs(fit, pars = c("mu", # means
"tau", # between cluster variance
"lp__" #?
))
RESEARCH QUESTIONS: Does the posttest vary between Conditions?
fit2 <- stan(file = 'schools.stan', data = condition_dat)
## Warning in .local(object, ...): some chains had errors; consider specifying
## chains = 1 to debug
## here are whatever error messages were returned
## [[1]]
## Stan model 'schools' does not contain samples.
##
## [[2]]
## Stan model 'schools' does not contain samples.
##
## [[3]]
## Stan model 'schools' does not contain samples.
## Warning: There were 3 divergent transitions after warmup. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: Examine the pairs() plot to diagnose sampling problems
plot(fit2, pars = c("mu",
"tau",
"eta[1]",
"eta[2]",
"eta[3]",
"eta[4]"
))
## ci_level: 0.8 (80% intervals)
## outer_level: 0.95 (95% intervals)
pairs(fit, pars = c("mu", # mean
"tau", # between cluster variance
"lp__")) # some version of the posteior
la <- extract(fit, permuted = TRUE) # return a list of arrays
mu <- la$mu
### return an array of three dimensions: iterations, chains, parameters
a <- extract(fit, permuted = FALSE)
### use S3 functions on stanfit objects
a2 <- as.array(fit)
m <- as.matrix(fit)
d <- as.data.frame(fit)