## ---- include = FALSE---------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup, echo = FALSE------------------------------------------------- library(bggum) ## ----read_in_raw_data---------------------------------------------------- roberts_court <- read.csv("roberts_court.csv", stringsAsFactors = FALSE) head(roberts_court) ## ----reshape_data-------------------------------------------------------- ## Recode the votes to be integers in $\{0, \ldots, K_j - 1\}$ roberts_court$vote <- ifelse(roberts_court$vote %in% c(2, 7), 0, 1) ## Reshape the data library(dplyr) library(tidyr) responses <- roberts_court %>% select(-caseId, -term) %>% spread(lexisCite, vote) ## I like to keep the respondents' names as the rownames rownames(responses) <- responses$justiceName ## Now we just eliminate the respondent ID column and turn it into a matrix responses <- as.matrix(responses[ , -1]) ## Eliminate unanimous items unanimous <- apply(responses, 2, function(x) length(unique(na.omit(x))) == 1) responses <- responses[ , !unanimous] ## Here are responses to a few items: responses[ , c(1, floor(ncol(responses) / 2), ncol(responses))] ## ----tune_proposals, eval = FALSE---------------------------------------- # ## Load the package # library(bggum) # ## Set the seed for reproducibility # set.seed(123) # ## Tune the proposal densities # proposal_sds <- tune_proposals(responses, tune_iterations = 5000) # set.seed(456) # ## Tune the temperature schedule # temps <- tune_temperatures(responses, n_temps = 6, proposal_sds = proposal_sds) ## ----load_hypers, echo = FALSE------------------------------------------- ## (These are the results when I ran the above unevaluated code locally) temps <- c(1, 0.933, 0.870, 0.812, 0.758, 0.706) ## ----check_temps--------------------------------------------------------- round(temps, 2) ## ----sample_posterior, eval = FALSE-------------------------------------- # ## We need the parallel package # library(parallel) # ## We'll set up the cluster with two cores (for two chains) # cl <- makeCluster(2, type = "FORK", outfile = "bggum-log.txt") # ## Deal with reproducibility # clusterSetRNGStream(cl = cl, iseed = 789) # ## Produce the chains # chains <- parLapplyLB(cl = cl, X = 1:2, fun = function(x) { # ggumMC3(data = responses, # sample_iterations = 50000, # burn_iterations = 5000, # proposal_sds = proposal_sds, # temps = temps) # }) # ## Stop the cluster # stopCluster(cl) ## ----post_process, eval = FALSE------------------------------------------ # constraint <- which(rownames(responses) == "RBGinsburg") # processed_chains <- lapply(chains, post_process, constraint = constraint, # expected_sign = "-") ## ----roberts_trace_plots_setup1, eval = FALSE---------------------------- # roberts <- which(rownames(responses) == "JGRoberts") # trace_colors <- c("#0072b280", "#d55e0080") # iters <- nrow(chains[[1]]) # idx <- seq(floor(iters / 1000), iters, floor(iters / 1000)) # chain1_raw <- chains[[1]][idx, roberts] # chain2_raw <- chains[[2]][idx, roberts] # chain1_pp <- processed_chains[[1]][idx, roberts] # chain2_pp <- processed_chains[[2]][idx, roberts] ## ----roberts_trace_plots_setup2, echo = FALSE---------------------------- roberts <- which(rownames(responses) == "JGRoberts") trace_colors <- c("#0072b280", "#d55e0080") roberts_draws <- read.csv("roberts_draws.csv", stringsAsFactors = FALSE) iters <- nrow(roberts_draws) * 50 idx <- 1:nrow(roberts_draws) chain1_raw <- roberts_draws[idx, "chain1_raw"] chain2_raw <- roberts_draws[idx, "chain2_raw"] chain1_pp <- roberts_draws[idx, "chain1_pp"] chain2_pp <- roberts_draws[idx, "chain2_pp"] idx <- seq(floor(iters / 1000), iters, floor(iters / 1000)) ## ----roberts_trace_plots, results = 'asis', fig.dim = c(6, 4)------------ ylims <- range(c(chain1_raw, chain2_raw, chain1_pp, chain2_pp)) opar <- par(mar = c(3, 3, 3, 1) + 0.1) plot(idx, chain1_raw, type = "l", col = trace_colors[1], ylim = ylims, xlab = "", ylab = "", xaxt = "n", yaxt = "n", main = "Raw Samples") lines(idx, chain2_raw, col = trace_colors[2]) axis(side = 1, tick = FALSE, line = -0.75) axis(side = 2, tick = FALSE, line = -0.75) title(xlab = "Iteration", ylab = expression(theta[Roberts]), line = 1.5) plot(idx, chain1_pp, type = "l", col = trace_colors[1], ylim = ylims, xlab = "", ylab = "", xaxt = "n", yaxt = "n", main = "Processed Samples") lines(idx, chain2_pp, col = trace_colors[2]) axis(side = 1, tick = FALSE, line = -0.75) axis(side = 2, tick = FALSE, line = -0.75) title(xlab = "Iteration", ylab = expression(theta[Roberts]), line = 1.5) par(opar) ## ----convergence1, eval = FALSE------------------------------------------ # library(coda) # ## Look at the Gelman-Rubin potential scale reduction factor: # convergence_stats <- gelman.diag(processed_chains) # ## See what estimates are: # summary(convergence_stats$psrf[ , 1]) ## ----convergence2, echo = FALSE------------------------------------------ convergence_stats <- read.csv("convergence.csv") summary(convergence_stats[ , 1]) ## ----summary, eval = FALSE----------------------------------------------- # posterior_summary <- summary(processed_chains) ## ----load_summary, echo = FALSE------------------------------------------ posterior_summary <- readRDS("posterior_summary.rds") ## ----display_summary----------------------------------------------------- str(posterior_summary$estimates, max.level = 1) str(posterior_summary$sds, max.level = 1) head(posterior_summary$statistics) ## ----plot_thetas, results = 'asis', fig.dim = c(6, 6)-------------------- theta <- posterior_summary$estimates$theta names(theta) <- rownames(responses) n <- length(theta) ordering <- order(theta) theta_sorted <- sort(theta) opar <- par(mar = c(3, 6, 1, 1) + 0.1) plot(theta_sorted, factor(names(theta_sorted), levels = names(theta_sorted)), pch = 19, xlim = c(-3, 2.5), xaxt = "n", yaxt = "n", xlab = "", ylab = "") axis(side = 1, tick = FALSE, line = -0.75) axis(side = 2, tick = FALSE, line = -0.75, las = 1, labels = names(theta_sorted), at = 1:n) title(xlab = expression(theta), line = 1.5) segments(x0 = posterior_summary$statistics[ordering, 1], y0 = 1:n, x1 = posterior_summary$statistics[ordering, 4], y1 = 1:n) par(opar) ## ----plot_items, results = 'asis', fig.dim = c(6, 4)--------------------- alpha <- posterior_summary$estimates$alpha delta <- posterior_summary$estimates$delta tau <- posterior_summary$estimates$tau phillip_morris <- which(colnames(responses) == "2007 U.S. LEXIS 1332") rucho <- which(colnames(responses) == "2019 U.S. LEXIS 4401") icc(alpha[phillip_morris], delta[phillip_morris], tau[[phillip_morris]], main_title = "Philip Morris USA, Inc. v. Williams", plot_responses = TRUE, thetas = theta, responses = responses[ , phillip_morris]) icc(alpha[rucho], delta[rucho], tau[[rucho]], main_title = "Rucho v. Common Cause", plot_responses = TRUE, thetas = theta, responses = responses[ , rucho])