Inference for Categorical Data
Click here to open the slides.
Here is the R script from class.
library(tidyverse)
p_hat <- 0.657
n <- 680 + 105
# Create a table that mimics the resulting counts
df <- data.frame(
Group = c(rep('Duke', 69 + 36), rep('American', 454+226)),
Respone = c(rep('Yes', 69), rep('No', 36), rep('Yes', 454) , rep('No', 226)
)
)
table(df$Group, df$Respone) # Verify the counts are the same
# Estimate the bootstrap sample
boots <- numeric()
for(i in 1:1000) {
temp <- df[sample(nrow(df), size = nrow(df), replace = TRUE),]
temp2 <- table(temp$Group, temp$Respone) %>% prop.table(1)
boots[i] <- diff(temp2[,2])
}
mean(boots)
sd(boots)
# 95% Confidence Interval
c(mean(boots) - 1.96 * sd(boots), mean(boots) + 1.96 * sd(boots))
# Bootstrap distribution
hist(boots)