In: Statistics and Probability
You know that the definition of the variance is Var[X] = 1 n − 1 Z ∞ −∞ (Xi − µ) 2 dFX(x), so if we have a plug-in estimator S 2 = 1 n − 1 Xn i=1 (xi − X¯) 2 and we are interested in SES2 = p Var[S2] we can use the bootstrap to obtain it. Write a function in R called bootVarSE that computes a bootstrap estimate of the standard error of the sample variance estimator. Test your function using 100 standard random normal draws (that is, rnorm(100)) as data input.
###########################################
## Boostrap for the Normal distribution
## And compare with known distribution
###########################################
set.seed(1)
# X1, ..., Xn ~ N(mu, sigma^2)
# bar(X) ~ N(mu, sigma^2/n)
# Let's say distribution of bar(x) is not known
###########################################
## Nonparametric bootstrap
###########################################
# First small n and B
mu <- 5
sig2 <- 3
n <- 10
my.samp <- rnorm(n, mean = mu, sd = sqrt(sig2))
barx <- mean(my.samp)
var.x <- var(my.samp)
B <- 10
boot.np <- numeric(length = B)
boot.p <- numeric(length = B)
for(b in 1:B)
{
boot.samp.np <- sample(my.samp, replace = TRUE) # NP Bootstramp
samples
boot.np[b] <- mean(boot.samp.np) # NP Bootstrap estimator
boot.samp.p <- rnorm(n, mean = barx, sd = sqrt(var.x))
boot.p[b] <- mean(boot.samp.p)
}
# 95% Boostratp confidence interval
quantile(boot.np, probs = c(.025, .975))
quantile(boot.p, probs = c(.025, .975))
# CI from known distribution
c( barx - qt(.975, df = n-1)*sqrt(var.x/n), barx + qt(.975, df =
n-1)*sqrt(var.x/n) )
# CIs are close, but not great
###########################################
# Now n is same B is big
mu <- 5
sig2 <- 3
n <- 10
B <- 1e3
boot.np <- numeric(length = B)
boot.p <- numeric(length = B)
for(b in 1:B)
{
boot.samp.np <- sample(my.samp, replace = TRUE) # NP Bootstramp
samples
boot.np[b] <- mean(boot.samp.np) # NP Bootstrap estimator
boot.samp.p <- rnorm(n, mean = barx, sd = sqrt(var.x))
boot.p[b] <- mean(boot.samp.p)
}
# 95% Boostratp confidence interval
quantile(boot.np, probs = c(.025, .975))
quantile(boot.p, probs = c(.025, .975))
# CI from known distribution
c( barx - qt(.975, df = n-1)*sqrt(var.x/n), barx + qt(.975, df =
n-1)*sqrt(var.x/n) )
# CIs are closer!
###########################################
# Now n is big and B is big
mu <- 5
sig2 <- 3
n <- 100
my.samp <- rnorm(n, mean = mu, sd = sqrt(sig2))
barx <- mean(my.samp)
var.x <- var(my.samp)
B <- 1e3
boot.np <- numeric(length = B)
boot.p <- numeric(length = B)
for(b in 1:B)
{
boot.samp.np <- sample(my.samp, replace = TRUE) # NP Bootstramp
samples
boot.np[b] <- mean(boot.samp.np) # NP Bootstrap estimator
boot.samp.p <- rnorm(n, mean = barx, sd = sqrt(var.x))
boot.p[b] <- mean(boot.samp.p)
}
# 95% Boostratp confidence interval
quantile(boot.np, probs = c(.025, .975))
quantile(boot.p, probs = c(.025, .975))
# CI from known distribution
c( barx - qt(.975, df = n-1)*sqrt(var.x/n), barx + qt(.975, df =
n-1)*sqrt(var.x/n) )
# CIs are very close now!