In: Statistics and Probability
The `R` package \texttt{moments} gives us two very useful functions; \texttt{skewness} and \texttt{kurtosis}. If data is truly normal, it should have a skewness value of 0 and a kurtosis value of 3. Write an R function that conducts a normality test as follows: it takes as input a data set, calculates a bootstrap confidence interval for the skewness, calculates a bootstrap confidence interval for the kurtosis, then sees if 0 is in the skewness interval and 3 is in the kurtosis interval. If so, your routine prints that the data is normally distributed, otherwise your routine should print that the data is not normally distributed. Test your routine on random data from normal (rnorm), uniform (runif), and exponential (rexp) , with sample sizes of n=10,30,70 and 100.
An example code fragment is below:
```{r}
library(moments)
mynormtest = function(x){
#find bootstrap CI for skewness of x
# find bootstrap CI for kurtosis of x
cat("Some otuput goes here\n")
}
library(moments)
mynormtest = function(data_vector){
columns = 300 # no. of bootstrap samples
rows = length(data_vector) # no. of observation in our data
boot_samples_matrix = matrix(nrow = rows)
for (i in 1:columns){
boot_samples_matrix = cbind(boot_samples_matrix,
sample(data_vector, size = rows, replace = TRUE))
}
boot_samples_matrix = boot_samples_matrix[,2:101]
skew_list = apply(boot_samples_matrix,2,skewness)
mean_skew = mean(skew_list)
sd_skew = sd(skew_list)
n = length(skew_list)
# applying 95% confidence interval
skew_ci_95 = c(mean_skew-qnorm(0.975)*sd_skew/sqrt(n),
mean_skew+qnorm(0.975)*sd_skew/sqrt(n))
kurtosis_list = apply(boot_samples_matrix,2,kurtosis)
mean_kurtosis = mean(kurtosis_list)
sd_kurtosis = sd(kurtosis_list)
n = length(kurtosis_list)
kurtosis_ci_95 = c(mean_kurtosis-qnorm(0.975)*sd_kurtosis/sqrt(n),
mean_kurtosis+qnorm(0.975)*sd_kurtosis/sqrt(n))
bool_skew = skew_ci_95[1] <= 0 & 0 <= skew_ci_95[2]
bool_kurtosis = kurtosis_ci_95[1] <= 3 & 3 <=
kurtosis_ci_95[2]
if(bool_skew & bool_kurtosis){
print("The data is Normally distributed")
}else{
print("The data is NOT Normally distributed")
}
}
#......... checking on random data
#1) Random Normal data
x1 = rnorm(10)
x2 = rnorm(30)
x3 = rnorm(70)
x4 = rnorm(100)
mynormtest(x1) # calling our function
mynormtest(x2)
mynormtest(x3)
mynormtest(x4)
#2) Random Uniform data
y1 = runif(10)
y2 = runif(30)
y3 = runif(70)
y4 = runif(100)
mynormtest(y1)
mynormtest(y2)
mynormtest(y3)
mynormtest(y4)
#3) Random exponential
z1 = rexp(10)
z2 = rexp(30)
z3 = rexp(70)
z4 = rexp(100)
mynormtest(z1)
mynormtest(z2)
mynormtest(z3)
mynormtest(z4)
mynormtest(y1)
mynormtest(y2)
mynormtest(y3)
mynormtest(y4)