Question

In: Advanced Math

Write your own R-function for the linear regression using qr() function. You must not use solve()...

Write your own R-function for the linear regression using qr() function. You must not use solve() function. The input of your function would be a design matrix X and a response vector y and the output must contain

- least sqaure estimates and the corresponding stnadard errors.

- residuals and MSE

- fitted values

- ANOVA table

Solutions

Expert Solution

##Importing Data set
library(readxl)
project_data <- read.table("https://www.kaggle.com/kumarajarshi/life-expectancy-who.txt",header=T)
View(project_data)
attach(project_data)

##assign column names
y<- project_data$`Life expectancy`
x1<-project_data$Status
x2<-project_data$`Adult Mortality`
x3<-project_data$`infant deaths`
x4<-project_data$`Hepatitis B`
x5<-project_data$Measles
x6<-project_data$BMI
x7<-project_data$`under-five deaths`
x8<-project_data$Polio
x9<-project_data$Diphtheria
x10<-project_data$`HIV/AIDS`
x11<-project_data$GDP
x12<-project_data$Population
x13<-project_data$`thinness 1-19 years`
x14<-project_data$`thinness 5-9 years`
x15<-project_data$`Income composition of resources`
x16<-project_data$Schooling

##Plotting boxplot and density
for(c in project_data[,2:17])
{
boxplot(c)
plot(density(c))
polygon(density(c),col = "green", border = "red")
}

##Skewness of data
library(e1071)
for(c in project_data[,2:17])
{
print(skewness(c))
}

##Transformation for removing skewness
x3.1<- x3^(1/6)
x5.1<- x5^(1/5)
x7.1<- x7^(1/4)
x12.1<- log10(x12)

##Print Skewness values for transformed variables
print(skewness(x3.1))
print(skewness(x5.1))
print(skewness(x7.1))
print(skewness(x12.1))

##Plotting boxplot and density for transformed variables
boxplot(x3.1)
plot(density(x3.1))
polygon(density(x3.1), col = "green", border = "red")

boxplot(x5.1)
plot(density(x5.1))
polygon(density(x5.1), col = "green", border = "red")

boxplot(x7.1)
plot(density(x7.1))
polygon(density(x7.1), col = "green", border = "red")

boxplot(x12.1)
plot(density(x12.1))
polygon(density(x12.1), col = "green", border = "red")

View(project_data)

##Import project_data again with transformed variables
library(readxl)
project_data <- read_excel("PROJECT/project_data.xlsx", sheet = "Sheet3")
View(project_data)

##assign column names once again with transformed variables
y<- project_data$`Life expectancy`
x1<-project_data$Status
x2<-project_data$`Adult Mortality`
x3<-project_data$`infant deaths`
x4<-project_data$`Hepatitis B`
x5<-project_data$Measles
x6<-project_data$BMI
x7<-project_data$`under-five deaths`
x8<-project_data$Polio
x9<-project_data$Diphtheria
x10<-project_data$`HIV/AIDS`
x11<-project_data$GDP
x12<-project_data$Population
x13<-project_data$`thinness 1-19 years`
x14<-project_data$`thinness 5-9 years`
x15<-project_data$`Income composition of resources`
x16<-project_data$Schooling

## fitting model
fitt<-lm(y~x1+x2+x3+x4+x5+x6+x7+x8+x9+x10+x11+x12+x13+x14+x15+x16)

## forming x matrix
x <- cbind(x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,x16)
frame <- data.frame(x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,x16)

## installing corrplot package for dispersion matrix
installed.packages("corrplot")
library(corrplot)

## checking correlation between variables and response variable
corrplot(cor(cbind(y,frame)),method="number")

## installing car package for VIF
install.packages("car")
library(car)

## checking VIF
vif(fitt)
summary(fitt)
## we observe that adj R sq= 0.885 and Multiple R-squared: 0.8996

## installing package for variance decomposition
install.packages("perturb")
library(perturb)

## checking variance decomposition
colldiag(fitt)

##??16,4(=0.906), ??16,9(=0.933) are large
##the regressor variable with higher VIF value (i.e. x9 ) is removed and fit model excluding x9
fitt1<-lm(y~x1+x2+x3+x4+x5+x6+x7+x8+x10+x11+x12+x13+x14+x15+x16)
summary(fitt1)

## we observe that adj R sq= 0.8854 and Multiple R-squared: 0.899
## stage 2 checking vif
vif(fitt1)

## Checking vdm at stage 3
colldiag(fitt1)

##??15,3(=0.970), ??15,7(=0.960) are large
##the regressor variable with higher VIF value (i.e. x3 ) is removed and fit model excluding x9
fitt2<-lm(y~x1+x2+x4+x5+x6+x7+x8+x10+x11+x12+x13+x14+x15+x16)
summary(fitt2)

## we observe that adj R sq= 0.8847 and Multiple R-squared: 0.8975
## stage 2 checking vif
vif(fitt2)

## Checking vdm at stage 3
colldiag(fitt2)

##***??14,(), ??14,() are large
##the regressor variable with higher VIF value (i.e. x13 ) is removed and fit model excluding x9
fitt3<-lm(y~x1+x2+x4+x5+x6+x7+x8+x10+x11+x12+x14+x15+x16)
summary(fitt3)

## we observe that adj R sq= 0.8854 and Multiple R-squared: 0.8972
## stage 2 checking vif
vif(fitt3)
## we observed that vif are not less than 5 so no dropping of variable now.

## now checking for heteroscedasticity
plot(fitt3)
res <- residuals(fitt3)
ff <- fitted(fitt3)
cor(res,ff)
plot(res,ff)
##errors are random since correlation b/w res and ff is approximately zero

## cheecking normality
shapiro.test(res)
## p value(=0.2962) is not less than 0.05 hence normal

## Lack Of fit
install.packages("alr3")
library(alr3)
pureErrorAnova

## Variable Selection..... Akaike Information criteria
install.packages("MASS")
library(MASS)
step(fitt3,direction="both")

install.packages("AICcmodavg")
library(AICcmodavg)
AICc(fitt3)
## AICc=634.9806

## prediction
y_hat<- predict(fitt3)
y_hat
cor(y,y_hat)
plot(y,y_hat)
plot(y_hat)
plot(y)
plot(y,y_hat)

plot(y,y_hat, type="l", col="green", lwd=5, xlab="y", ylab="y_hat", main="graph between y and y_hat")
lines(y, y_hat, col="red", lwd=2)
lines(y, y_hat, type="b", col="red", lwd=2, pch=19)

#checking normality
options(scipen = 999)
ress<-residuals(fitt3)
ks.test(ress,"pnorm")

## Q-Q Plot
qqnorm(ress)
qqline(ress)

f<- lm(y~ x2+x4+x10+x15)
summary(f)
options(scipen = 999)
r<-residuals(f)
ks.test(r,"pnorm")
hist(r,127)
hist(r)

Colby Messinger answered 10 months ago

Next > < Previous

Question

Write your own R-function for the linear regression using qr() function. You must not use solve()...

Solutions

Expert Solution

Related Solutions

Write your own routine to solve a system of n linear equations and n unknowns using...

Write a function to solve a system of linear equations of the form Ax= b using...

Write a MATLAB function function = pivGauss(.....) to solve linear equations using Gaussian Elimination with Partial...

3. Solve the following linear programming problem. You must use the dual. First write down the...

Solve the following linear programming problem. You must use the dual. First write down the dual...

Solve the below questions using your own words PLEASE!! Make sure to write by your own...

PLEASE NO PLAGIARISM AND MUST BE IN YOUR OWN WORDS You must write a minimum of...

PLEASE NO PLAGIARISM AND MUST BE IN YOUR OWN WORDS You must write a minimum of...

pl use R code to do that and show me the program Use a linear regression...

Explain how you can solve the following problems using the QR factorization. (a) Find the vector...