Question

In: Advanced Math

Write your own R-function for the linear regression using qr() function. You must not use solve()...

Write your own R-function for the linear regression using qr() function. You must not use solve() function. The input of your function would be a design matrix X and a response vector y and the output must contain

- least sqaure estimates and the corresponding stnadard errors.

- residuals and MSE

- fitted values

- ANOVA table

Solutions

Expert Solution

##Importing Data set
library(readxl)
project_data <- read.table("https://www.kaggle.com/kumarajarshi/life-expectancy-who.txt",header=T)
View(project_data)
attach(project_data)

##assign column names
y<- project_data$`Life expectancy`
x1<-project_data$Status
x2<-project_data$`Adult Mortality`
x3<-project_data$`infant deaths`
x4<-project_data$`Hepatitis B`
x5<-project_data$Measles
x6<-project_data$BMI
x7<-project_data$`under-five deaths`
x8<-project_data$Polio
x9<-project_data$Diphtheria
x10<-project_data$`HIV/AIDS`
x11<-project_data$GDP
x12<-project_data$Population
x13<-project_data$`thinness 1-19 years`
x14<-project_data$`thinness 5-9 years`
x15<-project_data$`Income composition of resources`
x16<-project_data$Schooling

##Plotting boxplot and density
for(c in project_data[,2:17])
{
boxplot(c)
plot(density(c))
polygon(density(c),col = "green", border = "red")
}

##Skewness of data
library(e1071)
for(c in project_data[,2:17])
{
print(skewness(c))
}

##Transformation for removing skewness
x3.1<- x3^(1/6)
x5.1<- x5^(1/5)
x7.1<- x7^(1/4)
x12.1<- log10(x12)

##Print Skewness values for transformed variables
print(skewness(x3.1))
print(skewness(x5.1))
print(skewness(x7.1))
print(skewness(x12.1))

##Plotting boxplot and density for transformed variables
boxplot(x3.1)
plot(density(x3.1))
polygon(density(x3.1), col = "green", border = "red")

boxplot(x5.1)
plot(density(x5.1))
polygon(density(x5.1), col = "green", border = "red")

boxplot(x7.1)
plot(density(x7.1))
polygon(density(x7.1), col = "green", border = "red")

boxplot(x12.1)
plot(density(x12.1))
polygon(density(x12.1), col = "green", border = "red")

View(project_data)

##Import project_data again with transformed variables
library(readxl)
project_data <- read_excel("PROJECT/project_data.xlsx", sheet = "Sheet3")
View(project_data)

##assign column names once again with transformed variables
y<- project_data$`Life expectancy`
x1<-project_data$Status
x2<-project_data$`Adult Mortality`
x3<-project_data$`infant deaths`
x4<-project_data$`Hepatitis B`
x5<-project_data$Measles
x6<-project_data$BMI
x7<-project_data$`under-five deaths`
x8<-project_data$Polio
x9<-project_data$Diphtheria
x10<-project_data$`HIV/AIDS`
x11<-project_data$GDP
x12<-project_data$Population
x13<-project_data$`thinness 1-19 years`
x14<-project_data$`thinness 5-9 years`
x15<-project_data$`Income composition of resources`
x16<-project_data$Schooling

## fitting model
fitt<-lm(y~x1+x2+x3+x4+x5+x6+x7+x8+x9+x10+x11+x12+x13+x14+x15+x16)

## forming x matrix
x <- cbind(x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,x16)
frame <- data.frame(x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,x16)

## installing corrplot package for dispersion matrix
installed.packages("corrplot")
library(corrplot)

## checking correlation between variables and response variable
corrplot(cor(cbind(y,frame)),method="number")

## installing car package for VIF
install.packages("car")
library(car)

## checking VIF
vif(fitt)
summary(fitt)
## we observe that adj R sq= 0.885 and Multiple R-squared: 0.8996

## installing package for variance decomposition
install.packages("perturb")
library(perturb)

## checking variance decomposition
colldiag(fitt)

##??16,4(=0.906), ??16,9(=0.933) are large
##the regressor variable with higher VIF value (i.e. x9 ) is removed and fit model excluding x9
fitt1<-lm(y~x1+x2+x3+x4+x5+x6+x7+x8+x10+x11+x12+x13+x14+x15+x16)
summary(fitt1)

## we observe that adj R sq= 0.8854 and Multiple R-squared: 0.899
## stage 2 checking vif
vif(fitt1)

## Checking vdm at stage 3
colldiag(fitt1)

##??15,3(=0.970), ??15,7(=0.960) are large
##the regressor variable with higher VIF value (i.e. x3 ) is removed and fit model excluding x9
fitt2<-lm(y~x1+x2+x4+x5+x6+x7+x8+x10+x11+x12+x13+x14+x15+x16)
summary(fitt2)

## we observe that adj R sq= 0.8847 and Multiple R-squared: 0.8975
## stage 2 checking vif
vif(fitt2)

## Checking vdm at stage 3
colldiag(fitt2)

##***??14,(), ??14,() are large
##the regressor variable with higher VIF value (i.e. x13 ) is removed and fit model excluding x9
fitt3<-lm(y~x1+x2+x4+x5+x6+x7+x8+x10+x11+x12+x14+x15+x16)
summary(fitt3)

## we observe that adj R sq= 0.8854 and Multiple R-squared: 0.8972
## stage 2 checking vif
vif(fitt3)
## we observed that vif are not less than 5 so no dropping of variable now.

## now checking for heteroscedasticity
plot(fitt3)
res <- residuals(fitt3)
ff <- fitted(fitt3)
cor(res,ff)
plot(res,ff)
##errors are random since correlation b/w res and ff is approximately zero

## cheecking normality
shapiro.test(res)
## p value(=0.2962) is not less than 0.05 hence normal

## Lack Of fit
install.packages("alr3")
library(alr3)
pureErrorAnova

## Variable Selection..... Akaike Information criteria
install.packages("MASS")
library(MASS)
step(fitt3,direction="both")

install.packages("AICcmodavg")
library(AICcmodavg)
AICc(fitt3)
## AICc=634.9806

## prediction
y_hat<- predict(fitt3)
y_hat
cor(y,y_hat)
plot(y,y_hat)
plot(y_hat)
plot(y)
plot(y,y_hat)

plot(y,y_hat, type="l", col="green", lwd=5, xlab="y", ylab="y_hat", main="graph between y and y_hat")
lines(y, y_hat, col="red", lwd=2)
lines(y, y_hat, type="b", col="red", lwd=2, pch=19)

#checking normality
options(scipen = 999)
ress<-residuals(fitt3)
ks.test(ress,"pnorm")

## Q-Q Plot
qqnorm(ress)
qqline(ress)

f<- lm(y~ x2+x4+x10+x15)
summary(f)
options(scipen = 999)
r<-residuals(f)
ks.test(r,"pnorm")
hist(r,127)
hist(r)


Related Solutions

Write your own routine to solve a system of n linear equations and n unknowns using...
Write your own routine to solve a system of n linear equations and n unknowns using the LU decomposition. Input should take an augmented matrix and the number of equations n. Output should be the augmented matrix. L and U matrices and the solution. Also compare your results to those of Matlab’s built in LU decomposition. Use your code to solve the systems given as (a) and (b) below: a) 3a − 2b + c = −3, a − 4b...
Write a function to solve a system of linear equations of the form Ax= b using...
Write a function to solve a system of linear equations of the form Ax= b using the iterative Gauss-Seidel method. You are free to use any basic MATLAB operation to implement the algorithm (i.e. you may use any combination of loops, indexing, math, etc.), but avoid “built-in” solution methods — you would not be allowed to use the GaussSeidel function if such a function existed. The function must also test for a number of possible issues. If an issue is...
Write a MATLAB function function = pivGauss(.....) to solve linear equations using Gaussian Elimination with Partial...
Write a MATLAB function function = pivGauss(.....) to solve linear equations using Gaussian Elimination with Partial Pivoting. You'll need to employ Nested Loops. Thank you !
3. Solve the following linear programming problem. You must use the dual. First write down the...
3. Solve the following linear programming problem. You must use the dual. First write down the dual maximization LP problem, solve that, then state the solution to the original minimization problem. (a) Minimize w = 4y1 + 5y2 + 7y3 Subject to: y1 + y2 + y3 ≥ 18 2y1 + y2 + 2y3 ≥ 20 y1 + 2y2 + 3y3 ≥ 25 y1, y2, y3 ≥ 0 (b) Making use of shadow costs, if the 2nd original constraint changed...
Solve the following linear programming problem. You must use the dual. First write down the dual...
Solve the following linear programming problem. You must use the dual. First write down the dual maximization LP problem, solve that, then state the solution to the original minimization problem. (a) Minimize w = 4y1 + 5y2 + 7y3 Subject to: y1 + y2 + y3 ≥ 18 2y1 + y2 + 2y3 ≥ 20 y1 + 2y2 + 3y3 ≥ 25 y1, y2, y3 ≥ 0 (b) Making use of shadow costs, if the 2nd original constraint changed to...
Solve the below questions using your own words PLEASE!! Make sure to write by your own...
Solve the below questions using your own words PLEASE!! Make sure to write by your own words or paraphrase 1. What is the difference between Windows and Linux server 2. Give some advantages and disadvantages Windows and Linux Operating System
pl use R code to do that and show me the program Use a linear regression...
pl use R code to do that and show me the program Use a linear regression of Y~log(X) using the labtestdata.csv data to predict Y (2dp) when x = 250 on the unlogged scale? Calculate the F-value (1 dp) from an ANOVA on the regression of Y~log(X) using the data contained in labtestdata.csv abtestdata.csv y x 1.018746 1 1.508895 2 0.727282 3 1.787127 4 2.903983 5 3.181554 6 1.737834 7 2.715766 8 1.570552 9 3.046107 10 4.499675 11 4.240688 12...
PLEASE NO PLAGIARISM AND MUST BE IN YOUR OWN WORDS You must write a minimum of...
PLEASE NO PLAGIARISM AND MUST BE IN YOUR OWN WORDS You must write a minimum of two paragraphs and every paragraph should have at least four complete sentences. What is risk management? What is Vulnerability assessment? Thanks!!
PLEASE NO PLAGIARISM AND MUST BE IN YOUR OWN WORDS You must write a minimum of...
PLEASE NO PLAGIARISM AND MUST BE IN YOUR OWN WORDS You must write a minimum of two paragraphs and every paragraph should have at least four complete sentences. What is the difference between security and safety? What is the relationship between risk management and vulnerability assessment? Thank!!
a. Using the following R codes to fit the linear regression model for VitC on HeadWt,...
a. Using the following R codes to fit the linear regression model for VitC on HeadWt, and obtain its summary. Paste the R output in your homework. cabbages_data <- read.csv("http://users.stat.umn.edu/~wuxxx725/data/cabbages_data.csv") cabbages_reg <- lm(VitC ~ HeadWt, data = cabbages_data) summary(cabbages_reg) b. State and interpret the value of r 2 from the model summary output in part a). c. Calculate the correlation r between HeadWt and VitC, and state the strength and the direction of the correlation. d. State the estimated regression...
ADVERTISEMENT
ADVERTISEMENT
ADVERTISEMENT