In: Computer Science
Assume a linear model and then add 0-mean Gaussian noise to generate a sample. Divide your sample into two as training and validation sets.
Use linear regression using the training half. Compute error on the validation set. Do the same for polynomials of degrees 2 and 3 as well
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
x=np.array(list(range(10)))
y=2*x;
noise=np.random.randn(1,10)
y=y+np.array(noise)
plt.scatter(x,y)
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(x.reshape(-1,1),y.reshape(-1,1),test_size=0.3,random_state=100)
X_train2,X_test2,y_train2,y_test2=train_test_split(x.reshape(-1,1),y.reshape(-1,1),test_size=0.3,random_state=1)
from sklearn.linear_model import LinearRegression
model1=LinearRegression()
model1.fit(X_train,y_train)
predictions1=model1.predict(X_test)
model2=LinearRegression()
model2.fit(X_train2,y_train2)
predictions2=model2.predict(X_test2)
from sklearn.metrics import mean_squared_error
model1_error=mean_squared_error(y_test,predictions1)**0.5
model2_error=mean_squared_error(y_test2,predictions2)**0.5
print("model1 error=",model1_error)
print("model2 error=",model2_error)
##polynomial regression
from sklearn.preprocessing import PolynomialFeatures
poly_2 = PolynomialFeatures(degree = 2)
X1 = poly_2.fit_transform(x.reshape(-1,1))
X1_train,X1_test,y1_train,y1_test=train_test_split(X1,y.reshape(-1,1),test_size=0.3,random_state=100)
model3 = LinearRegression()
model3.fit(X1_train,y1_train)
predictions3=model3.predict(X1_test)
poly_3 = PolynomialFeatures(degree = 3)
X2 = poly_3.fit_transform(x.reshape(-1,1))
X2_train,X2_test,y2_train,y2_test=train_test_split(X2,y.reshape(-1,1),test_size=0.3,random_state=100)
model4=LinearRegression()
model4.fit(X2_train,y2_train)
predictions4=model4.predict(X2_test)
model3_error=mean_squared_error(y1_test,predictions3)**0.5
model4_error=mean_squared_error(y2_test,predictions4)**0.5
print("model3 error=",model3_error)
print("model4 error=",model4_error)