In: Computer Science
Machine Learning
do using python on jupyter notebook
1. Linear Regression
Dataset used: Diabetes from sklearn
You are asked to solve a regression problem in the Diabetes
dataset. Please review the Diabetes dataset used before creating a
program to decide which attributes will be used in the regression
process.
please use the cross-validation step to produce the best evaluation
of the model.
All you have to do is
• Perform linear regression using the OLS (Ordinary Least Square)
method (sklearn.linear_model.LinearRegression)
• Also do linear regression using additional regularization.
(Lasso)
• Also do linear regression by doing gradient descent algorithm
training (func: sklearn.linear_model.SGDRegressor)
• In Sklearn there are several other regression methods that can be
tried for use in Diabetes problems.
import pandas as pd
import numpy as np
from sklearn.datasets import load_diabetes
data=load_diabetes()
data1 = pd.DataFrame(data= np.c_[data['data'],
data['target']],
columns= data['feature_names'] + ['target'])
print(data1.head)
y=data1['target']
X=data1
X.drop(['target'], axis=1,inplace=True)
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y)
#LinearRegression
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
reg = LinearRegression().fit(X_train,y_train)
pred1=reg.predict(X_train)
pred2=reg.predict(X_test)
train_mse=mean_squared_error(pred1,y_train)
test_mse=mean_squared_error(pred2,y_test)
print('train mean_squared_error :',train_mse)
print('test mean_squared_error :',test_mse)
#Lasso
from sklearn import linear_model
from sklearn.metrics import mean_squared_error
reg = linear_model.Lasso()
reg.fit(X_train,y_train)
pred1=reg.predict(X_train)
pred2=reg.predict(X_test)
train_mse=mean_squared_error(pred1,y_train)
test_mse=mean_squared_error(pred2,y_test)
print('train mean_squared_error :',train_mse)
print('test mean_squared_error :',test_mse)
#SGDRegressor
from sklearn import linear_model
from sklearn.metrics import mean_squared_error
reg = linear_model.SGDRegressor()
reg.fit(X_train,y_train)
pred1=reg.predict(X_train)
pred2=reg.predict(X_test)
train_mse=mean_squared_error(pred1,y_train)
test_mse=mean_squared_error(pred2,y_test)
print('train mean_squared_error :',train_mse)
print('test mean_squared_error :',test_mse)
#Bayesian Ridge
from sklearn import linear_model
from sklearn.metrics import mean_squared_error
reg = linear_model.BayesianRidge()
reg.fit(X_train,y_train)
pred1=reg.predict(X_train)
pred2=reg.predict(X_test)
train_mse=mean_squared_error(pred1,y_train)
test_mse=mean_squared_error(pred2,y_test)
print('train mean_squared_error :',train_mse)
print('test mean_squared_error :',test_mse)