School of Computer Science and Engineering
CSE3045- Mathematical Modelling for Data Science
Semester: Fall 2022-23
Slot: L21+L22
LAB Activity-3
Faculty: Dr. Arup Ghosh
1) Implement the Stochastic Gradient Descent Algorithm for
Linear Regression using Python and Test it for some randomly
generated datasets.
Code
import warnings
warnings.filterwarnings("ignore")
from sklearn.datasets import load_boston
from sklearn import preprocessing
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from prettytable import PrettyTable
from sklearn.linear_model import SGDRegressor
from sklearn import preprocessing
from sklearn.metrics import mean_squared_error
from numpy import random
from sklearn.model_selection import train_test_split
print("DONE")
# Linear Regression on Boston Housing data
boston_data=pd.DataFrame(load_boston().data,columns=load_boston().feature_names)
Y=load_boston().target
X=load_boston().data
x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.3)
# standardizing data
scaler = preprocessing.StandardScaler().fit(x_train)
x_train = scaler.transform(x_train)
x_test=scaler.transform(x_test)
## Adding the PRICE Column in the data
train_data=pd.DataFrame(x_train)
train_data['price']=y_train
train_data.head(3)
# implemented SGD Classifier
def GradientDescentRegressor(train_data,learning_rate=0.001,n_itr=1000,k=10):
w_cur=np.zeros(shape=(1,train_data.shape[1]-1))
b_cur=0
cur_itr=1
while(cur_itr<=n_itr):
w_old=w_cur
b_old=b_cur
w_temp=np.zeros(shape=(1,train_data.shape[1]-1))
b_temp=0
temp=train_data.sample(k)
#print(temp.head(3))
y=np.array(temp['price'])
x=np.array(temp.drop('price',axis=1))
for i in range(k):
w_temp+=x[i]*(y[i]-(np.dot(w_old,x[i])+b_old))*(-2/k)
b_temp+=(y[i]-(np.dot(w_old,x[i])+b_old))*(-2/k)
w_cur=w_old-learning_rate*w_temp
b_cur=b_old-learning_rate*b_temp
if(w_old==w_cur).all():
break
cur_itr+=1
return w_cur,b_cur
def predict(x,w,b):
y_pred=[]
for i in range(len(x)):
y=np.asscalar(np.dot(w,x[i])+b)
y_pred.append(y)
return np.array(y_pred)
def plot_(test_data,y_pred):
#scatter plot
plt.scatter(test_data,y_pred)
plt.grid()
plt.title('scatter plot between actual y and predicted y')
plt.xlabel('actual y')
plt.ylabel('predicted y')
plt.show()
w,b = GradientDescentRegressor(train_data,learning_rate=0.001,n_itr=1000)
y_pred_sgd=predict(x_test,w,b)
plot_(y_test,y_pred_sgd)
print('Mean Squared Error :',mean_squared_error(y_test, y_pred_sgd))
OUTPUT:
learning_rate=0.001, n_itr=1000
Mean Squared Error : 37.646704089438025
On Changing learning rate to 0.01 i.e., 1%.
learning_rate=0.01,
n_itr=1000
Mean Squared Error : 23.285739328791426
SCREENSHOT OF CODE and OUTPUT:
Implementing Stochastic Gradient Descent using SKLEARN library:
# SkLearn SGD classifier
n_iter=100
clf_ = SGDRegressor(max_iter=n_iter)
clf_.fit(x_train, y_train)
y_pred_sksgd=clf_.predict(x_test)
plt.scatter(y_test,y_pred_sksgd)
plt.grid()
plt.xlabel('Actual y')
plt.ylabel('Predicted y')
plt.title('Scatter plot from actual y and predicted y')
plt.show()
print('Mean Squared Error :',mean_squared_error(y_test, y_pred_sksgd))
# SkLearn SGD classifier predicted weight matrix
sklearn_w=clf_.coef_
sklearn_w
Comparing Both Methods: