from google.
colab import drive
filepath='/content/drive/MyDrive/data.csv'
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data=pd.read_csv(filepath)
from re import X
X=data.iloc[:,:-1].values
y=data.iloc[:,-1].values
def cost_function(X,y,w):
hypothesis = np.dot(X,w.T)
J = (1/(2*len(y)))*np.sum((hypothesis-y)**2)
return J
def batch_gradient_descent(X,y,w,alpha,iters):
cost_history = np.zeros(iters)
for i in range(iters):
hypothesis = np.dot(X,w.T)
w = w - (alpha/len(y)) * np.dot(hypothesis - y, X)
cost_history[i] = cost_function(X,y,w)
return w, cost_history
def stochastic_gradient_descent(X,y,w,alpha, iters):
cost_history = np.zeros(iters)
for i in range(iters):
rand_index = np.random.randint(len(y)-1)
ind_x = X[rand_index:rand_index+1]
ind_y = y[rand_index:rand_index+1]
w = w- alpha * np.dot((np.dot(ind_x,w.T) - ind_y), ind_x)
cost_history[i] = cost_function(ind_x,ind_y,w)
return w, cost_history
def MB_gradient_descent(X,y,w,alpha, iters, batch_size):
cost_history = np.zeros(iters)
for i in range(iters):
rand_index = np.random.randint(len(y)-batch_size)
ind_x = X[rand_index:rand_index+batch_size]
ind_y = y[rand_index:rand_index+batch_size]
w = w - (alpha/batch_size) * (ind_x.T.dot(ind_x.dot(w) -
ind_y))
cost_history[i] = cost_function(ind_x,ind_y,w)
return w, cost_history
def mean_abs_error(Ypred,Yact):
sum_error=abs(Yact - Ypred)
ma_error=sum(sum_error)/Ypred.shape[0]
return ma_error
def mean_square_error(Ypred,Yact):
for i in range(Ypred.shape[0]):
sum_error=(Yact - Ypred)**2
ms_error=sum(sum_error)/Ypred.shape[0]
return ms_error
def r_squared(y_true, y_pred):
ss_total = np.sum((y_true - np.mean(y_true)) ** 2)
ss_residual = np.sum((y_true - y_pred) ** 2)
return 1 - (ss_residual / ss_total)
def correcoff(Ypred,Yact):
ypm=np.mean(Ypred)##mean of Ypred data
yam=np.mean(Yact)##mean of Yactual data
sum_numerator=((Yact - yam)*(Ypred-ypm))
sum_d1=((Yact - yam)**2)
sum_d2=((Ypred - ypm)**2)
sum_d1=pow(sum(sum_d1),1/2)
sum_d2=pow(sum(sum_d2),1/2)
th_error=sum(sum_numerator)/(sum_d1*sum_d2)
return th_error
X = data.iloc[:, :-1] # All rows, all columns except the last
y = data.iloc[:, -1]
X=np.asarray(X)
y=np.asarray(y)
m=X.shape[0]
xmin=np.min(X,axis=0)
xmax=np.max(X,axis=0)
X_normalized= (X-xmin)/(xmax-xmin)
ymin = np.min(y, axis = 0)
ymax = np.max(y, axis = 0)
y_normalized = (y- ymin)/(ymax-ymin)
m=X_normalized.shape[0]
pp=np.ones([m,1])
X_normalized=np.append(pp,X_normalized,axis=1)
train_percentage = 0.70
train_size = int(len(X) * train_percentage)
# split the data into training and testing
X_train = X_normalized[:train_size]
y_train = y_normalized[:train_size]
X_test = X_normalized[train_size:]
y_test = y_normalized[train_size:]
print("Training Features Shape:", X_train.shape)
print("Testing Features Shape:", X_test.shape)
print("Training Target Shape:", y_train.shape)
print("Testing Target Shape:", y_test.shape)
Training Features Shape: (56, 9)
Testing Features Shape: (25, 9)
Training Target Shape: (56,)
Testing Target Shape: (25,)
w= np.zeros((X_normalized.shape[1])) ###weight initialization
w.shape
(9,)
alpha=0.4 ##learning rate
iters=1500 ###iterations
batch_w,J_his = batch_gradient_descent(X_train,y_train,w,alpha,iters)
plt.plot(range(iters),J_his)
plt.xlabel('Iterations')
plt.ylabel('Cost')
plt.title('BGD Cost vs Iterations')
plt.show()
bgd=batch_w
print("WEIGHT VECTOR",bgd)
y_pred_bgd=X_test.dot(bgd.T)
WEIGHT VECTOR [0.21794334 0.10606424 0.0478828 0.03935448 0.03229864
0.03136558
0.11796494 0.06546174 0.03341064]
yp_unnorm=(y_pred_bgd*(ymax-ymin))+ymin
y_test_unnorm=(y_test*(ymax-ymin))+ymin
a=mean_abs_error(yp_unnorm,y_test_unnorm)
print("MEAN ABSOLUTE ERROR",a)
MEAN ABSOLUTE ERROR 1.1326579129012353
b=mean_square_error(yp_unnorm,y_test_unnorm)
print("MEAN SQUARE ERROR:",b)
MEAN SQUARE ERROR: 1.819265635873902
c=correcoff(yp_unnorm,y_test_unnorm)
print("CORRELATION COEFF:",c)
CORRELATION COEFF: 0.2602600569650748
d=r_squared(yp_unnorm,y_test_unnorm)
print("R2:",d)
R2: -226.79600202581813
plt.figure(figsize=(8, 6))
plt.plot(y_test_unnorm, label='Actual Output', color='blue',
marker='o')
plt.plot(yp_unnorm, label='Predicted Output', color='red', marker='x')
plt.xlabel('Sample Index')
plt.ylabel('Output Value')
plt.title('Predicted vs Actual Output (Line Plot)')
plt.legend()
plt.show()
alpha=0.4
iters=5000 ###iterations
w_n_l2,J_sgd_l2 = stochastic_gradient_descent(X_train,y_train,w,alpha,
iters)
plt.plot(range(iters),J_sgd_l2)
plt.xlabel('Iterations')
plt.ylabel('Cost')
plt.title('SGD Cost vs Iterations')
plt.show()
sgd=w_n_l2
print("WEIGHT VECTOR",bgd)
y_pred_sgd=X_test.dot(sgd.T)
yp_unnorm=(y_pred_sgd*(ymax-ymin))+ymin
y_test_unnorm=(y_test*(ymax-ymin))+ymin
a=mean_abs_error(yp_unnorm,y_test_unnorm)
print("MEAN ABSOLUTE ERROR",a)
b=mean_square_error(yp_unnorm,y_test_unnorm)
print("MEAN SQUARE ERROR:",b)
c=correcoff(y_test_unnorm, yp_unnorm)
print("CORRELATION COEFF:",c)
d=r_squared(yp_unnorm,y_test_unnorm)
print("R2:",d)
plt.figure(figsize=(8, 6))
plt.plot(y_test_unnorm, label='Actual Output', color='blue',
marker='o')
plt.plot(yp_unnorm, label='Predicted Output', color='red', marker='x')
plt.xlabel('Sample Index')
plt.ylabel('Output Value')
plt.title('Predicted vs Actual Output (Line Plot)')
plt.legend()
plt.show()
WEIGHT VECTOR [0.21794334 0.10606424 0.0478828 0.03935448 0.03229864
0.03136558
0.11796494 0.06546174 0.03341064]
MEAN ABSOLUTE ERROR 0.9617008469068318
MEAN SQUARE ERROR: 1.6948874101095208
CORRELATION COEFF: 0.7968621371120339
R2: -6.284048634553031
alpha=0.4
iters=2000 ###iterations
batch_size=15
mb_w_l1,J_mb_l1 = MB_gradient_descent(X_train,y_train,w,alpha, iters,
batch_size)
plt.plot(range(iters),J_mb_l1)
plt.xlabel('Iterations')
plt.ylabel('Cost')
plt.title('MBGD Cost vs Iterations')
plt.show()
mbgd=mb_w_l1
print("WEIGHT VECTOR",bgd)
y_pred_mbgd=X_test.dot(mbgd.T)
yp_unnorm=(y_pred_mbgd*(ymax-ymin))+ymin
y_test_unnorm=(y_test*(ymax-ymin))+ymin
a=mean_abs_error(yp_unnorm,y_test_unnorm)
print("MEAN ABSOLUTE ERROR",a)
b=mean_square_error(yp_unnorm,y_test_unnorm)
print("MEAN SQUARE ERROR:",b)
c=correcoff(yp_unnorm,y_test_unnorm)
print("CORRELATION COEFF:",c)
d=r_squared(yp_unnorm,y_test_unnorm)
print("R2:",d)
plt.figure(figsize=(8, 6))
plt.plot(y_test_unnorm, label='Actual Output', color='blue',
marker='o')
plt.plot(yp_unnorm, label='Predicted Output', color='red', marker='x')
plt.xlabel('Sample Index')
plt.ylabel('Output Value')
plt.title('Predicted vs Actual Output')
plt.legend()
plt.show()
WEIGHT VECTOR [0.21794334 0.10606424 0.0478828 0.03935448 0.03229864
0.03136558
0.11796494 0.06546174 0.03341064]
MEAN ABSOLUTE ERROR 1.0860545963179458
MEAN SQUARE ERROR: 1.7247709330182672
CORRELATION COEFF: 0.39688976236696366
R2: -293.6095175999243