Regression_model
July 15, 2022
[ ]: from google.colab import drive
drive.mount('/content/drive')
Mounted at /content/drive
[ ]: from IPython.display import Image
# get the image
Image("drive/My Drive/ML Lab/ML Lab 2022-2023/LR_equation.jpeg", width=250,␣
,→height=200)
[ ]:
[ ]: import numpy as np
import matplotlib.pyplot as plt
def estimate_coef(x, y):
# number of observations/points
n = np.size(x)
1
# mean of x and y vector
m_x = np.mean(x)
m_y = np.mean(y)
# calculating cross-deviation and deviation about x
SS_xy = np.sum(y*x) - n*m_y*m_x
SS_xx = np.sum(x*x) - n*m_x*m_x
# calculating regression coefficients
b_1 = SS_xy / SS_xx
b_0 = m_y - b_1*m_x
return (b_0, b_1)
def plot_regression_line(x, y, b):
# plotting the actual points as scatter plot
plt.scatter(x, y, color = "m",
marker = "o", s = 30)
# predicted response vector
y_pred = b[0] + b[1]*x
print(y_pred)
# plotting the regression line
plt.plot(x, y_pred, marker='^', color = "g")
# putting labels
plt.xlabel('x')
plt.ylabel('y')
# function to show plot
plt.show()
def main():
# observations / data
x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
y = np.array([2, 1, 5, 3, 6, 4, 9, 10, 11, 8])
# estimating coefficients
b = estimate_coef(x, y)
print("Estimated coefficients:\nb_0 = {} \
\nb_1 = {}".format(b[0], b[1]))
# plotting regression line
plot_regression_line(x, y, b)
if __name__ == "__main__":
main()
Estimated coefficients:
b_0 = 1.4000000000000004
b_1 = 1.0
[ 1.4 2.4 3.4 4.4 5.4 6.4 7.4 8.4 9.4 10.4]
2
https://realpython.com/linear-regression-in-python/
R-squared is a goodness-of-fit measure for linear regression models. This
statistic indicates the percentage of the variance in the dependent variable that
the independent variables explain collectively.
R-squared values range from 0 to 1 and are commonly stated as percentages from
0% to 100%. An R-squared of 100% or 1 means that all movements of a dependent
variable are completely explained by movements in independent variable(s) you are
interested in.
[ ]: help(plt.plot)
[ ]: from sklearn.linear_model import LinearRegression
x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]).reshape((-1, 1))
y = np.array([2, 1, 5, 3, 6, 4, 9, 10, 11, 8])
#x.reshape((-1,1))
print(x.shape)
lr_model=LinearRegression().fit(x,y)
print("Coefficient of Determination i.e. is r-squared value=", lr_model.
,→score(x,y))
print("Linear regression coefficient associated with the predictor variable= ",
lr_model.coef_)
print("Linear regression intercept =", lr_model.intercept_)
(10, 1)
3
Coefficient of Determination i.e. is r-squared value= 0.7575757575757576
Linear regression coefficient associated with the predictor variable= [1.]
Linear regression intercept = 1.4000000000000004
z=np.array([5, 15, 25, 35, 45, 55]).reshape((-1, 1)) print(z.shape)
[ ]: z=np.array([5, 15, 25, 35, 45, 55]).reshape((-1, 1))
print(z.shape)
(6, 1)
0.1 y = 1 * x_0 + 2 * x_1 + 3
1 Multiple Linear regression
[ ]: import numpy as np
from sklearn.linear_model import LinearRegression
X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
# y = 1 * x_0 + 2 * x_1 + 3
y = np.dot(X, np.array([1, 2])) + 3
reg = LinearRegression().fit(X, y)
print(reg.score(X, y))
print(reg.coef_)
print(reg.intercept_)
reg.predict(np.array([[3, 5]]))
1.0
[1. 2.]
3.0000000000000018
[ ]: array([16.])
[ ]: help(LinearRegression.score)
Help on function score in module sklearn.base:
score(self, X, y, sample_weight=None)
Returns the coefficient of determination R^2 of the prediction.
The coefficient R^2 is defined as (1 - u/v), where u is the residual
sum of squares ((y_true - y_pred) ** 2).sum() and v is the total
sum of squares ((y_true - y_true.mean()) ** 2).sum().
The best possible score is 1.0 and it can be negative (because the
model can be arbitrarily worse). A constant model that always
predicts the expected value of y, disregarding the input features,
would get a R^2 score of 0.0.
4
Parameters
----------
X : array-like, shape = (n_samples, n_features)
Test samples.
y : array-like, shape = (n_samples) or (n_samples, n_outputs)
True values for X.
sample_weight : array-like, shape = [n_samples], optional
Sample weights.
Returns
-------
score : float
R^2 of self.predict(X) wrt. y.
[ ]: #pwd
[ ]: '/content'
[ ]: from IPython.display import Image
# get the image
Image("drive/My Drive/ML Lab/ML Lab 2022-2023/rsquared.png", width=250,␣
,→height=200)
[ ]:
[ ]: from IPython.display import Image
# get the image
Image("drive/My Drive/ML Lab/ML Lab 2022-2023/polynomial_reg.png", width=650,␣
,→height=150)
[ ]:
5
2 Polynomial Regression
[ ]: # Step 1: Import packages
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
# Step 2a: Provide data
x = [[0, 1], [5, 1], [15, 2], [25, 5], [35, 11], [45, 15], [55, 34], [60, 35]]
y = [4, 5, 20, 14, 32, 22, 38, 43]
x, y = np.array(x), np.array(y)
# Step 2b: Transform input data
x_ = PolynomialFeatures(degree=2, include_bias=False).fit_transform(x)
# Step 3: Create a model and fit it
model = LinearRegression().fit(x_, y)
# Step 4: Get results
r_sq = model.score(x_, y)
intercept, coefficients = model.intercept_, model.coef_
# Step 5: Predict
y_pred = model.predict(x_)
[ ]: print('coefficient of determination:', r_sq)
print('intercept:', intercept)
print('coefficients:', coefficients, sep='\n')
print('predicted response:', y_pred, sep='\n')
coefficient of determination: 0.9453701449127822
intercept: 0.843055645239474
coefficients:
[ 2.44828275 0.16160353 -0.15259677 0.47928683 -0.4641851 ]
predicted response:
[ 0.54047408 11.36340283 16.07809622 15.79139 29.73858619 23.50834636
39.05631386 41.92339046]
[ ]: y = [4, 5, 20, 14, 32, 22, 38, 43]