import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression
# Generate synthetic regression dataset
X, y = make_regression(n_samples=100, n_features=1, noise=15,
random_state=42)
X = X.flatten()
y = y.flatten()
# Add bias (intercept) term to X
X_b = np.c_[np.ones(X.shape[0]), X] # Add x0 = 1
# Cost function
def compute_cost(X, y, theta):
m = len(y)
predictions = X.dot(theta)
cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2)
return cost
# Gradient Descent algorithm
def gradient_descent(X, y, theta, learning_rate, iterations):
m = len(y)
cost_history = []
for i in range(iterations):
gradients = (1 / m) * X.T.dot(X.dot(theta) - y)
theta = theta - learning_rate * gradients
cost = compute_cost(X, y, theta)
cost_history.append(cost)
return theta, cost_history
# Initial values
theta_init = np.random.randn(2)
learning_rate = 0.1
iterations = 50
# Run gradient descent
theta_final, cost_history = gradient_descent(X_b, y, theta_init,
learning_rate, iterations)
# Make predictions
y_pred = X_b.dot(theta_final)
# Print final results
print("Final Theta Values:", theta_final)
print("Final Cost:", cost_history[-1])
# Plot data and regression line
plt.figure(figsize=(10, 6))
plt.scatter(X, y, label="Data", alpha=0.6)
plt.plot(X, y_pred, color='red', label="Regression Line")
plt.title("Linear Regression using Gradient Descent")
plt.xlabel("X")
plt.ylabel("y")
plt.legend()
plt.grid(True)
plt.show()
# Plot cost over iterations
plt.figure(figsize=(10, 6))
plt.plot(range(iterations), cost_history, marker='o', linestyle='-',
color='b')
plt.title("Cost vs. Iterations")
plt.xlabel("Iterations")
plt.ylabel("Cost")
plt.grid(True)
plt.show()
Final Theta Values: [ 1.50615837 45.10580632]
Final Cost: 88.01409174506877