5/31/25, 12:53 PM Copy of ML_assignment 1ipynb - Colab
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
# Load data
housing = fetch_california_housing(as_frame=True)
df = housing.frame
X = df.drop('MedHouseVal', axis=1)
y = df['MedHouseVal']
# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(
X_scaled, y, test_size=0.2, random_state=42)
# Train Linear Regression
lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)
# Evaluate
print("Linear Regression Performance:")
print("MAE:", mean_absolute_error(y_test, y_pred))
print("MSE:", mean_squared_error(y_test, y_pred))
print("R2:", r2_score(y_test, y_pred))
print("-"*30)
# Sample points for plot
n_points = 50
if len(y_test) > n_points:
indices = np.random.choice(len(y_test), n_points, replace=False)
else:
indices = np.arange(len(y_test))
y_test_sample = y_test.iloc[indices] if hasattr(y_test, "iloc") else y_test[indices]
y_pred_sample = y_pred[indices]
# Plot
plt.figure(figsize=(6,6))
plt.scatter(y_test_sample, y_pred_sample, alpha=0.6)
plt.plot([min(y_test_sample), max(y_test_sample)],
[min(y_test_sample), max(y_test_sample)], 'r--')
plt.xlabel("Actual Median House Value")
plt.ylabel("Predicted Median House Value")
plt.title("Linear Regression: Actual vs Predicted")
plt.show()
https://colab.research.google.com/drive/1hvjOBqq5IVjp8wvjbFDadWB6tDWzGhxh#printMode=true 1/5
5/31/25, 12:53 PM Copy of ML_assignment 1ipynb - Colab
Linear Regression Performance:
MAE: 0.5332001304956565
MSE: 0.555891598695244
R2: 0.5757877060324511
------------------------------
from sklearn.linear_model import Ridge
# Use previous data preprocessing and train-test split steps
ridge = Ridge(alpha=1.0)
ridge.fit(X_train, y_train)
y_pred = ridge.predict(X_test)
print("Ridge Regression Performance:")
print("MAE:", mean_absolute_error(y_test, y_pred))
print("MSE:", mean_squared_error(y_test, y_pred))
print("R2:", r2_score(y_test, y_pred))
print("-"*30)
n_points = 100
if len(y_test) > n_points:
indices = np.random.choice(len(y_test), n_points, replace=False)
else:
indices = np.arange(len(y_test))
y_test_sample = y_test.iloc[indices] if hasattr(y_test, "iloc") else y_test[indices]
y_pred_sample = y_pred[indices]
plt.figure(figsize=(6,6))
plt.scatter(y_test_sample, y_pred_sample, alpha=0.6)
plt.plot([min(y_test_sample), max(y_test_sample)],
[min(y_test_sample), max(y_test_sample)], 'r--')
plt.xlabel("Actual Median House Value")
plt.ylabel("Predicted Median House Value")
plt.title("Ridge Regression: Actual vs Predicted")
plt.show()
https://colab.research.google.com/drive/1hvjOBqq5IVjp8wvjbFDadWB6tDWzGhxh#printMode=true 2/5
5/31/25, 12:53 PM Copy of ML_assignment 1ipynb - Colab
Ridge Regression Performance:
MAE: 0.5331933646313113
MSE: 0.5558512007367514
R2: 0.575818534544132
------------------------------
from sklearn.linear_model import Lasso
# Use previous data preprocessing and train-test split steps
lasso = Lasso(alpha=0.01)
lasso.fit(X_train, y_train)
y_pred = lasso.predict(X_test)
print("Lasso Regression Performance:")
print("MAE:", mean_absolute_error(y_test, y_pred))
print("MSE:", mean_squared_error(y_test, y_pred))
print("R2:", r2_score(y_test, y_pred))
print("-"*30)
n_points = 100
if len(y_test) > n_points:
indices = np.random.choice(len(y_test), n_points, replace=False)
else:
indices = np.arange(len(y_test))
y_test_sample = y_test.iloc[indices] if hasattr(y_test, "iloc") else y_test[indices]
y_pred_sample = y_pred[indices]
plt.figure(figsize=(6,6))
plt.scatter(y_test_sample, y_pred_sample, alpha=0.6)
plt.plot([min(y_test_sample), max(y_test_sample)],
[min(y_test_sample), max(y_test_sample)], 'r--')
plt.xlabel("Actual Median House Value")
plt.ylabel("Predicted Median House Value")
plt.title("Lasso Regression: Actual vs Predicted")
plt.show()
https://colab.research.google.com/drive/1hvjOBqq5IVjp8wvjbFDadWB6tDWzGhxh#printMode=true 3/5
5/31/25, 12:53 PM Copy of ML_assignment 1ipynb - Colab
Lasso Regression Performance:
MAE: 0.535523256745153
MSE: 0.5479327795506
R2: 0.581861244352776
------------------------------
from sklearn.preprocessing import PolynomialFeatures
# Use previous data preprocessing and train-test split steps
poly = PolynomialFeatures(degree=2)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)
poly_reg = LinearRegression()
poly_reg.fit(X_train_poly, y_train)
y_pred = poly_reg.predict(X_test_poly)
print("Polynomial Regression (degree 2) Performance:")
print("MAE:", mean_absolute_error(y_test, y_pred))
print("MSE:", mean_squared_error(y_test, y_pred))
print("R2:", r2_score(y_test, y_pred))
print("-"*30)
n_points = 100
if len(y_test) > n_points:
indices = np.random.choice(len(y_test), n_points, replace=False)
else:
indices = np.arange(len(y_test))
y_test_sample = y_test.iloc[indices] if hasattr(y_test, "iloc") else y_test[indices]
y_pred_sample = y_pred[indices]
plt.figure(figsize=(6,6))
plt.scatter(y_test_sample, y_pred_sample, alpha=0.6)
plt.plot([min(y_test_sample), max(y_test_sample)],
[min(y_test_sample), max(y_test_sample)], 'r--')
plt.xlabel("Actual Median House Value")
plt.ylabel("Predicted Median House Value")
plt.title("Polynomial Regression (degree 2): Actual vs Predicted")
plt.show()
https://colab.research.google.com/drive/1hvjOBqq5IVjp8wvjbFDadWB6tDWzGhxh#printMode=true 4/5
5/31/25, 12:53 PM Copy of ML_assignment 1ipynb - Colab
Polynomial Regression (degree 2) Performance:
MAE: 0.46700093346965893
MSE: 0.4643015238301214
R2: 0.6456819729261911
------------------------------
https://colab.research.google.com/drive/1hvjOBqq5IVjp8wvjbFDadWB6tDWzGhxh#printMode=true 5/5