ml-assignment-1ipynb
May 31, 2025
[28]: import pandas as pd
import numpy as np
import [Link] as plt
from [Link] import fetch_california_housing
from sklearn.model_selection import train_test_split
from [Link] import StandardScaler
from sklearn.linear_model import LinearRegression
from [Link] import mean_absolute_error, mean_squared_error, r2_score
# Load data
housing = fetch_california_housing(as_frame=True)
df = [Link]
X = [Link]('MedHouseVal', axis=1)
y = df['MedHouseVal']
# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(
X_scaled, y, test_size=0.2, random_state=42)
# Train Linear Regression
lr = LinearRegression()
[Link](X_train, y_train)
y_pred = [Link](X_test)
# Evaluate
print("Linear Regression Performance:")
print("MAE:", mean_absolute_error(y_test, y_pred))
print("MSE:", mean_squared_error(y_test, y_pred))
print("R2:", r2_score(y_test, y_pred))
print("-"*30)
1
# Sample points for plot
n_points = 50
if len(y_test) > n_points:
indices = [Link](len(y_test), n_points, replace=False)
else:
indices = [Link](len(y_test))
y_test_sample = y_test.iloc[indices] if hasattr(y_test, "iloc") else␣
↪y_test[indices]
y_pred_sample = y_pred[indices]
# Plot
[Link](figsize=(6,6))
[Link](y_test_sample, y_pred_sample, alpha=0.6)
[Link]([min(y_test_sample), max(y_test_sample)],
[min(y_test_sample), max(y_test_sample)], 'r--')
[Link]("Actual Median House Value")
[Link]("Predicted Median House Value")
[Link]("Linear Regression: Actual vs Predicted")
[Link]()
Linear Regression Performance:
MAE: 0.5332001304956565
MSE: 0.555891598695244
R2: 0.5757877060324511
------------------------------
2
[29]: from sklearn.linear_model import Ridge
# Use previous data preprocessing and train-test split steps
ridge = Ridge(alpha=1.0)
[Link](X_train, y_train)
y_pred = [Link](X_test)
print("Ridge Regression Performance:")
print("MAE:", mean_absolute_error(y_test, y_pred))
print("MSE:", mean_squared_error(y_test, y_pred))
print("R2:", r2_score(y_test, y_pred))
print("-"*30)
3
n_points = 100
if len(y_test) > n_points:
indices = [Link](len(y_test), n_points, replace=False)
else:
indices = [Link](len(y_test))
y_test_sample = y_test.iloc[indices] if hasattr(y_test, "iloc") else␣
↪y_test[indices]
y_pred_sample = y_pred[indices]
[Link](figsize=(6,6))
[Link](y_test_sample, y_pred_sample, alpha=0.6)
[Link]([min(y_test_sample), max(y_test_sample)],
[min(y_test_sample), max(y_test_sample)], 'r--')
[Link]("Actual Median House Value")
[Link]("Predicted Median House Value")
[Link]("Ridge Regression: Actual vs Predicted")
[Link]()
Ridge Regression Performance:
MAE: 0.5331933646313113
MSE: 0.5558512007367514
R2: 0.575818534544132
------------------------------
4
[30]: from sklearn.linear_model import Lasso
# Use previous data preprocessing and train-test split steps
lasso = Lasso(alpha=0.01)
[Link](X_train, y_train)
y_pred = [Link](X_test)
print("Lasso Regression Performance:")
print("MAE:", mean_absolute_error(y_test, y_pred))
print("MSE:", mean_squared_error(y_test, y_pred))
print("R2:", r2_score(y_test, y_pred))
print("-"*30)
5
n_points = 100
if len(y_test) > n_points:
indices = [Link](len(y_test), n_points, replace=False)
else:
indices = [Link](len(y_test))
y_test_sample = y_test.iloc[indices] if hasattr(y_test, "iloc") else␣
↪y_test[indices]
y_pred_sample = y_pred[indices]
[Link](figsize=(6,6))
[Link](y_test_sample, y_pred_sample, alpha=0.6)
[Link]([min(y_test_sample), max(y_test_sample)],
[min(y_test_sample), max(y_test_sample)], 'r--')
[Link]("Actual Median House Value")
[Link]("Predicted Median House Value")
[Link]("Lasso Regression: Actual vs Predicted")
[Link]()
Lasso Regression Performance:
MAE: 0.535523256745153
MSE: 0.5479327795506
R2: 0.581861244352776
------------------------------
6
[31]: from [Link] import PolynomialFeatures
# Use previous data preprocessing and train-test split steps
poly = PolynomialFeatures(degree=2)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = [Link](X_test)
poly_reg = LinearRegression()
poly_reg.fit(X_train_poly, y_train)
y_pred = poly_reg.predict(X_test_poly)
print("Polynomial Regression (degree 2) Performance:")
print("MAE:", mean_absolute_error(y_test, y_pred))
7
print("MSE:", mean_squared_error(y_test, y_pred))
print("R2:", r2_score(y_test, y_pred))
print("-"*30)
n_points = 100
if len(y_test) > n_points:
indices = [Link](len(y_test), n_points, replace=False)
else:
indices = [Link](len(y_test))
y_test_sample = y_test.iloc[indices] if hasattr(y_test, "iloc") else␣
↪y_test[indices]
y_pred_sample = y_pred[indices]
[Link](figsize=(6,6))
[Link](y_test_sample, y_pred_sample, alpha=0.6)
[Link]([min(y_test_sample), max(y_test_sample)],
[min(y_test_sample), max(y_test_sample)], 'r--')
[Link]("Actual Median House Value")
[Link]("Predicted Median House Value")
[Link]("Polynomial Regression (degree 2): Actual vs Predicted")
[Link]()
Polynomial Regression (degree 2) Performance:
MAE: 0.46700093346965893
MSE: 0.4643015238301214
R2: 0.6456819729261911
------------------------------
8
[32]: results = {
'Linear Regression': {'MAE': mae_lr, 'MSE': mse_lr, 'R2': r2_lr},
'Ridge Regression': {'MAE': mae_ridge, 'MSE': mse_ridge, 'R2': r2_ridge},
'Lasso Regression': {'MAE': mae_lasso, 'MSE': mse_lasso, 'R2': r2_lasso},
'Polynomial Regression': {'MAE': mae_poly, 'MSE': mse_poly, 'R2': r2_poly},
}
for model, metrics in [Link]():
print(f"{model}: MAE={metrics['MAE']:.2f}, MSE={metrics['MSE']:.2f},␣
↪R2={metrics['R2']:.3f}")
best_model = max([Link](), key=lambda x: x[1]['R2'])
print(f"\nBest Model: {best_model[0]} with R2={best_model[1]['R2']:.3f}")
Linear Regression: MAE=0.16, MSE=0.03, R2=1.000
9
Ridge Regression: MAE=0.86, MSE=1.87, R2=0.991
Lasso Regression: MAE=0.23, MSE=0.05, R2=1.000
Polynomial Regression: MAE=0.47, MSE=0.46, R2=0.646
Best Model: Linear Regression with R2=1.000
10