0% found this document useful (0 votes)

22 views5 pages

Message

The document is a Python script for a machine learning homework assignment focused on regression techniques, specifically Linear and Ridge Regression using the California housing dataset. It includes functions for data preprocessing, model training, evaluation, and visualization of results. The script also compares the performance of different models and regularization techniques using metrics like RMSE and R².

Uploaded by

jjie9622

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

22 views5 pages

Message

Uploaded by

jjie9622

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

You are on page 1/ 5

# Homework 3 - main file

# COMP.4220 Machine Learning

import itertools, functools

import numpy as np
import matplotlib.pyplot as plt
from regression import LinearRegression, RidgeRegression
from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import Ridge as skRidge
from sklearn.linear_model import LinearRegression as skLinearRegression
from sklearn.metrics import root_mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler as skStandardScalar
from sklearn.preprocessing import StandardScaler

def train_test_split(X, t, test_size=0.2, random_state=None):

"""Splits data into training and testing sets using only NumPy."""

if random_state:
np.random.seed(random_state)

# ---- Part (d) ---- #

# 1. Shuffle the data

indices = np.arange(1)
X = X
t = t

# 2. Split the data

#split_index = 1

X_train = X
X_test = []
t_train = t
t_test = []

return X_train, X_test, t_train, t_test

def standardscalar(x: np.ndarray):

# ---- Part (b) ---- #
Xs = X
Xs = standardscalar(X)

return (x - np.mean(x, axis=0)) / np.std(x, axis=0)

class PolynomialFeature(object):
def __init__(self, degree=2):
assert isinstance(degree, int)
self.degree = degree

def transform(self, x):

if x.ndim == 1:
x = x[:, None]
x_t = x.transpose()
features = [np.ones(len(x))]
for degree in range(1, self.degree + 1):
for items in itertools.combinations_with_replacement(x_t, degree):
features.append(functools.reduce(lambda x, y: x * y, items))
return np.asarray(features).transpose()

def main():
# ---- Part (a) ---- #
housing = fetch_california_housing()
X = housing.data
t = housing.target
print(X.shape, t.shape)
print(housing.feature_names[:6])
print(housing.DESCR)

# ---- Part (b) ---- #

def standardscalar(x):
mean = np.mean(x, axis=0)
std = np.std(x, axis=0)
return (x - mean) / std

X_scaled = standardscalar(X)

# Standardize the data using sklearn's StandardScaler

scaler = StandardScaler()
X_sklearn_scaled = scaler.fit_transform(X)
print(np.allclose(X_scaled, X_sklearn_scaled)) # Should be True
print(np.allclose(np.mean(X_scaled, axis=0), np.mean(X_sklearn_scaled,
axis=0))) # Should be True
print(np.allclose(np.std(X_scaled, axis=0), np.std(X_sklearn_scaled, axis=0)))
# Should be True

# ---- Part (c) ---- #

X_train, X_test, t_train, t_test = train_test_split(X, t, test_size=0.2,
random_state=42)
scaler = StandardScaler()
X_sk_standardized = scaler.fit_transform(X)
print((Xs - Xss))

# ---- Part (d) ---- #

class RidgeRegression:
def __init__(self, lambda_):
self.lambda_ = lambda_

def fit(self, X, t):

# Add the regularization term (lambda * I)
I = np.identity(X.shape[1])
self.w = np.linalg.inv(X.T @ X + self.lambda_ * I) @ X.T @ t

def predict(self, X):

return X @ self.w
X_train, X_test, t_train, t_test = [], [], [], []

# ---- Part (k) ---- #

linreg = LinearRegression()
linreg.fit(X_train, t_train)

# Predictions and evaluation for Linear Regression

t_pred_linreg = linreg.predict(X_test)
rmse_linreg = np.sqrt(mean_squared_error(t_test, t_pred_linreg))
r2_linreg = r2_score(t_test, t_pred_linreg)

print(f'Linear Regression RMSE: {rmse_linreg}')

print(f'Linear Regression R²: {r2_linreg}')

# Compare the performance with the best Ridge model (with lambda = 1.0)
best_ridge_model = ridge_models[lambdas.index(1.0)]
t_pred_best_ridge = best_ridge_model.predict(X_test)
rmse_best_ridge = np.sqrt(mean_squared_error(t_test, t_pred_best_ridge))
r2_best_ridge = r2_score(t_test, t_pred_best_ridge)

print(f'Best Ridge Regression (lambda=1.0) RMSE: {rmse_best_ridge}')

print(f'Best Ridge Regression (lambda=1.0) R²: {r2_best_ridge}')

# Comparing the results

print("\nComparison of Linear Regression and Ridge Regression (lambda=1.0):")
print(f"Linear Regression RMSE: {rmse_linreg}, R²: {r2_linreg}")
print(f"Ridge Regression (lambda=1.0) RMSE: {rmse_best_ridge}, R²:
{r2_best_ridge}")

# ---- Part (g, h) ---- #

# (g) Plotting true vs predicted for both models
plt.figure(figsize=(12, 6))

# Plot for Ridge Regression

plt.subplot(1, 2, 1)
plt.scatter(t_test, t_pred, color='blue')
plt.plot([t_test.min(), t_test.max()], [t_test.min(), t_test.max()],
color='red')
plt.title('Ridge Regression: True vs Predicted')

# Plot for Linear Regression

plt.subplot(1, 2, 2)
plt.scatter(t_test, t_pred_linreg, color='green')
plt.plot([t_test.min(), t_test.max()], [t_test.min(), t_test.max()],
color='red')
plt.title('Linear Regression: True vs Predicted')

plt.show()

# (h) Try different values of regularization lambda for Ridge regression

ridge_2 = RidgeRegression(lambda_=10.0)
ridge_2.fit(X_train, t_train)
t_pred_ridge_2 = ridge_2.predict(X_test)

# Evaluate new Ridge regression

rmse_ridge_2 = np.sqrt(mean_squared_error(t_test, t_pred_ridge_2))
r2_ridge_2 = r2_score(t_test, t_pred_ridge_2)

print(f'Ridge Regression (lambda=10) RMSE: {rmse_ridge_2}')

print(f'Ridge Regression (lambda=10) R²: {r2_ridge_2}')
# Model building
lr = LinearRegression()
y_lr = []
print('Linear Regression results')
print(f'RMSE: {np.inf}')
print(f'R2: {np.inf}')
rr = RidgeRegression(lambd=1.0)
y_rr = []
print('Ridge Regression results')
print(f'RMSE: {np.inf}')
print(f'R2: {np.inf}')

# ---- Part (i) ---- #

# (i) Ridge Regression for different lambda values
lambdas = [0.1, 1.0, 10.0, 100.0]
ridge_models = []
rmse_values = []
r2_values = []

for lambda_val in lambdas:

ridge_model = RidgeRegression(lambda_=lambda_val)
ridge_model.fit(X_train, t_train)
t_pred_ridge = ridge_model.predict(X_test)

rmse_ridge = np.sqrt(mean_squared_error(t_test, t_pred_ridge))

r2_ridge = r2_score(t_test, t_pred_ridge)

ridge_models.append(ridge_model)
rmse_values.append(rmse_ridge)
r2_values.append(r2_ridge)

lr_sk = skLinearRegression()
y_lr_sk = []
print('Sklearn Linear Regression results')
print(f'RMSE: {np.inf}')
print(f'R2: {np.inf}')

rr_sk = skRidge(alpha=1.0)
y_rr_sk = []
print('Sklearn Ridge Regression results')
print(f'RMSE: {np.inf}')
print(f'R2: {np.inf}')

# ---- Part (j) ---- #

plt.figure(figsize=(12, 6))

# Plot RMSE
plt.subplot(1, 2, 1)
plt.plot(lambdas, rmse_values, marker='o', linestyle='-', color='b')
plt.xscale('log')
plt.title('RMSE for Ridge Regression with different lambda values')
plt.xlabel('Lambda')
plt.ylabel('RMSE')

# Plot R²
plt.subplot(1, 2, 2)
plt.plot(lambdas, r2_values, marker='o', linestyle='-', color='r')
plt.xscale('log')
plt.title('R² for Ridge Regression with different lambda values')
plt.xlabel('Lambda')
plt.ylabel('R²')
plt.show()

# Plot the results

plt.figure(figsize=(12, 6))

plt.subplot(2, 2, 1)
# use scatter and plot to show the results
plt.xlabel('add a proper label')
plt.ylabel('add a proper label')
plt.title('add a proper title')

plt.subplot(2, 2, 2)
# use scatter and plot to show the results
plt.xlabel('add a proper label')
plt.ylabel('add a proper label')
plt.title('add a proper title')

plt.subplot(2, 2, 3)
# use scatter and plot to show the results
plt.xlabel('add a proper label')
plt.ylabel('add a proper label')
plt.title('add a proper title')

plt.subplot(2, 2, 4)
# use scatter and plot to show the results
plt.xlabel('add a proper label')
plt.ylabel('add a proper label')
plt.title('add a proper title')

plt.tight_layout()
plt.show()

if __name__=='__main__':
main()

SML - Week 3
No ratings yet
SML - Week 3
5 pages
Machine Learning
No ratings yet
Machine Learning
10 pages
Message
No ratings yet
Message
2 pages
Python File
No ratings yet
Python File
5 pages
ML Lab Record
No ratings yet
ML Lab Record
17 pages
Zerox Ready
No ratings yet
Zerox Ready
21 pages
ML Manual
No ratings yet
ML Manual
30 pages
ML Manual
No ratings yet
ML Manual
9 pages
ML Record
No ratings yet
ML Record
19 pages
Print Out ML - Finallllllllllllllll
No ratings yet
Print Out ML - Finallllllllllllllll
11 pages
LAB5 Regularization
No ratings yet
LAB5 Regularization
6 pages
Wa0002.
No ratings yet
Wa0002.
5 pages
AIML Project
No ratings yet
AIML Project
4 pages
ML Minimized Programs
No ratings yet
ML Minimized Programs
9 pages
Lasso Regression Aim: Roll Number: 160122733094 Date
No ratings yet
Lasso Regression Aim: Roll Number: 160122733094 Date
8 pages
ML Lab Prgms Split
No ratings yet
ML Lab Prgms Split
3 pages
ML Journal External
No ratings yet
ML Journal External
14 pages
Assignment 3
No ratings yet
Assignment 3
5 pages
1
No ratings yet
1
13 pages
ML
No ratings yet
ML
17 pages
Ridge - Lasso - Regression (1) .Ipynb - Colaboratory
No ratings yet
Ridge - Lasso - Regression (1) .Ipynb - Colaboratory
4 pages
Practical 8
No ratings yet
Practical 8
5 pages
Machine Learning Practical File MRIEM
No ratings yet
Machine Learning Practical File MRIEM
49 pages
Fruit Data Analysis and Classification
No ratings yet
Fruit Data Analysis and Classification
27 pages
Machine Learning Lab: Regression Analysis
No ratings yet
Machine Learning Lab: Regression Analysis
15 pages
Aiml Practicals
No ratings yet
Aiml Practicals
22 pages
Argha's ML LAB - 240927 - 121838
No ratings yet
Argha's ML LAB - 240927 - 121838
13 pages
Linear Regression - Numpy and Sklearn
No ratings yet
Linear Regression - Numpy and Sklearn
7 pages
Ridge vs Lasso: A Python Guide
No ratings yet
Ridge vs Lasso: A Python Guide
3 pages
Lab ML
No ratings yet
Lab ML
26 pages
Machine Learning Lab Assignments
100% (2)
Machine Learning Lab Assignments
23 pages
Data Mining Practicals
No ratings yet
Data Mining Practicals
22 pages
Linear Regression Analysis Guide
No ratings yet
Linear Regression Analysis Guide
15 pages
Machine Learning Algorithms Guide
No ratings yet
Machine Learning Algorithms Guide
34 pages
Big Data Assignment - 4
No ratings yet
Big Data Assignment - 4
6 pages
ML Exp 3-7 Manuval
No ratings yet
ML Exp 3-7 Manuval
21 pages
Boston Housing Price Prediction
No ratings yet
Boston Housing Price Prediction
3 pages
BDS-Homework-1-Submission - Ipynb - Colab
No ratings yet
BDS-Homework-1-Submission - Ipynb - Colab
11 pages
Mlda - Lab
No ratings yet
Mlda - Lab
35 pages
Docu 4
No ratings yet
Docu 4
3 pages
California Housing Regression with Scikit-Learn
No ratings yet
California Housing Regression with Scikit-Learn
2 pages
F 11
No ratings yet
F 11
3 pages
Da 012307
No ratings yet
Da 012307
8 pages
Data Science Record - 05
No ratings yet
Data Science Record - 05
20 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
9 pages
Linear Regression with Boston Housing Data
No ratings yet
Linear Regression with Boston Housing Data
14 pages
CP4252 Lab Manual
No ratings yet
CP4252 Lab Manual
13 pages
ML Lab
No ratings yet
ML Lab
29 pages
Linear Regression and Regularization Techniques
No ratings yet
Linear Regression and Regularization Techniques
27 pages
I Implementation of Regression
No ratings yet
I Implementation of Regression
6 pages
ML Regression & Classification Guide
100% (1)
ML Regression & Classification Guide
45 pages
Linear Regression
No ratings yet
Linear Regression
4 pages
External
No ratings yet
External
11 pages
Data Analytics
No ratings yet
Data Analytics
10 pages
Assignment 2
No ratings yet
Assignment 2
3 pages
ML Remaining
No ratings yet
ML Remaining
17 pages
OLS vs Ridge Regression Analysis
No ratings yet
OLS vs Ridge Regression Analysis
2 pages
Regression Analysis Cheat Sheet
No ratings yet
Regression Analysis Cheat Sheet
9 pages
Mlalllabprgs
No ratings yet
Mlalllabprgs
17 pages
Probability Theory Solutions
No ratings yet
Probability Theory Solutions
72 pages
CT-2 QP With Answer Key
No ratings yet
CT-2 QP With Answer Key
6 pages
Probability with Cards and Events
No ratings yet
Probability with Cards and Events
2 pages
Infosys Stock Trend - Ipynb - Colab
No ratings yet
Infosys Stock Trend - Ipynb - Colab
16 pages
Ma3355 CDP
No ratings yet
Ma3355 CDP
9 pages
Monte Carlo Simulation PDF
100% (1)
Monte Carlo Simulation PDF
14 pages
Econometric Analysis of Pakistan's GDP
No ratings yet
Econometric Analysis of Pakistan's GDP
12 pages
Quiz1 Solutions Quiz 1 Soln
No ratings yet
Quiz1 Solutions Quiz 1 Soln
7 pages
Tutorial 3 PDF
No ratings yet
Tutorial 3 PDF
2 pages
3-2 F Baumeister Presentation Homogeneity in EQA
No ratings yet
3-2 F Baumeister Presentation Homogeneity in EQA
24 pages
Lesson 3.4-Normal Distribution
No ratings yet
Lesson 3.4-Normal Distribution
19 pages
Data Analysis for Outdoor Enthusiasts
No ratings yet
Data Analysis for Outdoor Enthusiasts
10 pages
Probability: Theory and Examples 5th Edition Rick Durrett Latest PDF 2025
No ratings yet
Probability: Theory and Examples 5th Edition Rick Durrett Latest PDF 2025
105 pages
Sec 06
No ratings yet
Sec 06
1 page
Cs-3491-Ai-Ml-Lab RECORD
No ratings yet
Cs-3491-Ai-Ml-Lab RECORD
59 pages
MIS770A CH 08 Even Sol PDF
No ratings yet
MIS770A CH 08 Even Sol PDF
7 pages
WST02 01 Que 20170125
No ratings yet
WST02 01 Que 20170125
24 pages
Lab Report Biostats
100% (1)
Lab Report Biostats
34 pages
SVM Exam Paper For ABA Course To Be Returned With Answers Excel Exercise (25 Marks)
No ratings yet
SVM Exam Paper For ABA Course To Be Returned With Answers Excel Exercise (25 Marks)
12 pages
Book 3
No ratings yet
Book 3
30 pages
Barton 2008
No ratings yet
Barton 2008
12 pages
Paula E. Lester, Deborah Inman, Lloyd K. Bishop - Handbook of Tests and Measurement in Education and The Social Sciences-Rowman & Littlefield Publishers (2014)
No ratings yet
Paula E. Lester, Deborah Inman, Lloyd K. Bishop - Handbook of Tests and Measurement in Education and The Social Sciences-Rowman & Littlefield Publishers (2014)
355 pages
Measures of Variation Explained
No ratings yet
Measures of Variation Explained
36 pages
Dickey-Fuller Unit Root Test
No ratings yet
Dickey-Fuller Unit Root Test
13 pages
Comprehensive Data Analysis Guide
No ratings yet
Comprehensive Data Analysis Guide
4 pages
Introduction to Predictive Analytics
No ratings yet
Introduction to Predictive Analytics
10 pages
Springer Texts in Statistics: Series Editors
No ratings yet
Springer Texts in Statistics: Series Editors
14 pages
Cost Accounting II Assignment Analysis
No ratings yet
Cost Accounting II Assignment Analysis
4 pages
Math Finance Chap - 6 - Stochastic Volatility Slides 26-03-10
No ratings yet
Math Finance Chap - 6 - Stochastic Volatility Slides 26-03-10
15 pages
Probability Distributions Guide
No ratings yet
Probability Distributions Guide
30 pages

Message

Uploaded by

Message

Uploaded by

# Homework 3 - main file

# COMP.4220 Machine Learning

import itertools, functools

def train_test_split(X, t, test_size=0.2, random_state=None):

# ---- Part (d) ---- #

# 2. Split the data

return X_train, X_test, t_train, t_test

def standardscalar(x: np.ndarray):

return (x - np.mean(x, axis=0)) / np.std(x, axis=0)

def transform(self, x):

# ---- Part (b) ---- #

# Standardize the data using sklearn's StandardScaler

# ---- Part (c) ---- #

# ---- Part (d) ---- #

def fit(self, X, t):

def predict(self, X):

# ---- Part (k) ---- #

# Predictions and evaluation for Linear Regression

print(f'Linear Regression RMSE: {rmse_linreg}')

print(f'Best Ridge Regression (lambda=1.0) RMSE: {rmse_best_ridge}')

# Comparing the results

# ---- Part (g, h) ---- #

# Plot for Ridge Regression

# Plot for Linear Regression

# (h) Try different values of regularization lambda for Ridge regression

# Evaluate new Ridge regression

print(f'Ridge Regression (lambda=10) RMSE: {rmse_ridge_2}')

# ---- Part (i) ---- #

for lambda_val in lambdas:

rmse_ridge = np.sqrt(mean_squared_error(t_test, t_pred_ridge))

# ---- Part (j) ---- #

# Plot the results

You might also like