#Loading dataset
from sklearn.datasets import fetch_california_housing
california = fetch_california_housing()
#Load the model we will use, the linear regression model
from sklearn.linear_model import LinearRegression
#Load the package to split the dataset into training and testing sets
from sklearn.model_selection import train_test_split
#Load the package to validate the model
from sklearn.metrics import mean_squared_error
#Description of the dataset
print(california.DESCR)
#Load the dataset and put it into my_data
my_data = california
#We will split the data into two groups, with 80% as the training set and 20% as
the test set.
#(train_x, train_y) is the 80% training set, used to train the model,
#where x represents the data and y represents the answers (i.e., the actual house
prices).
#(test_x, test_y) is the 20% test set, used to validate the model's predictive
ability.
#random_state: Setting a random_state ensures that your results are reproducible
#shuffle: If shuffle is set to True, the data will be mixed up randomly before
splitting it into training and testing sets.
train_x, test_x, train_y, test_y = train_test_split(my_data.data,
my_data.target, test_size=0.2, random_state=42, shuffle=True)
#Retrieve the preloaded package 'Linear Regression Model' and store it in the
variable my_model.
my_model = LinearRegression()
#Train the model using the training set (train_x, train_y).
my_model.fit(train_x, train_y)
#Generating predicted values and storing them in the variable pred
pred = my_model.predict(test_x)
#We can use the evaluation metric MSE (mean squared error) to assess the model's
actual error.
score = mean_squared_error(pred, test_y)
#A lower MSE score is better, indicating that the predicted values are close to the
actual answers
print(" MSE: ", score)
#Calculate the coefficients & intercept
coefficients = my_model.coef_
intercept = my_model.intercept_
#Show the coefficients & intercept
print(coefficients)
print(intercept)
#Generate the equation
equation = ' + '.join([f'{coeff}*{feat}' for feat, coeff in
zip(california.feature_names, coefficients)])
equation = f'y = {equation} + {intercept}'
print("Regression Equation:")
print(equation)