multiple-regression
September 21, 2023
[3]: import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
# Load the Iris dataset
iris = sns.load_dataset("iris")
# Select the features and the target variable
X = iris[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]
y = iris['sepal_length'] # Predict sepal length based on other features
# Split the dataset into the Training set and Test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,␣
↪random_state=0)
# Fitting Multiple Linear Regression to the Training set
regressor = LinearRegression()
regressor.fit(X_train, y_train)
# Predicting the Test set results
y_pred = regressor.predict(X_test)
# Calculate the R-squared score
score = r2_score(y_test, y_pred)
# Create a scatter plot to visualize the predictions vs. actual values
plt.figure(figsize=(8, 6))
plt.scatter(y_test, y_pred, color='blue')
plt.title(f'Multiple Linear Regression: Actual vs. Predicted Sepal Length␣
↪(R-squared={score:.2f})')
plt.xlabel('Actual Sepal Length')
plt.ylabel('Predicted Sepal Length')
plt.grid(True)
1
# Add a diagonal line (y = x) for reference
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)],␣
↪linestyle='--', color='red')
# Show the plot
plt.show()
[4]: import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
# Load the California housing dataset
california_housing = datasets.fetch_california_housing()
data = california_housing.data
target = california_housing.target
2
# Select a subset of features (you can modify this as needed)
# For example, we'll use 'MedInc' (median income) and 'AveRooms' (average␣
↪number of rooms)
X = data[:, [0, 5]]
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, target, test_size=0.2,␣
↪random_state=0)
# Fit a multiple linear regression model
regressor = LinearRegression()
regressor.fit(X_train, y_train)
# Make predictions on the test set
y_pred = regressor.predict(X_test)
# Calculate model performance metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
# Create a 3D scatter plot for visualization
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
# Plot actual values in blue
ax.scatter(X_test[:, 0], X_test[:, 1], y_test, color='blue', label='Actual')
# Plot predicted values in red
ax.scatter(X_test[:, 0], X_test[:, 1], y_pred, color='red', label='Predicted')
# Set labels for the axes
ax.set_xlabel('Median Income')
ax.set_ylabel('Average Rooms')
ax.set_zlabel('House Value')
# Add a legend
ax.legend(loc='upper right')
# Add a title and metrics to the plot
plt.title(f'Multiple Linear Regression\nMSE: {mse:.2f}, R^2: {r2:.2f}')
# Show the 3D plot
plt.show()
3
[5]: import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
# Load the Diabetes dataset
diabetes = datasets.load_diabetes()
data = diabetes.data
target = diabetes.target
# Select a subset of features (you can modify this as needed)
# For example, we'll use the first two features: 'Age' and 'BMI'
X = data[:, [0, 2]]
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, target, test_size=0.2,␣
↪random_state=0)
4
# Fit a multiple linear regression model
regressor = LinearRegression()
regressor.fit(X_train, y_train)
# Make predictions on the test set
y_pred = regressor.predict(X_test)
# Calculate model performance metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
# Create a 3D scatter plot for visualization
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
# Plot actual values in blue
ax.scatter(X_test[:, 0], X_test[:, 1], y_test, color='blue', label='Actual')
# Plot predicted values in red
ax.scatter(X_test[:, 0], X_test[:, 1], y_pred, color='red', label='Predicted')
# Set labels for the axes
ax.set_xlabel('Age')
ax.set_ylabel('BMI')
ax.set_zlabel('Diabetes Progression')
# Add a legend
ax.legend(loc='upper right')
# Add a title and metrics to the plot
plt.title(f'Multiple Linear Regression\nMSE: {mse:.2f}, R^2: {r2:.2f}')
# Show the 3D plot
plt.show()
5
[6]: import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
# Load a dataset with at least three features (e.g., 'Diabetes' dataset)
diabetes = datasets.load_diabetes()
data = diabetes.data
target = diabetes.target
# Select three features (you can modify this as needed)
# For example, we'll use the first three features: 'Age', 'BMI', and 'BP'
X = data[:, [0, 2, 3]]
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, target, test_size=0.2,␣
↪random_state=0)
6
# Fit a multiple linear regression model
regressor = LinearRegression()
regressor.fit(X_train, y_train)
# Make predictions on the test set
y_pred = regressor.predict(X_test)
# Calculate model performance metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
# Create a 3D scatter plot for visualization
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
# Plot actual values in blue
ax.scatter(X_test[:, 0], X_test[:, 1], y_test, color='blue', label='Actual')
# Plot predicted values in red
ax.scatter(X_test[:, 0], X_test[:, 1], y_pred, color='red', label='Predicted')
# Set labels for the axes
ax.set_xlabel('Age')
ax.set_ylabel('BMI')
ax.set_zlabel('Blood Pressure')
# Add a legend
ax.legend(loc='upper right')
# Add a title and metrics to the plot
plt.title(f'Multiple Linear Regression\nMSE: {mse:.2f}, R^2: {r2:.2f}')
# Create a grid of points for the 3D hyperplane
x_range = np.linspace(min(X_test[:, 0]), max(X_test[:, 0]), 10)
y_range = np.linspace(min(X_test[:, 1]), max(X_test[:, 1]), 10)
x_plane, y_plane = np.meshgrid(x_range, y_range)
z_plane = regressor.coef_[0] * x_plane + regressor.coef_[1] * y_plane +␣
↪regressor.intercept_
# Plot the 3D hyperplane
ax.plot_surface(x_plane, y_plane, z_plane, alpha=0.5, cmap='viridis')
# Show the 3D plot
plt.show()
7
[ ]: