Develop a program to Compute the correlation matrix to understand the
relationships between pairs of features. Visualize the correlation matrix using
a heatmap to know which variables have strong positive/negative
correlations. Create a pair plot to visualize pairwise relationships between
features. Use California Housing dataset.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import fetch_california_housing
from scipy.stats import zscore
# Load the California Housing dataset
data = fetch_california_housing()
df = pd.DataFrame(data.data, columns=data.feature_names)
# Add target variable
df['MedHouseVal'] = data.target
df.info()
df.describe()
df.shape
# Compute the correlation matrix
correlation_matrix = df.corr()
print("\nCorrelation Matrix:")
print(correlation_matrix)
# Visualize the correlation matrix using a heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm',
fmt='.2f', linewidths=0.5)
plt.title("Heatmap of Feature Correlations")
plt.show()
# Create a pair plot to visualize pairwise relationships
sns.pairplot(df.sample(500)) # Sampling to reduce computation time
plt.suptitle("Pairwise Relationships Between Features", y=1.02)
plt.show()