Problem 1: Implementing a Simple SOM
Task: Implement a basic SOM from scratch and train it on a simple dataset (e.g., a 2D grid of points).
import numpy as np
import matplotlib.pyplot as plt
# Generate a 2D grid dataset
data = np.random.rand(100, 2)
# Parameters
grid_size = (10, 10) # SOM grid
learning_rate = 0.5
epochs = 100
# Initialize SOM weights randomly
som_weights = np.random.rand(grid_size[0], grid_size[1], 2)
# Training function
for epoch in range(epochs):
for sample in data:
# Find the best matching unit (BMU)
distances = np.linalg.norm(som_weights - sample, axis=2)
bmu_index = np.unravel_index(np.argmin(distances), grid_size)
# Update the BMU and its neighbors
for i in range(grid_size[0]):
for j in range(grid_size[1]):
dist_to_bmu = np.linalg.norm(np.array([i, j]) - np.array(bmu_index))
influence = np.exp(-dist_to_bmu / (2 * (epoch / epochs + 0.1)))
som_weights[i, j] += learning_rate * influence * (sample - som_weights[i, j])
# Visualize the trained SOM
plt.scatter(data[:, 0], data[:, 1], label="Data")
for i in range(grid_size[0]):
for j in range(grid_size[1]):
plt.scatter(som_weights[i, j, 0], som_weights[i, j, 1], c='red', s=20)
plt.title("Self-Organizing Map")
plt.legend()
plt.show()
Problem 2: Clustering with SOM
Task: Use SOM to cluster a dataset and visualize the clusters.
from sklearn.datasets import make_blobs
from sklearn.preprocessing import MinMaxScaler
# Create a synthetic dataset
data, labels = make_blobs(n_samples=300, centers=4, cluster_std=0.6, random_state=42)
scaler = MinMaxScaler()
data = scaler.fit_transform(data)
# SOM grid
grid_size = (5, 5)
som_weights = np.random.rand(grid_size[0], grid_size[1], 2)
# Training SOM (same method as above)
epochs = 100
learning_rate = 0.3
for epoch in range(epochs):
for sample in data:
distances = np.linalg.norm(som_weights - sample, axis=2)
bmu_index = np.unravel_index(np.argmin(distances), grid_size)
for i in range(grid_size[0]):
for j in range(grid_size[1]):
dist_to_bmu = np.linalg.norm(np.array([i, j]) - np.array(bmu_index))
influence = np.exp(-dist_to_bmu / (2 * (epoch / epochs + 0.1)))
som_weights[i, j] += learning_rate * influence * (sample - som_weights[i, j])
# Assign data points to clusters
clusters = []
for sample in data:
distances = np.linalg.norm(som_weights - sample, axis=2)
bmu_index = np.unravel_index(np.argmin(distances), grid_size)
clusters.append(bmu_index)
# Visualize clusters
clusters = np.array(clusters)
plt.scatter(data[:, 0], data[:, 1], c=[(x[0] + x[1]*grid_size[0]) for x in clusters], cmap='tab10', s=20)
plt.title("SOM Clustering")
plt.show()
Problem 3: SOM for Dimensionality Reduction
Task: Use SOM to reduce dimensionality of a dataset.
from sklearn.datasets import load_digits
# Load a high-dimensional dataset
digits = load_digits()
data = digits.data
data = MinMaxScaler().fit_transform(data)
# SOM Parameters
grid_size = (15, 15)
som_weights = np.random.rand(grid_size[0], grid_size[1], data.shape[1])
# Train SOM
epochs = 50
learning_rate = 0.2
for epoch in range(epochs):
for sample in data:
distances = np.linalg.norm(som_weights - sample, axis=2)
bmu_index = np.unravel_index(np.argmin(distances), grid_size)
for i in range(grid_size[0]):
for j in range(grid_size[1]):
dist_to_bmu = np.linalg.norm(np.array([i, j]) - np.array(bmu_index))
influence = np.exp(-dist_to_bmu / (2 * (epoch / epochs + 0.1)))
som_weights[i, j] += learning_rate * influence * (sample - som_weights[i, j])
# Project data into the SOM grid
reduced_data = []
for sample in data:
distances = np.linalg.norm(som_weights - sample, axis=2)
bmu_index = np.unravel_index(np.argmin(distances), grid_size)
reduced_data.append(bmu_index)
reduced_data = np.array(reduced_data)
# Visualize the reduced data
plt.scatter(reduced_data[:, 0], reduced_data[:, 1], c=digits.target, cmap='tab10', s=15)
plt.colorbar()
plt.title("Dimensionality Reduction with SOM")
plt.show()
Problem 4: Visualizing SOM as a Heatmap
Task: Visualize the neuron activation frequencies in a trained SOM.
# Count the activation frequency of each neuron
activation_map = np.zeros(grid_size)
for sample in data:
distances = np.linalg.norm(som_weights - sample, axis=2)
bmu_index = np.unravel_index(np.argmin(distances), grid_size)
activation_map[bmu_index] += 1
# Plot heatmap
plt.imshow(activation_map, cmap='hot', interpolation='nearest')
plt.colorbar(label="Activation Frequency")
plt.title("SOM Neuron Activation Map")
plt.show()