Clustering Codes
Hierarchical Clustering
# Compute the distance matrix
distance_matrix = dist(data, method = "euclidean") # Default method is
Euclidean print(distance_matrix)
# Hierarchical clustering using "complete"/”single”/”average” linkage
hclust_complete = hclust(distance_matrix, method = "complete")
hclust_single = hclust(distance_matrix, method = "single") # Single linkage
hclust_average = hclust(distance_matrix, method = "average") # Average linkage
# Plot the dendrogram
plot(hclust_complete, main = "Hierarchical Clustering - Complete Linkage",
xlab = "Data Points", ylab = "Height")
# Cut the dendrogram into 3 clusters
clusters = cutree(hclust_complete, k = 3)
print(clusters)
# Visualize clusters on the scatter plot
plot(data$x, data$y, col = clusters, main = "Clusters (k = 3)", xlab = "X",
ylab = "Y", pch = 19)
legend("topright", legend = unique(clusters), col = unique(clusters), pch = 19)
K means Clustering
# "lock in" the randomness so others (or you later) can reproduce
the exact results.
[Link](123)
# Ensure reproducibility
kmeans_result = kmeans(data, centers = 3, nstart = 25)
print(kmeans_result)
# Cluster centers
print(kmeans_result$centers)
# Cluster assignments
print(kmeans_result$cluster)
# Scatter plot with clusters
plot(data$x, data$y, col = kmeans_result$cluster, main = "K-Means
Clustering Results", xlab = "X", ylab = "Y", pch = 19)
# Add cluster centers
points(kmeans_result$centers[, 1], kmeans_result$centers[, 2], col =
1:3, pch = 8, cex = 2)
# Evaluate cluster sizes
table(kmeans_result$cluster)
# Within-cluster sum of squares
cat("Total Within-Cluster Sum of Squares: ",
kmeans_result$[Link], "\n")
# Compute total within-cluster sum of squares for different numbers
of clusters
wss = numeric(10)
for (k in 1:10) {
kmeans_temp = kmeans(data, centers = k, nstart = 25)
wss[k] = kmeans_temp$[Link] }
# Plot the Elbow Method
plot(1:10, wss, type = "b", pch = 19, frame = FALSE, main = "Elbow
Method for Optimal k", xlab = "Number of Clusters (k)", ylab =
"Total Within-Cluster Sum of Squares")
DBSCAN Clustering
[Link]("dbscan")
library(dbscan)
# Apply DBSCAN with epsilon (eps) and minimum points (minPts)
dbscan_result = dbscan(data, eps = 0.8, minPts = 5)
print(dbscan_result)
Spectral Clustering:
data("spirals")
sc = specc(spirals, centers = 2)
plot(spirals, col = sc, pch = sc)