EXPERIMENT- 7
// Implementation of k-means clustering algorithm //
# importing the packages
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# Importing the Dataset
dataset = pd.read_csv('/content/K-Mean_Mall_Customers.csv')
X = dataset.iloc[:, [3, 4]].values
# Finding the optimal number of clusters using the elbow method
from sklearn.cluster import KMeans
#Initializing the list for the values of WCSS
wcss = []
#Using for loop for iterations from 1 to 10.
#initialized the for loop for the iteration on a different value of k ranging from 1 to 10
for i in range(1, 11):
kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 42)
kmeans.fit(X)
wcss.append(kmeans.inertia_)
plt.plot(range(1, 11), wcss)
plt.title('The Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('WCSS')
plt.show()
# Training the K-means algorithm on the training dataset
kmeans = KMeans(n_clusters = 5, init = 'k-means++', random_state = 42)
y_kmeans = kmeans.fit_predict(X)
# Visualizing the Clusters
# Cluster1 shows the customers with average salary and average spending so we can categorize these
customers as
# Cluster2 shows the customer has a high income but low spending, so we can categorize them as
careful.
#Cluster3 shows the low income and also low spending so they can be categorized as sensible.
#Cluster4 shows the customers with low income with very high spending so they can be categorized as
careless.
#Cluster5 shows the customers with high income and high spending so they can be categorized as
target, and these customers can be the most profitable customers for the mall owner.
# We can now compare the values of y_predict with our original dataset.
plt.scatter(X[y_kmeans == 0, 0], X[y_kmeans == 0, 1], s = 100, c = 'red', label = 'Cluster 1')
plt.scatter(X[y_kmeans == 1, 0], X[y_kmeans == 1, 1], s = 100, c = 'black', label = 'Cluster 2')
plt.scatter(X[y_kmeans == 2, 0], X[y_kmeans == 2, 1], s = 100, c = 'green', label = 'Cluster 3')
plt.scatter(X[y_kmeans == 3, 0], X[y_kmeans == 3, 1], s = 100, c = 'cyan', label = 'Cluster 4')
plt.scatter(X[y_kmeans == 4, 0], X[y_kmeans == 4, 1], s = 100, c = 'magenta', label = 'Cluster 5')
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s = 300, c = 'yellow', label =
'Centroids')
plt.title('Clusters of customers')
plt.xlabel('Annual Income (k$)')
plt.ylabel('Spending Score (1-100)')
plt.legend()
plt.show()