import matplotlib.
pyplot as plt
import pandas as pd
from sklearn.cluster import KMeans
dataset = pd.read_excel('nata.xlsx')
X = dataset.iloc[:, [10,11,12,13]].values
wcss = []
for i in range(1, 11):
kmeans = KMeans(n_clusters=i, init='k-means++', random_state=42)
kmeans.fit(X)
wcss.append(kmeans.inertia_)
print(f"WCSS for {i} clusters: {kmeans.inertia_}")
plt.plot(range(1, 11), wcss)
plt.xlabel('Number of clusters')
plt.ylabel('WCSS')
plt.title('Elbow Method')
plt.show()
kmeans = KMeans(n_clusters=4, init="k-means++", random_state=42)
y_kmeans = kmeans.fit_predict(X)
dataset['Cluster'] = y_kmeans
dataset.to_excel("nata_clusters.xlsx", index=False)
#import seaborn as sns
#sns.pairplot(dataset[['Income','Recency','Cluster']], hue='Cluster', diag_kind='kde')
#plt.show()
# transformation for Plot 4 dimensions reduced to 2 dimensions
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y_kmeans, cmap='rainbow', alpha=0.7)
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.title('Clusters in PCA Space')
plt.show()