0% found this document useful (0 votes)
26 views2 pages

Elbow Method

Uploaded by

Prateek Verma
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
26 views2 pages

Elbow Method

Uploaded by

Prateek Verma
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd

##to create dataframe and import libraries

from [Link] import KMeans


import pandas as pd
from matplotlib import pyplot as plt

##to read csv file


df = pd.read_csv(‘/kaggle/input/income-dataset-for-k-means/[Link]’)
df
##to check first 5 rows
df = [Link]()
## to check the basic statistics of the data
[Link]()
[Link]
[Link]
## to plot scatter plot between age and income
[Link]([Link], df['Income($)'])
[Link]('Age')
[Link]('Income($)')

## to use elbow method to find number of clusters (sse = sum of squared error)
sse = []
k_rng = range(1,10)
for k in k_rng:
km = KMeans(n_clusters=k)
[Link](df[['Age','Income($)']])
[Link](km.inertia_)

##print sse
sse

## plot elbow graph


[Link]('K')
[Link]('Sum of squared error')
[Link](k_rng,sse)

##to identify the number of clusters


km = KMeans(n_clusters=3)
y_predicted = km.fit_predict(df[['Age','Income($)']])
y_predicted

## print the predicted cluster number for each datapoint


df['cluster']=y_predicted
[Link]()
##to check the cluster centers
km.cluster_centers_

##to plot the different datapoints as per their assigned clusters


df1 = df[[Link]==0]
df2 = df[[Link]==1]
df3 = df[[Link]==2]
[Link]([Link],df1['Income($)'],color='green')
[Link]([Link],df2['Income($)'],color='red')
[Link]([Link],df3['Income($)'],color='black')
[Link](km.cluster_centers_[:,0],km.cluster_centers_[:,1],color='purple',marker='*',labe
l='centroid')
[Link]('Age')
[Link]('Income ($)')
[Link]()

You might also like