0% found this document useful (0 votes)
35 views5 pages

KNN For Classification

Uploaded by

snehalkotar1153
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
35 views5 pages

KNN For Classification

Uploaded by

snehalkotar1153
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd

Name : Snehal Kotkar Div : A Roll No.

: 46

Practical No. : 2 Problem Statement : Build a machine learning model using k-Nearest
Neighbors algorithm to predict whether the patients in the "Pima Indians Diabetes Dataset"
have diabetes or not.

import numpy as np
import pandas as pd
import [Link] as plt
[Link]('ggplot')

from [Link] import drive


[Link]('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly


remount, call [Link]("/content/drive", force_remount=True).

df = pd.read_csv('/content/drive/MyDrive/ML /[Link]')
[Link]()

{"summary":"{\n \"name\": \"df\",\n \"rows\": 768,\n \"fields\": [\


n {\n \"column\": \"Pregnancies\",\n \"properties\": {\n
\"dtype\": \"number\",\n \"std\": 3,\n \"min\": 0,\n
\"max\": 17,\n \"num_unique_values\": 17,\n \"samples\":
[\n 6,\n 1,\n 3\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"Glucose\",\n \"properties\":
{\n \"dtype\": \"number\",\n \"std\": 31,\n
\"min\": 0,\n \"max\": 199,\n \"num_unique_values\":
136,\n \"samples\": [\n 151,\n 101,\n
112\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"BloodPressure\",\n \"properties\": {\n \"dtype\":
\"number\",\n \"std\": 19,\n \"min\": 0,\n
\"max\": 122,\n \"num_unique_values\": 47,\n
\"samples\": [\n 86,\n 46,\n 85\
n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"SkinThickness\",\n \"properties\": {\n \"dtype\":
\"number\",\n \"std\": 15,\n \"min\": 0,\n
\"max\": 99,\n \"num_unique_values\": 51,\n \"samples\":
[\n 7,\n 12,\n 48\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"Insulin\",\n \"properties\":
{\n \"dtype\": \"number\",\n \"std\": 115,\n
\"min\": 0,\n \"max\": 846,\n \"num_unique_values\":
186,\n \"samples\": [\n 52,\n 41,\n
183\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"BMI\",\n \"properties\": {\n \"dtype\": \"number\",\n
\"std\": 7.884160320375446,\n \"min\": 0.0,\n \"max\":
67.1,\n \"num_unique_values\": 248,\n \"samples\": [\n
19.9,\n 31.0,\n 38.1\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"DiabetesPedigreeFunction\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
0.3313285950127749,\n \"min\": 0.078,\n \"max\": 2.42,\n
\"num_unique_values\": 517,\n \"samples\": [\n 1.731,\
n 0.426,\n 0.138\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"Age\",\n \"properties\": {\n
\"dtype\": \"number\",\n \"std\": 11,\n \"min\": 21,\n
\"max\": 81,\n \"num_unique_values\": 52,\n \"samples\":
[\n 60,\n 47,\n 72\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"Outcome\",\n \"properties\":
{\n \"dtype\": \"number\",\n \"std\": 0,\n
\"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n
\"samples\": [\n 0,\n 1\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n }\n ]\n}","type":"dataframe","variable_name":"df"}

[Link]

(768, 9)

[Link]().sum()

Pregnancies 0
Glucose 0
BloodPressure 0
SkinThickness 0
Insulin 0
BMI 0
DiabetesPedigreeFunction 0
Age 0
Outcome 0
dtype: int64

X = [Link]('Outcome',axis=1).values
y = df['Outcome'].values

from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test =
train_test_split(X,y,test_size=0.25,random_state=42, stratify=y)

#import KNeighborsClassifier
from [Link] import KNeighborsClassifier

#Setup arrays to store training and test accuracies


neighbors = [Link](1,15)
train_accuracy =[Link](len(neighbors))
test_accuracy = [Link](len(neighbors))

for i,k in enumerate(neighbors):


#Setup a knn classifier with k neighbors
knn = KNeighborsClassifier(n_neighbors=k)

#Fit the model


[Link](X_train, y_train)

#Compute accuracy on the training set


train_accuracy[i] = [Link](X_train, y_train)

#Compute accuracy on the test set


test_accuracy[i] = [Link](X_test, y_test)

#Generate plot
[Link]('k-NN Varying number of neighbors')
[Link](neighbors, test_accuracy, label='Testing Accuracy')
[Link](neighbors, train_accuracy, label='Training accuracy')
[Link]()
[Link]('Number of neighbors')
[Link]('Accuracy')
[Link]()
#Setup a knn classifier with k neighbors
knn = KNeighborsClassifier(n_neighbors=4)

#Fit the model


[Link](X_train,y_train)

KNeighborsClassifier(n_neighbors=4)

#Get accuracy. Note: In case of classification algorithms score method


represents accuracy.
[Link](X_test,y_test)

0.7291666666666666

#let us get the predictions using the classifier we had fit above
y_pred = [Link](X_test)

y_pred

array([0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0,
0,
0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,
0,
1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,
0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0,
0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1,
0,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0,
1,
0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0])

You might also like