Program 4
Develop a program to load the Iris dataset. Implement the k-Nearest
Neighbors (k-NN) algorithm for classifying flowers based on their
features. Split the dataset into training and testing sets and evaluate
the model using metrics like accuracy and F1-score. Test it for different
values of 𝑘 (e.g., k=1,3,5) and evaluate the accuracy. Extend the k-NN
algorithm to assign weights based on the distance of neighbors (e.g.,
𝑤𝑒𝑖𝑔ℎ𝑡=1/𝑑2 ). Compare the performance of weighted k-NN and regular k-NN
on a synthetic or real-world dataset.
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from collections import Counter
def euclidean_distance(x1, x2):
return np.sqrt(np.sum((x1 - x2) ** 2))
class KNN:
def __init__(self, k=3, weighted=False):
self.k = k
self.weighted = weighted
def fit(self, X_train, y_train):
self.X_train = X_train
self.y_train = y_train
def predict(self, X_test):
return np.array([self._predict(x) for x in X_test])
def _predict(self, x):
distances = [euclidean_distance(x, x_train) for x_train in
self.X_train]
k_indices = np.argsort(distances)[:self.k]
k_nearest_labels = [self.y_train[i] for i in k_indices]
if self.weighted:
weights = [1 / (distances[i] ** 2 + 1e-5) for i in k_indices]
# Adding small value to avoid division by zero
class_weights = {}
for label, weight in zip(k_nearest_labels, weights):
class_weights[label] = class_weights.get(label, 0) +
weight
return max(class_weights, key=class_weights.get)
else:
most_common = Counter(k_nearest_labels).most_common(1)
return most_common[0][0]
# Load Iris dataset
data = load_iris()
X, y = data.data, data.target
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
# Test for different values of k
k_values = [1, 3, 5]
for k in k_values:
knn = KNN(k=k, weighted=False)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')
print(f"k={k}, Accuracy: {accuracy:.4f}, F1-score: {f1:.4f}")
# Compare weighted vs unweighted k-NN
knn_weighted = KNN(k=3, weighted=True)
knn_weighted.fit(X_train, y_train)
y_pred_weighted = knn_weighted.predict(X_test)
accuracy_weighted = accuracy_score(y_test, y_pred_weighted)
f1_weighted = f1_score(y_test, y_pred_weighted, average='weighted')
print(f"Weighted k-NN (k=3), Accuracy: {accuracy_weighted:.4f}, F1-score:
{f1_weighted:.4f}")