import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns
import matplotlib.pyplot as plt
# Loading the dataset
data = pd.read_csv('diabetes.csv')
# Replace zeros with NaN and then with mean for specific columns
zero_not_accepted = ['Glucose', 'BloodPressure', 'SkinThickness', 'BMI', 'Insulin']
for col in zero_not_accepted:
data[col] = data[col].replace(0, np.NaN)
mean = int(data[col].mean(skipna=True))
data[col] = data[col].replace(np.NaN, mean)
# Extracting features (X) and target (y)
X = data.iloc[:, 0:8]
y = data.iloc[:, 8]
# Splitting dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=0)
# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Loading the KNN model
classifier = KNeighborsClassifier(n_neighbors=11, p=2, metric='euclidean')
# Fitting the model
classifier.fit(X_train, y_train)
# Making predictions
y_pred = classifier.predict(X_test)
# Evaluating the model
# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)
# Classification Report
class_report = classification_report(y_test, y_pred)
print("Classification Report:")
print(class_report)
# Macro Average
report_dict = classification_report(y_test, y_pred, output_dict=True)
macro_avg = report_dict['macro avg']
print("Macro Average:")
print(macro_avg)
# Weighted Average
weighted_avg = report_dict['weighted avg']
print("Weighted Average:")
print(weighted_avg)