LAB - 5 (CB.EN.
U4ECE22115)
Experiment 1: Performance decision tree (DT) and Naive Bayes (NB) in IRIS dataset and note the accuracy using both classifiers 70-30% training-test split.
In [42]: import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
# Load IRIS dataset
iris = pd.read_csv(r"C:\Users\Daejuswaram Gopinath\Downloads\Iris_Dataset.csv")
X = iris.iloc[:,0:4]
y = iris.Species
# Split data into training and testing sets (70-30 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Decision Tree Classifier
dt_classifier = DecisionTreeClassifier()
dt_classifier.fit(X_train, y_train)
dt_pred = dt_classifier.predict(X_test)
dt_accuracy = accuracy_score(y_test, dt_pred)
# Naive Bayes Classifier
nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)
nb_pred = nb_classifier.predict(X_test)
nb_accuracy = accuracy_score(y_test, nb_pred)
print("Decision Tree Accuracy:", dt_accuracy)
print("Naive Bayes Accuracy:", nb_accuracy)
Decision Tree Accuracy: 1.0
Naive Bayes Accuracy: 1.0
In [43]: iris.head()
Out[43]: Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species
0 1 5.1 3.5 1.4 0.2 Iris-setosa
1 2 4.9 3.0 1.4 0.2 Iris-setosa
2 3 4.7 3.2 1.3 0.2 Iris-setosa
3 4 4.6 3.1 1.5 0.2 Iris-setosa
4 5 5.0 3.6 1.4 0.2 Iris-setosa
Experiment 2: Compare the performance of both classifiers (DT and NB) using 10-fold, leave one out, and 10-fold stratified cross-validation.
In [44]: from sklearn.model_selection import cross_val_score, LeaveOneOut, StratifiedKFold
# Cross-validation methods
cv_methods = ['10-fold', 'Leave One Out', 'Stratified 10-fold']
classifiers = [DecisionTreeClassifier(), GaussianNB()]
for clf in classifiers:
for cv_method in cv_methods:
if cv_method == '10-fold':
scores = cross_val_score(clf, X, y, cv=10)
elif cv_method == 'Leave One Out':
loo = LeaveOneOut()
scores = cross_val_score(clf, X, y, cv=loo)
else:
skf = StratifiedKFold(n_splits=10)
scores = cross_val_score(clf, X, y, cv=skf)
print(f"{cv_method} Cross-validation Accuracy for {type(clf).__name__}: {scores.mean()}")
10-fold Cross-validation Accuracy for DecisionTreeClassifier: 0.9666666666666668
Leave One Out Cross-validation Accuracy for DecisionTreeClassifier: 0.9933333333333333
Stratified 10-fold Cross-validation Accuracy for DecisionTreeClassifier: 0.9333333333333332
10-fold Cross-validation Accuracy for GaussianNB: 0.9866666666666667
Leave One Out Cross-validation Accuracy for GaussianNB: 0.9866666666666667
Stratified 10-fold Cross-validation Accuracy for GaussianNB: 0.9866666666666667
In [45]: from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import pandas as pd
# Assuming 'iris' is a DataFrame version of the Iris dataset
# And 'species' is the column with categorical data to be encoded
# Label Encoding
le = LabelEncoder()
iris['species_encoded'] = le.fit_transform(iris['Species'])
# One Hot Encoding
ohe = OneHotEncoder()
species_ohe = ohe.fit_transform(iris[['Species']]).toarray()
species_ohe_df = pd.DataFrame(species_ohe, columns=[f"species_{i}" for i in range(species_ohe.shape[1])])
iris = pd.concat([iris, species_ohe_df], axis=1)
In [46]: label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)
# Support Vector Machine Classifier without Standardization
svm_classifier = SVC(kernel='linear')
svm_classifier.fit(X_train, y_train_encoded)
y_pred_no_standardization = svm_classifier.predict(X_test)
accuracy_no_standardization = accuracy_score(y_test_encoded, y_pred_no_standardization)
# Support Vector Machine Classifier with Standardization
scaler = StandardScaler()
X_train_standardized = scaler.fit_transform(X_train)
X_test_standardized = scaler.transform(X_test)
svm_classifier_standardized = SVC(kernel='linear')
svm_classifier_standardized.fit(X_train_standardized, y_train_encoded)
y_pred_standardization = svm_classifier_standardized.predict(X_test_standardized)
accuracy_standardization = accuracy_score(y_test_encoded, y_pred_standardization)
# Support Vector Machine Classifier without Normalization
svm_classifier_no_normalization = SVC(kernel='linear')
svm_classifier_no_normalization.fit(X_train, y_train_encoded)
y_pred_no_normalization = svm_classifier_no_normalization.predict(X_test)
accuracy_no_normalization = accuracy_score(y_test_encoded, y_pred_no_normalization)
# Support Vector Machine Classifier with Normalization
normalizer = MinMaxScaler()
X_train_normalized = normalizer.fit_transform(X_train)
X_test_normalized = normalizer.transform(X_test)
svm_classifier_normalized = SVC(kernel='linear')
svm_classifier_normalized.fit(X_train_normalized, y_train_encoded)
y_pred_normalization = svm_classifier_normalized.predict(X_test_normalized)
accuracy_normalization = accuracy_score(y_test_encoded, y_pred_normalization)
# Print accuracies
print("Accuracy without Standardization:", accuracy_no_standardization)
print("Accuracy with Standardization:", accuracy_standardization)
print("Accuracy without Normalization:", accuracy_no_normalization)
print("Accuracy with Normalization:", accuracy_normalization)
Accuracy without Standardization: 1.0
Accuracy with Standardization: 1.0
Accuracy without Normalization: 1.0
Accuracy with Normalization: 1.0
In [47]: iris.head()
Out[47]: Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species species_encoded species_0 species_1 species_2
0 1 5.1 3.5 1.4 0.2 Iris-setosa 0 1.0 0.0 0.0
1 2 4.9 3.0 1.4 0.2 Iris-setosa 0 1.0 0.0 0.0
2 3 4.7 3.2 1.3 0.2 Iris-setosa 0 1.0 0.0 0.0
3 4 4.6 3.1 1.5 0.2 Iris-setosa 0 1.0 0.0 0.0
4 5 5.0 3.6 1.4 0.2 Iris-setosa 0 1.0 0.0 0.0
Experiment 5: Perform colour classification for the above dataset using SVM, k-NN, DT, NB classifier.
Type - 1 (Considering specific colors)
In [39]: import numpy as np
import pandas as pd
import cv2
import os
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
# Define the directory containing the image data
DATADIR = r'C:\Users\Daejuswaram Gopinath\Downloads\ColorClassification'
# Define the categories or labels
CATEGORIES = ['Black', 'Blue', 'Brown', 'Green','Violet', 'White']
# Function to load and preprocess images
def load_images_and_labels():
images = []
labels = []
for category in CATEGORIES:
path = os.path.join(DATADIR, category)
class_num = CATEGORIES.index(category)
for img in os.listdir(path):
img_array = cv2.imread(os.path.join(path,img))
img_array = cv2.resize(img_array, (100, 100)) # Resize images to a fixed size
images.append(img_array.flatten())
labels.append(class_num)
return images, labels
# Load and preprocess images
images, labels = load_images_and_labels()
# Convert to numpy arrays
X = np.array(images)
y = np.array(labels)
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)
# Initialize classifiers
svm_classifier = SVC(kernel='linear')
knn_classifier = KNeighborsClassifier(n_neighbors=5)
dt_classifier = DecisionTreeClassifier()
nb_classifier = GaussianNB()
# Train classifiers
svm_classifier.fit(X_train, y_train)
knn_classifier.fit(X_train, y_train)
dt_classifier.fit(X_train, y_train)
nb_classifier.fit(X_train, y_train)
# Predictions
y_pred_svm = svm_classifier.predict(X_test)
y_pred_knn = knn_classifier.predict(X_test)
y_pred_dt = dt_classifier.predict(X_test)
y_pred_nb = nb_classifier.predict(X_test)
# Calculate accuracies
accuracy_svm = accuracy_score(y_test, y_pred_svm)
accuracy_knn = accuracy_score(y_test, y_pred_knn)
accuracy_dt = accuracy_score(y_test, y_pred_dt)
accuracy_nb = accuracy_score(y_test, y_pred_nb)
# Print accuracies
print("SVM Accuracy:", accuracy_svm)
print("k-NN Accuracy:", accuracy_knn)
print("Decision Tree Accuracy:", accuracy_dt)
print("Naive Bayes Accuracy:", accuracy_nb)
SVM Accuracy: 0.8076923076923077
k-NN Accuracy: 0.5384615384615384
Decision Tree Accuracy: 0.5384615384615384
Naive Bayes Accuracy: 0.8076923076923077
Type - 2 (Considering all colors)
In [40]: import numpy as np
import pandas as pd
import cv2
import os
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
# Define the directory containing the image data
DATADIR = r'C:\Users\Daejuswaram Gopinath\Downloads\ColorClassification'
# Define the categories or labels
CATEGORIES = ['orange','Violet','red','Blue','Green','Black','Brown','White']
# Function to load and preprocess images
def load_images_and_labels():
images = []
labels = []
for category in CATEGORIES:
path = os.path.join(DATADIR, category)
class_num = CATEGORIES.index(category)
for img in os.listdir(path):
img_array = cv2.imread(os.path.join(path,img))
img_array = cv2.resize(img_array, (100, 100)) # Resize images to a fixed size
images.append(img_array.flatten())
labels.append(class_num)
return images, labels
# Load and preprocess images
images, labels = load_images_and_labels()
# Convert to numpy arrays
X = np.array(images)
y = np.array(labels)
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)
# Initialize classifiers
svm_classifier = SVC(kernel='linear')
knn_classifier = KNeighborsClassifier(n_neighbors=5)
dt_classifier = DecisionTreeClassifier()
nb_classifier = GaussianNB()
# Train classifiers
svm_classifier.fit(X_train, y_train)
knn_classifier.fit(X_train, y_train)
dt_classifier.fit(X_train, y_train)
nb_classifier.fit(X_train, y_train)
# Predictions
y_pred_svm = svm_classifier.predict(X_test)
y_pred_knn = knn_classifier.predict(X_test)
y_pred_dt = dt_classifier.predict(X_test)
y_pred_nb = nb_classifier.predict(X_test)
# Calculate accuracies
accuracy_svm = accuracy_score(y_test, y_pred_svm)
accuracy_knn = accuracy_score(y_test, y_pred_knn)
accuracy_dt = accuracy_score(y_test, y_pred_dt)
accuracy_nb = accuracy_score(y_test, y_pred_nb)
# Print accuracies
print("SVM Accuracy:", accuracy_svm)
print("k-NN Accuracy:", accuracy_knn)
print("Decision Tree Accuracy:", accuracy_dt)
print("Naive Bayes Accuracy:", accuracy_nb)
SVM Accuracy: 0.6363636363636364
k-NN Accuracy: 0.5454545454545454
Decision Tree Accuracy: 0.6666666666666666
Naive Bayes Accuracy: 0.5757575757575758
In [ ]: