0% found this document useful (0 votes)

47 views35 pages

ML Manual Final

Uploaded by

Priya Mohana

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

47 views35 pages

ML Manual Final

Uploaded by

Priya Mohana

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 35

Ex.

NO: 1
LINEAR REGRESSION
DATE:

AIM
To explore the linear regression model on the usa housing and uci and pima indians
diabetes data set.
ALGORITHM

STEP 1: Start the program

STEP 2: To download the any kind of data set like housing dataset using kaggle.

STEP 3: To read data from downloaded data set.

STEP 4: To find the linear and logistic regression model using the given data set.

STEP 5: Display the output.

STEP 6: Stop the program.

PROGRAM
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

import seaborn as sns sns.set_style('darkgrid') %matplotlib inline from

matplotlib.ticker

import FormatStrFormatter

import warnings

warnings.filterwarnings('ignore')

df = pd.read_csv('C:/Users/diabetes.csv')

df.head()
`1
df.shape

df.dtypes

df['Outcome']=df['Outcome'].astype('bool')

fig,axes = plt.subplots(nrows=3,ncols=2,dpi=120,figsize = (8,6))

plot00=sns.countplot('Pregnancies',data=df,ax=axes[0][0],color='green')

axes[0][0].set_title('Count',fontdict={'fontsize':8})

axes[0][0].set_xlabel('Month of Preg.',fontdict={'fontsize':7})

axes[0][0].set_ylabel('Count',fontdict={'fontsize':7})

plt.tight_layout()

plot01=sns.countplot('Pregnancies',data=df,hue='Outcome',ax=axes[0][1])

axes[0][1].set_title('Diab. VS Non-Diab.',fontdict={'fontsize':8})

axes[0][1].set_xlabel('Month of Preg.',fontdict={'fontsize':7})

axes[0][1].set_ylabel('Count',fontdict={'fontsize':7}) plot01.axes.legend(loc=1)

plt.setp(axes[0][1].get_legend().get_texts(), fontsize='6')

plt.setp(axes[0][1].get_legend().get_title(), fontsize='6')

plt.tight_layout()

plot10 = sns.distplot(df['Pregnancies'],ax=axes[1][0])

axes[1][0].set_title('Pregnancies Distribution',fontdict={'fontsize':8})

axes[1][0].set_xlabel('Pregnancy Class',fontdict={'fontsize':7})

axes[1][0].set_ylabel('Freq/Dist',fontdict={'fontsize':7}) plt.tight_layout()

plot11=f[df['Outcome']==False]['Pregnancies'].plot.hist(ax=axes[1][1],label='N

on- Diab.')
`2
plot11_2=df[df['Outcome']==True]['Pregnancies'].plot.hist(ax=axes[1][1],label=

'Diab.')

axes[1][1].set_title('Diab. VS Non-Diab.',fontdict={'fontsize':8})

axes[1][1].set_xlabel('Pregnancy Class',fontdict={'fontsize':7})

axes[1][1].set_ylabel('Freq/Dist',fontdict={'fontsize':7})

plot11.axes.legend(loc=1)

plt.setp(axes[1][1].get_legend().get_texts(), fontsize='6') # for legend text

plt.setp(axes[1][1].get_legend().get_title(), fontsize='6') # for legend title

plt.tight_layout()

plot20 = sns.boxplot(df['Pregnancies'],ax=axes[2][0],orient='v')

axes[2][0].set_title('Pregnancies',fontdict={'fontsize':8})

axes[2][0].set_xlabel('Pregnancy',fontdict={'fontsize':7})

axes[2][0].set_ylabel('Five Point Summary',fontdict={'fontsize':7})

plt.tight_layout()

plot21=sns.boxplot(x='Outcome',y='Pregnancies',data=df,ax=axes[2][1])

axes[2][1].set_title('Diab. VS Non-Diab.',fontdict={'fontsize':8})

axes[2][1].set_xlabel('Pregnancy',fontdict={'fontsize':7})

axes[2][1].set_ylabel('Five Point

Summary',fontdict={'fontsize':7})

plt.xticks(ticks=[0,1],labels=['Non-

Diab.','Diab.'],fontsize=7) plt.tight_layout()

plt.show()
`3
OUTPUT

## Blood Pressure variable

fig,axes = plt.subplots(nrows=2,ncols=2,dpi=120,figsize = (8,6))

plot00=sns.distplot(df['BloodPressure'],ax=axes[0][0],color='green')

axes[0][0].yaxis.set_major_formatter(FormatStrFormatter('%.3f'))

axes[0][0].set_title('Distribution of BP',fontdict={'fontsize':8})

axes[0][0].set_xlabel('BP Class',fontdict={'fontsize':7})

axes[0][0].set_ylabel('Count/Dist.',fontdict={'fontsize':7}) plt.tight_layout()

plot01=sns.distplot(df[df['Outcome']==False]['BloodPressure'],ax=axes[0][1],color='gr

een',label='NonDiab.')

sns.distplot(df[df.Outcome==True]['BloodPressure'],ax=axes[0][1],color='red',label='

Diab')

axes[0][1].set_title('Distribution of BP',fontdict={'fontsize':8})
`4
axes[0][1].set_xlabel('BP Class',fontdict={'fontsize':7})

axes[0][1].set_ylabel('Count/Dist.',fontdict={'fontsize':7})

axes[0][1].yaxis.set_major_formatter(FormatStrFormatter('%.3f'))

plot01.axes.legend(loc=1)

plt.setp(axes[0][1].get_legend().get_texts(), fontsize='6')

plt.setp(axes[0][1].get_legend().get_title(), fontsize='6')

plt.tight_layout()

plot10=sns.boxplot(df['BloodPressure'],ax=axes[1][0],orient='v')

axes[1][0].set_title('Numerical Summary',fontdict={'fontsize':8})

axes[1][0].set_xlabel('BP',fontdict={'fontsize':7})

axes[1][0].set_ylabel(r'Five Point Summary(BP)',fontdict={'fontsize':7})

plt.tight_layout()

plot11=sns.boxplot(x='Outcome',y='BloodPressure',data=df,ax=axes[1][1])

axes[1][1].set_title(r'Numerical Summary (Outcome)',fontdict={'fontsize':8})

axes[1][1].set_ylabel(r'Five Point Summary(BP)',fontdict={'fontsize':7})

plt.xticks(ticks=[0,1],labels=['Non-Diab.','Diab.'],fontsize=7)

axes[1][1].set_xlabel('Category',fontdict={'fontsize':7})

plt.tight_layou

t()plt.show()

fig,axes = plt.subplots(nrows=1,ncols=2,dpi=120,figsize = (8,4))

plot0=sns.distplot(df[df['BloodPressure']!=0]['BloodPressure'],ax=axes[0],co

lor='green') axes[0].yaxis.set_major_formatter(FormatStrFormatter('%.3f'))
`5
axes[0].set_title('Distribution of BP',fontdict={'fontsize':8})

axes[0].set_xlabel('BP

Class',fontdict={'fontsize':7})

axes[0].set_ylabel('Count/Dist.',fontdict={'font

size':7})plt.tight_layout()

plot1=sns.boxplot(df[df['BloodPressure']!=0]['BloodPressure'],ax=axes[1],orien

t='v')axes[1].set_title('Numerical Summary',fontdict={'fontsize':8})

axes[1].set_xlabel('BloodPressure',fontdict={'fontsize':7})

axes[1].set_ylabel(r'Five Point Summary(BP)',fontdict={'fontsize':7})

plt.tight_layout()

OUTPUT

`6
LINEAR REGRESSION MODELLING ON HOUSING DATASET

# Data manipulation libraries

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

import seaborn as sns %matplotlib inline

USAhousing = pd.read_csv('USA_Housing.csv')

USAhousing.head()

USAhousing.info()

USAhousing.describe()

USAhousing.columns sns.pairplot(USAhousing)

sns.distplot(USAhousing['Price'])sns.heatmap(USAhousing.corr())

`7
X = USAhousing[['Avg. Area Income', 'Avg. Area House Age', 'Avg. Area

Number of Rooms','Avg. Area Number of Bedrooms', 'Area Population']]y =

USAhousing['Price']from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4,

random_state=101)from sklearn.linear_model

import LinearRegression

lm = LinearRegression()lm.fit(X_train,y_train)

# print the interceptprint(lm.intercept_)

coeff_df = pd.DataFrame(lm.coef_,X.columns,columns=['Coefficient'])coeff_df

predictions = lm.predict(X_test)plt.scatter(y_test,predictions)

sns.distplot((y_test-predictions),bins=50);from sklearn

import metrics

print('MAE:', metrics.mean_absolute_error(y_test, predictions))

print('MSE:', metrics.mean_squared_error(y_test, predictions))

print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test,

predictions)))

`8
OUTPUT

`9
RESULT

Exploring various commands for doing Bivariate analytics on the usa housing
Dataset was successfully executed.

`10
EX.NO: 2
BINARY CLASSIFICATION MODEL
DATE:

AIM

To implement a binary classification model with different classification metrics to

determine the models effectiveness.

ALGORITHM:

STEP 1: Import the necessary libraries and load the data

STEP 2: Split the data into features (X) and the target variable (y)
STEP 3: Split the data into training and testing sets.
STEP 4: Train a binary classification model (e.g., logistic regression) on the training set.
STEP 5: Make predictions on the test set using the trained model.
STEP 6: Calculate and evaluate the model's performance with the default classification
threshold (0.5) using various classification metrics such as accuracy, precision, recall, F1-
score, and ROC-AUC score.
STEP 7: Experiment with different classification thresholds (e.g., 0.3, 0.7, etc.).
STEP 8: Analyze and compare the model's performance metrics for different thresholds to
determine the most effective threshold based on your problem requirements.
STEP 9: Choose the threshold that achieves the desired balance between precision and recall
or any other relevant metric for your specific problem.
STEP 10: Implement the chosen threshold in your binary classification model for future
predictions.

`11
PROGRAM

import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score,

roc_auc_score

# Load the data from exercise 1

data = pd.read_csv('house_data.csv')

# Separate the features (X) and the target variable (y)

X = data.drop('above_price_threshold', axis=1)

y = data['above_price_threshold']

# Split the data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the logistic regression model

model = LogisticRegression()

model.fit(X_train, y_train)

# Make predictions on the test set

y_pred = model.predict(X_test)

# Evaluate the model's performance with default threshold (0.5)

accuracy = accuracy_score(y_test, y_pred)

precision = precision_score(y_test, y_pred)

recall = recall_score(y_test, y_pred)

`12
f1 = f1_score(y_test, y_pred)

probabilities = model.predict_proba(X_test)[:, 1]

roc_auc = roc_auc_score(y_test, probabilities)

print("Performance with default threshold (0.5):")

print("Accuracy:", accuracy)

print("Precision:", precision)

print("Recall:", recall)

print("F1-Score:", f1)

print("ROC-AUC Score:", roc_auc)

# Modify the classification threshold

threshold = 0.3

y_pred_threshold = (probabilities >= threshold).astype(int)

# Evaluate the model's performance with modified threshold (0.3)

accuracy_threshold = accuracy_score(y_test, y_pred_threshold)

precision_threshold = precision_score(y_test, y_pred_threshold)

recall_threshold = recall_score(y_test, y_pred_threshold)

f1_threshold = f1_score(y_test, y_pred_threshold)

roc_auc_threshold = roc_auc_score(y_test, probabilities)

print("\nPerformance with modified threshold (0.3):")

print("Accuracy:", accuracy_threshold)

print("Precision:", precision_threshold)

print("Recall:", recall_threshold)

`13
print("F1-Score:", f1_threshold)

print("ROC-AUC Score:", roc_auc_threshold)

OUTPUT

Accuracy: 0.85

Precision: 0.75

Recall: 0.9

F1-Score: 0.82

ROC-AUC Score: 0.92

RESULT

Thus the Binary Classification model with different classification metrics was

executed and verified successfully.

`14
EX.NO: 3 CLASSIFICATION WITH NEAREST NEIGHBORS WITH
KNN CLASSIFIER
DATE:

AIM

To implement the nearest neighbouring algorithms using KNN classifier.

ALGORITHM

STEP 1: Import the necessary libraries:

 pandas to load and manipulate the dataset.

 TfidfVectorizer from sklearn.feature_extraction.text to convert the text data into

numerical features using TF-IDF vectorization.

 KNeighborsClassifier from sklearn.neighbors to create the KNN classifier.

 train_test_split from sklearn.model_selection to split the data into training and test

sets.

 accuracy_score from sklearn.metrics to calculate the accuracy of the classifier.

STEP 2:Load and preprocess the dataset:

 Use pd.read_csv to load the fake news detection dataset into a pandas DataFrame

named data.

 Split the DataFrame into features (X) and labels (y).

STEP 3: Convert text data to numerical features:

 Initialize a TfidfVectorizer object named vectorizer.

`15
 Use vectorizer.fit_transform to convert the text data (X) into numerical features

using TF-IDF vectorization. This step transforms the text data into a matrix of TF-

IDF features.

STEP 4: Split the data into training and test sets:

 Use train_test_split to split the TF-IDF features (X) and labels (y) into training and

test sets, with a specified test size.

STEP 5: Create and train the KNN classifier:

 Create a KNN classifier object, knn, with the desired number of neighbors (k).

 Train the KNN classifier on the training data (X_train, y_train) using the fit

method.

STEP 6: Make predictions and calculate accuracy:

 Use the trained KNN classifier to make predictions (y_pred) on the test set (X_test).

 Calculate the accuracy of the classifier by comparing the predicted labels with the

true labels using the accuracy_score function.

STEP 7: Print the accuracy:

 Output the calculated accuracy to evaluate the performance of the KNN classifier

on the fake news detection dataset.

PROGRAM

import pandas as pd

from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.neighbors import KNeighborsClassifier

from sklearn.model_selection import train_test_split

`16
from sklearn.metrics import accuracy_score

# Load the Fake News Detection dataset

data = pd.read_csv('fake_news_dataset.csv')

# Split the dataset into features and labels

X = data['text']

y = data['label']

# Convert the text data into numerical features using TF-IDF vectorization

vectorizer = TfidfVectorizer()

X = vectorizer.fit_transform(X)

# Split the data into training and test sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a KNN classifier

k=5

knn = KNeighborsClassifier(n_neighbors=k)

# Train the classifier

knn.fit(X_train, y_train)

# Make predictions on the test set

y_pred = knn.predict(X_test)

# Calculate the accuracy of the classifier

accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

import pandas as pd

`17
from pandas import DataFrame

from sklearn.datasets import load_iris

# sklearn.datasetsincludes common example

datasets# A function to load in the iris

dataset

iris_obj =

load_iris()#

Dataset preview

iris_obj.data

iris = DataFrame(iris_obj.data, columns=iris_obj.feature_names,index=pd.Index([i

for i in range(iris_obj.data.shape[0])])).join(DataFrame(iris_obj.target,

columns=pd.Index(["species"]),index=pd.Index([i for i in

range(iris_obj.target.shape[0])])))

iris # prints iris

data Commands

iris_obj.feature_na

mesiris.count()

iris.mean()

iris.median()

iris.var()

iris.std()

iris.max()

`18
iris.min()

iris.describe()

OUTPUT

RESULT
Thus the implementation of nearest neighboring of KNN classifier was executed

and verified successfully.

`19
Ex.NO: 4
VALIDATION SET&TEST SET
DATE:

AIM

To implement a validation set and test set with a different regression model.

ALGORITHM

STEP 1: Split the dataset:

 Take the original dataset and randomly split it into three subsets: a training set, a

validation set, and a test set.

 Decide on the proportions of the splits (e.g., 70% training, 15% validation, 15%

test) based on the size of your dataset and the desired evaluation strategy.

STEP 2: Train the classifier:

 Use the training set to train the classifier or model of your choice. This involves

fitting the model to the training data and adjusting its parameters or weights.

STEP 3:

 Evaluate on the validation set:

 Apply the trained model to the validation set and obtain predictions for each

instance in the validation set.

 Compare the predicted labels with the true labels in the validation set to measure

the model's performance.

 Calculate evaluation metrics such as accuracy, precision, recall, F1 score, or any

and 3 until you achieve satisfactory performance on the validation set.

STEP 4: Final evaluation on the test set:

 Once you have selected the best model based on its performance on the validation

set, apply this model to the test set.

 Obtain predictions for each instance in the test set using the selected model.

 Compare the predicted labels with the true labels in the test set to evaluate the final

performance of the model.

 Calculate evaluation metrics on the test set to assess the generalization capability

of the model.

STEP 5: Analyze and interpret the results:

 Examine the evaluation metrics obtained on the validation and test sets to assess

the performance of the classifier.

 Compare the results to make informed decisions about the model's suitability for

the given task.

 Consider other factors such as computational efficiency, interpretability, and

domain-specific requirements

PROGRAM
# Data manipulation libraries

import numpy as np

import pandas as pd

###scikit Learn Modules needed for Logistic

`21
Regressionfrom sklearn.linear_model

import LogisticRegression

from sklearn.model_selection

import train_test_split,GridSearchCVfrom sklearn.preprocessing

import LabelEncoder,MinMaxScaler,OneHotEncoder,StandardScaler

from sklearn.metrics

import confusion_matrix from sklearn.impute

import SimpleImputer from sklearn.pipeline

import Pipeline from sklearn.compose

import ColumnTransformer

#for plotting

import matplotlib.pyplot as pltimport seaborn as sns %matplotlib inline

sns.set(color_codes=True) import warnings warnings.filterwarnings('ignore')

df=pd.read_csv('C:/Users/diabetes.csv')

df.head()

df.tail()

df.isnull().sum()

df.describe(include='all')

df.corr()

sns.heatmap(df.corr(),annot=True)plt.show()

df.hist() plt.show()

sns.countplot(x=df['Outcome'])

`22
scaler=StandardScaler()

df[['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin','BMI',

'DiabetesPedigreeFunction', 'Age']]=scaler.fit_transform(df[['Pregnancies',

'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin','BMI',

'DiabetesPedigreeFunction', 'Age']])df_new = df

# Train & Test split

x_train, x_test, y_train, y_test = train_test_split( df_new[['Pregnancies', 'Glucose',

'BloodPressure', 'SkinThickness', 'Insulin','BMI', 'DiabetesPedigreeFunction',

'Age']],df_new['Outcome'],test_size=0.20,random_state=21)

print('Shape of Training Xs:{}'.format(x_train.shape))

print('Shape of Test Xs:{}'.format(x_test.shape))

print('Shape of Training y:{}'.format(y_train.shape))

print('Shape of Test y:{}'.format(y_test.shape))

Shape of Training Xs:(614, 8)

Shape of Test Xs:(154, 8)

Shape of Training y:(614,)

Shape of Test y:(154,)

# Build Model

model = LogisticRegression() model.fit(x_train, y_train) y_predicted =

model.predict(x_test)

score=model.score(x_test,y_test);

print(score)

`23
0.7337662337662337

#Confusion Matrix

# Compute confusion matrix

cnf_matrix = confusion_matrix(y_test, y_predicted)

np.set_printoptions(precision=2)cnf_matrix

OUTPUT

`24
`25
RESULT

Thus the validation sets and test sets using the dataset was executed

successfully.

`26
EX.NO: 5
K-MEANS ALGORITHM
DATE:

AIM

To implement K-means algorithm.

ALGORITHM

 Initialize the number of clusters k and the maximum number of iterations

max_iters.

 Randomly initialize k centroids from the data points.

Repeat the following steps until convergence or until reaching the maximum

number of iterations:

Assign each data point to the nearest centroid by calculating the Euclidean

distance.

 Update the centroids based on the mean of the data points assigned to

each cluster.

 Check for convergence by comparing the current centroids with the

previous centroids. If they are equal, terminate the loop.

 Return the cluster labels assigned to each data point and the final

centroids.

`27
PROGRAM

import numpy as np

def initialize_centroids(X, k):

"""Randomly initialize k centroids from the data points."""indices =

np.random.choice(range(len(X)), size=k, replace=False)

centroids = X[indices] return centroids

def assign_clusters(X, centroids):

"""Assign each data point to the nearest centroid."""distances = np.sqrt(((X -

centroids[:, np.newaxis])**2).sum(axis=2))

cluster_labels = np.argmin(distances, axis=0)

return cluster_labels

def update_centroids(X, cluster_labels, k):

"""Update the centroids based on the mean of the data points in each

cluster."""

centroids = np.array([X[cluster_labels == i].mean(axis=0) for i in range(k)])

return centroids

def k_means(X, k, max_iters=100):

"""Perform k-means clustering on the data points."""

centroids = initialize_centroids(X, k)

for _ in range(max_iters):

prev_centroids = centroids.copy()

`28
# Assign data points to clusters
cluster_labels = assign_clusters(X, centroids)

# Update centroids

centroids = update_centroids(X, cluster_labels, k)

# Check for convergence

if np.allclose(centroids, prev_centroids):

break

return cluster_labels, centroids

# Example usage

X = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [4, 4], [4, 0]])

k=2

cluster_labels, centroids = k_means(X, k)

print("Cluster Labels:", cluster_labels)

print("Centroids:", centroids)

`29
OUTPUT

Cluster Labels: [0 0 0 1 1 1]

Centroids: [[1. 2. ]

[3. 2. ]]

RESULT

Thus the implementation of K-means clustering algorithm was executed

and verified successfully.

`30
EX.NO: 6
NAÏVE BAYES CLASSIFIER
DATE:

AIM

To implement the naïve bayes classifier

ALGORITHM

STEP1: Initialize class variables:

 classes: An array to store the unique class labels.

 class_priors: A dictionary to store the prior probabilities of each class.

 feature_probs: A nested dictionary to store the conditional probabilities of

each feature value given the class.

STEP2: Fit the classifier:

 Accept the training data X and corresponding class labels y as inputs.

 Determine the unique class labels and store them in the classes array.

 Calculate the prior probabilities for each class by dividing the count of

samples in each class by the total number of samples.

 For each feature in the dataset:

 Determine the unique feature values and store them in a variable.

 For each class label:

 Create a nested dictionary entry for the feature and class if it doesn't

already exist.

`31
 Calculate the conditional probability of each feature value given the class

by dividing the count of samples with the specific feature value and class

by the count of samples in that class.

STEP 3: Predict the class labels:

 Accept the test data X_test as input.

 Initialize an empty array to store the predicted class labels.

 For each sample in X_test:

 Initialize an empty array to store the posterior probabilities for each class.

 For each class in classes:

 Initialize the posterior probability as the corresponding prior probability.

 For each feature value in the sample:

 If the feature and class combination exists in feature_probs and the

feature value exists in the conditional probabilities:

 Multiply the posterior probability by the conditional probability of the

feature value given the class.

 Otherwise, set the posterior probability to 0 and break the loop.

 Append the posterior probability to the array of posterior probabilities.

 Append the class label with the maximum posterior probability to the

predicted labels array.

STEP 4: Return the predicted class labels.

`32
PROGRAM

import numpy as np

class NaiveBayesClassifier:
def init (self):
self.classes = None
self.class_priors = None
self.feature_probs = None
def fit(self, X, y):
self.classes = np.unique(y)
self.class_priors = np.zeros(len(self.classes))
self.feature_probs = {}
for i, c in enumerate(self.classes):
X_c = X[y == c]
self.class_priors[i] = len(X_c) / len(X)
for j in range(X.shape[1]):
feature_values = np.unique(X[:, j])
self.feature_probs[(j, c)] = {}
for value in feature_values:
count = len(X_c[X_c[:, j] == value])
self.feature_probs[(j, c)][value] = count / len(X_c)
def predict(self, X):
predictions = []
for x in X:
posterior_probs = []
for i, c in enumerate(self.classes):
posterior_prob = self.class_priors[i]
for j, value in enumerate(x):
if (j, c) in self.feature_probs and value in self.feature_probs[(j, c)]:

`33
posterior_prob *= self.feature_probs[(j, c)][value]
else:
posterior_prob = 0
break
posterior_probs.append(posterior_prob)

predictions.append(self.classes[np.argmax(posterior_probs)])

return np.array(predictions)

# Sample dataset

X_train = np.array([[1, 0], [1, 1], [0, 1], [0, 0]])

y_train = np.array([1, 1, 0, 0])

X_test = np.array([[1, 1], [0, 1]])

# Create and train the Naive Bayes classifier

classifier = NaiveBayesClassifier()

classifier.fit(X_train, y_train)

# Make predictions

predictions = classifier.predict(X_test)

print("Predictions:", predictions)

`34
OUTPUT

Predictions: [1 0]

RESULT

Thus the implementation of naïve bayes classifier was executed and

verified successfully.

`35

Face Mask Detection
No ratings yet
Face Mask Detection
34 pages
Python Programming Concepts Overview
No ratings yet
Python Programming Concepts Overview
162 pages
QR Code Generator Project Report
No ratings yet
QR Code Generator Project Report
32 pages
Madhuri 2214877122 Projectppt
No ratings yet
Madhuri 2214877122 Projectppt
13 pages
Student Management System
No ratings yet
Student Management System
41 pages
Python Internship for AI Students
No ratings yet
Python Internship for AI Students
20 pages
Cs-203 MJ-P Ds I and Dbms I and Cs-231-Fp
No ratings yet
Cs-203 MJ-P Ds I and Dbms I and Cs-231-Fp
63 pages
ML Mini Project 2
No ratings yet
ML Mini Project 2
26 pages
21981a4907 Eduskills Data Engineering
No ratings yet
21981a4907 Eduskills Data Engineering
26 pages
Project PPT Review (Batch 1)
No ratings yet
Project PPT Review (Batch 1)
28 pages
Python Basic Programming Internship Report
No ratings yet
Python Basic Programming Internship Report
22 pages
Fake News Detection Using LSTM
No ratings yet
Fake News Detection Using LSTM
67 pages
Steganography Project Report For Major Project in B Tech
No ratings yet
Steganography Project Report For Major Project in B Tech
74 pages
Plant Disease Detection Project Report
No ratings yet
Plant Disease Detection Project Report
11 pages
Face Recognition System
No ratings yet
Face Recognition System
7 pages
Stock Management System Project Report
No ratings yet
Stock Management System Project Report
30 pages
Summer Training Report - Ishan Patwal
No ratings yet
Summer Training Report - Ishan Patwal
52 pages
Spinning Wheel Python Project
No ratings yet
Spinning Wheel Python Project
12 pages
Artificial Intelligence Unit Vi: Planning
No ratings yet
Artificial Intelligence Unit Vi: Planning
70 pages
Python Development with PyDev Tutorial
No ratings yet
Python Development with PyDev Tutorial
12 pages
Minor Project Report
No ratings yet
Minor Project Report
49 pages
Government Polytechnic, Washim: A Micro Project Report ON
100% (1)
Government Polytechnic, Washim: A Micro Project Report ON
12 pages
Data Science Practical With Solutions BSC Cs Sem 6
No ratings yet
Data Science Practical With Solutions BSC Cs Sem 6
29 pages
MCA Mini Project Report Format 12-2023
No ratings yet
MCA Mini Project Report Format 12-2023
8 pages
Bca Science Slip Sem II C & Dbms Feb2017
No ratings yet
Bca Science Slip Sem II C & Dbms Feb2017
66 pages
Computer Science (Project File)
No ratings yet
Computer Science (Project File)
38 pages
Devin AI Research Paper
No ratings yet
Devin AI Research Paper
6 pages
Telegram Bot Micro-Project Report
No ratings yet
Telegram Bot Micro-Project Report
13 pages
LP3 - ML Mini-Project Report Format Shreeyas
No ratings yet
LP3 - ML Mini-Project Report Format Shreeyas
13 pages
Hadoop Ecosystem in Healthcare Analysis
No ratings yet
Hadoop Ecosystem in Healthcare Analysis
29 pages
Summer Training Report
No ratings yet
Summer Training Report
16 pages
Page Replacement Algorithms Guide
No ratings yet
Page Replacement Algorithms Guide
14 pages
Lab Manual - CS 606 - Skill Development Lab - Jan-June23
No ratings yet
Lab Manual - CS 606 - Skill Development Lab - Jan-June23
63 pages
Report For Face Mask Detection Using Python and Deep Learning
100% (2)
Report For Face Mask Detection Using Python and Deep Learning
30 pages
Data Science Codealpha
No ratings yet
Data Science Codealpha
11 pages
Python Machine Learning Training Report
No ratings yet
Python Machine Learning Training Report
65 pages
Question Bank - WTL-oral Question Bank - WTL-oral
No ratings yet
Question Bank - WTL-oral Question Bank - WTL-oral
9 pages
Detection of Fake Online Reviews Using Semi Supervised and Supervised Learning
No ratings yet
Detection of Fake Online Reviews Using Semi Supervised and Supervised Learning
4 pages
Mini Project Synopsis
No ratings yet
Mini Project Synopsis
29 pages
College Website Design Project for Students
No ratings yet
College Website Design Project for Students
16 pages
MCA Notes
No ratings yet
MCA Notes
189 pages
Python Text Editor Project
0% (1)
Python Text Editor Project
20 pages
Laboratory Manual: Silver Oak College of Engineering and Technology
No ratings yet
Laboratory Manual: Silver Oak College of Engineering and Technology
27 pages
Used Car Price Prediction Project Report
No ratings yet
Used Car Price Prediction Project Report
10 pages
Fake Account Detection Using Machine Learning and Data Science
0% (1)
Fake Account Detection Using Machine Learning and Data Science
58 pages
CPL Practical 1
No ratings yet
CPL Practical 1
14 pages
Sentiment Analysis On Youtube Comments
No ratings yet
Sentiment Analysis On Youtube Comments
54 pages
Python Training Report
No ratings yet
Python Training Report
41 pages
A Project Report On Data Analysis
No ratings yet
A Project Report On Data Analysis
25 pages
Minor Project
No ratings yet
Minor Project
64 pages
Col780 A1
No ratings yet
Col780 A1
4 pages
Toaz - Info Fantasy Cricket Game Using Python Intershala Project PR
No ratings yet
Toaz - Info Fantasy Cricket Game Using Python Intershala Project PR
33 pages
Student Result System Guide
No ratings yet
Student Result System Guide
11 pages
Project Report Task Management System
No ratings yet
Project Report Task Management System
42 pages
DWM Manual
No ratings yet
DWM Manual
60 pages
Report Robotics
No ratings yet
Report Robotics
5 pages
Assignment 1 String
No ratings yet
Assignment 1 String
6 pages
FYMCA IDSLab A6 Submission
No ratings yet
FYMCA IDSLab A6 Submission
9 pages
Logistic Regression for Diabetes Risk
No ratings yet
Logistic Regression for Diabetes Risk
9 pages
CP4252 Machine Learning Laboratory
No ratings yet
CP4252 Machine Learning Laboratory
37 pages
C Notes
100% (1)
C Notes
158 pages
Cs3271 Programming in C
No ratings yet
Cs3271 Programming in C
52 pages
Data Structures & OOP Lab Manual
No ratings yet
Data Structures & OOP Lab Manual
59 pages
300 Puzzles (WWW - Placementpapers.net)
No ratings yet
300 Puzzles (WWW - Placementpapers.net)
127 pages
Detailed Lesson Plan in Mathematics III
88% (17)
Detailed Lesson Plan in Mathematics III
11 pages
Introduction To Wordpress
No ratings yet
Introduction To Wordpress
12 pages
Hardness Test Lab Report
No ratings yet
Hardness Test Lab Report
10 pages
Goal Programming
100% (4)
Goal Programming
44 pages
2850 L3 Diploma Qualification Handbook IVQ v2-1
No ratings yet
2850 L3 Diploma Qualification Handbook IVQ v2-1
236 pages
MATH201 W3 - It's All About The Benjamins Assignment Template Week 3 Edit
No ratings yet
MATH201 W3 - It's All About The Benjamins Assignment Template Week 3 Edit
8 pages
MVC4 User Manual - 10 - 13.8kV Motortronics REV6.22
No ratings yet
MVC4 User Manual - 10 - 13.8kV Motortronics REV6.22
113 pages
Progress Test 1B
No ratings yet
Progress Test 1B
7 pages
Fluke 8558A - 8.5 Digit Multimeter
No ratings yet
Fluke 8558A - 8.5 Digit Multimeter
12 pages
Fairy Tail 100 Years Quest Chapter 3
No ratings yet
Fairy Tail 100 Years Quest Chapter 3
1 page
T1 Introduction To SPSS
No ratings yet
T1 Introduction To SPSS
12 pages
Cie As Level Paper 1 Pure Math 1 Worksheet - Functions (Students)
No ratings yet
Cie As Level Paper 1 Pure Math 1 Worksheet - Functions (Students)
19 pages
Exp 6
No ratings yet
Exp 6
3 pages
Plan 65 Mechanical Seal
No ratings yet
Plan 65 Mechanical Seal
2 pages
2018PHY1B Prac Guide-Final
No ratings yet
2018PHY1B Prac Guide-Final
58 pages
Modern Architecture Is Ugly Compared With Classical Styles
No ratings yet
Modern Architecture Is Ugly Compared With Classical Styles
2 pages
Sudha Murthy
100% (1)
Sudha Murthy
5 pages
UT Tester Software Flowchart
No ratings yet
UT Tester Software Flowchart
1 page
Datastage Qa
No ratings yet
Datastage Qa
2 pages
Definitions of Advertising
No ratings yet
Definitions of Advertising
9 pages
Cis Lesson Plan With Rubric
No ratings yet
Cis Lesson Plan With Rubric
3 pages
5472 - Report Final Comment
No ratings yet
5472 - Report Final Comment
2 pages
Public Relations & Communication Theory. J.C. Skinner-1
50% (2)
Public Relations & Communication Theory. J.C. Skinner-1
195 pages
Addiction Like Behavior Associated With Mobile Phone
No ratings yet
Addiction Like Behavior Associated With Mobile Phone
6 pages
Assigment 3: Unique No:795129
No ratings yet
Assigment 3: Unique No:795129
4 pages
Dafa (The Metaphysical Foundations of Ifa Book 6)
100% (6)
Dafa (The Metaphysical Foundations of Ifa Book 6)
113 pages
An Introduction To Social Anthropology UPSCPDF - Com Good Quality HD
No ratings yet
An Introduction To Social Anthropology UPSCPDF - Com Good Quality HD
138 pages
Financial Econometrics and Empirical Finance - Module 2 General Exam Solutions - July 2012
No ratings yet
Financial Econometrics and Empirical Finance - Module 2 General Exam Solutions - July 2012
25 pages
CF RT - : Monitoring of Rock Movements Using Probe Inclinometer - Guidelines
No ratings yet
CF RT - : Monitoring of Rock Movements Using Probe Inclinometer - Guidelines
10 pages
BE', KNOW' and Do' Concept of Leadership: Planning Organizing Directing
No ratings yet
BE', KNOW' and Do' Concept of Leadership: Planning Organizing Directing
21 pages