03/09/2025, 08:04 question2ml.
ipynb - Colab
'''3 Data prepration
Download the "Spambase Data Set" from the UCI Machine Learning Repository
([Link] This dataset contains email
messages, where the goal is to predict whether a message is spam or not based on several
input features.
Implementation:
Implement Bernoulli Naive Bayes, Multinomial Naive Bayes, and Gaussian Naive Bayes classifiers using the scikit-learn librar
Results:
Report the following performance metrics for each classifier:
Accuracy,Precision,Recall,F1 score,Confusion_matrix'''
import pandas as pd
df = pd.read_csv("/content/drive/MyDrive/MLlabfiles/[Link]", header=None)
X = [Link][:, :-1] # Features
y = [Link][:, -1] # Labels
from sklearn.naive_bayes import BernoulliNB
from sklearn.model_selection import cross_val_predict
from [Link] import classification_report, confusion_matrix
# Create the model
bnb = BernoulliNB()
# Generate predictions
y_pred_bnb = cross_val_predict(bnb, X, y, cv=10)
from sklearn.naive_bayes import MultinomialNB
mnb = MultinomialNB()
y_pred_mnb = cross_val_predict(mnb, X, y, cv=10)
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
y_pred_gnb = cross_val_predict(gnb, X, y, cv=10)
y_pred_bnb = cross_val_predict(BernoulliNB(), X, y, cv=10)
y_pred_mnb = cross_val_predict(MultinomialNB(), X, y, cv=10)
y_pred_gnb = cross_val_predict(GaussianNB(), X, y, cv=10)
from [Link] import accuracy_score, precision_score, recall_score, f1_score
results = {
"BernoulliNB": {
"accuracy": accuracy_score(y, y_pred_bnb),
"precision": precision_score(y, y_pred_bnb),
"recall": recall_score(y, y_pred_bnb),
"f1": f1_score(y, y_pred_bnb)
},
"MultinomialNB": {
"accuracy": accuracy_score(y, y_pred_mnb),
"precision": precision_score(y, y_pred_mnb),
"recall": recall_score(y, y_pred_mnb),
"f1": f1_score(y, y_pred_mnb)
},
"GaussianNB": {
"accuracy": accuracy_score(y, y_pred_gnb),
"precision": precision_score(y, y_pred_gnb),
"recall": recall_score(y, y_pred_gnb),
"f1": f1_score(y, y_pred_gnb)
}
}
import pandas as pd
df_results = [Link](results).T # Transpose to get models as rows
print(df_results)
[Link] 1/3
03/09/2025, 08:04 [Link] - Colab
accuracy precision recall f1
BernoulliNB 0.883938 0.881336 0.815223 0.846991
MultinomialNB 0.786351 0.732363 0.721456 0.726869
GaussianNB 0.821778 0.700444 0.956977 0.808858
keyboard_arrow_down New section
'''4. Download the "LLM - Detect AI generated text dataset"In this Dataset contains both AI Generated Essay and Human Writte
Dataset Link- [Link]
The dataset contains more than 28,000 essays written by students and AI generated.
Features :
text : Which contains essay text
generated : This is the target label . 0 - Human Written Essay , 1 - AI Generated Essay
Implementation:
Implement Bernoulli Naive Bayes, Multinomial Naive Bayes, and Gaussian
Naive Bayes classifiers using the scikit-learn library in Python
. Use 10-fold cross-validation to evaluate the performance of each classifier on the dataset.
You should use the default hyperparameters for each classifier.'''
import pandas as pd
import numpy as np
df = pd.read_csv("/content/drive/MyDrive/MLlabfiles/Training_Essay_Data.csv")
X_text = df['text'] # Just the text column
y = df['generated'] # Load the correct labels from the new dataset
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf = TfidfVectorizer(stop_words='english', max_features=5000) # You can tune this
X_tfidf = tfidf.fit_transform(X_text)
from sklearn.naive_bayes import BernoulliNB
from sklearn.model_selection import cross_val_predict
from [Link] import classification_report, confusion_matrix
bnb = BernoulliNB()
y_pred_bnb = cross_val_predict(bnb, X_tfidf, y, cv=10)
from sklearn.naive_bayes import MultinomialNB
mnb = MultinomialNB()
y_pred_mnb = cross_val_predict(mnb, X_tfidf, y, cv=10)
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
y_pred_gnb = cross_val_predict(gnb, X_tfidf.toarray(), y, cv=10)
from [Link] import accuracy_score, precision_score, recall_score, f1_score
results = {
"BernoulliNB": {
"accuracy": accuracy_score(y, y_pred_bnb),
"precision": precision_score(y, y_pred_bnb),
"recall": recall_score(y, y_pred_bnb),
"f1": f1_score(y, y_pred_bnb)
},
"MultinomialNB": {
"accuracy": accuracy_score(y, y_pred_mnb),
"precision": precision_score(y, y_pred_mnb),
"recall": recall_score(y, y_pred_mnb),
"f1": f1_score(y, y_pred_mnb)
},
"GaussianNB": {
"accuracy": accuracy_score(y, y_pred_gnb),
"precision": precision_score(y, y_pred_gnb),
"recall": recall_score(y, y_pred_gnb),
"f1": f1_score(y, y_pred_gnb)
}
}
import pandas as pd
df_results = [Link](results).T # Transpose to get models as rows
print(df_results)
accuracy precision recall f1
BernoulliNB 0.951484 0.946536 0.931082 0.938745
[Link] 2/3
03/09/2025, 08:04 [Link] - Colab
MultinomialNB 0.924104 0.901850 0.908825 0.905324
GaussianNB 0.928495 0.877977 0.953424 0.914147
[Link] 3/3