0% found this document useful (0 votes)

18 views10 pages

Naive Bayes and Decision Trees in Python

All machine learning subject codes

Uploaded by

21r11a05g8

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

18 views10 pages

Naive Bayes and Decision Trees in Python

All machine learning subject codes

Uploaded by

21r11a05g8

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 10

Week-1

# Week-1

# Naive Bayes (NB)

import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.naive_bayes import GaussianNB

from sklearn import metrics

df = pd.read_csv("sample_data/pima_indian.csv")

feature_col_names = ['num_preg', 'glucose_conc', 'diastolic_bp', 'thickness', 'insulin', 'bmi',

'diab_pred', 'age']

predicted_class_names = ['diabetes']

X = df[feature_col_names].values # these are factors for the prediction

y = df[predicted_class_names].values # this is what we want to predict

#splitting the dataset into train and test data

xtrain,xtest,ytrain,ytest=train_test_split(X,y,test_size=0.33)

print ('\n the total number of Training Data :',ytrain.shape)

print ('\n the total number of Test Data :',ytest.shape)

# Training Naive Bayes (NB) classifier on training data.

clf = GaussianNB().fit(xtrain,ytrain.ravel())

predicted = clf.predict(xtest)

predictTestData= clf.predict([[6,148,72,35,0,33.6,0.627,50]])

#printing Confusion matrix, accuracy, Precision and Recall

print('\n Confusion matrix')

print(metrics.confusion_matrix(ytest,predicted))
print('\n Accuracy of the classifier is',metrics.accuracy_score(ytest,predicted))

print('\n The value of Precision', metrics.precision_score(ytest,predicted))

print('\n The value of Recall', metrics.recall_score(ytest,predicted))

print("Predicted Value for individual Test Data:", predictTestData)

week-2

#Week-2

import pandas

from sklearn import tree

import pydotplus

from sklearn.tree import DecisionTreeClassifier

import matplotlib.pyplot as plt

import matplotlib.image as pltimg

df = pandas.read_csv("/content/sample_data/playtennis.csv")

d = {'Sunny': 0, 'Overcast': 1, 'Rain': 2}

df['Outlook'] = df['Outlook'].map(d)

d = {'Hot': 0, 'Mild': 1,'Cool': 2}

df['Temperature'] = df['Temperature'].map(d)

d = {'High': 0, 'Normal': 1}

df['Humidity'] = df['Humidity'].map(d)

d = {'Weak': 0, 'Strong': 1}

df['Wind'] = df['Wind'].map(d)

d = {'No': 0, 'Yes': 1}

df['playtennis'] = df['playtennis'].map(d)

features = ['Outlook', 'Temperature', 'Humidity', 'Wind']

X = df[features]

y = df['playtennis']
dtree = DecisionTreeClassifier(criterion="entropy")

dtree = dtree.fit(X, y)

data = tree.export_graphviz(dtree, out_file=None, feature_names=features)

graph = pydotplus.graph_from_dot_data(data)

graph.write_png('mydecisiontree.png')

img=pltimg.imread('mydecisiontree.png')

imgplot = plt.imshow(img)

plt.show()

Week-3(a)

# Week-3a

import numpy as np

import pandas as pd

from sklearn.neighbors import KNeighborsClassifier

from sklearn.model_selection import train_test_split

from sklearn import metrics

# Define the column names for the dataset

names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'Class']

# Read dataset to pandas dataframe

dataset = pd.read_csv("/content/sample_data/iris.csv", names=names, header=0)

# Split the dataset into features (X) and labels (y)

X = dataset.iloc[:, :-1]

y = dataset.iloc[:, -1]

# Display the first few rows of the features

print(X.head())

# Split the data into training and testing sets

Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.10)

# Train the k-NN classifier

classifier = KNeighborsClassifier(n_neighbors=5).fit(Xtrain, ytrain)

# Predict the labels of the test set

ypred = classifier.predict(Xtest)

# Display the results

i=0

print ("\n-------------------------------------------------------------------------")

print ('%-25s %-25s %-25s' % ('Original Label', 'Predicted Label', 'Correct/Wrong'))

print ("-------------------------------------------------------------------------")

for label in ytest:

print ('%-25s %-25s' % (label, ypred[i]), end="")

if (label == ypred[i]):

print (' %-25s' % ('Correct'))

else:

print (' %-25s' % ('Wrong'))

i=i+1

print ("-------------------------------------------------------------------------")

print("\nConfusion Matrix:\n", metrics.confusion_matrix(ytest, ypred))

print ("-------------------------------------------------------------------------")

print("\nClassification Report:\n", metrics.classification_report(ytest, ypred))

print ("-------------------------------------------------------------------------")

print('Accuracy of the classifier is %0.2f' % metrics.accuracy_score(ytest, ypred))

Week-3(b)

#Week-3b
import numpy as nm

import matplotlib.pyplot as mtp

import pandas as pd

data_set= pd.read_csv('/content/sample_data/Salary_Data.csv')

x= data_set.iloc[:, :-1].values

y= data_set.iloc[:, 1].values

# Splitting the dataset into training and test set.

from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test= train_test_split(x, y, test_size= 1/3, random_state=0)

#Fitting the Simple Linear Regression model to the training dataset

from sklearn.linear_model import LinearRegression

regressor= LinearRegression()

regressor.fit(x_train, y_train)

#Prediction of Test and Training set result

y_pred= regressor.predict(x_test)

x_pred= regressor.predict(x_train)

#visualizing the Training set results:

mtp.scatter(x_train, y_train, color="green")

mtp.plot(x_train, x_pred, color="red")

mtp.title("Salary vs Experience (Training Dataset)")

mtp.xlabel("Years of Experience")

mtp.ylabel("Salary(In Rupees)")

mtp.show()

#Step: 5. visualizing the Test set results:

#visualizing the Test set results

mtp.scatter(x_test, y_test, color="blue")

mtp.plot(x_train, x_pred, color="red")

mtp.title("Salary vs Experience (Test Dataset)")

mtp.xlabel("Years of Experience")

mtp.ylabel("Salary(In Rupees)")

mtp.show()
Week-4(a)

#Week-4a

import matplotlib.pyplot as plt

from sklearn import svm, datasets

from sklearn.inspection import DecisionBoundaryDisplay

# import some data to play with

iris = datasets.load_iris()

# Take the first two features. We could avoid this by using a two-dim dataset

X = iris.data[:, :2]

y = iris.target

# we create an instance of SVM and fit out data. We do not scale our

# data since we want to plot the support vectors

C = 1.0 # SVM regularization parameter

models = (

svm.SVC(kernel="linear", C=C),

svm.LinearSVC(C=C, max_iter=10000),

svm.SVC(kernel="rbf", gamma=0.7, C=C),

svm.SVC(kernel="poly", degree=3, gamma="auto", C=C),

models = (clf.fit(X, y) for clf in models)

# title for the plots

titles = (

"SVC with linear kernel",

"LinearSVC (linear kernel)",

"SVC with RBF kernel",

"SVC with polynomial (degree 3) kernel",

# Set-up 2x2 grid for plotting.

fig, sub = plt.subplots(2, 2)

plt.subplots_adjust(wspace=0.4, hspace=0.4)

X0, X1 = X[:, 0], X[:, 1]

for clf, title, ax in zip(models, titles, sub.flatten()):

disp = DecisionBoundaryDisplay.from_estimator(

clf,

response_method="predict",

cmap=plt.cm.coolwarm,

alpha=0.8,

ax=ax,

xlabel=iris.feature_names[0],

ylabel=iris.feature_names[1],

ax.scatter(X0, X1, c=y, cmap=plt.cm.coolwarm, s=20, edgecolors="k")

ax.set_xticks(())

ax.set_yticks(())

ax.set_title(title)

plt.show()

week-4(b)

#Week-4b

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

import seaborn as sns

from sklearn.model_selection import train_test_split

# Importing the classification report and confusion matrix

from sklearn.metrics import classification_report, confusion_matrix

%matplotlib inline
# Checking the dataset

iris.head()

# Creating a pairplot to visualize the similarities and especially difference between the species

sns.pairplot(data=iris, hue='species', palette='Set2')

# Separating the independent variables from dependent variables

x=iris.iloc[:,:-1]

y=iris.iloc[:,4]

x_train,x_test, y_train, y_test=train_test_split(x,y,test_size=0.30)

from sklearn.svm import SVC

model=SVC()

model.fit(x_train, y_train)

pred=model.predict(x_test)

print(confusion_matrix(y_test,pred))

print(classification_report(y_test, pred))

Week-5

#Week-5

#EM algorithm and K-Means Algorithm

import matplotlib.pyplot as plt

from sklearn import datasets

from sklearn.cluster import KMeans

import sklearn.metrics as sm

import pandas as pd

import numpy as np

iris = datasets.load_iris()

X = pd.DataFrame(iris.data)

X.columns = ['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width']

y = pd.DataFrame(iris.target)

y.columns = ['Targets']
model = KMeans(n_clusters=2)

model.fit(X)

plt.figure(figsize=(14,7))

colormap = np.array(['red', 'lime', 'black'])

# Plot the Original Classifications

plt.subplot(1, 2, 1)

plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[y.Targets], s=40)

plt.title('Real Classification')

plt.xlabel('Petal Length')

plt.ylabel('Petal Width')

# Plot the Models Classifications

plt.subplot(1, 2, 2)

plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[model.labels_], s=40)

plt.title('K Mean Classification')

plt.xlabel('Petal Length')

plt.ylabel('Petal Width')

print('The accuracy score of K-Mean: ',sm.accuracy_score(y, model.labels_))

print('The Confusion matrixof K-Mean: ',sm.confusion_matrix(y, model.labels_))

from sklearn import preprocessing

scaler = preprocessing.StandardScaler()

scaler.fit(X)

xsa = scaler.transform(X)

xs = pd.DataFrame(xsa, columns = X.columns)

#xs.sample(5)

from sklearn.mixture import GaussianMixture

gmm = GaussianMixture(n_components=3)g

gmm.fit(xs)

y_gmm = gmm.predict(xs)

#y_cluster_gmm

plt.subplot(2, 2, 3)

plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[y_gmm], s=40)

plt.title('EMM Classification')

plt.xlabel('Petal Length')

plt.ylabel('Petal Width')

print('The accuracy score of EM: ',sm.accuracy_score(y, y_gmm))

print('The Confusion matrix of EM: ',sm.confusion_matrix(y, y_gmm))

Linearregression SVM
No ratings yet
Linearregression SVM
3 pages
ML Manual With Outputs
No ratings yet
ML Manual With Outputs
30 pages
ML Lab Manual
No ratings yet
ML Lab Manual
12 pages
Python ML Algorithms Guide
No ratings yet
Python ML Algorithms Guide
7 pages
Naive Bayes Classification
No ratings yet
Naive Bayes Classification
8 pages
Aiml Ex 4-7
No ratings yet
Aiml Ex 4-7
8 pages
Atul MLT Exp 4-11
No ratings yet
Atul MLT Exp 4-11
17 pages
Simple Linear Regression
No ratings yet
Simple Linear Regression
11 pages
LAB-4 Report
No ratings yet
LAB-4 Report
21 pages
ML Lab PT
No ratings yet
ML Lab PT
25 pages
All in One
No ratings yet
All in One
13 pages
Practicalpgm ML
No ratings yet
Practicalpgm ML
33 pages
3 Classification
No ratings yet
3 Classification
16 pages
IRis
No ratings yet
IRis
19 pages
16BCB0126 VL2018195002535 Pe003
No ratings yet
16BCB0126 VL2018195002535 Pe003
40 pages
ADS - Phase 3
No ratings yet
ADS - Phase 3
34 pages
AI ML - Cycle 2 Programs
No ratings yet
AI ML - Cycle 2 Programs
15 pages
Unit2 ML Programs
No ratings yet
Unit2 ML Programs
7 pages
Final ML Programs 075005
No ratings yet
Final ML Programs 075005
15 pages
1st PGM
No ratings yet
1st PGM
10 pages
AIML Programs
No ratings yet
AIML Programs
22 pages
ML PDF
No ratings yet
ML PDF
30 pages
1
No ratings yet
1
13 pages
Machine Learning Lab Assignments
100% (2)
Machine Learning Lab Assignments
23 pages
Aiml Practical
No ratings yet
Aiml Practical
17 pages
Python For Data Science IA 1 Programs
No ratings yet
Python For Data Science IA 1 Programs
14 pages
Programs Lab Bca
No ratings yet
Programs Lab Bca
16 pages
Btech1007022 Lab5.1
No ratings yet
Btech1007022 Lab5.1
9 pages
Prathamesh KRAI
No ratings yet
Prathamesh KRAI
38 pages
Shobit Sharma (2124399) ML Lab File PDF
No ratings yet
Shobit Sharma (2124399) ML Lab File PDF
19 pages
ML Lab Manual
No ratings yet
ML Lab Manual
17 pages
ML Regression & Classification Guide
100% (1)
ML Regression & Classification Guide
45 pages
ML Cheatsheet
No ratings yet
ML Cheatsheet
4 pages
Btech1007022 Lab5
No ratings yet
Btech1007022 Lab5
14 pages
Python For Data Science IA 1 Programs
No ratings yet
Python For Data Science IA 1 Programs
14 pages
MlLabManualdocx 2024 09 04 22 02 58
No ratings yet
MlLabManualdocx 2024 09 04 22 02 58
19 pages
Titanic Data Analysis with Python
No ratings yet
Titanic Data Analysis with Python
20 pages
ML Prac1-10
No ratings yet
ML Prac1-10
32 pages
Data Mining Assignment No. 1
No ratings yet
Data Mining Assignment No. 1
7 pages
WBCD Dataset Model Training Analysis
No ratings yet
WBCD Dataset Model Training Analysis
3 pages
ML Lab 01999676272
No ratings yet
ML Lab 01999676272
12 pages
Machine Learning Evaluation Guide
100% (1)
Machine Learning Evaluation Guide
504 pages
Data Analytics
No ratings yet
Data Analytics
10 pages
DM ML Practical
No ratings yet
DM ML Practical
13 pages
ML Lab
No ratings yet
ML Lab
10 pages
AML Lab
No ratings yet
AML Lab
14 pages
ML File
No ratings yet
ML File
7 pages
Data Preprocessing
No ratings yet
Data Preprocessing
9 pages
Experiment 6-7-8
No ratings yet
Experiment 6-7-8
5 pages
Naïve Bayes and K-NN Algorithm Guide
No ratings yet
Naïve Bayes and K-NN Algorithm Guide
10 pages
ML File
No ratings yet
ML File
8 pages
Disease Prediction with Machine Learning
No ratings yet
Disease Prediction with Machine Learning
573 pages
Prakhar - Week 5
No ratings yet
Prakhar - Week 5
8 pages
Decision Tree
No ratings yet
Decision Tree
2 pages
Remaining ML Program
No ratings yet
Remaining ML Program
12 pages
Classification Techniques in Python
No ratings yet
Classification Techniques in Python
30 pages
Machine Learning Algorithms Guide
No ratings yet
Machine Learning Algorithms Guide
9 pages
Naive Bayes Classifier CSV
No ratings yet
Naive Bayes Classifier CSV
2 pages
Algoritma K-Means Clustering Dan Contoh Soal - KETUTRARE
No ratings yet
Algoritma K-Means Clustering Dan Contoh Soal - KETUTRARE
17 pages
Cluster Analysis: Classification Analysis, or Numerical Taxonomy
No ratings yet
Cluster Analysis: Classification Analysis, or Numerical Taxonomy
13 pages
Irrigation - KNN - Ipynb - Colab
No ratings yet
Irrigation - KNN - Ipynb - Colab
8 pages
AI & ML Unit 4 Notes
No ratings yet
AI & ML Unit 4 Notes
16 pages
COMPX310-19A Machine Learning Chapter 3: Classification
No ratings yet
COMPX310-19A Machine Learning Chapter 3: Classification
39 pages
2012 Network Anomaly Detection by Cascading K-Means Clustering and C4.5 Decision Tree Algorithm
No ratings yet
2012 Network Anomaly Detection by Cascading K-Means Clustering and C4.5 Decision Tree Algorithm
9 pages
Overview of Simple Classification Algorithms
No ratings yet
Overview of Simple Classification Algorithms
76 pages
IntelliHealth: Medical Decision Support
No ratings yet
IntelliHealth: Medical Decision Support
16 pages
Naïve Bayes Classifier Overview
No ratings yet
Naïve Bayes Classifier Overview
24 pages
A Simple Guide To Centroid Based Clustering (With Python Code)
No ratings yet
A Simple Guide To Centroid Based Clustering (With Python Code)
25 pages
Support Vector Machine: With Python Code
No ratings yet
Support Vector Machine: With Python Code
21 pages
AIO2023
No ratings yet
AIO2023
11 pages
On KNN Algorithm
No ratings yet
On KNN Algorithm
8 pages
Simultaneous Equations & IV Techniques
No ratings yet
Simultaneous Equations & IV Techniques
10 pages
C02 03
0% (1)
C02 03
58 pages
Bagging vs Boosting in Ensemble Learning
No ratings yet
Bagging vs Boosting in Ensemble Learning
40 pages
ML Python Exercises UOM BDS Cluster Analysis
No ratings yet
ML Python Exercises UOM BDS Cluster Analysis
8 pages
ML Lecture 8 9 Classification
No ratings yet
ML Lecture 8 9 Classification
35 pages
Final Credit Risk Prediction Report Corrected
No ratings yet
Final Credit Risk Prediction Report Corrected
19 pages
Machine Learning Model Building
No ratings yet
Machine Learning Model Building
6 pages
Understanding DBSCAN Clustering Algorithm
No ratings yet
Understanding DBSCAN Clustering Algorithm
22 pages
11 Most Common Machine Learning Algorithms Explained in A Nutshell by Soner Yıldırım Towards Data Science
No ratings yet
11 Most Common Machine Learning Algorithms Explained in A Nutshell by Soner Yıldırım Towards Data Science
16 pages
Work GRP 2 Multinomial Probit and Logit Models Examples
No ratings yet
Work GRP 2 Multinomial Probit and Logit Models Examples
5 pages
Pam Clustering Technique
No ratings yet
Pam Clustering Technique
10 pages
K Nearest Neigheour Algorithm Notes
No ratings yet
K Nearest Neigheour Algorithm Notes
1 page
Made Easy
No ratings yet
Made Easy
11 pages
Lecture Slides#7
No ratings yet
Lecture Slides#7
21 pages
Classification Metrics Guide
No ratings yet
Classification Metrics Guide
15 pages
Tuber Classification Using SVM & KNN
No ratings yet
Tuber Classification Using SVM & KNN
4 pages
WQD7005 Final Exam - 17219402
No ratings yet
WQD7005 Final Exam - 17219402
12 pages