#Defining Dataset
import pandas as pd
import numpy as np
df=pd.read_csv(r'E:\dataset\iris\iris.csv')
df.tail(5)
# Assigning features and label variables
weather=['Sunny','Sunny','Overcast','Rainy','Rainy','Rainy','Overcast','Sunny','Sunny',
'Rainy','Sunny','Overcast','Overcast','Rainy']
temp=['Hot','Hot','Hot','Mild','Cool','Cool','Cool','Mild','Cool','Mild','Mild','Mild','Hot','Mild']
play=['No','No','Yes','Yes','Yes','No','Yes','No','Yes','Yes','Yes','Yes','Yes','No']
Encoding Features
First, you need to convert these string labels into numbers. for example: 'Overcast', 'Rainy',
'Sunny' as 0, 1, 2. This is known as label encoding. Scikit-learn provides LabelEncoder library
for encoding labels with a value between 0 and one less than the number of discrete classes.
# Import LabelEncoder
from sklearn import preprocessing
#creating labelEncoder
le = preprocessing.LabelEncoder()
# Converting string labels into numbers.
wheather_encoded=le.fit_transform(wheather)
print wheather_encoded
[2 2 0 1 1 1 0 2 2 1 2 0 0 1]
Similarly, you can also encode temp and play columns.
# Converting string labels into numbers
temp_encoded=le.fit_transform(temp)
label=le.fit_transform(play)
print "Temp:",temp_encoded
print "Play:",play
Temp: [1 1 1 2 0 0 0 2 0 2 2 2 1 2]
Play: [0 0 1 1 1 0 1 0 1 1 1 1 1 0]
Now combine both the features (weather and temp) in a single variable (list of tuples).
#Combinig weather and temp into single list of tuples
features=np.zeros((14,2))
#zip(wheather_encoded,temp_encoded)
print (features)
features[:,0]=temp_encoded
features[:,1]=wheather_encoded
print(features)
[(2, 1), (2, 1), (0, 1), (1, 2), (1, 0), (1, 0), (0, 0), (2, 2), (2, 0), (1, 2), (2, 2), (0, 2), (0, 1), (1, 2)]
Generating Model
Generate a model using naive bayes classifier in the following steps:
Create naive bayes classifier
Fit the dataset on classifier
Perform prediction
#Import Gaussian Naive Bayes model
from sklearn.naive_bayes import GaussianNB
#Create a Gaussian Classifier
model = GaussianNB()
model.fit(features,label)
#Predict Output
predicted= model.predict([[0,2]])
print(predicted)