INDEX
Sr.No. Particular Page Signature
No.
1. To implement basic python libraries of machine 1-5
learning like numpy, pandas, SciPy, Scitkit-learn,
matplotlib,etc.
2. To implement Principal Component Analysis. 6-7
3. To implement FIND-S algorithm. 8-9
4. To analyse the tested data using K-Means 10 -11
Clustering algorithm.
5. To implement k-nearest neighbor algorithm. 12-13
6. To implement linear regression. 14-15
7. To implement logistics regression. 16-18
8. To implement Naïve Bayes Algorithm. 19-23
9. To implement Decision Tree Algorithm. 24-28
10. To implement Support Vector Machine Algorithm. 29-32
PROGRAM :- 1
AIM: To implement basic python libraries of machine learning
like numpy, pandas, SciPy, Scitkit-learn, matplotlib,etc.
1.
import numpy as nup
# Then, create two arrays of rank 2
K = nup.array([[2, 4], [6, 8]])
R = nup.array([[1, 3], [5, 7]])
# Then, create two arrays of rank 1
P = nup.array([10, 12])
S = nup.array([9, 11])
# Then, we will print the Inner product of vectors
print ("Inner product of vectors: ", nup.dot(P, S), "\n")
# Then, we will print the Matrix and Vector product
print ("Matrix and Vector product: ", nup.dot(K, P), "\n")
# Now, we will print the Matrix and matrix product
print ("Matrix and matrix product: ", nup.dot(K, R))
OUTPUT:-
2.
import matplotlib.pyplot as plt
from scipy import interpolate
x = np.arange(5, 20)
y = np.exp(x/3.0)
f = interpolate.interp1d(x, y)x1 = np.arange(6, 12)
y1 = f(x1) # use interpolation function returned by `interp1d`
plt.plot(x, y, 'o', x1, y1, '--')
plt.show()
OUTPUT:-
3.
from sklearn import datasets as ds
from sklearn import metrics as mt
from sklearn.tree import DecisionTreeClassifier as dtc
# load the iris datasets
dataset_1 = ds.load_iris()
# fit a CART model to the data
model_1 = dtc()
model_1.fit(dataset_1.data, dataset_1.target)
print(model)
# make predictions
expected_1 = dataset_1.target
predicted_1 = model_1.predict(dataset_1.data)
# summarize the fit of the model
print (mt.classification_report(expected_1, predicted_1))
print(mt.confusion_matrix(expected_1, predicted_1))
OUTPUT:-
4.
import pandas as pad
data_1 = {"Countries": ["Bhutan", "Cape Verde", "Chad", "Estonia", "Guinea", "Kenya", "Libya
", "Mexico"],
"capital": ["Thimphu", "Praia", "N'Djamena", "Tallinn", "Conakry", "Nairobi", "Tripoli", "
Mexico City"],
"Currency": ["Ngultrum", "Cape Verdean escudo", "CFA Franc", "Estonia Kroon; Euro", "G
uinean franc", "Kenya shilling", "Libyan dinar", "Mexican peso"],
"population": [20.4, 143.5, 12.52, 135.7, 52.98, 76.21, 34.28, 54.32] }
data_1_table = pad.DataFrame(data_1)
print(data_1_table)
OUTPUT:-
4. Matplotlib
import matplotlib.pyplot as plot
import numpy as nup
# Prepare the data
K = nup.linspace(2, 4, 8)
R = nup.linspace(5, 7, 9)
Q = nup.linspace(0, 1, 3)
# Plot the data
plot.plot(K, K, label = 'K')
plot.plot(R, R, label = 'R')
plot.plot(Q, Q, label = 'Q')
# Add a legend
plot.legend()
# Show the plot
plot.show()
OUTPUT:-
PROGRAM :- 2
AIM: To implement Principal Component Analysis.
import numpy as nmp
import matplotlib.pyplot as mpltl
import pandas as pnd
DS = pnd.read_csv('Wine.csv')
# Now, we will distribute the dataset into two components "X" and "Y"
X = DS.iloc[: , 0:13].values
Y = DS.iloc[: , 13].values
from sklearn.model_selection import train_test_split as tts
X_train, X_test, Y_train, Y_test = tts(X, Y, test_size = 0.2, random_state = 0)
from sklearn.preprocessing import StandardScaler as SS
SC = SS()
X_train = SC.fit_transform(X_train)
X_test = SC.transform(X_test)
from sklearn.decomposition import PCA
PCa = PCA (n_components = 1)
X_train = PCa.fit_transform(X_train)
X_test = PCa.transform(X_test)
explained_variance = PCa.explained_variance_ratio_
from sklearn.linear_model import LogisticRegression as LR
classifier_1 = LR (random_state = 0)
classifier_1.fit(X_train, Y_train)
Y_pred = classifier_1.predict(X_test)
from sklearn.metrics import confusion_matrix as CM
c_m = CM (Y_test, Y_pred)
from matplotlib.colors import ListedColormap as LCM
X_set, Y_set = X_train, Y_train
X_1, X_2 = nmp.meshgrid(nmp.arange(start = X_set[:, 0].min() - 1,
stop = X_set[: , 0].max() + 1, step = 0.01),
nmp.arange(start = X_set[: , 1].min() - 1,
stop = X_set[: , 1].max() + 1, step = 0.01))
mpltl.contourf(X_1, X_2, classifier_1.predict(nmp.array([X_1.ravel(),
X_2.ravel()]).T).reshape(X_1.shape), alpha = 0.75,
cmap = LCM (('yellow', 'grey', 'green')))
mpltl.xlim (X_1.min(), X_1.max())
mpltl.ylim (X_2.min(), X_2.max())
for s, t in enumerate(nmp.unique(Y_set)):
mpltl.scatter(X_set[Y_set == t, 0], X_set[Y_set == t, 1],
c = LCM (('red', 'green', 'blue'))(s), label = t)
mpltl.title('Logistic Regression for Training set: ')
mpltl.xlabel ('PC_1') # for X_label
mpltl.ylabel ('PC_2') # for Y_label
mpltl.legend() # for showing legend
# show scatter plot
mpltl.show()
OUTPUT:-
PROGRAM :- 3
AIM: To implement FIND-S algorithm.
import pandas as pd
import numpy as np
#to read the data in the csv file
data = pd.read_csv("data.csv")
print(data,"n")
#making an array of all the attributes
d = np.array(data)[:,:-1]
print("n The attributes are: ",d)
#segragating the target that has positive and negative examples
target = np.array(data)[:,-1]
print("n The target is: ",target)
#training function to implement find-s algorithm
def train(c,t):
for i, val in enumerate(t):
if val == "Yes":
specific_hypothesis = c[i].copy()
break
for i, val in enumerate(c):
if t[i] == "Yes":
for x in range(len(specific_hypothesis)):
if val[x] != specific_hypothesis[x]:
specific_hypothesis[x] = '?'
else:
pass
return specific_hypothesis
#obtaining the final hypothesis
print("n The final hypothesis is:",train(d,target))
OUTPUT:-
PROGRAM :- 4
AIM: To analyse the tested data using K-MeansClustering
algorithm.
# importing required tools
import numpy as np
import cv2
from matplotlib import pyplot as plt
# creating two test data
X = np.random.randint(10,35,(25,2))
Y = np.random.randint(55,70,(25,2))
Z = np.vstack((X,Y))
Z = Z.reshape((50,2))
# convert to np.float32
Z = np.float32(Z)
plt.xlabel('Test Data')
plt.ylabel('Z samples')
plt.hist(Z,256,[0,256])
plt.show()
X = np.random.randint(10,45,(25,2))
Y = np.random.randint(55,70,(25,2))
Z = np.vstack((X,Y))
# convert to np.float32
Z = np.float32(Z)
# define criteria and apply kmeans()
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
ret,label,center = cv2.kmeans(Z,2,None,criteria,10,cv2.KMEANS_RANDOM_CENTERS)
# Now separate the data
A = Z[label.ravel()==0]
B = Z[label.ravel()==1]
# Plot the data
plt.scatter(A[:,0],A[:,1])
plt.scatter(B[:,0],B[:,1],c = 'r')
plt.scatter(center[:,0],center[:,1],s = 80,c = 'y', marker = 's')
plt.xlabel('Test Data'),plt.ylabel('Z samples')
plt.show()
OUTPUT:-
PROGRAM :- 5
AIM: To implement k-nearest neighbor algorithm.
# Import necessary modules
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
import numpy as np
import matplotlib.pyplot as plt
irisData = load_iris()
X=irisData.data
y=irisData.targt
# Split into training and test set
X_train, X_test, y_train, y_test = train_test_split( X,
y, test_size = 0.2, random_state=42) neighbors =
np.arange(1, 9)
train_accuracy = np.empty(len(neighbors))
test_accuracy = np.empty(len(neighbors))
# Loop over K values
for i, k in enumerate(neighbors):
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)
# Compute training and test data accuracy
train_accuracy[i] = knn.score(X_train, y_train)
test_accuracy[i] = knn.score(X_test, y_test)
# Generate plot
plt.plot(neighbors, test_accuracy, label = 'Testing dataset Accuracy')
plt.plot(neighbors, train_accuracy, label = 'Training dataset Accuracy')
plt.legend()
plt.xlabel('n_neighbors')
plt.ylabel('Accuracy')
plt.show()
OUTPUT:-
PROGRAM :- 6
AIM: To implement linear regression.
import numpy as nmp
import matplotlib.pyplot as mtplt
def estimate_coeff(p, q):
# Here, we will estimate the total number of points or observation
n1 = nmp.size(p)
# Now, we will calculate the mean of a and b vector
m_p = nmp.mean(p)
m_q = nmp.mean(q)
# here, we will calculate the cross deviation and deviation about a
SS_pq = nmp.sum(q * p) - n1 * m_q * m_p
SS_pp = nmp.sum(p * p) - n1 * m_p * m_p
# here, we will calculate the regression coefficients
b_1 = SS_pq / SS_pp
b_0 = m_q - b_1 * m_p
return (b_0, b_1)
def plot_regression_line(p, q, b):
# Now, we will plot the actual points or observation as scatter plot
mtplt.scatter(p, q, color = "m",
marker = "o", s = 30)
# here, we will calculate the predicted response vector
q_pred = b[0] + b[1] * p
# here, we will plot the regression line
mtplt.plot(p, q_pred, color = "g")
# here, we will put the labels
mtplt.xlabel('p')
mtplt.ylabel('q')
# here, we will define the function to show plot
mtplt.show()
def main():
# entering the observation points or data
p = np.array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19])
q = np.array([11, 13, 12, 15, 17, 18, 18, 19, 20, 22])
# now, we will estimate the coefficients
b = estimate_coeff(p, q)
print("Estimated coefficients are :\nb_0 = {} \
\nb_1 = {}".format(b[0], b[1]))
# Now, we will plot the regression line
plot_regression_line(p, q, b)
if __name__ == "__main__":
main()
OUTPUT:-
PROGRAM :- 7
AIM: To implement logistics regression.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection
import train_test_split
from sklearn.preprocessing
import StandardScaler
dataset = pd.read_csv("User_Data.csv")
x = dataset.iloc[:, [2, 3]].values
y = dataset.iloc[:, 4].values
# Splitting the dataset to train and test
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.25,
random_state = 0)
sc_x = StandardScaler()
xtrain = sc_x.fit_transform(xtrain)
xtest = sc_x.transform(xtest)
print (xtrain[0:10, :])
OUTPUT:-
[[ 0.58164944 -0.88670699]
[-0.60673761 1.46173768]
[-0.01254409 -0.5677824 ]
[-0.60673761 1.89663484]
[ 1.37390747 -1.40858358]
[ 1.47293972 0.99784738]
[ 0.08648817 -0.79972756]
[-0.01254409 -0.24885782]
[-0.21060859 -0.5677824 ]
[-0.21060859 -0.19087153]]
# Train the model
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0)
classifier.fit(xtrain, ytrain)
# prediction
y_pred = classifier.predict(xtest)
# Test the performance of our model
from sklearn.metrics import confusion_matrix cm =
confusion_matrix(ytest, y_pred)
print ("Confusion Matrix : \n", cm)
# Accuracy
from sklearn.metrics import accuracy_score
print ("Accuracy : ", accuracy_score(ytest, y_pred))
OUTPUT 2 :-
Confusion Matrix :
[[65 3]
[8 24]]
Out of 100 :
True Positive + True Negative = 65 + 24
False Positive + False Negative = 3 + 8 Performance measure – Accuracy
Accuracy: 0.89
# Visualizing the performance of our model from
matplotlib.colors import ListedColormapX_set,
y_set = xtest, ytest
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1,
stop = X_set[:, 0].max() + 1, step = 0.01),
np.arange(start = X_set[:, 1].min() - 1,
stop = X_set[:, 1].max() + 1, step = 0.01))
plt.contourf(X1, X2, classifier.predict(
np.array([X1.ravel(), X2.ravel()]).T).reshape(
X1.shape), alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],c =
ListedColormap(('red', 'green'))(i), label = j)
plt.title('Classifier (Test set)')plt.xlabel('Age')
plt.ylabel('Estimated Salary')plt.legend()
plt.show()
OUTPUT:-
PROGRAM :- 8
AIM: To implement Naïve Bayes Algorithm.
# Importing library
import math
import random
import csv
# the categorical class names are changed to numberic data
# eg: yes and no encoded to 1 and 0
def encode_class(mydata):
classes = []
for i in range(len(mydata)):
if mydata[i][-1] not in classes:
classes.append(mydata[i][-1])
for i in range(len(classes)):
for j in range(len(mydata)):
if mydata[j][-1] == classes[i]:
mydata[j][-1] = i
return mydata
# Splitting the data
def splitting(mydata, ratio):
train_num = int(len(mydata) * ratio)
train = []
# initially testset will have all the dataset
test = list(mydata)
while len(train) < train_num:
# index generated randomly from range 0
# to length of testset
index = random.randrange(len(test))
# from testset, pop data rows and put it in train
train.append(test.pop(index))
return train, test
# Group the data rows under each class yes or
# no in dictionary eg: dict[yes] and dict[no]
def groupUnderClass(mydata):
dict = {}
for i in range(len(mydata)):
if (mydata[i][-1] not in dict):
dict[mydata[i][-1]] = []
dict[mydata[i][-1]].append(mydata[i])
return dict
# Calculating Mean
def mean(numbers):
return sum(numbers) / float(len(numbers))
# Calculating Standard Deviation
def std_dev(numbers):
avg = mean(numbers)
variance = sum([pow(x - avg, 2) for x in numbers]) / float(len(numbers) - 1)
return math.sqrt(variance)
def MeanAndStdDev(mydata):
info = [(mean(attribute), std_dev(attribute)) for attribute in zip(*mydata)]
# eg: list = [ [a, b, c], [m, n, o], [x, y, z]]
# here mean of 1st attribute =(a + m+x), mean of 2nd attribute = (b + n+y)/3
# delete summaries of last class
del info[-1]
return info
# find Mean and Standard Deviation under each class
def MeanAndStdDevForClass(mydata):
info = {}
dict = groupUnderClass(mydata)
for classValue, instances in dict.items():
info[classValue] = MeanAndStdDev(instances)
return info
# Calculate Gaussian Probability Density Function
def calculateGaussianProbability(x, mean, stdev):
expo = math.exp(-(math.pow(x - mean, 2) / (2 * math.pow(stdev, 2))))
return (1 / (math.sqrt(2 * math.pi) * stdev)) * expo
# Calculate Class Probabilities
def calculateClassProbabilities(info, test):
probabilities = {}
for classValue, classSummaries in info.items():
probabilities[classValue] = 1
for i in range(len(classSummaries)):
mean, std_dev = classSummaries[i]
x = test[i]
probabilities[classValue] *= calculateGaussianProbability(x, mean, std_dev)
return probabilities
# Make prediction - highest probability is the prediction
def predict(info, test):
probabilities = calculateClassProbabilities(info, test)
bestLabel, bestProb = None, -1
for classValue, probability in probabilities.items():
if bestLabel is None or probability > bestProb:
bestProb = probability
bestLabel = classValue
return bestLabel
# returns predictions for a set of examples
def getPredictions(info, test):
predictions = []
for i in range(len(test)):
result = predict(info, test[i])
predictions.append(result)
return predictions
# Accuracy score
def accuracy_rate(test, predictions):
correct = 0
for i in range(len(test)):
if test[i][-1] == predictions[i]:
correct += 1
return (correct / float(len(test))) * 100.0
# driver code
# add the data path in your system
filename = r'E:\user\MACHINE LEARNING\machine learning algos\Naive
bayes\filedata.csv'
# load the file and store it in mydata list
mydata = csv.reader(open(filename, "rt"))
mydata = list(mydata)
mydata = encode_class(mydata)
for i in range(len(mydata)):
mydata[i] = [float(x) for x in mydata[i]]
# split ratio = 0.7
# 70% of data is training data and 30% is test data used for testing
ratio = 0.7
train_data, test_data = splitting(mydata, ratio)
print('Total number of examples are: ', len(mydata))
print('Out of these, training examples are: ', len(train_data))
print("Test examples are: ", len(test_data))
# prepare model
info = MeanAndStdDevForClass(train_data)
# test model
predictions = getPredictions(info, test_data)
accuracy = accuracy_rate(test_data, predictions)
print("Accuracy of your model is: ", accuracy)
OUTPUT:-
Total number of examples are: 200
Out of these, training examples are: 140
Test examples are: 60
Accuracy of your model is: 71.237678
PROGRAM :- 9
AIM: To implement Decision Tree Algorithm.
# Run this program on your local python
# interpreter, provided you have installed
# the required libraries.
# Importing the required packages
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
# Function importing Dataset
def importdata():
balance_data = pd.read_csv(
'https://archive.ics.uci.edu/ml/machine-learning-'+
'databases/balance-scale/balance-scale.data',
sep= ',', header = None)
# Printing the dataswet shape
print ("Dataset Length: ", len(balance_data))
print ("Dataset Shape: ", balance_data.shape)
# Printing the dataset obseravtions
print ("Dataset: ",balance_data.head())
return balance_data
# Function to split the dataset
def splitdataset(balance_data):
# Separating the target variable
X = balance_data.values[:, 1:5]
Y = balance_data.values[:, 0]
# Splitting the dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(
X, Y, test_size = 0.3, random_state = 100)
return X, Y, X_train, X_test, y_train, y_test
# Function to perform training with giniIndex.
def train_using_gini(X_train, X_test, y_train):
# Creating the classifier object
clf_gini = DecisionTreeClassifier(criterion = "gini",
random_state = 100,max_depth=3, min_samples_leaf=5)
# Performing training
clf_gini.fit(X_train, y_train)
return clf_gini
# Function to perform training with entropy.
def tarin_using_entropy(X_train, X_test, y_train):
# Decision tree with entropy
clf_entropy = DecisionTreeClassifier(
criterion = "entropy", random_state = 100,
max_depth = 3, min_samples_leaf = 5)
# Performing training
clf_entropy.fit(X_train, y_train)
return clf_entropy
# Function to make predictions
def prediction(X_test, clf_object):
# Predicton on test with giniIndex
y_pred = clf_object.predict(X_test)
print("Predicted values:")
print(y_pred)
return y_pred
# Function to calculate accuracy
def cal_accuracy(y_test, y_pred):
print("Confusion Matrix: ",
confusion_matrix(y_test, y_pred))
print ("Accuracy : ",accuracy_score(y_test,y_pred)*100)
print("Report : ",classification_report(y_test, y_pred))
# Driver code
def main():
# Building Phase
data = importdata()
X, Y, X_train, X_test, y_train, y_test = splitdataset(data)
clf_gini = train_using_gini(X_train, X_test, y_train)
clf_entropy = tarin_using_entropy(X_train, X_test, y_train)
# Operational Phase
print("Results Using Gini Index:")
# Prediction using gini
y_pred_gini = prediction(X_test, clf_gini)
cal_accuracy(y_test, y_pred_gini)
print("Results Using Entropy:")
# Prediction using entropy
y_pred_entropy = prediction(X_test, clf_entropy)
cal_accuracy(y_test, y_pred_entropy)
# Calling main function
if __name__=="__main__":
main()
OUTPUT:-
Dataset Length: 625
Dataset Shape: (625, 5)
Dataset: 0 1 2 3 4
0 B 1 1 1 1
1 R 1 1 1 2
2 R 1 1 1 3
3 R 1 1 1 4
4 R 1 1 1 5
Results Using Gini Index:
Predicted values:
['R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'R' 'L' 'L' 'L' 'R' 'L' 'R' 'L'
'L' 'R' 'L' 'R' 'L' 'L' 'R' 'L' 'L' 'L' 'R' 'L' 'L' 'L' 'R' 'L' 'L' 'L'
'L' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'L' 'L' 'R' 'L' 'R' 'R' 'L' 'R'
'R' 'L' 'R' 'R' 'L' 'L' 'R' 'R' 'L' 'L' 'L' 'L' 'L' 'R' 'R' 'L' 'L' 'R'
'R' 'L' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'L' 'R' 'R' 'L' 'R' 'L'
'R' 'R' 'L' 'L' 'L' 'R' 'R' 'L' 'L' 'L' 'R' 'L' 'R' 'R' 'R' 'R' 'R' 'R'
'R' 'L' 'R' 'L' 'R' 'R' 'L' 'R' 'R' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'L'
'L' 'L' 'L' 'R' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R'
'L' 'L' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'R' 'R'
'L' 'L' 'R' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'L' 'R' 'R'
'L' 'R' 'R' 'L' 'L' 'R' 'R' 'R']
Confusion Matrix: [[ 0 6 7]
[ 0 67 18]
[ 0 19 71]]
Accuracy : 73.4042553191
Report :
precision recall f1-score support
B 0.00 0.00 0.00 13
L 0.73 0.79 0.76 85
R 0.74 0.79 0.76 90
avg/total 0.68 0.73 0.71 188
Results Using Entropy:
Predicted values:
['R' 'L' 'R' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'R' 'R' 'L' 'L' 'R' 'L' 'R' 'L'
'L' 'R' 'L' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R' 'L' 'R' 'L' 'R' 'L' 'L' 'L'
'L' 'L' 'R' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'L' 'L' 'R' 'L' 'L' 'R' 'L' 'L'
'R' 'L' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'L' 'R' 'L' 'L' 'R' 'L' 'L' 'L' 'R'
'R' 'L' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'L' 'R' 'R' 'L' 'R' 'L'
'R' 'R' 'L' 'L' 'L' 'R' 'R' 'L' 'L' 'L' 'R' 'L' 'L' 'R' 'R' 'R' 'R' 'R'
'R' 'L' 'R' 'L' 'R' 'R' 'L' 'R' 'R' 'L' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'L'
'L' 'L' 'L' 'R' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R'
'L' 'R' 'R' 'L' 'L' 'R' 'L' 'R' 'R' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'R' 'R'
'R' 'L' 'R' 'L' 'R' 'R' 'L' 'R' 'L' 'R' 'L' 'R' 'L' 'L' 'L' 'L' 'L' 'R'
'R' 'R' 'L' 'L' 'L' 'R' 'R' 'R']
Confusion Matrix: [[ 0 6 7]
[ 0 63 22]
[ 0 20 70]]
Accuracy : 70.7446808511
Report :
precision recall f1-score support
B 0.00 0.00 0.00 13
L 0.71 0.74 0.72 85
R 0.71 0.78 0.74 90
avg / total 0.66 0.71 0.68 188
PROGRAM :- 10
AIM: To implement Support Vector MachineAlgorithm.
Importing the dataset
import pandas as pd
data = pd.read_csv("apples_and_oranges.csv")
Splitting the dataset into training and test samples
from sklearn.model_selection import train_test_split
training_set, test_set = train_test_split(data, test_size = 0.2,
random_state = 1)
Classifying the predictors and target
X_train = training_set.iloc[:,0:2].values
Y_train = training_set.iloc[:,2].values
X_test = test_set.iloc[:,0:2].values
Y_test = test_set.iloc[:,2].values
Initializing Support Vector Machine and fitting the
training data
from sklearn.svm import SVC
classifier = SVC(kernel='rbf', random_state = 1)
classifier.fit(X_train,Y_train)
Predicting the classes for test set
Y_pred = classifier.predict(X_test)
Attaching the predictions to test set for comparing
test_set["Predictions"] = Y_pred
#Comparing the actual classes and predictions
#Calculating the accuracy of the predictions
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(Y_test,Y_pred)
accuracy = float(cm.diagonal().sum())/len(Y_test)
print("\nAccuracy Of SVM For The Given Dataset : ", accuracy)
OUTPUT:-
Accuracy Of SVM For The Given Dataset : 0.875
Visualizing the classifier
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
Y_train = le.fit_transform(Y_train)
After encoding , fit the encoded data to the SVM
from sklearn.svm import SVC
classifier = SVC(kernel='rbf', random_state = 1)
classifier.fit(X_train,Y_train)
Let’s Visualize!
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
plt.figure(figsize = (7,7))
X_set, y_set = X_train, Y_train
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step
= 0.01), np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
alpha = 0.75, cmap = ListedColormap(('black', 'white')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c = ListedColormap(('red', 'orange'))(i),
label = j)
plt.title('Apples Vs Oranges')
plt.xlabel('Weight In Grams')
plt.ylabel('Size in cm')
plt.legend()
plt.show()
OUTPUT:-
Visualizing the predictions
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
plt.figure(figsize = (7,7))
X_set, y_set = X_test, Y_test
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step
= 0.01),np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(),
X2.ravel()]).T).reshape(X1.shape),alpha = 0.75, cmap = ListedColormap(('black', 'white')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],c = ListedColormap(('red', 'orange'))(i), label
= j)
plt.title('Apples Vs Oranges Predictions')
plt.xlabel('Weight In Grams')
plt.ylabel('Size in cm')
plt.legend()
plt.show()
OUTPUT:-
A Practical File
On
Machine Learning
Submitted in partial fulfillment of the requirement
for the
Award of Bachelor of Technology Degree
In
Computer Science Engineering
2019-2023
Submitted By:
Ravi Parkash (4811)
Semester 8TH
Under the Guidance of
Mrs, Jyoti Ahlawat
DEPARTMENT OF COMPUTER SCIENCE ENGINEERING
MATU RAM INSTITUTE OF ENGINEERING
& MANAGEMENT Rohtak Haryana