ML LAB PROGRAMS
1) Aim: Illustrate and Demonstrate the working model and principle of Find-S algorithm.
Program: For a given set of training data examples stored in a .CSV file, implement and demonstrate the Find-
S algorithm to output a description of the set of all hypotheses consistent with the training examples.
import csv
def loadCsv(filename):
lines=[Link](open(filename,"r"))
dataset=list(lines)
headers=[Link](0)
return dataset,headers
def print_hypothesis(h):
print('<',end='')
for i in range (0,len(h)-1):
print(h[i],end=',')
print('>')
def findS():
dataset,features=loadCsv("/content/[Link]")
rows=len(dataset)
cols=len(dataset[0])
flag=0
for x in range(0,rows):
t=dataset[x]
if t[-1]=='1' and flag==0:
flag=1
h=dataset[x]
elif t[-1]=='1':
for y in range(cols):
if h[y]!=t[y]:
h[y]='?'
print("The maximally specific hypothesis for a given training exams")
print_hypothesis(h)
findS()
Output
2) Aim: Demonstrate the working model and principle of candidate elimination algorithm.
Program: For a given set of training data examples stored in a .CSV file, implement and demonstrate the
Candidate-Elimination algorithm to output a description of the set of all hypotheses consistent with the training
examples.
import numpy as np
import pandas as pd
df = pd.read_csv("/content/[Link]")
concept=[Link]([Link][:,0:-1])
target=[Link]([Link][:,-1])
def learn(concept,target):
specific_h=concept[0].copy()
print("Most specific",specific_h)
general_h=[["?" for i in range(len(specific_h))] for i in range(len(specific_h))]
print("General",general_h)
for i,h in enumerate(concept):
print("Instances", i+1, "is", h)
if target[i]==1:
print("Instance is positive")
for x in range(len(specific_h)):
if(h[x]!=specific_h[x]):
specific_h[x]='?'
general_h[x][x]='?'
if(target[i]==0):
print("instance is negative")
for x in range(len(specific_h)):
if h[x]!=specific_h[x]:
general_h[x][x]=specific_h[x]
else:
general_h[x][x]='?'
print("Specific boundary", i+1, specific_h)
print("General bundary", i+1, general_h)
print("\n")
learn(concept, target)
OUTPUT
Most specific ['sunny' 'warm' 'normal' 'strong' 'warm' 'same']
General [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?',
'?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]
Instances 1 is ['sunny' 'warm' 'normal' 'strong' 'warm' 'same']
Instance is positive
Specific boundary 1 ['sunny' 'warm' 'normal' 'strong' 'warm' 'same']
General bundary 1 [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?',
'?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]
Instances 2 is ['sunny' 'warm' 'high' 'strong' 'warm' 'same']
Instance is positive
Specific boundary 2 ['sunny' 'warm' '?' 'strong' 'warm' 'same']
General bundary 2 [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?',
'?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]
Instances 3 is ['rainy' 'cold' 'high' 'strong' 'warm' 'change']
instance is negative
Specific boundary 3 ['sunny' 'warm' '?' 'strong' 'warm' 'same']
General bundary 3 [['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?'], ['?', '?', '?', '?',
'?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', 'same']]
Instances 4 is ['sunny' 'warm' 'high' 'strong' 'cold' 'change']
Instance is positive
Specific boundary 4 ['sunny' 'warm' '?' 'strong' '?' '?']
General bundary 4 [['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?'], ['?', '?', '?', '?',
'?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]
3) Aim: To construct the Decision tree using the training data sets under supervised learning
concept.
Program: Write a program to demonstrate the working of the decision tree based ID3
algorithm. Use an appropriate data set for building the decision tree and apply this knowledge
to classify a new sample.
#Imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from [Link] import DecisionTreeClassifier
from [Link] import LabelEncoder
from sklearn import metrics
from [Link] import confusion_matrix
from matplotlib import pyplot as plt
import warnings
[Link]('ignore')
from sklearn import tree
df=pd.read_csv("/content/[Link]")
[Link]()
[Link]('Id',axis=1,inplace=True)
[Link]()
le = LabelEncoder()
df['Species']= le.fit_transform(df['Species'])
df['Species'].unique()
X=[Link][:,:4]
y=[Link][:,4:]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,random_state=123)
lf=DecisionTreeClassifier(criterion='entropy', splitter='best', max_leaf_nodes=3)
[Link](X_train,y_train.[Link]())
y_pred=[Link](X_test)
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
confusion_matrix(y_test, y_pred)
fn=['SepalLengthCm' ,'SepalWidthCm' ,'PetalLengthCm', 'PetalWidthCm']#column names of the
dataset
cn=['Iris-setosa',' Iris-versicolor','Iris-virginica']#names of classes to be classified
fig, axes = [Link](nrows = 1,ncols = 1,figsize = (2,2), dpi=200)
tree.plot_tree(clf,
feature_names = fn,
class_names=cn,
filled = True);
species_check = [Link]([[4.7, 3.2, 1.3, 0.2]])[0]
print(cn[species_check])
output
[Link]: To understand the working principle of Artificial Neural network with feed forward
and feed backward principle.
Program: Build an Artificial Neural Network by implementing the Backpropagation algorithm
and test the same using appropriate data sets.
import numpy as np
# Input dataset
X = [Link](([2, 9], [1, 5], [3, 6]), dtype=float) # two inputs [sleep, study]
y = [Link](([92], [86], [89]), dtype=float) # one output [Expected % in Exams]
# Normalize the input features
X = X / [Link](X, axis=0) # Normalize features in X
y = y / 100 # Normalize output to be in the range [0, 1]
# Sigmoid Function
def sigmoid(x):
return 1 / (1 + [Link](-x))
# Derivative of Sigmoid Function
def derivatives_sigmoid(x):
return x * (1 - x)
# Variable initialization
epoch = 5000 # Setting training iterations
lr = 0.1 # Setting learning rate
inputlayer_neurons = 2 # Number of features in dataset
hiddenlayer_neurons = 3 # Number of neurons in hidden layer
output_neurons = 1 # Number of neurons in output layer
# Weight and bias initialization
wh = [Link](size=(inputlayer_neurons, hiddenlayer_neurons)) # Weights between input
and hidden layer
bh = [Link](size=(1, hiddenlayer_neurons)) # Bias for hidden layer
wout = [Link](size=(hiddenlayer_neurons, output_neurons)) # Weights between hidden
and output layer
bout = [Link](size=(1, output_neurons)) # Bias for output layer
# Training the neural network
for i in range(epoch):
# Forward Propagation
hinp1 = [Link](X, wh)
hinp = hinp1 + bh
hlayer_act = sigmoid(hinp)
outinp1 = [Link](hlayer_act, wout)
outinp = outinp1 + bout
output = sigmoid(outinp)
# Backpropagation
EO = y - output # Error at output
outgrad = derivatives_sigmoid(output) # Gradient of output
d_output = EO * outgrad # Delta output
EH = d_output.dot(wout.T) # Error at hidden layer
# How much hidden layer weights contributed to error
hiddengrad = derivatives_sigmoid(hlayer_act) # Gradient of hidden layer
d_hiddenlayer = EH * hiddengrad # Delta hidden layer
# Update weights and biases
wout += hlayer_act.[Link](d_output) * lr
bout += [Link](d_output, axis=0, keepdims=True) * lr
wh += [Link](d_hiddenlayer) * lr
bh += [Link](d_hiddenlayer, axis=0, keepdims=True) * lr
# Print results
print("Input: \n" + str(X))
print("Actual Output: \n" + str(y))
print("Predicted Output: \n" + str(output))
output
5) Aim: Demonstrate the text classifier using Naïve bayes classifier algorithm.
Program: Write a program to implement the naive Bayesian classifier for a sample training
data set stored as a .CSV file. Compute the accuracy of the classifier, considering few test data
sets.
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics
from [Link] import classification_report
df=pd.read_csv('/content/[Link]',header=None)
X=[Link]([8],axis=1)
Y=df[8]
x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.2,random_state=1)
gn=GaussianNB()
[Link](x_train,y_train)
y_pred=[Link](x_test)
print(len(x_test))
print(len(x_train))
print(metrics.accuracy_score(y_test,y_pred)*100)
print(classification_report(y_test,y_pred))
Output
10th as 6th Aim: Implement and demonstrate classification algorithm using
Support vector machine Algorithm.
Program: Implement and demonstrate the working of SVM algorithm for
classification.
import [Link] as plt
from [Link] import DecisionBoundaryDisplay
from [Link] import SVC
from [Link] import LabelEncoder
from sklearn.model_selection import train_test_split
from [Link] import classification_report
import pandas as pd
import seaborn as sb
import numpy as np
import warnings
[Link]('ignore')
# datasets
df = pd.read_csv(r"/content/[Link]")
[Link]('Id',axis=1,inplace=True)
[Link]()
[Link]()
le = LabelEncoder()
df['Species']= le.fit_transform(df['Species'])
df['Species'].unique()
#model
X=[Link][:,:4]
y=[Link][:,4:]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,random_state=123)
svm = SVC(kernel="rbf", gamma=0.5, C=1.0)
[Link](X_train, y_train)
y_prediction=[Link](X_test)
class_names=["Iris-setosa","Iris-virginica","Iris-versicolor"]
print(classification_report(y_test, y_prediction ,target_names=class_names))
x=[Link][:,:2]
svm2 = SVC(kernel="rbf", gamma=0.5, C=1.0)
[Link](x,y)
DecisionBoundaryDisplay.from_estimator(
svm2,
x,
response_method="predict",
cmap=[Link],
alpha=0.8,
)
OUTPUT
<class '[Link]'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 SepalLengthCm 150 non-null float64
1 SepalWidthCm 150 non-null float64
2 PetalLengthCm 150 non-null float64
3 PetalWidthCm 150 non-null float64
4 Species 150 non-null object
dtypes: float64(4), object(1)
memory usage: 6.0+ KB
precision recall f1-score support
Iris-setosa 1.00 1.00 1.00 18
Iris-virginica 0.91 1.00 0.95 10
Iris-versicolor 1.00 0.94 0.97 17
accuracy 0.98 45
macro avg 0.97 0.98 0.97 45
weighted avg 0.98 0.98 0.98 45
7)Aim: Implement and demonstrate the working model of K-means clustering
algorithm with Expectation Maximization Concept.
Program: Apply EM algorithm to cluster a set of data stored in a .CSV file. Use the
same data set for clustering using k-Means algorithm. Compare the results of these
two algorithms and comment on the quality of clustering. You can add Python ML
library classes/API in the program.
import [Link] as plt
from sklearn import datasets
from [Link] import KMeans
import [Link] as sm
import pandas as pd
import numpy as np
iris = datasets.load_iris()
X = [Link]([Link])
[Link] = ['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width']
y = [Link]([Link])
[Link] = ['Targets']
model = KMeans(n_clusters=3)
[Link](X)
[Link](figsize=(14,7))
colormap = [Link](['red', 'lime', 'black'])
# Plot the Original Classifications
[Link](1, 2, 1)
[Link](X.Petal_Length, X.Petal_Width, c=colormap[[Link]], s=40)
[Link]('Real Classification')
[Link]('Petal Length')
[Link]('Petal Width')
# Plot the Models Classifications
[Link](1, 2, 2)
[Link](X.Petal_Length, X.Petal_Width, c=colormap[model.labels_], s=40)
[Link]('K Mean Classification')
[Link]('Petal Length')
[Link]('Petal Width')
print('The accuracy score of K-Mean: ',sm.accuracy_score(y, model.labels_))
print('The Confusion matrixof K-Mean: ',sm.confusion_matrix(y, model.labels_))
from sklearn import preprocessing
scaler = [Link]()
[Link](X)
xsa = [Link](X)
xs = [Link](xsa, columns = [Link])
#[Link](5)
from [Link] import GaussianMixture
gmm = GaussianMixture(n_components=3)
[Link](xs)
y_gmm = [Link](xs)
#y_cluster_gmm
[Link](2, 2, 3)
[Link](X.Petal_Length, X.Petal_Width, c=colormap[y_gmm], s=40)
[Link]('GMM Classification')
[Link]('Petal Length')
[Link]('Petal Width')
print('The accuracy score of EM: ',sm.accuracy_score(y, y_gmm))
print('The Confusion matrix of EM: ',sm.confusion_matrix(y, y_gmm))
8) Aim: Demonstrate and analyse the results of classification based on KNN
Algorithm. Program: Write a program to implement k-Nearest Neighbour
algorithm to classify the iris data set. Print both correct and wrong predictions.
Java/Python ML library classes can be used for this problem.
from sklearn.model_selection import train_test_split
from [Link] import KNeighborsClassifier
from [Link] import classification_report, confusion_matrix
from sklearn import datasets
iris=datasets.load_iris()
x = [Link]
y = [Link]
print ('sepal-length', 'sepal-width', 'petal-length', 'petal-width')
print(x)
print('class: 0-Iris-Setosa, 1- Iris-Versicolour, 2- Iris-Virginica')
print(y)
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.3)
#To Training the model and Nearest nighbors K=5
classifier = KNeighborsClassifier(n_neighbors=5)
[Link](x_train, y_train)
#To make predictions on our test data
y_pred=[Link](x_test)
print('Confusion Matrix')
print(confusion_matrix(y_test,y_pred))
print('Accuracy Metrics')
print(classification_report(y_test,y_pred))
OUTPUT
class: 0-Iris-Setosa, 1- Iris-Versicolour, 2- Iris-Virginica
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0000000000000111111111111111111111111
1111111111111111111111111122222222222
2222222222222222222222222222222222222
2 2]
Confusion Matrix
[[18 0 0]
[ 0 16 1]
[ 0 0 10]]
Accuracy Metrics
precision recall f1-score support
0 1.00 1.00 1.00 18
1 1.00 0.94 0.97 17
2 0.91 1.00 0.95 10
accuracy 0.98 45
macro avg 0.97 0.98 0.97 45
weighted avg 0.98 0.98 0.98 45
9) Aim: Understand and analyse the concept of Regression algorithm techniques.
Program: Implement the non-parametric Locally Weighted Regression algorithm
in order to fit data points. Select appropriate data set for your experiment and draw
graphs.
import numpy as np
from [Link] import figure, show, output_notebook
from [Link] import gridplot
# Enable Bokeh to display the plots in the notebook
output_notebook()
def local_regression(x0, X, Y, tau):
# Add bias term to x0
x0 = np.r_[1, x0] # Add one to avoid the loss in information
# Add bias term to X
X = np.c_[[Link](len(X)), X]
# Fit model: normal equations with kernel
xw = X.T * radial_kernel(x0, X, tau) # XTranspose * W
beta = [Link](xw @ X) @ xw @ Y # @ is matrix multiplication (dot product)
# Predict value
return x0 @ beta # @ is matrix multiplication (dot product) for prediction
def radial_kernel(x0, X, tau):
# Weight or Radial Kernel Bias Function
return [Link]([Link]((X - x0) ** 2, axis=1) / (-2 * tau * tau))
# Generate dataset
n = 1000
X = [Link](-3, 3, num=n)
print("The Data Set (10 Samples) X :\n", X[:10])
Y = [Link]([Link](X ** 2 - 1) + .5)
print("The Fitting Curve Data Set (10 Samples) Y :\n", Y[:10])
# Jitter X
X += [Link](scale=.1, size=n)
print("Normalized (10 Samples) X :\n", X[:10])
# Domain for prediction
domain = [Link](-3, 3, num=300)
print("X0 Domain Space (10 Samples) :\n", domain[:10])
def plot_lwr(tau):
# Prediction through regression
prediction = [local_regression(x0, X, Y, tau) for x0 in domain]
plot = figure(width=400, height=400)
[Link] = f'tau={tau}'
[Link](X, Y, alpha=.3)
[Link](domain, prediction, line_width=2, color='red')
return plot
# Create and display the plots
plots = [
[plot_lwr(10.), plot_lwr(1.)],
[plot_lwr(0.1), plot_lwr(0.01)]
]
show(gridplot(plots))
OUTPUT
10th as 6th
Aim: Demonstrate and Analyse the results sets obtained from Bayesian belief
network Principle.
Program:- Write a program to construct a Bayesian network considering medical
data. Use this model to demonstrate the diagnosis of heart patients using standard
Heart Disease Data Set. You can use Python ML library classes/API.
import pandas as pd
import numpy as np
from [Link] import MaximumLikelihoodEstimator
from [Link] import BayesianModel
from [Link] import VariableElimination
import networkx as nx
import [Link] as plt
df=pd.read_csv("medical [Link]")
[Link]()
print([Link]())
print([Link](df['age']))
model=
BayesianModel([('age','heartdisease'),('gender','heartdisease'),('exang','heartdisease'),('c
p','heartdisease'),('heartdisease','restecg'),('heartdisease','chol')])
[Link](df,estimator=MaximumLikelihoodEstimator)
print('\n Inferencing with Bayesian Network:')
HeartDisease_infer = VariableElimination(model)
print('\n 1. Probability of HeartDisease given evidence= cp')
q1=HeartDisease_infer.query(variables=['heartdisease'],evidence={'gender':1})
print(q1)
print('\n 2. Probability of HeartDisease given evidence= restecg')
q2=HeartDisease_infer.query(variables=['heartdisease'],evidence={'restecg':1})
print(q2)
graph = [Link]([Link]())
nx.draw_networkx(graph,with_labels=True)
#plot
[Link]()
OUTPUT