0% found this document useful (0 votes)

54 views26 pages

FIND-S and Candidate-Elimination Algorithms

The document describes implementing the ID3 decision tree algorithm. It defines functions for calculating entropy, splitting data into subtables based on attribute values, and calculating information gain ratio to select the best attribute to split on at each node. The algorithm is demonstrated by building a decision tree on a dataset and using it to classify new samples.

Uploaded by

chiragchandak99960

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

54 views26 pages

FIND-S and Candidate-Elimination Algorithms

Uploaded by

chiragchandak99960

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

1.

Implement and demonstrate the FIND-S algorithm for finding the most specific
hypothesis based on a given set of training data samples. Read the training data
from a .CSV file.
import csv

with open('[Link]', 'r') as f:

reader = [Link](f)
your_list = list(reader)

h = [['0', '0', '0', '0', '0', '0']]

for i in your_list:
print(i)
if i[-1] == "True":
j=0
for x in i:
if x != "True":
if x != h[0][j] and h[0][j] == '0':
h[0][j] = x
elif x != h[0][j] and h[0][j] != '0':
h[0][j] = '?'
else:
pass
j=j+1
print("Most specific hypothesis is")
print(h)

Output

'Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same',True

'Sunny', 'Warm', 'High', 'Strong', 'Warm', 'Same',True
'Rainy', 'Cold', 'High', 'Strong', 'Warm', 'Change',False
'Sunny', 'Warm', 'High', 'Strong', 'Cool','Change',True

Maximally Specific set

[['Sunny', 'Warm', '?', 'Strong', '?', '?']]
2. For a given set of training data examples stored in a .CSV file, implement and
demonstrate the Candidate-Elimination algorithm to output a description of the set of all
hypotheses consistent with the training examples.

class Holder:
factors={} #Initialize an empty dictionary
attributes = () #declaration of dictionaries parameters with an arbitrary length

'''
Constructor of class Holder holding two parameters,
self refers to the instance of the class
'''
def init (self,attr): #
[Link] = attr
for i in attr:
[Link][i]=[]

def add_values(self,factor,values):
[Link][factor]=values

class CandidateElimination:
Positive={} #Initialize positive empty dictionary
Negative={} #Initialize negative empty dictionary

def init (self,data,fact):

self.num_factors = len(data[0][0])
[Link] = [Link]
[Link] = [Link]
[Link] = data

def run_algorithm(self):
'''
Initialize the specific and general boundaries, and loop the dataset against the
algorithm
'''
G = [Link]()
S = [Link]()

'''
Programmatically populate list in the iterating variable trial_set
'''
count=0
for trial_set in [Link]:
if self.is_positive(trial_set): #if trial set/example consists of positive examples
G = self.remove_inconsistent_G(G,trial_set[0]) #remove inconsitent data from
the general boundary
S_new = S[:] #initialize the dictionary with no key-value pair
print (S_new)
for s in S:
if not [Link](s,trial_set[0]):
S_new.remove(s)
generalization = self.generalize_inconsistent_S(s,trial_set[0])
generalization = self.get_general(generalization,G)
if generalization:
S_new.append(generalization)
S = S_new[:]
S = self.remove_more_general(S)
print(S)

else:#if it is negative

S = self.remove_inconsistent_S(S,trial_set[0]) #remove inconsitent data from

the specific boundary
G_new = G[:] #initialize the dictionary with no key-value pair (dataset can
take any value)
print (G_new)
for g in G:
if [Link](g,trial_set[0]):
G_new.remove(g)
specializations = self.specialize_inconsistent_G(g,trial_set[0])
specializationss = self.get_specific(specializations,S)
if specializations != []:
G_new += specializationss
G = G_new[:]
G = self.remove_more_specific(G)
print(G)

print (S)
print (G)

def initializeS(self):
''' Initialize the specific boundary '''
S = tuple(['-' for factor in range(self.num_factors)]) #6 constraints in the vector
return [S]

def initializeG(self):
''' Initialize the general boundary '''
G = tuple(['?' for factor in range(self.num_factors)]) # 6 constraints in the vector
return [G]

def is_positive(self,trial_set):
''' Check if a given training trial_set is positive '''
if trial_set[1] == 'Y':
return True
elif trial_set[1] == 'N':
return False
else:
raise TypeError("invalid target value")

def match_factor(self,value1,value2):
''' Check for the factors values match,
necessary while checking the consistency of
training trial_set with the hypothesis '''
if value1 == '?' or value2 == '?':
return True
elif value1 == value2 :
return True
return False

def consistent(self,hypothesis,instance):
''' Check whether the instance is part of the hypothesis '''
for i,factor in enumerate(hypothesis):
if not self.match_factor(factor,instance[i]):
return False
return True

def remove_inconsistent_G(self,hypotheses,instance):
''' For a positive trial_set, the hypotheses in G
inconsistent with it should be removed '''
G_new = hypotheses[:]

for g in hypotheses:
if not [Link](g,instance):
G_new.remove(g)
return G_new

def remove_inconsistent_S(self,hypotheses,instance):
''' For a negative trial_set, the hypotheses in S
inconsistent with it should be removed '''
S_new = hypotheses[:]
for s in hypotheses:
if [Link](s,instance):
S_new.remove(s)
return S_new

def remove_more_general(self,hypotheses):
''' After generalizing S for a positive trial_set, the hypothesis in S
general than others in S should be removed '''
S_new = hypotheses[:]
for old in hypotheses:
elif j == '?':
if i != '?':
return False
elif i != j:
return False
else:
continue
return True

def more_specific(self,hyp1,hyp2):
''' hyp1 more specific than hyp2 is
equivalent to hyp2 being more general than hyp1 '''
return self.more_general(hyp2,hyp1)

dataset=[(('sunny','warm','normal','strong','warm','same'),'Y'),(('sunny','warm','high','stron
g','warm','same'),'Y'),(('rainy','cold','high','strong','warm','change'),'N'),(('sunny','warm','hi
gh','strong','cool','change'),'Y')]
attributes =('Sky','Temp','Humidity','Wind','Water','Forecast')
f = Holder(attributes)
f.add_values('Sky',('sunny','rainy','cloudy')) #sky can be sunny rainy or cloudy
f.add_values('Temp',('cold','warm')) #Temp can be sunny cold or warm
f.add_values('Humidity',('normal','high')) #Humidity can be normal or high
f.add_values('Wind',('weak','strong')) #wind can be weak or strong
f.add_values('Water',('warm','cold')) #water can be warm or cold
f.add_values('Forecast',('same','change')) #Forecast can be same or change
a = CandidateElimination(dataset,f) #pass the dataset to the algorithm class and call the
run algoritm method
a.run_algorithm()

Output

[('sunny', 'warm', 'normal', 'strong', 'warm', 'same')]

[('sunny', 'warm', 'normal', 'strong', 'warm','same')]
[('sunny', 'warm', '?', 'strong', 'warm', 'same')]
[('?', '?', '?', '?', '?', '?')]
[('sunny', '?', '?', '?', '?', '?'), ('?', 'warm', '?', '?', '?', '?'), ('?', '?', '?', '?', '?', 'same')]
[('sunny', 'warm', '?', 'strong', 'warm', 'same')]
[('sunny', 'warm', '?', 'strong', '?', '?')]
[('sunny', 'warm', '?', 'strong', '?', '?')]
[('sunny', '?', '?', '?', '?', '?'), ('?', 'warm', '?', '?', '?', '?')]
3. Write a program to demonstrate the working of the decision tree based ID3 algorithm.
Use an appropriate data set for building the decision tree and apply this knowledge to
classify a new sample.

import numpy as np
import math
from data_loader import read_data

class Node:
def init (self, attribute):
[Link] = attribute
[Link] = []
[Link] = ""

def str (self):

return [Link]

def subtables(data, col, delete):

dict = {}
items = [Link](data[:, col])

count = [Link](([Link][0], 1), dtype=np.int32)

for x in range([Link][0]):
for y in range([Link][0]):
if data[y, col] == items[x]:
count[x] += 1

for x in range([Link][0]):
dict[items[x]] = [Link]((int(count[x]), [Link][1]), dtype="|S32")

pos = 0
for y in range([Link][0]):
if data[y, col] == items[x]:
dict[items[x]][pos] = data[y]
pos += 1

if delete:
dict[items[x]] = [Link](dict[items[x]], col, 1)

return items, dict

def entropy(S):
items = [Link](S)
if [Link] == 1:
return 0

counts = [Link](([Link][0], 1))

sums = 0

for x in range([Link][0]):

counts[x] = sum(S == items[x]) / ([Link] * 1.0)

for count in counts:

sums += -1 * count * [Link](count, 2)
return sums

def gain_ratio(data, col):

items, dict = subtables(data, col, delete=False)

total_size = [Link][0]
entropies = [Link](([Link][0], 1))
intrinsic = [Link](([Link][0], 1))
for x in range([Link][0]):
ratio = dict[items[x]].shape[0]/(total_size * 1.0)
entropies[x] = ratio * entropy(dict[items[x]][:, -1])
intrinsic[x] = ratio * [Link](ratio, 2)

total_entropy = entropy(data[:, -1])

iv = -1 * sum(intrinsic)

for x in range([Link][0]):
total_entropy -= entropies[x]

return total_entropy / iv

def create_node(data, metadata):

if ([Link](data[:, -1])).shape[0] == 1:
node = Node("")
[Link] = [Link](data[:, -1])[0]
return node

gains = [Link](([Link][1] - 1, 1))

for col in range([Link][1] - 1):
gains[col] = gain_ratio(data, col)

split = [Link](gains)

node = Node(metadata[split])
metadata = [Link](metadata, split, 0)
items, dict = subtables(data, split, delete=True)

for x in range([Link][0]):
child = create_node(dict[items[x]], metadata)
[Link]((items[x], child))

return node

def empty(size):
s = ""
for x in range(size):
s += " "
return s

def print_tree(node, level):

if [Link] != "":
print(empty(level), [Link])
return

print(empty(level), [Link])

for value, n in [Link]:

print(empty(level + 1), value)
print_tree(n, level + 2)

metadata, traindata = read_data("[Link]")

data = [Link](traindata)
node = create_node(data, metadata)
print_tree(node, 0)

Data_loader.py
import csv
def read_data(filename):
with open(filename, 'r') as csvfile:
datareader = [Link](csvfile, delimiter=',')
headers = next(datareader)
metadata = []
traindata = []
for name in headers:
[Link](name)
for row in datareader:
[Link](row)

return (metadata, traindata)

[Link]

outlook,temperature,humidity,wind,
answer sunny,hot,high,weak,no
sunny,hot,high,strong,no
overcast,hot,high,weak,yes
rain,mild,high,weak,yes
rain,cool,normal,weak,yes
rain,cool,normal,strong,no
overcast,cool,normal,strong,yes
sunny,mild,high,weak,no
sunny,cool,normal,weak,yes
rain,mild,normal,weak,yes
sunny,mild,normal,strong,yes
overcast,mild,high,strong,yes
overcast,hot,normal,weak,yes
rain,mild,high,strong,no

Output
outlook
overcast
b'yes'
rain
wind
b'strong'
b'no'
b'weak'
b'yes'
sunny
humidity
b'high'
b'no'
b'normal'
b'yes
4. Build an Artificial Neural Network by implementing the Backpropagation
algorithm and test the same using appropriate data sets.

import numpy as np
X = [Link](([2, 9], [1, 5], [3, 6]), dtype=float)
y = [Link](([92], [86], [89]), dtype=float)
X = X/[Link](X,axis=0) # maximum of X array longitudinally
y = y/100

#Sigmoid Function
def sigmoid (x):
return 1/(1 + [Link](-x))

#Derivative of Sigmoid Function

def derivatives_sigmoid(x):
return x * (1 - x)

#Variable initialization
epoch=7000 #Setting training iterations
lr=0.1 #Setting learning rate
inputlayer_neurons = 2 #number of features in data set
hiddenlayer_neurons = 3 #number of hidden layers neurons
output_neurons = 1 #number of neurons at output layer
#weight and bias initialization
wh=[Link](size=(inputlayer_neurons,hiddenlayer_neurons))
bh=[Link](size=(1,hiddenlayer_neurons))
wout=[Link](size=(hiddenlayer_neurons,output_neurons))
bout=[Link](size=(1,output_neurons))
#draws a random range of numbers uniformly of dim x*y
for i in range(epoch):

#Forward Propogation
hinp1=[Link](X,wh)
hinp=hinp1 + bh
hlayer_act = sigmoid(hinp)
outinp1=[Link](hlayer_act,wout)
outinp= outinp1+ bout
output = sigmoid(outinp)

#Backpropagation
EO = y-output
outgrad = derivatives_sigmoid(output)
d_output = EO* outgrad
EH = d_output.dot(wout.T)
hiddengrad = derivatives_sigmoid(hlayer_act)#how much hidden layer wts
contributed to error
d_hiddenlayer = EH * hiddengrad
wout += hlayer_act.[Link](d_output) *lr# dotproduct of nextlayererror and
currentlayerop
# bout += [Link](d_output, axis=0,keepdims=True) *lr
wh += [Link](d_hiddenlayer) *lr
#bh += [Link](d_hiddenlayer, axis=0,keepdims=True) *lr
print("Input: \n" + str(X))
print("Actual Output: \n" + str(y))
print("Predicted Output: \n" ,output)

output
Input:
[[ 0.66666667 1. ]
[ 0.33333333 0.55555556]
[ 1. 0.66666667]]
Actual Output:
[[ 0.92]
[ 0.86]
[ 0.89]]
Predicted Output:
[[ 0.89559591]
[ 0.88142069]
[ 0.8928407 ]]
5. Write a program to implement the naïve Bayesian classifier for a sample training data
set stored as a .CSV file. Compute the accuracy of the classifier, considering few test data
sets.

import csv
import random
import math

def loadCsv(filename):
lines = [Link](open(filename, "r"));
dataset = list(lines)
for i in range(len(dataset)):
#converting strings into numbers for processing
dataset[i] = [float(x) for x in dataset[i]]

return dataset

def splitDataset(dataset, splitRatio):

#67% training size
trainSize = int(len(dataset) * splitRatio);
trainSet = []
copy = list(dataset);
while len(trainSet) < trainSize:
#generate indices for the dataset list randomly to pick ele for training data
index = [Link](len(copy));
[Link]([Link](index))
return [trainSet, copy]
def separateByClass(dataset):
separated = {}
#creates a dictionary of classes 1 and 0 where the values are the instacnes belonging to
each class
for i in range(len(dataset)):
vector = dataset[i]
if (vector[-1] not in separated):
separated[vector[-1]] = []
separated[vector[-1]].append(vector)
return separated

def mean(numbers):
return sum(numbers)/float(len(numbers))

def stdev(numbers):
avg = mean(numbers)
variance = sum([pow(x-avg,2) for x in numbers])/float(len(numbers)-1)
return [Link](variance)
def getAccuracy(testSet, predictions):
correct = 0
for i in range(len(testSet)):
if testSet[i][-1] == predictions[i]:
correct += 1
return (correct/float(len(testSet))) * 100.0

def main():
filename = '[Link]'
splitRatio = 0.67
dataset = loadCsv(filename);

trainingSet, testSet = splitDataset(dataset, splitRatio)

print('Split {0} rows into train={1} and test={2} rows'.format(len(dataset),
len(trainingSet), len(testSet)))
# prepare model
summaries = summarizeByClass(trainingSet);
# test model
predictions = getPredictions(summaries, testSet)
accuracy = getAccuracy(testSet, predictions)
print('Accuracy of the classifier is : {0}%'.format(accuracy))

main()

Output
confusion matrix is as
follows [[17 0 0]
[ 0 17 0]
[ 0 0 11]]
Accuracy metrics
precision recall f1-score support

0 1.00 1.00 1.00 17

1 1.00 1.00 1.00 17
2 1.00 1.00 1.00 11

avg / total 1.00 1.00 1.00 45

6. Assuming a set of documents that need to be classified, use the naïve Bayesian
Classifier model to perform this task. Built-in Java classes/API can be used to write
the program. Calculate the accuracy, precision, and recall for your data set.

import pandas as pd
msg=pd.read_csv('[Link]',names=['message','label'])
print('The dimensions of the dataset',[Link])
msg['labelnum']=[Link]({'pos':1,'neg':0})
X=[Link]
y=[Link]
print(X)
print(y)

#splitting the dataset into train and test data

from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(X,y)
print([Link])
print([Link])
print([Link])
print([Link])
#output of count vectoriser is a sparse matrix
from sklearn.feature_extraction.text import CountVectorizer
count_vect = CountVectorizer()
xtrain_dtm = count_vect.fit_transform(xtrain)
xtest_dtm=count_vect.transform(xtest)
print(count_vect.get_feature_names())

df=[Link](xtrain_dtm.toarray(),columns=count_vect.get_feature_names())
print(df)#tabular representation
print(xtrain_dtm) #sparse matrix representation

# Training Naive Bayes (NB) classifier on training data.

from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB().fit(xtrain_dtm,ytrain)
predicted = [Link](xtest_dtm)

#printing accuracy metrics

from sklearn import metrics
print('Accuracy metrics')
print('Accuracy of the classifer is',metrics.accuracy_score(ytest,predicted))
print('Confusion matrix')
print(metrics.confusion_matrix(ytest,predicted))
print('Recall and Precison ')
print(metrics.recall_score(ytest,predicted))
print(metrics.precision_score(ytest,predicted))

'''docs_new = ['I like this place', 'My boss is not my saviour']

X_new_counts = count_vect.transform(docs_new)
predictednew = [Link](X_new_counts)
for doc, category in zip(docs_new, predictednew):
print('%s->%s' % (doc, [Link][category]))'''

I love this sandwich,pos

This is an amazing place,pos
I feel very good about these beers,pos
This is my best work,pos
What an awesome view,pos
I do not like this restaurant,neg
I am tired of this stuff,neg
I can't deal with this,neg
He is my sworn enemy,neg
My boss is horrible,neg
This is an awesome place,pos
I do not like the taste of this juice,neg
I love to dance,pos
I am sick and tired of this place,neg
What a great holiday,pos
That is a bad locality to stay,neg
We will have good fun tomorrow,pos
I went to my enemy's house today,neg

OUTPUT

['about', 'am', 'amazing', 'an', 'and', 'awesome', 'beers', 'best', 'boss', 'can', 'deal',
'do', 'enemy', 'feel', 'fun', 'good', 'have', 'horrible', 'house', 'is', 'like', 'love', 'my',
'not', 'of', 'place', 'restaurant', 'sandwich', 'sick', 'stuff', 'these', 'this', 'tired', 'to',
'today', 'tomorrow', 'very', 'view', 'we', 'went', 'what', 'will', 'with', 'work']
about am amazing an and awesome beers best boss can ... today \
0 1 0 0 0 0 0 1 0 0 0 ... 0
1 0 0 0 0 0 0 0 1 0 0 ... 0
2 0 0 1 1 0 0 0 0 0 0 ... 0
3 0 0 0 0 0 0 0 0 0 0 ... 1
4 0 0 0 0 0 0 0 0 0 0 ... 0
5 01 0 0 1 0 0 0 0 0 ... 0
6 0 0 0 0 0 0 0 0 0 1 ... 0
7 0 0 0 0 0 0 0 0 0 0 ... 0
8 0 1 0 0 0 0 0 0 0 0 ... 0
9 0 0 0 1 0 1 0 0 0 0 ... 0
10 0 0 0 0 0 0 0 0 0 0 ... 0
11 0 0 0 0 0 0 0 0 1 0 ... 0
12 0 0 0 1 0 1 0 0 0 0 ... 0
tomorrow very view we went what will with work
0 0 1 0 0 00 0 00
1 0 0 0 0 00 0 0 1
2 0 0 0 0 00 0 0 0
3 0 0 0 0 10 0 0 0
4 0 0 0 0 00 0 0 0
5 0 0 0 0 00 0 0 0
6 0 0 0 0 00 0 1 0
7 1 0 0 1 00 1 0 0
8 0 0 0 0 00 0 0 0
7. Write a program to construct a Bayesian network considering medical data. Use
this model to demonstrate the diagnosis of heart patients using standard Heart
Disease Data Set. You can use Java/Python ML library classes/API.

From pomegranate import*

Asia=DiscreteDistribution({ „True‟:0.5, „False‟:0.5 })
Tuberculosis=ConditionalProbabilityTable(
[[ „True‟, „True‟, 0.2],
[„True‟, „False‟, 0.8],
[ „False‟, „True‟, 0.01],
[ „False‟, „False‟, 0.98]], [asia])

Smoking = DiscreteDistribution({ „True‟:0.5, „False‟:0.5 })

Lung = ConditionalProbabilityTable(
[[ „True‟, „True‟, 0.75],
[„True‟, „False‟,0.25].
[ „False‟, „True‟, 0.02],
[ „False‟, „False‟, 0.98]], [ smoking])

Bronchitis = ConditionalProbabilityTable(
[[ „True‟, „True‟, 0.92],
[„True‟, „False‟,0.08].
[ „False‟, „True‟,0.03],
[ „False‟, „False‟, 0.98]], [ smoking])

Tuberculosis_or_cancer = ConditionalProbabilityTable(
[[ „True‟, „True‟, „True‟, 1.0],
[„True‟, „True‟, „False‟, 0.0],
[„True‟, „False‟, „True‟, 1.0],
[„True‟, „False‟, „False‟, 0.0],
[„False‟, „True‟, „True‟, 1.0],
[„False‟, „True‟, „False‟, 0.0],
[„False‟, „False‟ „True‟, 1.0],
[„False‟, „False‟, „False‟, 0.0]], [tuberculosis, lung])

Xray = ConditionalProbabilityTable(
[[ „True‟, „True‟, 0.885],
[„True‟, „False‟, 0.115],
[ „False‟, „True‟, 0.04],
[ „False‟, „False‟, 0.96]], [tuberculosis_or_cancer])
dyspnea = ConditionalProbabilityTable(
[[ „True‟, „True‟, „True‟, 0.96],
[„True‟, „True‟, „False‟, 0.04],
[„True‟, „False‟, „True‟, 0.89],
[„True‟, „False‟, „False‟, 0.11],
[„False‟, „True‟, „True‟, 0.96],
[„False‟, „True‟, „False‟, 0.04],
[„False‟, „False‟ „True‟, 0.89],
[„False‟, „False‟, „False‟, 0.11 ]], [tuberculosis_or_cancer, bronchitis])
s0 = State(asia, name=”asia”)
s1 = State(tuberculosis, name=” tuberculosis”)
s2 = State(smoking, name=” smoker”)

network = BayesianNetwork(“asia”)
network.add_nodes(s0,s1,s2)
network.add_edge(s0,s1)
network.add_edge(s1.s2)
[Link]()
print(network.predict_probal({„tuberculosis‟: „True‟}))
8. Apply EM algorithm to cluster a set of data stored in a .CSV file. Use the same data
set for clustering using k-Means algorithm. Compare the results of these two
algorithms and comment on the quality of clustering. You can add Java/Python ML
library classes/API in the program.

import numpy as np
import [Link] as plt
from [Link].samples_generator import make_blobs
X, y_true = make_blobs(n_samples=100, centers =
4,Cluster_std=0.60,random_state=0)
X = X[:, ::-1]

#flip axes for better plotting

from [Link] import GaussianMixture
gmm = GaussianMixture (n_components = 4).fit(X)
lables = [Link](X)
[Link](X[:, 0], X[:, 1], c=labels, s=40, cmap=‟viridis‟);
probs = gmm.predict_proba(X)
print(probs[:5].round(3))
size = 50 * [Link](1) ** 2 # square emphasizes differences
[Link](X[:, 0], X[:, 1], c=labels, cmap=‟viridis‟, s=size);

from [Link] import Ellipse

def draw_ellipse(position, covariance, ax=None, **kwargs);
“””Draw an ellipse with a given position and covariance”””
Ax = ax or [Link]()
# Convert covariance to principal axes
if [Link] ==(2,2):

U, s, Vt = [Link](covariance)
Angle = [Link](np.arctan2(U[1, 0], U[0,0]))
Width, height = 2 * [Link](s)
else:
angle = 0
width, height = 2 * [Link](covariance)

#Draw the Ellipse

for nsig in range(1,4):
ax.add_patch(Ellipse(position, nsig * width, nsig *height,
angle, **kwargs))

def plot_gmm(gmm, X, label=True, ax=None):

ax = ax or [Link]()
labels = [Link](X).predict(X)
if label:
[Link](X[:, 0], x[:, 1], c=labels, s=40, cmap=‟viridis‟, zorder=2)
else:
[Link](X[:, 0], x[:, 1], s=40, zorder=2)
[Link](„equal‟)

w_factor = 0.2 / gmm.weights_.max()

for pos, covar, w in zip(gmm.means_, gmm.covariances_, gmm.weights_):
draw_ellipse(pos, covar, alpha=w * w_factor)

gmm = GaussianMixture(n_components=4, random_state=42)

plot_gmm(gmm, X)
gmm = GaussianMixture(n_components=4, covariance_type=‟full‟,
random_state=42)
plot_gmm(gmm, X)

Output

[[1 ,0, 0, 0]
[0 ,0, 1, 0]
[1 ,0, 0, 0]
[1 ,0, 0, 0]
[1 ,0, 0, 0]]
K-means
from [Link] import KMeans

#from sklearn import metrics

import numpy as np
import [Link] as plt
import pandas as pd
data=pd.read_csv("[Link]")
df1=[Link](data)
print(df1)
f1 = df1['Distance_Feature'].values
f2 = df1['Speeding_Feature'].values

X=[Link](list(zip(f1,f2)))
[Link]()
[Link]([0, 100])
[Link]([0, 50])
[Link]('Dataset')
[Link]('speeding_feature')
[Link]('Distance_Feature')
[Link](f1,f2)
[Link]()

# create new plot and data

[Link]()
colors = ['b', 'g', 'r']
markers = ['o', 'v', 's']

# KMeans algorithm
#K = 3
kmeans_model = KMeans(n_clusters=3).fit(X)

[Link]()
for i, l in enumerate(kmeans_model.labels_):
[Link](f1[i], f2[i], color=colors[l], marker=markers[l],ls='None')
[Link]([0, 100])
[Link]([0, 50])

[Link]()

Driver_ID,Distance_Feature,Speeding_Feature
3423311935,71.24,28
3423313212,52.53,25
3423313724,64.54,27
3423311373,55.69,22
3423310999,54.58,25
3423313857,41.91,10
3423312432,58.64,20
3423311434,52.02,8
3423311328,31.25,34
3423312488,44.31,19
3423311254,49.35,40
3423312943,58.07,45
3423312536,44.22,22
3423311542,55.73,19
3423312176,46.63,43
3423314176,52.97,32
3423314202,46.25,35
3423311346,51.55,27
3423310666,57.05,26
3423313527,58.45,30
3423312182,43.42,23
3423313590,55.68,37
3423312268,55.15,18
9. Write a program to implement k-Nearest Neighbour algorithm to classify the iris
data set. Print both correct and wrong predictions. Java/Python ML library classes
can be used for this problem.
import csv
import random
import math
import operator

def loadDataset(filename, split, trainingSet=[] , testSet=[]):

with open(filename, 'rb') as csvfile:
lines = [Link](csvfile)
dataset = list(lines)
for x in range(len(dataset)-1):
for y in range(4):
dataset[x][y] = float(dataset[x][y])
if [Link]() < split:
[Link](dataset[x])
else:
[Link](dataset[x])

def euclideanDistance(instance1, instance2, length):

distance = 0
for x in range(length):
distance += pow((instance1[x] - instance2[x]), 2)
return [Link](distance)

def getNeighbors(trainingSet, testInstance, k):

distances = []
length = len(testInstance)-1
for x in range(len(trainingSet)):
dist = euclideanDistance(testInstance, trainingSet[x], length)
[Link]((trainingSet[x], dist))
[Link](key=[Link](1))
neighbors = []
for x in range(k):
[Link](distances[x][0])
return neighbors

def getResponse(neighbors):
classVotes = {}
for x in range(len(neighbors)):
response = neighbors[x][-1]
if response in classVotes:
classVotes[response] += 1
else:
classVotes[response] = 1
OUTPUT

Confusion matrix is as follows

[[11 0 0]

[0 9 1]

[0 1 8]]

Accuracy metrics

0 1.00 1.00 1.00 11

1 0.90 0.90 0.90 10

2 0.89 0.89 0,89 9

Avg/Total 0.93 0.93 0.93 30

10. Implement the non-parametric Locally Weighted Regression algorithm in order
to fit data points. Select appropriate data set for your experiment and drawgraphs.

from numpy import *

import operator
from os import listdir
import matplotlib
import [Link] as plt
import pandas as pd
import numpy as np1
import [Link] as np
from [Link] import pearsonr

def kernel(point,xmat, k):

m,n = [Link](xmat)
weights = [Link]([Link]((m)))
for j in range(m):
diff = point - X[j]
weights[j,j] = [Link](diff*diff.T/(-2.0*k**2))
return weights

def localWeight(point,xmat,ymat,k):
wei = kernel(point,xmat,k)
W=(X.T*(wei*X)).I*(X.T*(wei*ymat.T))
return W

def localWeightRegression(xmat,ymat,k):
m,n = [Link](xmat)
ypred = [Link](m)
for i in range(m):
ypred[i] = xmat[i]*localWeight(xmat[i],xmat,ymat,k)
return ypred

# load data points

data = pd.read_csv('[Link]')
bill = [Link](data.total_bill)
tip = [Link]([Link])

#preparing and add 1 in bill

mbill = [Link](bill)
mtip = [Link](tip)
m= [Link](mbill)[1]
one = [Link]([Link](m))
X= [Link]((one.T,mbill.T))
#set k here
ypred = localWeightRegression(X,mtip,2)
SortIndex = X[:,1].argsort(0)
xsort = X[SortIndex][:,0]

Output

ML Lab File Batch 1
No ratings yet
ML Lab File Batch 1
20 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
30 pages
ML Lab PFG - Removed - Removed - Removed
No ratings yet
ML Lab PFG - Removed - Removed - Removed
22 pages
Machine Learning Through Python Lab Mannual
No ratings yet
Machine Learning Through Python Lab Mannual
33 pages
Lab Manual
No ratings yet
Lab Manual
25 pages
ML Lab Prog1-5 (5) College PDF
No ratings yet
ML Lab Prog1-5 (5) College PDF
12 pages
Code MLT
No ratings yet
Code MLT
9 pages
ML Lab Record
No ratings yet
ML Lab Record
49 pages
ML Lab Manual
No ratings yet
ML Lab Manual
90 pages
Machine Learning Algorithms in Python
No ratings yet
Machine Learning Algorithms in Python
18 pages
Machine Learning Lab: Algorithms & Implementation
No ratings yet
Machine Learning Lab: Algorithms & Implementation
33 pages
It 804
No ratings yet
It 804
33 pages
Machine Learning Lab Manual 10 15
No ratings yet
Machine Learning Lab Manual 10 15
6 pages
ML Lab P-1
No ratings yet
ML Lab P-1
10 pages
Wa0027.
No ratings yet
Wa0027.
34 pages
Machine Learning Lab Mannual CS 601
No ratings yet
Machine Learning Lab Mannual CS 601
30 pages
ML Lab Manual-99
No ratings yet
ML Lab Manual-99
23 pages
ML1 3 Merged
No ratings yet
ML1 3 Merged
19 pages
6CS4-22 Machine Learning Lab
No ratings yet
6CS4-22 Machine Learning Lab
30 pages
IV - ML Lab1
No ratings yet
IV - ML Lab1
40 pages
Machine Learning Lab Record: Dr. Sarika Hegde
No ratings yet
Machine Learning Lab Record: Dr. Sarika Hegde
23 pages
Machine Learning Laboratory Record Book: 1 Find S Algorithm
No ratings yet
Machine Learning Laboratory Record Book: 1 Find S Algorithm
22 pages
Machine Learning Lab
No ratings yet
Machine Learning Lab
31 pages
ML Lab Manual
No ratings yet
ML Lab Manual
14 pages
Original ML Lab Manual
No ratings yet
Original ML Lab Manual
22 pages
Machine Learning Lab Manual: PESIT
No ratings yet
Machine Learning Lab Manual: PESIT
31 pages
MLT Shivani
No ratings yet
MLT Shivani
8 pages
ML Lab Record
No ratings yet
ML Lab Record
33 pages
Mllab Manual
No ratings yet
Mllab Manual
54 pages
IV - ML Lab
No ratings yet
IV - ML Lab
31 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
52 pages
ML Lab
No ratings yet
ML Lab
9 pages
AD3461 - ML Lab Manual
No ratings yet
AD3461 - ML Lab Manual
54 pages
ML Lab Output
No ratings yet
ML Lab Output
15 pages
AI&ML
No ratings yet
AI&ML
9 pages
R20 Iii-Ii ML Lab Manual
100% (1)
R20 Iii-Ii ML Lab Manual
79 pages
Machine Learning Laboratory (21AIL66)
No ratings yet
Machine Learning Laboratory (21AIL66)
7 pages
ML Lab Programs
No ratings yet
ML Lab Programs
18 pages
Screenshot 2023-12-07 at 11.07.49 AM
No ratings yet
Screenshot 2023-12-07 at 11.07.49 AM
14 pages
Machine Learning
No ratings yet
Machine Learning
27 pages
AIML
No ratings yet
AIML
12 pages
ML Lab Manual (1-9)
No ratings yet
ML Lab Manual (1-9)
37 pages
Machine Learning Lab: Algorithms & Implementation
No ratings yet
Machine Learning Lab: Algorithms & Implementation
11 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
43 pages
Amit MLT1
No ratings yet
Amit MLT1
22 pages
B.TECH Machine Learning-Lab
No ratings yet
B.TECH Machine Learning-Lab
99 pages
Program 1
No ratings yet
Program 1
25 pages
ML - Lab Manual With Woad File
No ratings yet
ML - Lab Manual With Woad File
12 pages
Machine Learning Algorithm Demos
No ratings yet
Machine Learning Algorithm Demos
31 pages
MANUAL
No ratings yet
MANUAL
34 pages
Shashidhar-18csl76 Final
No ratings yet
Shashidhar-18csl76 Final
19 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
23 pages
MANUAL
No ratings yet
MANUAL
33 pages
Machine Learning Algorithms Guide
No ratings yet
Machine Learning Algorithms Guide
9 pages
Machine Learning Lab Record Coimbatore
No ratings yet
Machine Learning Lab Record Coimbatore
37 pages
Lab Manual
No ratings yet
Lab Manual
55 pages
ML Lab - 231009 - 210335
No ratings yet
ML Lab - 231009 - 210335
38 pages
SDH Access Platform Guide
No ratings yet
SDH Access Platform Guide
29 pages
Steel Bridge Design & Analysis
No ratings yet
Steel Bridge Design & Analysis
44 pages
Silica Sand as Concrete Aggregate Alternative
No ratings yet
Silica Sand as Concrete Aggregate Alternative
6 pages
Fire Safety Cables Catalogue
No ratings yet
Fire Safety Cables Catalogue
36 pages
You Will Receive 1.5 Points For Each Correct Answer For Problems 1 To 30
No ratings yet
You Will Receive 1.5 Points For Each Correct Answer For Problems 1 To 30
14 pages
Tutorial For Using HyperMesh
No ratings yet
Tutorial For Using HyperMesh
12 pages
Bullet Classification
No ratings yet
Bullet Classification
39 pages
Extrusion Work Instruction
No ratings yet
Extrusion Work Instruction
3 pages
RIG LESS RESUME Words Format
No ratings yet
RIG LESS RESUME Words Format
4 pages
Iot Based Plant Monitoring System
No ratings yet
Iot Based Plant Monitoring System
7 pages
India's Space Visionaries
No ratings yet
India's Space Visionaries
10 pages
Occupancy Separation
No ratings yet
Occupancy Separation
8 pages
Computer Vision System Toolbox - MATLAB
No ratings yet
Computer Vision System Toolbox - MATLAB
10 pages
Rubber Fender Catalog (Small)
100% (1)
Rubber Fender Catalog (Small)
36 pages
c21 Microbiology Tortora Testbank
No ratings yet
c21 Microbiology Tortora Testbank
16 pages
Fiamm Batteries Catalouge PDF
No ratings yet
Fiamm Batteries Catalouge PDF
4 pages
Vibration Lecture 3
No ratings yet
Vibration Lecture 3
44 pages
Visual Arts Overview for Art Enthusiasts
No ratings yet
Visual Arts Overview for Art Enthusiasts
3 pages
CAM15 - Reading Test 3
No ratings yet
CAM15 - Reading Test 3
4 pages
BMS Inventory Presentation - Type 5
No ratings yet
BMS Inventory Presentation - Type 5
130 pages
2021 JCE Mathematics Paper
No ratings yet
2021 JCE Mathematics Paper
12 pages
Eminem's "Jealousy Woes II" Lyrics Analysis
No ratings yet
Eminem's "Jealousy Woes II" Lyrics Analysis
1 page
Catalog of Commercial Refrigerators
No ratings yet
Catalog of Commercial Refrigerators
32 pages
Life Processes G1-2
No ratings yet
Life Processes G1-2
1 page
TPG Alstom - Distance Relay
No ratings yet
TPG Alstom - Distance Relay
7 pages
Schindler 3300 Elevator Specifications
No ratings yet
Schindler 3300 Elevator Specifications
1 page
Circular: Advisory
No ratings yet
Circular: Advisory
11 pages
Displacement Sensor Amplifier Guide
No ratings yet
Displacement Sensor Amplifier Guide
4 pages
AGV Systems for Industrial Use
No ratings yet
AGV Systems for Industrial Use
27 pages
User Manual - Rampage Handle Controller With BlueTooth v1.7.3.5
No ratings yet
User Manual - Rampage Handle Controller With BlueTooth v1.7.3.5
21 pages

FIND-S and Candidate-Elimination Algorithms

Uploaded by

FIND-S and Candidate-Elimination Algorithms

Uploaded by

1.

with open('[Link]', 'r') as f:

h = [['0', '0', '0', '0', '0', '0']]

'Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same',True

Maximally Specific set

def init (self,data,fact):

S = self.remove_inconsistent_S(S,trial_set[0]) #remove inconsitent data from

[('sunny', 'warm', 'normal', 'strong', 'warm', 'same')]

def str (self):

def subtables(data, col, delete):

count = [Link](([Link][0], 1), dtype=np.int32)

return items, dict

counts = [Link](([Link][0], 1))

counts[x] = sum(S == items[x]) / ([Link] * 1.0)

for count in counts:

def gain_ratio(data, col):

total_entropy = entropy(data[:, -1])

def create_node(data, metadata):

gains = [Link](([Link][1] - 1, 1))

def print_tree(node, level):

for value, n in [Link]:

metadata, traindata = read_data("[Link]")

return (metadata, traindata)

#Derivative of Sigmoid Function

def splitDataset(dataset, splitRatio):

trainingSet, testSet = splitDataset(dataset, splitRatio)

0 1.00 1.00 1.00 17

avg / total 1.00 1.00 1.00 45

#splitting the dataset into train and test data

# Training Naive Bayes (NB) classifier on training data.

#printing accuracy metrics

'''docs_new = ['I like this place', 'My boss is not my saviour']

I love this sandwich,pos

From pomegranate import*

Smoking = DiscreteDistribution({ „True‟:0.5, „False‟:0.5 })

#flip axes for better plotting

from [Link] import Ellipse

#Draw the Ellipse

def plot_gmm(gmm, X, label=True, ax=None):

w_factor = 0.2 / gmm.weights_.max()

gmm = GaussianMixture(n_components=4, random_state=42)

#from sklearn import metrics

# create new plot and data

def loadDataset(filename, split, trainingSet=[] , testSet=[]):

def euclideanDistance(instance1, instance2, length):

def getNeighbors(trainingSet, testInstance, k):

Confusion matrix is as follows

0 1.00 1.00 1.00 11

1 0.90 0.90 0.90 10

2 0.89 0.89 0,89 9

Avg/Total 0.93 0.93 0.93 30

from numpy import *

def kernel(point,xmat, k):

# load data points

#preparing and add 1 in bill

You might also like