Apriori Algorithm:
Program:
from itertools import combinations
def load_data():
# Sample transactional dataset
dataset = [
['bread', 'milk', 'beer'],
['bread', 'diaper', 'beer', 'egg'],
['milk', 'diaper', 'beer', 'cola'],
['bread', 'milk', 'diaper', 'beer'],
['bread', 'milk', 'diaper', 'cola']
]
return dataset
def create_c1(dataset):
c1 = set()
for transaction in dataset:
for item in transaction:
c1.add(frozenset([item]))
return c1
def filter_candidates(dataset, candidates, min_support):
candidate_counts = {}
for transaction in dataset:
for candidate in candidates:
if candidate.issubset(transaction):
candidate_counts[candidate] = candidate_counts.get(candidate, 0) + 1
num_transactions = float(len(dataset))
qualified_items = []
support_data = {}
for candidate, count in candidate_counts.items():
support = count / num_transactions
if support >= min_support:
qualified_items.append(candidate)
support_data[candidate] = support
return qualified_items, support_data
def generate_candidates(Lk, k):
candidates = []
len_Lk = len(Lk)
for i in range(len_Lk):
for j in range(i + 1, len_Lk):
L1 = list(Lk[i])[:k - 2]
L2 = list(Lk[j])[:k - 2]
L1.sort()
L2.sort()
if L1 == L2:
candidates.append(Lk[i] | Lk[j])
return candidates
def apriori(dataset, min_support=0.5):
C1 = create_c1(dataset)
D = list(map(set, dataset))
L1, support_data = filter_candidates(D, C1, min_support)
L = [L1]
k=2
while len(L[k - 2]) > 0:
candidates = generate_candidates(L[k - 2], k)
Lk, support_k = filter_candidates(D, candidates, min_support)
support_data.update(support_k)
L.append(Lk)
k += 1
return L, support_data
def generate_rules(L, support_data, min_confidence=0.7):
rules = []
for i in range(1, len(L)):
for freq_set in L[i]:
for subset in combinations(freq_set, r=i):
confidence = support_data[freq_set] / support_data[freq_set - set(subset)]
if confidence >= min_confidence:
rules.append((set(subset), freq_set - set(subset), confidence))
return rules
def print_results(L, support_data, rules):
for i, itemset in enumerate(L):
print(f"Frequent {i+1}-itemsets")
print("===================")
for item in itemset:
print(f"{item} - support: {support_data[item]}")
print()
print("Association Rules")
print("===================")
for rule in rules:
antecedent, consequent, confidence = rule
print(f"{antecedent} => {consequent} - confidence: {confidence}")
if __name__ == "__main__":
dataset = load_data()
L, support_data = apriori(dataset, min_support=0.4)
rules = generate_rules(L, support_data, min_confidence=0.7)
print_results(L, support_data, rules)
Output:
Frequent 1-itemsets
===================
frozenset({'bread'}) - support: 0.8
frozenset({'beer'}) - support: 0.8
frozenset({'milk'}) - support: 0.8
frozenset({'diaper'}) - support: 0.8
frozenset({'cola'}) - support: 0.4
Frequent 2-itemsets
===================
frozenset({'beer', 'bread'}) - support: 0.6
frozenset({'milk', 'bread'}) - support: 0.6
frozenset({'beer', 'milk'}) - support: 0.6
frozenset({'diaper', 'bread'}) - support: 0.6
frozenset({'beer', 'diaper'}) - support: 0.6
frozenset({'diaper', 'milk'}) - support: 0.6
frozenset({'milk', 'cola'}) - support: 0.4
frozenset({'diaper', 'cola'}) - support: 0.4
Frequent 3-itemsets
===================
frozenset({'beer', 'milk', 'bread'}) - support: 0.4
frozenset({'beer', 'diaper', 'bread'}) - support: 0.4
frozenset({'beer', 'diaper', 'milk'}) - support: 0.4
frozenset({'diaper', 'milk', 'cola'}) - support: 0.4
frozenset({'diaper', 'milk', 'bread'}) - support: 0.4
Frequent 4-itemsets
===================
Association Rules
===================
{'beer'} => frozenset({'bread'}) - confidence: 0.7499999999999999
{'bread'} => frozenset({'beer'}) - confidence: 0.7499999999999999
{'milk'} => frozenset({'bread'}) - confidence: 0.7499999999999999
{'bread'} => frozenset({'milk'}) - confidence: 0.7499999999999999
{'beer'} => frozenset({'milk'}) - confidence: 0.7499999999999999
{'milk'} => frozenset({'beer'}) - confidence: 0.7499999999999999
{'diaper'} => frozenset({'bread'}) - confidence: 0.7499999999999999
{'bread'} => frozenset({'diaper'}) - confidence: 0.7499999999999999
{'beer'} => frozenset({'diaper'}) - confidence: 0.7499999999999999
{'diaper'} => frozenset({'beer'}) - confidence: 0.7499999999999999
{'diaper'} => frozenset({'milk'}) - confidence: 0.7499999999999999
{'milk'} => frozenset({'diaper'}) - confidence: 0.7499999999999999
{'milk'} => frozenset({'cola'}) - confidence: 1.0
{'diaper'} => frozenset({'cola'}) - confidence: 1.0
{'diaper', 'milk'} => frozenset({'cola'}) - confidence: 1.0
FP Growth Algorithm:
Program:
from mlxtend.frequent_patterns import fpgrowth
import pandas as pd
dataset = [
['milk', 'bread', 'biscuit'],
['bread', 'butter'],
['milk', 'bread', 'butter'],
['milk', 'bread'],
['milk', 'bread', 'biscuit', 'butter'],
['jam', 'butter'],
['jam', 'bread'],
['milk', 'jam'],
['bread', 'butter']
]
df = pd.DataFrame(dataset)
encoded_df = pd.get_dummies(df.apply(lambda x: pd.Series(x)), prefix='', prefix_sep='')
frequent_itemsets = fpgrowth(encoded_df, min_support=0.2, use_colnames=True)
print(frequent_itemsets)
Note: Before running program install mlxtend package
Type “pip install mlxtend” command in terminal
Output:
support itemsets
0 0.555556 (bread)
1 0.555556 (milk)
2 0.222222 (biscuit)
3 0.333333 (butter)
4 0.222222 (bread)
5 0.222222 (jam)
6 0.444444 (bread, milk)
7 0.222222 (biscuit, milk)
8 0.222222 (biscuit, bread)
9 0.222222 (biscuit, bread, milk)
10 0.222222 (butter, bread)
Naïve Bayes Algorithm:
Program:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
iris = load_iris()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
naive_bayes = GaussianNB()
naive_bayes.fit(X_train, y_train)
y_pred = naive_bayes.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
Output:
Accuracy: 1.0