-
Notifications
You must be signed in to change notification settings - Fork 97
Expand file tree
/
Copy pathAprioriAlgorithmViaTries.m
More file actions
135 lines (99 loc) · 4.5 KB
/
AprioriAlgorithmViaTries.m
File metadata and controls
135 lines (99 loc) · 4.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
(*
Implementation of the Apriori algorithm via Tries in Mathematica
Copyright (C) 2022 Anton Antonov
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Written by Anton Antonov,
ʇǝu˙oǝʇsod@ǝqnɔuouoʇuɐ,
Windermere, Florida, USA.
*)
(*
Mathematica is (C) Copyright 1988-2022 Wolfram Research, Inc.
Protected by copyright law and international treaties.
Unauthorized reproduction or distribution subject to severe civil
and criminal penalties.
Mathematica is a registered trademark of Wolfram Research, Inc.
*)
If[Length[DownValues[TriesWithFrequencies`TriesCreate]] == 0,
Echo["TriesWithFrequencies.m", "Importing from GitHub:"];
Import["https://raw.githubusercontent.com/antononcube/MathematicaForPrediction/master/TriesWithFrequencies.m"]
];
(***********************************************************)
(* Package definition *)
(***********************************************************)
BeginPackage["AprioriAlgorithmViaTries`"];
Apriori::usage = "Apriori[baskets : {_List ..}, minSupport_?NumericQ, minItemsNumber_Integer, maxItemsNumberArg : (_Integer | Infinity)]";
Begin["`Private`"];
Needs["TriesWithFrequencies`"];
Clear[ScanBasket];
ScanBasket[basket_List, k_Integer, aFreqSets_?AssociationQ] :=
Block[{candidates},
candidates = Subsets[basket, {k, k}];
Select[candidates, Lookup[aFreqSets, Key@Most[#], False] && Lookup[aFreqSets, Key@{Last[#]}, False] &]
] /; k > 1;
Clear[Apriori];
Options[Apriori] = {Counts -> False};
Apriori[
lsBasketsArg : {_List ..},
minSupportArg_?NumericQ,
minItemsNumber_Integer : 1,
maxItemsNumberArg : (_Integer | Infinity) : Infinity,
opts :OptionsPattern[]] :=
Block[{lsBaskets = lsBasketsArg, minSupport = minSupportArg, maxItemsNumber, countsQ,
aFreqSets, lsFreqSets,
trBase, trSets, trSets2, aAllTries, k = 1, contQ = True,
res},
(*Max number of items processing*)
minSupport = If[ 0 <= minSupport <= 1, minSupport * Length[lsBaskets], minSupport];
(*Max number of items processing*)
maxItemsNumber = Min[maxItemsNumberArg, Max[Length /@ lsBaskets]];
(*To use counts or not*)
countsQ = TrueQ @ OptionValue[Apriori, Counts];
(*Make sure all baskets unique items*)
lsBaskets = Union /@ lsBaskets;
(*Make single items baskets trie*)
trBase = TrieCreate[List /@ Flatten[lsBaskets]];
(*Remove the items that are not frequent enough*)
trBase = TrieThresholdRemove[trBase, minSupport, "Postfix" -> None];
(*Verify early stop*)
If[TrieDepth[trBase] == 1,
Echo[Row[{"All items have support smaller than", Spacer[3], minSupport}]];
Return[{}]
];
(*Initial set of frequent sets*)
aFreqSets =
AssociationThread[List /@ Select[Tally[Flatten[lsBaskets]], #[[2]] >= minSupport &][[All, 1]], True];
(*First gathered trie*)
aAllTries = <|k -> trBase|>;
(*Main loop*)
While[contQ && k < maxItemsNumber,
k++;
(*Scan the baskets and make trie with viable candidates*)
trSets = TrieCreate[Join @@ Map[ScanBasket[#, k, aFreqSets] &, lsBaskets]];
(*Remove baskets that are not frequent enough*)
trSets2 = TrieThresholdRemove[trSets, minSupport, "Postfix" -> None];
(*Get frequent sets from the trie*)
lsNew = Select[Rest /@ TrieGetWords[trSets2], Length[#] == k &];
(*Update frequent sets*)
If[Length[lsNew] == 0,
contQ = False,
(*ELSE*)
aFreqSets = Join[aFreqSets, AssociationThread[lsNew, True]];
(*Add to gathered tries*)
AppendTo[aAllTries, k -> trSets2]
];
];
lsFreqSets = Select[Keys[aFreqSets], Length[#] >= minItemsNumber &];
res = Association@Map[# -> N[TrieRetrieve[aAllTries[Length[#]], #][$TrieValue]] &, lsFreqSets];
If[countsQ, res, res / Length[lsBaskets]]
];
End[];
EndPackage[];