9
9
>>> find_frequent_itemsets(transactions, minimum_support)
10
10
"""
11
11
12
- from collections import defaultdict
12
+ from collections import defaultdict , namedtuple
13
13
from itertools import imap
14
14
15
15
__author__ = 'Eric Naeseth <[email protected] >'
16
16
__copyright__ = 'Copyright © 2009 Eric Naeseth'
17
17
__license__ = 'MIT License'
18
18
19
- def find_frequent_itemsets (transactions , minimum_support ):
19
+ def find_frequent_itemsets (transactions , minimum_support , include_support = False ):
20
20
"""
21
- Finds frequent itemsets in the given transactions using FP-growth. This
21
+ Find frequent itemsets in the given transactions using FP-growth. This
22
22
function returns a generator instead of an eagerly-populated list of items.
23
23
24
24
The `transactions` parameter can be any iterable of iterables of items.
@@ -27,6 +27,9 @@ def find_frequent_itemsets(transactions, minimum_support):
27
27
28
28
Each item must be hashable (i.e., it must be valid as a member of a
29
29
dictionary or a set).
30
+
31
+ If `include_support` is true, yield (itemset, support) pairs instead of
32
+ just the itemsets.
30
33
"""
31
34
items = defaultdict (lambda : 0 ) # mapping from items to their supports
32
35
processed_transactions = []
@@ -41,10 +44,8 @@ def find_frequent_itemsets(transactions, minimum_support):
41
44
processed_transactions .append (processed )
42
45
43
46
# Remove infrequent items from the item support dictionary.
44
- items = dict (items )
45
- for item , support in items .items ():
46
- if support < minimum_support :
47
- del items [item ]
47
+ items = dict ((item , support ) for item , support in items .iteritems ()
48
+ if support >= minimum_support )
48
49
49
50
# Build our FP-tree. Before any transactions can be added to the tree, they
50
51
# must be stripped of infrequent items and their surviving items must be
@@ -64,7 +65,7 @@ def find_with_suffix(tree, suffix):
64
65
if support >= minimum_support and item not in suffix :
65
66
# New winner!
66
67
found_set = [item ] + suffix
67
- yield found_set
68
+ yield ( found_set , support ) if include_support else found_set
68
69
69
70
# Build a conditional tree and recursively search for frequent
70
71
# itemsets within it.
@@ -74,8 +75,8 @@ def find_with_suffix(tree, suffix):
74
75
yield s # pass along the good news to our caller
75
76
76
77
# Search for frequent itemsets, and yield the results we find.
77
- for s in find_with_suffix (master , []):
78
- yield s
78
+ for itemset in find_with_suffix (master , []):
79
+ yield itemset
79
80
80
81
class FPTree (object ):
81
82
"""
0 commit comments