Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 710608f

Browse files
committed
Clean up whitespace
I was a young and naive TextMate user when I wrote all this.
1 parent a72fe93 commit 710608f

File tree

2 files changed

+99
-99
lines changed

2 files changed

+99
-99
lines changed

fp_growth.py

Lines changed: 58 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -20,17 +20,17 @@ def find_frequent_itemsets(transactions, minimum_support):
2020
"""
2121
Finds frequent itemsets in the given transactions using FP-growth. This
2222
function returns a generator instead of an eagerly-populated list of items.
23-
23+
2424
The `transactions` parameter can be any iterable of iterables of items.
2525
`minimum_support` should be an integer specifying the minimum number of
2626
occurrences of an itemset for it to be accepted.
27-
27+
2828
Each item must be hashable (i.e., it must be valid as a member of a
2929
dictionary or a set).
3030
"""
3131
items = defaultdict(lambda: 0) # mapping from items to their supports
3232
processed_transactions = []
33-
33+
3434
# Load the passed-in transactions and count the support that individual
3535
# items have.
3636
for transaction in transactions:
@@ -39,71 +39,71 @@ def find_frequent_itemsets(transactions, minimum_support):
3939
items[item] += 1
4040
processed.append(item)
4141
processed_transactions.append(processed)
42-
42+
4343
# Remove infrequent items from the item support dictionary.
4444
items = dict(items)
4545
for item, support in items.items():
4646
if support < minimum_support:
4747
del items[item]
48-
48+
4949
# Build our FP-tree. Before any transactions can be added to the tree, they
5050
# must be stripped of infrequent items and their surviving items must be
5151
# sorted in decreasing order of frequency.
5252
def clean_transaction(transaction):
5353
transaction = filter(lambda v: v in items, transaction)
5454
transaction.sort(key=lambda v: items[v], reverse=True)
5555
return transaction
56-
56+
5757
master = FPTree()
5858
for transaction in imap(clean_transaction, processed_transactions):
5959
master.add(transaction)
60-
60+
6161
def find_with_suffix(tree, suffix):
6262
for item, nodes in tree.items():
6363
support = sum(n.count for n in nodes)
6464
if support >= minimum_support and item not in suffix:
6565
# New winner!
6666
found_set = [item] + suffix
6767
yield found_set
68-
68+
6969
# Build a conditional tree and recursively search for frequent
7070
# itemsets within it.
7171
cond_tree = conditional_tree_from_paths(tree.prefix_paths(item),
7272
minimum_support)
7373
for s in find_with_suffix(cond_tree, found_set):
7474
yield s # pass along the good news to our caller
75-
75+
7676
# Search for frequent itemsets, and yield the results we find.
7777
for s in find_with_suffix(master, []):
7878
yield s
7979

8080
class FPTree(object):
8181
"""
8282
An FP tree.
83-
83+
8484
This object may only store transaction items that are hashable (i.e., all
8585
items must be valid as dictionary keys or set members).
8686
"""
8787
def __init__(self):
8888
# The root node of the tree.
8989
self._root = FPNode(self, None, None)
90-
90+
9191
# A dictionary mapping items to the head and tail of a path of
9292
# "neighbors" that will hit every node containing that item.
9393
self._routes = {}
94-
94+
9595
@property
9696
def root(self):
9797
"""The root node of the tree."""
9898
return self._root
99-
99+
100100
def add(self, transaction):
101101
"""
102102
Adds a transaction to the tree.
103103
"""
104-
104+
105105
point = self._root
106-
106+
107107
for item in transaction:
108108
next_point = point.search(item)
109109
if next_point:
@@ -115,25 +115,25 @@ def add(self, transaction):
115115
# currently looking at.
116116
next_point = FPNode(self, item)
117117
point.add(next_point)
118-
118+
119119
# Update the route of nodes that contain this item to include
120120
# our new node.
121121
self._update_route(next_point)
122-
122+
123123
point = next_point
124-
124+
125125
def _update_route(self, point):
126126
"""Add the given node to the route through all nodes for its item."""
127127
assert self is point.tree
128-
128+
129129
try:
130130
route = self._routes[point.item]
131131
route[1].neighbor = point # route[1] is the tail
132132
route[1] = point
133133
except KeyError:
134134
# First node for this item; start a new route.
135135
self._routes[point.item] = [point, point]
136-
136+
137137
def items(self):
138138
"""
139139
Generate one 2-tuples for each item represented in the tree. The first
@@ -142,37 +142,37 @@ def items(self):
142142
"""
143143
for item in self._routes.iterkeys():
144144
yield (item, self.nodes(item))
145-
145+
146146
def nodes(self, item):
147147
"""
148148
Generates the sequence of nodes that contain the given item.
149149
"""
150-
150+
151151
try:
152152
node = self._routes[item][0]
153153
except KeyError:
154154
return
155-
155+
156156
while node:
157157
yield node
158158
node = node.neighbor
159-
159+
160160
def prefix_paths(self, item):
161161
"""Generates the prefix paths that end with the given item."""
162-
162+
163163
def collect_path(node):
164164
path = []
165165
while node and not node.root:
166166
path.append(node)
167167
node = node.parent
168168
path.reverse()
169169
return path
170-
170+
171171
return (collect_path(node) for node in self.nodes(item))
172-
172+
173173
def _removed(self, node):
174174
"""Called when `node` is removed from the tree; performs cleanup."""
175-
175+
176176
head, tail = self._routes[node.item]
177177
if node is head:
178178
if node is tail or not node.neighbor:
@@ -193,14 +193,14 @@ def conditional_tree_from_paths(paths, minimum_support):
193193
tree = FPTree()
194194
condition_item = None
195195
items = set()
196-
196+
197197
# Import the nodes in the paths into the new tree. Only the counts of the
198198
# leaf notes matter; the remaining counts will be reconstructed from the
199199
# leaf counts.
200200
for path in paths:
201201
if condition_item is None:
202202
condition_item = path[-1].item
203-
203+
204204
point = tree.root
205205
for node in path:
206206
next_point = point.search(node.item)
@@ -212,17 +212,17 @@ def conditional_tree_from_paths(paths, minimum_support):
212212
point.add(next_point)
213213
tree._update_route(next_point)
214214
point = next_point
215-
215+
216216
assert condition_item is not None
217-
217+
218218
# Calculate the counts of the non-leaf nodes.
219219
for path in tree.prefix_paths(condition_item):
220220
count = None
221221
for node in reversed(path):
222222
if count is not None:
223223
node._count += count
224224
count = node.count
225-
225+
226226
# Eliminate the nodes for any items that are no longer frequent.
227227
for item in items:
228228
support = sum(n.count for n in tree.nodes(item))
@@ -231,47 +231,47 @@ def conditional_tree_from_paths(paths, minimum_support):
231231
for node in tree.nodes(item):
232232
if node.parent is not None:
233233
node.parent.remove(node)
234-
234+
235235
# Finally, remove the nodes corresponding to the item for which this
236236
# conditional tree was generated.
237237
for node in tree.nodes(condition_item):
238238
if node.parent is not None: # the node might already be an orphan
239239
node.parent.remove(node)
240-
240+
241241
return tree
242-
242+
243243
class FPNode(object):
244244
"""A node in an FP tree."""
245-
245+
246246
def __init__(self, tree, item, count=1):
247247
self._tree = tree
248248
self._item = item
249249
self._count = count
250250
self._parent = None
251251
self._children = {}
252252
self._neighbor = None
253-
253+
254254
def add(self, child):
255255
"""Adds the given FPNode `child` as a child of this node."""
256-
256+
257257
if not isinstance(child, FPNode):
258258
raise TypeError("Can only add other FPNodes as children")
259-
259+
260260
if not child.item in self._children:
261261
self._children[child.item] = child
262262
child.parent = self
263-
263+
264264
def search(self, item):
265265
"""
266266
Checks to see if this node contains a child node for the given item.
267267
If so, that node is returned; otherwise, `None` is returned.
268268
"""
269-
269+
270270
try:
271271
return self._children[item]
272272
except KeyError:
273273
return None
274-
274+
275275
def remove(self, child):
276276
try:
277277
if self._children[child.item] is child:
@@ -293,41 +293,41 @@ def remove(self, child):
293293
raise ValueError("that node is not a child of this node")
294294
except KeyError:
295295
raise ValueError("that node is not a child of this node")
296-
296+
297297
def __contains__(self, item):
298298
return item in self._children
299-
299+
300300
@property
301301
def tree(self):
302302
"""The tree in which this node appears."""
303303
return self._tree
304-
304+
305305
@property
306306
def item(self):
307307
"""The item contained in this node."""
308308
return self._item
309-
309+
310310
@property
311311
def count(self):
312312
"""The count associated with this node's item."""
313313
return self._count
314-
314+
315315
def increment(self):
316316
"""Increments the count associated with this node's item."""
317317
if self._count is None:
318318
raise ValueError("Root nodes have no associated count.")
319319
self._count += 1
320-
320+
321321
@property
322322
def root(self):
323323
"""True if this node is the root of a tree; false if otherwise."""
324324
return self._item is None and self._count is None
325-
325+
326326
@property
327327
def leaf(self):
328328
"""True if this node is a leaf in the tree; false if otherwise."""
329329
return len(self._children) == 0
330-
330+
331331
def parent():
332332
doc = "The node's parent."
333333
def fget(self):
@@ -340,7 +340,7 @@ def fset(self, value):
340340
self._parent = value
341341
return locals()
342342
parent = property(**parent())
343-
343+
344344
def neighbor():
345345
doc = """
346346
The node's neighbor; the one with the same value that is "to the right"
@@ -356,13 +356,13 @@ def fset(self, value):
356356
self._neighbor = value
357357
return locals()
358358
neighbor = property(**neighbor())
359-
359+
360360
@property
361361
def children(self):
362362
"""The nodes that are children of this node."""
363363
return tuple(self._children.itervalues())
364-
365-
364+
365+
366366
def __repr__(self):
367367
if self.root:
368368
return "<%s (root)>" % type(self).__name__
@@ -372,16 +372,16 @@ def __repr__(self):
372372
if __name__ == '__main__':
373373
from optparse import OptionParser
374374
import csv
375-
375+
376376
p = OptionParser(usage='%prog data_file')
377377
p.add_option('-s', '--minimum-support', dest='minsup', type='int',
378378
help='Minimum itemset support (default: 2)')
379379
p.set_defaults(minsup=2)
380-
380+
381381
options, args = p.parse_args()
382382
if len(args) < 1:
383383
p.error('must provide the path to a CSV file to read')
384-
384+
385385
f = open(args[0])
386386
try:
387387
for itemset in find_frequent_itemsets(csv.reader(f), options.minsup):

0 commit comments

Comments
 (0)