Created
November 17, 2015 20:53
-
-
Save josh-howes/439ef4fd7d1893ec9cf5 to your computer and use it in GitHub Desktop.
Han, Pei and Yin's novel frequent pattern tree (FP-tree) structure
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Mining Frequent Patterns without Candidate Generation. | |
Naive implementation of the data structures in the following paper <http://hanj.cs.illinois.edu/pdf/sigmod00.pdf> | |
""" | |
class HeaderTable(dict): | |
"""Header Table. | |
To facilitate tree traversal, an item header table is built in which each item points to its | |
occurrence in the tree via a head of node-link. Nodes with the same item-name are linked in | |
sequence via such node-links. | |
""" | |
def follow_node_links(self, item_name): | |
if item_name not in self.keys(): | |
return None | |
header_node = self.get(item_name) | |
node = header_node | |
while node.node_link: | |
node = none.node_link | |
return node | |
class FPTree(object): | |
def __init__(self, item_name='null', count=0, node_link=None, parent=None): | |
"""FP Node. | |
Item Prefix Subtree of the Frequent Pattern Tree. | |
Arguments | |
--------- | |
item_name : str | |
Registers which item this node represents | |
count : int | |
The number of transactions represented by the portion of the path reaching this node. | |
children : list | |
A set of item prefix subtrees as the children of the root. | |
node_link : `FPNode` | |
Node in the FP-tree carrying the same item-name or null if there is none. | |
header_table : dict | |
Frequent-item header table. | |
""" | |
self.item_name = item_name | |
self.count = count | |
self.node_link = node_link | |
self.subtrees = list() | |
self._lookup = dict() | |
self._tree_count = 0 | |
def __repr__(self): | |
return '<FPNode(name="{0}", count={1})>'.format(self.item_name, self.count) | |
def has_child(self, name): | |
return name in self._lookup.keys() | |
def get_subtree(self, name): | |
return self.subtrees[self._lookup[name]] | |
def insert_tree(self, transactions): | |
head, rest = transactions[0], transactions[1:] | |
if self.has_child(head): | |
tree = self.get_subtree(head) | |
tree.count += 1 | |
else: | |
tree = FPTree(item_name=head, | |
count=1, | |
parent=self) | |
self._lookup[head] = self._tree_count | |
self._tree_count += 1 | |
self.subtrees.append(tree) | |
# Add node_links | |
parent = header_table.follow_node_links(head) | |
if not parent: | |
header_table[head] = tree | |
else: | |
parent.node_link = tree | |
if rest: | |
tree.insert_tree(rest) | |
header_table = HeaderTable() | |
def create_fptree(transactions): | |
"""Create a Frequent Pattern Tree from a list of transactions. | |
Arguments | |
--------- | |
transactions : list | |
List of transactions, where a transaction is a set of items. | |
Returns | |
------- | |
fp_tree : `FPTree` | |
""" | |
counts = defaultdict(int) | |
for trans in transactions: | |
for item in trans: | |
counts[item] += 1 | |
tree = FPTree(None) | |
for trans in transactions: | |
trans.sort(key=lambda x: counts[x], reverse=True) | |
tree.insert_tree(trans) | |
return tree | |
def print_tree(tree): | |
"""Print Tree. | |
Print the structure of a Frequent Pattern Tree to stdout. | |
Arguments | |
--------- | |
tree : `FPTree` | |
""" | |
def _print_tree(tree, level=0): | |
assert isinstance(tree, FPTree) | |
print("{pad}+-{node}".format(pad=" " * (level), node=str(tree))) | |
for child in tree.subtrees: | |
_print_tree(child, level+1) | |
_print_tree(tree) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment