Last active
October 28, 2021 04:55
-
-
Save abhijeet-talaulikar/1fe1baac6eef64a5bc74c01bc306090f to your computer and use it in GitHub Desktop.
fp-skeleton-partC.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def fp_growth(transaction_db, min_sup, fp_list, prefix = []): | |
####### STEP A: CONCATENATE ####### | |
# Get one itemsets which meet minimum support | |
one_itemset_dict = get_one_itemsets(transaction_db, min_sup) | |
# Concanenate frequent patterns with prefix | |
freq_patterns = [] | |
for key,val in one_itemset_dict.items(): | |
pattern = prefix + [key] | |
freq_patterns.append((pattern,val)) | |
for pat in freq_patterns: | |
if pat not in fp_list: | |
fp_list.append(pat[0]) | |
####### STEP B: GENERATE FP-TREE ####### | |
# Initialize node link dictionary | |
node_link_dict = {item:[] for item in one_itemset_dict.keys()} | |
# Create tree data structure as a list of dictionary nodes | |
# Insert root node as {name:None, count:None, parent: None} | |
fp_tree = [{'name':"Root", 'count':None, 'parent': None}] | |
root_node = fp_tree[0] | |
for t_tup in transaction_db.list_items: | |
# Sort t according to 1-itemset support counts | |
# This is required only at the root node | |
if prefix == []: | |
t = sort_transaction(t_tup[0], one_itemset_dict) | |
else: | |
t = [i for i in t_tup[0] if i in one_itemset_dict.keys()] | |
if len(t) > 1: | |
insert_tree(t[0], t[1:], root_node, fp_tree, node_link_dict, t_tup[1]) | |
####### STEP C: GENERATE CONDITIONAL PATTERN BASE ####### | |
for item in one_itemset_dict.keys(): | |
cpb_transactions = [] | |
cond_pattern_base = [] | |
# Iterate over all node links of item | |
for node in node_link_dict[item]: | |
cur_node = node['parent'] | |
cur_route = [] | |
if cur_node is root_node: | |
continue | |
while cur_node is not root_node: | |
cur_route.insert(0, cur_node) | |
cur_node = cur_node['parent'] | |
cond_pattern_base.append((cur_route, node['count'])) | |
cpb_transactions.append(([path['name'] for path in cur_route], node['count'])) | |
cpb_db = pd.DataFrame({'list_items':cpb_transactions}) | |
if cpb_db.shape[0] > 0: | |
fp_growth(cpb_db, min_sup, fp_list, prefix + [item]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment