Created
December 1, 2021 03:23
-
-
Save billju/ee0ed4d08b974d8ba25e8aebac65e1d5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import io | |
import pandas as pd | |
from itertools import permutations, combinations | |
CSV = """ | |
交易紀錄 牛奶(A) 麵包(B) 餅乾(C) 柳橙汁(D) 汽水(E) 泡麵(F) 水果(G) | |
101 1 1 1 1 0 0 0 | |
102 0 1 1 0 1 1 0 | |
103 1 0 1 0 0 0 1 | |
104 1 1 0 1 0 1 1 | |
105 0 0 1 0 1 0 1 | |
""" | |
df = pd.read_csv(io.StringIO(CSV), sep=' ').drop(columns=['交易紀錄']) | |
prob = lambda cols: (df[cols].sum(axis=1)==len(cols)).mean() # 計算某集合的出現比例 | |
min_support = 0.4 # 出現比例 | |
min_confidence = 0.8 # A交集B比例 / A比例 | |
cols = [c for c in df.columns if prob([c])>=min_support] # 達標的單一項 | |
comb = [tup for i in range(2,len(cols)+1) for tup in combinations(cols,i) if prob([*tup])>=min_support] # 達標的組合(至少兩個) | |
perm = [(t[:i],t[i:]) for tup in comb for t in permutations(tup) for i in range(1,len(t))] # 排列組合 | |
result = [] | |
for A,B in perm: | |
support = prob([*A,*B]) | |
confidence = support/prob([*B]) | |
lift = confidence/prob([*A]) | |
result.append([A,B,support,confidence,lift]) | |
pd.DataFrame(result,columns=['A','B','SUP','CONF','LIFT']) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment