Skip to content

Instantly share code, notes, and snippets.

@samirsaci
Created March 19, 2021 20:52
Show Gist options
  • Save samirsaci/a341a83447d3da193515f5d3e5935297 to your computer and use it in GitHub Desktop.
Save samirsaci/a341a83447d3da193515f5d3e5935297 to your computer and use it in GitHub Desktop.
Create nodes
def order_brand(PATH_IN):
''' List of all brands in each order'''
# Import DataFrame
df_rec = pd.read_excel(PATH_IN)
# Listing Unique Brands
df_ordbr = pd.DataFrame(df_rec.groupby(['ORDER_NUMBER'])['BRAND'].unique())
df_ordbr.columns = ['list_brand']
# source = list brands
list_brand = list(df_rec['BRAND'].unique())
# boolean column per brand for each order: is brand in order ?
for br in list_brand:
df_ordbr[br] = df_ordbr['list_brand'].apply(lambda t: br in t)
# unique combinations of brands boolean
df_con = pd.DataFrame(df_ordbr.reset_index()[df_ordbr.columns[1:]]).drop_duplicates()
return list_brand, df_ordbr, df_con, df_rec
def create_nodes(df_con, n_groups):
''' Create nodes from df_con'''
list_col, list_cont = [], []
# how many brands are ordered with this brand
for col in df_con.columns:
list_col.append(col)
list_cont.append((df_con[df_con[col] == True].sum() > 0).sum())
df_nodes = pd.DataFrame({'name': list_col, 'group':list_cont})
df_nodes.set_index('name', inplace = True)
# group by range of values
range_value = np.ceil(df_nodes['group'].max()/n_groups)
df_nodes['group'] = n_groups - (df_nodes['group']/range_value).apply(np.floor).astype(int)
return df_nodes
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment