Skip to content

Instantly share code, notes, and snippets.

@rnyak
Last active November 8, 2022 16:02
Show Gist options
  • Save rnyak/f494d86cb22b7cf4e3be3837301bd859 to your computer and use it in GitHub Desktop.
Save rnyak/f494d86cb22b7cf4e3be3837301bd859 to your computer and use it in GitHub Desktop.
from merlin.datasets.synthetic import generate_data
train, valid = generate_data("dressipi2022-preprocessed", num_rows=10000, set_sizes=(0.8, 0.2))
item_features_names = ['f_' + str(col) for col in [47, 68]]
cat_features = [['item_id', 'purchase_id']] + item_features_names >> nvt.ops.Categorify()
features = ['session_id', 'timestamp', 'date'] + cat_features
to_aggregate = {}
to_aggregate['date'] = ["first"]
to_aggregate['item_id'] = ["last", "list"]
to_aggregate['purchase_id'] = ["first"]
for name in item_features_names:
to_aggregate[name] = ['list']
groupby_features = features >> nvt.ops.Groupby(
groupby_cols=["session_id"],
sort_cols=["date"],
aggs= to_aggregate,
name_sep="_")
item_last = (
groupby_features['item_id_last'] >>
AddMetadata(tags=[Tags.ITEM, Tags.ITEM_ID])
)
item_list = (
groupby_features['item_id_list'] >>
AddMetadata(
tags=[Tags.ITEM, Tags.ITEM_ID, Tags.LIST, Tags.SEQUENCE]
)
)
feature_list = (
groupby_features[[name+'_list' for name in item_features_names]] >>
AddMetadata(
tags=[Tags.SEQUENCE, Tags.ITEM, Tags.LIST]
)
)
other_features = groupby_features['session_id', 'date_first']
groupby_features = item_last + item_list + feature_list + other_features + groupby_features['purchase_id_first']
list_features = [name+'_list' for name in item_features_names] + ['item_id_list']
nonlist_features = ['session_id', 'date_first', 'item_id_last', 'purchase_id_first']
SESSIONS_MAX_LENGTH = 3
truncated_features = groupby_features[list_features] >> nvt.ops.ListSlice(-SESSIONS_MAX_LENGTH) >> nvt.ops.Rename(postfix = '_seq')
final_features = groupby_features[nonlist_features] + truncated_features
workflow = nvt.Workflow(final_features)
# fit data
train = workflow.fit_transform(train)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment