This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import tensorflow as tf | |
import merlin.models.tf as mm | |
from nvtabular.workflow import Workflow | |
from merlin.systems.dag.ops.tensorflow import PredictTensorflow | |
from merlin.systems.dag.ensemble import Ensemble | |
from merlin.systems.dag.ops.workflow import TransformWorkflow |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
os.environ["TF_GPU_ALLOCATOR"]="cuda_malloc_async" | |
import glob | |
import numpy as np | |
import pandas as pd | |
import gc | |
import calendar | |
import datetime | |
import cudf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
os.environ["TF_GPU_ALLOCATOR"]="cuda_malloc_async" | |
import glob | |
import numpy as np | |
import pandas as pd | |
import gc | |
import calendar | |
import datetime | |
import cudf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
os.environ["TF_GPU_ALLOCATOR"]="cuda_malloc_async" | |
import glob | |
import numpy as np | |
import pandas as pd | |
import gc | |
import calendar | |
import datetime | |
import cudf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import nvtabular as nvt | |
from nvtabular.ops import * | |
from merlin.schema.tags import Tags | |
import merlin.models.tf as mm | |
from merlin.io.dataset import Dataset | |
import tensorflow as tf | |
DATA_FOLDER = os.environ.get("DATA_FOLDER", "./data/") | |
# define output path for the processed parquet files |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import glob | |
import numpy as np | |
import pandas as pd | |
import cudf | |
import cupy as cp | |
import nvtabular as nvt | |
from nvtabular.ops import * |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: "0_transformworkflow" | |
input { | |
name: "item_id" | |
data_type: TYPE_INT32 | |
dims: -1 | |
dims: 1 | |
} | |
input { | |
name: "category" | |
data_type: TYPE_INT32 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import glob | |
import numpy as np | |
import pandas as pd | |
import cudf | |
import cupy as cp | |
import nvtabular as nvt | |
from nvtabular.ops import * |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from merlin.datasets.synthetic import generate_data | |
train, valid = generate_data("dressipi2022-preprocessed", num_rows=10000, set_sizes=(0.8, 0.2)) | |
item_features_names = ['f_' + str(col) for col in [47, 68]] | |
cat_features = [['item_id', 'purchase_id']] + item_features_names >> nvt.ops.Categorify() | |
features = ['session_id', 'timestamp', 'date'] + cat_features | |
to_aggregate = {} | |
to_aggregate['date'] = ["first"] | |
to_aggregate['item_id'] = ["last", "list"] | |
to_aggregate['purchase_id'] = ["first"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import numpy as np | |
import pandas as pd | |
import nvtabular as nvt | |
NUM_ROWS = 1000 | |
long_tailed_item_distribution = np.clip(np.random.lognormal(3., 1., NUM_ROWS).astype(np.int32), 1, 50000) | |
# generate random item interaction features | |
df = pd.DataFrame(np.random.randint(70000, 80000, NUM_ROWS), columns=['session_id']) |
NewerOlder