Skip to content

Instantly share code, notes, and snippets.

# External dependencies
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import time
import glob
import numpy as np
import pandas as pd
import cudf
import nvtabular as nvt
@rnyak
rnyak / training_evalutation.py
Last active September 22, 2021 17:29
training_and_evalutation
# Set training args and hyperparameters for training
training_args = T4RecTrainingArguments(
data_loader_engine='nvtabular', dataloader_drop_last=False,
num_train_epochs=3, per_device_train_batch_size = 256,
per_device_eval_batch_size = 32, learning_rate=0.000666, ...
)
trainer = Trainer(
model=model, args=training_args,
train_dataset_or_path=train_path,
eval_dataset_or_path=eval_path,
@rnyak
rnyak / NextItemPrediction.py
Last active September 22, 2021 17:29
NextItemPrediction
# Define one or multiple prediction-tasks
prediction_task = tr.NextItemPredictionTask()
model: tr.Model = tr.Head(
body,
prediction_task,
).to_model()
transformer_config = tr.XLNetConfig.build(
d_model=64, n_head=4, n_layer=2, total_seq_length=20
)
# Define the model block including: inputs, masking and transformer block.
body = tr.SequentialBlock(
input_module,
tr.MLPBlock([128, 64]),
tr.TransformerBlock(transformer_config, masking=inputs.masking)
)
from transformers4rec import torch as tr
# Defines input module to process tabular sequential input features
input_module = tr.TabularSequenceFeatures.from_schema(
schema,
max_sequence_length=20,
d_output=256,
aggregation="concat",
masking="causal",
)
@rnyak
rnyak / schema.py
Last active September 22, 2021 17:28
from merlin_standard_lib import Schema
# Loads the schema from a protobuf text file
schema = Schema().from_proto_text('schema.pb')
# Keeps only a few columns from the schema
schema = schema.select_by_name(['item_id-list', 'category_id-list'])
@rnyak
rnyak / example_nbs_run_instructions.md
Last active September 15, 2021 16:21
example_nbs_run_instructions
  1. Clone Transformers4rec from GitHub ​
git clone https://github.com/NVIDIA-Merlin/Transformers4Rec.git

​ 2. Get data files and folders from drive (https://drive.google.com/drive/u/0/folders/1nTuG6UHWOEaZnBJj7YSIVvnphE1zGc1h) and copy the directory within Transformers4rec directory and mount to the container.

  1. Pull merlin docker image and link it to Transformers4Rec volume ​
import os
import numpy as np
import pandas as pd
import glob
import cudf
import cupy
import nvtabular as nvt
from nvtabular import ColumnSelector
# External dependencies
import os
from time import time
import cudf
import tritonclient.grpc as grpcclient
import nvtabular.inference.triton as nvt_triton
#!curl -i triton:8000/v2/health/ready
import os
import numpy as np
import pandas as pd
import glob
import shutil
import cudf
import cupy
import nvtabular as nvt
from nvtabular import ColumnSelector