Skip to content

Instantly share code, notes, and snippets.

@Shoeboxam
Last active June 21, 2019 18:05
Show Gist options
  • Select an option

  • Save Shoeboxam/3469bb7b5980fdaf5da9bec314ab3100 to your computer and use it in GitHub Desktop.

Select an option

Save Shoeboxam/3469bb7b5980fdaf5da9bec314ab3100 to your computer and use it in GitHub Desktop.
Constructing pipelines for D3M TA2's via ta2ta3-api compatible json
import d3m
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata.base import ArgumentType
from google.protobuf.json_format import MessageToJson
from ta3ta2_api import utils
import json
types = [
utils.ValueType.RAW,
utils.ValueType.CSV_URI,
utils.ValueType.DATASET_URI,
utils.ValueType.PICKLE_BLOB,
utils.ValueType.PICKLE_URI,
]
d3m.index.load_all()
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')
step_inside = PrimitiveStep(primitive=d3m.primitives.data_transformation.remove_columns.DataFrameCommon)
step_inside.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_inside.add_hyperparameter(name="columns", argument_type=ArgumentType.VALUE, data=[1, 2])
step_inside.add_output('produce')
pipeline_description.add_step(step_inside)
step_mapper = PrimitiveStep(primitive=d3m.primitives.operator.dataset_map.DataFrameCommon)
step_mapper.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_mapper.add_hyperparameter(name="produce_method", argument_type=ArgumentType.VALUE, data="produce")
step_mapper.add_hyperparameter(name="primitive", argument_type=ArgumentType.PRIMITIVE, data=0)
step_mapper.add_hyperparameter(name="resources", argument_type=ArgumentType.CONTAINER, data="inputs.0")
step_mapper.add_output('produce')
pipeline_description.add_step(step_mapper)
step_placeholder = d3m.metadata.pipeline.PlaceholderStep()
step_placeholder.add_input('steps.1.produce')
step_placeholder.add_output('produce')
pipeline_description.add_step(step_placeholder)
# pipeline_description.to_json_structure()
pipeline_description.add_output(name='output', data_reference='steps.2.produce')
template = MessageToJson(utils.encode_pipeline_description(pipeline_description, types, '/working_dir'))
json.dumps(json.loads(template))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment