Created
June 4, 2022 16:52
-
-
Save Pierian-Data/976db31437ee442bf4e4491980784775 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"artifact": { | |
"name": "cdap-data-pipeline", | |
"version": "6.1.1", | |
"scope": "SYSTEM" | |
}, | |
"description": "Pipeline that cleans raw shipment data and loads it into a logistics data lake.", | |
"name": "Shipment-Data-Cleansing", | |
"config": { | |
"resources": { | |
"memoryMB": 1024, | |
"virtualCores": 1 | |
}, | |
"driverResources": { | |
"memoryMB": 1024, | |
"virtualCores": 1 | |
}, | |
"connections": [ | |
{ | |
"from": "Raw Shipping Data", | |
"to": "Parse and Clean" | |
}, | |
{ | |
"from": "Parse and Clean", | |
"to": "Cleaned Shipments" | |
}, | |
{ | |
"from": "Parse and Clean", | |
"to": "Cleanup Errors" | |
}, | |
{ | |
"from": "Cleanup Errors", | |
"to": "Invalid Shipment Data" | |
} | |
], | |
"comments": [], | |
"postActions": [], | |
"properties": {}, | |
"processTimingEnabled": true, | |
"stageLoggingEnabled": true, | |
"stages": [ | |
{ | |
"name": "Raw Shipping Data", | |
"plugin": { | |
"name": "GCSFile", | |
"type": "batchsource", | |
"label": "Raw Shipping Data", | |
"artifact": { | |
"name": "google-cloud", | |
"version": "0.13.2", | |
"scope": "SYSTEM" | |
}, | |
"properties": { | |
"filenameOnly": "false", | |
"copyHeader": "false", | |
"schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"body\",\"type\":\"string\"}]}", | |
"path": "gs://lineage-tutorial/logistics/", | |
"format": "text", | |
"project": "dis-user-guide", | |
"recursive": "false", | |
"referenceName": "Raw_Shipping_Data", | |
"serviceFilePath": "auto-detect" | |
} | |
}, | |
"outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"body\",\"type\":\"string\"}]}" | |
}, | |
{ | |
"name": "Parse and Clean", | |
"plugin": { | |
"name": "Wrangler", | |
"type": "transform", | |
"label": "Parse and Clean", | |
"artifact": { | |
"name": "wrangler-transform", | |
"version": "4.1.4", | |
"scope": "SYSTEM" | |
}, | |
"properties": { | |
"workspaceId": "1921ec2484659206a9967b497dc93dd9", | |
"directives": "parse-as-csv :body ',' false\ndrop body\nset columns shipment_id,shipping_date,shipping_time,origin_country,origin_city,destination_country,destination_city,customer_id,size,product,region_id,hazardous_goods,handle_with_care,time_to_ship,signature_required,weight,zipcode,price\nset-type :price float\nset-type :zipcode integer\nset-type :weight float\nfind-and-replace signature_required s/^Y$/true/g\nfind-and-replace signature_required s/^N$/false/g\nset-type :signature_required boolean\nfind-and-replace hazardous_goods s/^Y$/true/g\nfind-and-replace hazardous_goods s/^N$/false/g\nset-type :hazardous_goods boolean\nfind-and-replace handle_with_care s/^Y$/true/g\nfind-and-replace handle_with_care s/^N$/false/g", | |
"schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"shipment_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"shipping_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"shipping_time\",\"type\":[\"string\",\"null\"]},{\"name\":\"origin_country\",\"type\":[\"string\",\"null\"]},{\"name\":\"origin_city\",\"type\":[\"string\",\"null\"]},{\"name\":\"destination_country\",\"type\":[\"string\",\"null\"]},{\"name\":\"destination_city\",\"type\":[\"string\",\"null\"]},{\"name\":\"customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"size\",\"type\":[\"string\",\"null\"]},{\"name\":\"product\",\"type\":[\"string\",\"null\"]},{\"name\":\"region_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"hazardous_goods\",\"type\":[\"boolean\",\"null\"]},{\"name\":\"handle_with_care\",\"type\":[\"string\",\"null\"]},{\"name\":\"time_to_ship\",\"type\":[\"string\",\"null\"]},{\"name\":\"signature_required\",\"type\":[\"boolean\",\"null\"]},{\"name\":\"weight\",\"type\":[\"float\",\"null\"]},{\"name\":\"zipcode\",\"type\":[\"int\",\"null\"]},{\"name\":\"price\",\"type\":[\"float\",\"null\"]}]}", | |
"field": "*", | |
"precondition": "false", | |
"threshold": "1" | |
} | |
}, | |
"outputSchema": [ | |
{ | |
"name": "etlSchemaBody", | |
"schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"shipment_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"shipping_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"shipping_time\",\"type\":[\"string\",\"null\"]},{\"name\":\"origin_country\",\"type\":[\"string\",\"null\"]},{\"name\":\"origin_city\",\"type\":[\"string\",\"null\"]},{\"name\":\"destination_country\",\"type\":[\"string\",\"null\"]},{\"name\":\"destination_city\",\"type\":[\"string\",\"null\"]},{\"name\":\"customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"size\",\"type\":[\"string\",\"null\"]},{\"name\":\"product\",\"type\":[\"string\",\"null\"]},{\"name\":\"region_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"hazardous_goods\",\"type\":[\"boolean\",\"null\"]},{\"name\":\"handle_with_care\",\"type\":[\"string\",\"null\"]},{\"name\":\"time_to_ship\",\"type\":[\"string\",\"null\"]},{\"name\":\"signature_required\",\"type\":[\"boolean\",\"null\"]},{\"name\":\"weight\",\"type\":[\"float\",\"null\"]},{\"name\":\"zipcode\",\"type\":[\"int\",\"null\"]},{\"name\":\"price\",\"type\":[\"float\",\"null\"]}]}" | |
} | |
], | |
"inputSchema": [ | |
{ | |
"name": "Raw Shipping Data", | |
"schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"body\",\"type\":\"string\"}]}" | |
} | |
] | |
}, | |
{ | |
"name": "Cleaned Shipments", | |
"plugin": { | |
"name": "BigQueryTable", | |
"type": "batchsink", | |
"label": "Cleaned Shipments", | |
"artifact": { | |
"name": "google-cloud", | |
"version": "0.13.2", | |
"scope": "SYSTEM" | |
}, | |
"properties": { | |
"project": "auto-detect", | |
"serviceFilePath": "auto-detect", | |
"operation": "insert", | |
"truncateTable": "false", | |
"allowSchemaRelaxation": "false", | |
"location": "US", | |
"createPartitionedTable": "false", | |
"partitionFilterRequired": "false", | |
"schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"shipment_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"shipping_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"shipping_time\",\"type\":[\"string\",\"null\"]},{\"name\":\"origin_country\",\"type\":[\"string\",\"null\"]},{\"name\":\"origin_city\",\"type\":[\"string\",\"null\"]},{\"name\":\"destination_country\",\"type\":[\"string\",\"null\"]},{\"name\":\"destination_city\",\"type\":[\"string\",\"null\"]},{\"name\":\"customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"size\",\"type\":[\"string\",\"null\"]},{\"name\":\"product\",\"type\":[\"string\",\"null\"]},{\"name\":\"region_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"hazardous_goods\",\"type\":[\"boolean\",\"null\"]},{\"name\":\"handle_with_care\",\"type\":[\"string\",\"null\"]},{\"name\":\"time_to_ship\",\"type\":[\"string\",\"null\"]},{\"name\":\"signature_required\",\"type\":[\"boolean\",\"null\"]},{\"name\":\"weight\",\"type\":[\"float\",\"null\"]},{\"name\":\"zipcode\",\"type\":[\"int\",\"null\"]},{\"name\":\"price\",\"type\":[\"float\",\"null\"]}]}", | |
"referenceName": "Cleaned-Shipments", | |
"dataset": "logistics_demo", | |
"table": "delayed_shipments_us" | |
} | |
}, | |
"outputSchema": [ | |
{ | |
"name": "etlSchemaBody", | |
"schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"shipment_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"shipping_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"shipping_time\",\"type\":[\"string\",\"null\"]},{\"name\":\"origin_country\",\"type\":[\"string\",\"null\"]},{\"name\":\"origin_city\",\"type\":[\"string\",\"null\"]},{\"name\":\"destination_country\",\"type\":[\"string\",\"null\"]},{\"name\":\"destination_city\",\"type\":[\"string\",\"null\"]},{\"name\":\"customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"size\",\"type\":[\"string\",\"null\"]},{\"name\":\"product\",\"type\":[\"string\",\"null\"]},{\"name\":\"region_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"hazardous_goods\",\"type\":[\"boolean\",\"null\"]},{\"name\":\"handle_with_care\",\"type\":[\"string\",\"null\"]},{\"name\":\"time_to_ship\",\"type\":[\"string\",\"null\"]},{\"name\":\"signature_required\",\"type\":[\"boolean\",\"null\"]},{\"name\":\"weight\",\"type\":[\"float\",\"null\"]},{\"name\":\"zipcode\",\"type\":[\"int\",\"null\"]},{\"name\":\"price\",\"type\":[\"float\",\"null\"]}]}" | |
} | |
], | |
"inputSchema": [ | |
{ | |
"name": "Parse and Clean", | |
"schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"shipment_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"shipping_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"shipping_time\",\"type\":[\"string\",\"null\"]},{\"name\":\"origin_country\",\"type\":[\"string\",\"null\"]},{\"name\":\"origin_city\",\"type\":[\"string\",\"null\"]},{\"name\":\"destination_country\",\"type\":[\"string\",\"null\"]},{\"name\":\"destination_city\",\"type\":[\"string\",\"null\"]},{\"name\":\"customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"size\",\"type\":[\"string\",\"null\"]},{\"name\":\"product\",\"type\":[\"string\",\"null\"]},{\"name\":\"region_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"hazardous_goods\",\"type\":[\"boolean\",\"null\"]},{\"name\":\"handle_with_care\",\"type\":[\"string\",\"null\"]},{\"name\":\"time_to_ship\",\"type\":[\"string\",\"null\"]},{\"name\":\"signature_required\",\"type\":[\"boolean\",\"null\"]},{\"name\":\"weight\",\"type\":[\"float\",\"null\"]},{\"name\":\"zipcode\",\"type\":[\"int\",\"null\"]},{\"name\":\"price\",\"type\":[\"float\",\"null\"]}]}" | |
} | |
] | |
}, | |
{ | |
"name": "Cleanup Errors", | |
"plugin": { | |
"name": "ErrorCollector", | |
"type": "errortransform", | |
"label": "Cleanup Errors", | |
"artifact": { | |
"name": "core-plugins", | |
"version": "2.3.4", | |
"scope": "SYSTEM" | |
}, | |
"properties": { | |
"messageField": "msg", | |
"codeField": "code", | |
"stageField": "node" | |
} | |
}, | |
"outputSchema": [ | |
{ | |
"name": "etlSchemaBody", | |
"schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"msg\",\"type\":\"string\"},{\"name\":\"code\",\"type\":\"int\"},{\"name\":\"node\",\"type\":\"string\"}]}" | |
} | |
], | |
"inputSchema": [ | |
{ | |
"name": "Parse and Clean", | |
"schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"body\",\"type\":\"string\"}]}" | |
} | |
] | |
}, | |
{ | |
"name": "Invalid Shipment Data", | |
"plugin": { | |
"name": "BigQueryTable", | |
"type": "batchsink", | |
"label": "Invalid Shipment Data", | |
"artifact": { | |
"name": "google-cloud", | |
"version": "0.13.2", | |
"scope": "SYSTEM" | |
}, | |
"properties": { | |
"project": "auto-detect", | |
"serviceFilePath": "auto-detect", | |
"operation": "insert", | |
"truncateTable": "false", | |
"allowSchemaRelaxation": "false", | |
"location": "US", | |
"createPartitionedTable": "false", | |
"partitionFilterRequired": "false", | |
"referenceName": "Invalid-Shipment-Data", | |
"table": "invalid_shipment_data", | |
"dataset": "logistics_demo", | |
"schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"msg\",\"type\":\"string\"},{\"name\":\"code\",\"type\":\"int\"},{\"name\":\"node\",\"type\":\"string\"}]}" | |
} | |
}, | |
"outputSchema": [ | |
{ | |
"name": "etlSchemaBody", | |
"schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"msg\",\"type\":\"string\"},{\"name\":\"code\",\"type\":\"int\"},{\"name\":\"node\",\"type\":\"string\"}]}" | |
} | |
], | |
"inputSchema": [ | |
{ | |
"name": "Cleanup Errors", | |
"schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"body\",\"type\":\"string\"},{\"name\":\"msg\",\"type\":\"string\"},{\"name\":\"code\",\"type\":\"int\"},{\"name\":\"node\",\"type\":\"string\"}]}" | |
} | |
] | |
} | |
], | |
"schedule": "0 * * * *", | |
"engine": "mapreduce", | |
"numOfRecordsPreview": 100, | |
"description": "Pipeline that cleans raw shipment data and loads it into a logistics data lake.", | |
"maxConcurrentRuns": 1 | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment