Created
May 8, 2020 18:06
-
-
Save jeff303/ed1401dfbbda87ee1878394c01b65032 to your computer and use it in GitHub Desktop.
StreamSets Data Collector field restructuring example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"pipelineConfig" : { | |
"schemaVersion" : 6, | |
"version" : 16, | |
"pipelineId" : "FieldRestructuring5b840130-fc26-4363-8e1d-b075a6db10a2", | |
"title" : "Field Restructuring", | |
"description" : "", | |
"uuid" : "23890b10-6da9-4de1-b7e0-a8c32bd3e679", | |
"configuration" : [ { | |
"name" : "executionMode", | |
"value" : "STANDALONE" | |
}, { | |
"name" : "edgeHttpUrl", | |
"value" : "http://localhost:18633" | |
}, { | |
"name" : "deliveryGuarantee", | |
"value" : "AT_LEAST_ONCE" | |
}, { | |
"name" : "testOriginStage", | |
"value" : "streamsets-datacollector-dev-lib::com_streamsets_pipeline_stage_devtest_rawdata_RawDataDSource::3" | |
}, { | |
"name" : "startEventStage", | |
"value" : "streamsets-datacollector-basic-lib::com_streamsets_pipeline_stage_destination_devnull_ToErrorNullDTarget::1" | |
}, { | |
"name" : "stopEventStage", | |
"value" : "streamsets-datacollector-basic-lib::com_streamsets_pipeline_stage_destination_devnull_ToErrorNullDTarget::1" | |
}, { | |
"name" : "shouldRetry", | |
"value" : true | |
}, { | |
"name" : "triggerInterval", | |
"value" : 2000 | |
}, { | |
"name" : "retryAttempts", | |
"value" : -1 | |
}, { | |
"name" : "ludicrousMode", | |
"value" : false | |
}, { | |
"name" : "ludicrousModeInputCount", | |
"value" : false | |
}, { | |
"name" : "advancedErrorHandling", | |
"value" : false | |
}, { | |
"name" : "notifyOnStates", | |
"value" : [ "RUN_ERROR", "STOPPED", "FINISHED" ] | |
}, { | |
"name" : "emailIDs", | |
"value" : [ ] | |
}, { | |
"name" : "constants", | |
"value" : [ ] | |
}, { | |
"name" : "badRecordsHandling", | |
"value" : "streamsets-datacollector-basic-lib::com_streamsets_pipeline_stage_destination_devnull_ToErrorNullDTarget::1" | |
}, { | |
"name" : "errorRecordPolicy", | |
"value" : "ORIGINAL_RECORD" | |
}, { | |
"name" : "statsAggregatorStage", | |
"value" : "streamsets-datacollector-basic-lib::com_streamsets_pipeline_stage_destination_devnull_StatsDpmDirectlyDTarget::1" | |
}, { | |
"name" : "workerCount", | |
"value" : 0 | |
}, { | |
"name" : "clusterSlaveMemory", | |
"value" : 2048 | |
}, { | |
"name" : "clusterSlaveJavaOpts", | |
"value" : "-XX:+UseConcMarkSweepGC -XX:+UseParNewGC -Dlog4j.debug" | |
}, { | |
"name" : "clusterLauncherEnv", | |
"value" : [ ] | |
}, { | |
"name" : "mesosDispatcherURL", | |
"value" : null | |
}, { | |
"name" : "logLevel", | |
"value" : "INFO" | |
}, { | |
"name" : "hdfsS3ConfDir", | |
"value" : null | |
}, { | |
"name" : "rateLimit", | |
"value" : 0 | |
}, { | |
"name" : "maxRunners", | |
"value" : 0 | |
}, { | |
"name" : "shouldCreateFailureSnapshot", | |
"value" : true | |
}, { | |
"name" : "runnerIdleTIme", | |
"value" : 60 | |
}, { | |
"name" : "webhookConfigs", | |
"value" : [ ] | |
}, { | |
"name" : "sparkConfigs", | |
"value" : [ ] | |
}, { | |
"name" : "preprocessScript", | |
"value" : "/*\nThe following script define a method\nthat increments an integer by 1 \nand registers it as a UDF with \nthe SparkSession, which can be accessed\nusing the variable named \"spark\":\ndef inc(i: Integer): Integer = {\n i + 1\n}\nspark.udf.register (\"inc\", inc _)\n\n*/" | |
}, { | |
"name" : "clusterConfig.clusterType", | |
"value" : "LOCAL" | |
}, { | |
"name" : "clusterConfig.sparkMasterUrl", | |
"value" : "local[*]" | |
}, { | |
"name" : "clusterConfig.deployMode", | |
"value" : "CLIENT" | |
}, { | |
"name" : "clusterConfig.hadoopUserName", | |
"value" : null | |
}, { | |
"name" : "clusterConfig.sparkAppName", | |
"value" : "${pipeline:title()}" | |
}, { | |
"name" : "clusterConfig.stagingDir", | |
"value" : "/streamsets" | |
}, { | |
"name" : "clusterConfig.useYarnKerberosKeytab", | |
"value" : false | |
}, { | |
"name" : "clusterConfig.yarnKerberosKeytabSource", | |
"value" : "PROPERTIES_FILE" | |
}, { | |
"name" : "clusterConfig.yarnKerberosKeytab", | |
"value" : null | |
}, { | |
"name" : "clusterConfig.yarnKerberosPrincipal", | |
"value" : "name@DOMAIN" | |
}, { | |
"name" : "databricksConfig.baseUrl", | |
"value" : null | |
}, { | |
"name" : "databricksConfig.credentialType", | |
"value" : null | |
}, { | |
"name" : "databricksConfig.username", | |
"value" : "" | |
}, { | |
"name" : "databricksConfig.password", | |
"value" : "" | |
}, { | |
"name" : "databricksConfig.token", | |
"value" : "" | |
}, { | |
"name" : "databricksConfig.provisionNewCluster", | |
"value" : true | |
}, { | |
"name" : "databricksConfig.clusterId", | |
"value" : null | |
}, { | |
"name" : "databricksConfig.clusterConfig", | |
"value" : "{\n \"num_workers\": 8,\n \"spark_version\": \"5.3.x-scala2.11\",\n \"node_type_id\": \"i3.xlarge\"\n}" | |
}, { | |
"name" : "databricksConfig.terminateCluster", | |
"value" : false | |
}, { | |
"name" : "livyConfig.baseUrl", | |
"value" : "https://localhost:30443/gateway/default/livy/v1/" | |
}, { | |
"name" : "livyConfig.username", | |
"value" : "" | |
}, { | |
"name" : "livyConfig.password", | |
"value" : "" | |
}, { | |
"name" : "amazonEMRConfig.userRegion", | |
"value" : null | |
}, { | |
"name" : "amazonEMRConfig.userRegionCustom", | |
"value" : null | |
}, { | |
"name" : "amazonEMRConfig.accessKey", | |
"value" : "" | |
}, { | |
"name" : "amazonEMRConfig.secretKey", | |
"value" : "" | |
}, { | |
"name" : "amazonEMRConfig.s3StagingUri", | |
"value" : null | |
}, { | |
"name" : "amazonEMRConfig.provisionNewCluster", | |
"value" : false | |
}, { | |
"name" : "amazonEMRConfig.clusterId", | |
"value" : null | |
}, { | |
"name" : "amazonEMRConfig.clusterPrefix", | |
"value" : null | |
}, { | |
"name" : "amazonEMRConfig.terminateCluster", | |
"value" : false | |
}, { | |
"name" : "amazonEMRConfig.loggingEnabled", | |
"value" : true | |
}, { | |
"name" : "amazonEMRConfig.s3LogUri", | |
"value" : null | |
}, { | |
"name" : "amazonEMRConfig.enableEMRDebugging", | |
"value" : true | |
}, { | |
"name" : "amazonEMRConfig.serviceRole", | |
"value" : "EMR_DefaultRole" | |
}, { | |
"name" : "amazonEMRConfig.jobFlowRole", | |
"value" : "EMR_EC2_DefaultRole" | |
}, { | |
"name" : "amazonEMRConfig.visibleToAllUsers", | |
"value" : true | |
}, { | |
"name" : "amazonEMRConfig.ec2SubnetId", | |
"value" : null | |
}, { | |
"name" : "amazonEMRConfig.masterSecurityGroup", | |
"value" : null | |
}, { | |
"name" : "amazonEMRConfig.slaveSecurityGroup", | |
"value" : null | |
}, { | |
"name" : "amazonEMRConfig.instanceCount", | |
"value" : 2 | |
}, { | |
"name" : "amazonEMRConfig.masterInstanceType", | |
"value" : null | |
}, { | |
"name" : "amazonEMRConfig.masterInstanceTypeCustom", | |
"value" : null | |
}, { | |
"name" : "amazonEMRConfig.slaveInstanceType", | |
"value" : null | |
}, { | |
"name" : "amazonEMRConfig.slaveInstanceTypeCustom", | |
"value" : null | |
} ], | |
"uiInfo" : { | |
"previewConfig" : { | |
"previewSource" : "CONFIGURED_SOURCE", | |
"batchSize" : 10, | |
"timeout" : 30000, | |
"writeToDestinations" : false, | |
"executeLifecycleEvents" : false, | |
"showHeader" : false, | |
"showFieldType" : true, | |
"rememberMe" : false | |
} | |
}, | |
"fragments" : [ ], | |
"stages" : [ { | |
"instanceName" : "DevRawDataSource_01", | |
"library" : "streamsets-datacollector-dev-lib", | |
"stageName" : "com_streamsets_pipeline_stage_devtest_rawdata_RawDataDSource", | |
"stageVersion" : "3", | |
"configuration" : [ { | |
"name" : "rawData", | |
"value" : "{\n \"data1\": [\n {\n \"vlan\": \"195\",\n \"vlanname\": \"Subnet-54.14.195\"\n },\n {\n \"vlan\": \"195\",\n \"vlanname\": \"Subnet-54.14.193\"\n }\n ]\n}" | |
}, { | |
"name" : "stopAfterFirstBatch", | |
"value" : false | |
}, { | |
"name" : "eventData", | |
"value" : null | |
}, { | |
"name" : "stageOnRecordError", | |
"value" : "TO_ERROR" | |
} ], | |
"uiInfo" : { | |
"yPos" : 50, | |
"stageType" : "SOURCE", | |
"icon" : "dev.png", | |
"description" : "", | |
"label" : "Input Data", | |
"xPos" : 60 | |
}, | |
"inputLanes" : [ ], | |
"outputLanes" : [ "DevRawDataSource_01OutputLane15889586173500" ], | |
"eventLanes" : [ ], | |
"services" : [ { | |
"service" : "com.streamsets.pipeline.api.service.dataformats.DataFormatParserService", | |
"serviceVersion" : 1, | |
"configuration" : [ { | |
"name" : "displayFormats", | |
"value" : "DELIMITED,JSON,LOG,SDC_JSON,TEXT,XML" | |
}, { | |
"name" : "dataFormat", | |
"value" : "JSON" | |
}, { | |
"name" : "dataFormatConfig.compression", | |
"value" : "NONE" | |
}, { | |
"name" : "dataFormatConfig.filePatternInArchive", | |
"value" : "*" | |
}, { | |
"name" : "dataFormatConfig.charset", | |
"value" : "UTF-8" | |
}, { | |
"name" : "dataFormatConfig.removeCtrlChars", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.textMaxLineLen", | |
"value" : 1024 | |
}, { | |
"name" : "dataFormatConfig.useCustomDelimiter", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.customDelimiter", | |
"value" : "\\r\\n" | |
}, { | |
"name" : "dataFormatConfig.includeCustomDelimiterInTheText", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.jsonContent", | |
"value" : "MULTIPLE_OBJECTS" | |
}, { | |
"name" : "dataFormatConfig.jsonMaxObjectLen", | |
"value" : 4096 | |
}, { | |
"name" : "dataFormatConfig.csvFileFormat", | |
"value" : "CSV" | |
}, { | |
"name" : "dataFormatConfig.csvHeader", | |
"value" : "NO_HEADER" | |
}, { | |
"name" : "dataFormatConfig.csvAllowExtraColumns", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.csvExtraColumnPrefix", | |
"value" : "_extra_" | |
}, { | |
"name" : "dataFormatConfig.csvMaxObjectLen", | |
"value" : 1024 | |
}, { | |
"name" : "dataFormatConfig.csvCustomDelimiter", | |
"value" : "|" | |
}, { | |
"name" : "dataFormatConfig.multiCharacterFieldDelimiter", | |
"value" : "||" | |
}, { | |
"name" : "dataFormatConfig.multiCharacterLineDelimiter", | |
"value" : "${str:unescapeJava('\\\\n')}" | |
}, { | |
"name" : "dataFormatConfig.csvCustomEscape", | |
"value" : "\\" | |
}, { | |
"name" : "dataFormatConfig.csvCustomQuote", | |
"value" : "\"" | |
}, { | |
"name" : "dataFormatConfig.csvEnableComments", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.csvCommentMarker", | |
"value" : "#" | |
}, { | |
"name" : "dataFormatConfig.csvIgnoreEmptyLines", | |
"value" : true | |
}, { | |
"name" : "dataFormatConfig.csvRecordType", | |
"value" : "LIST_MAP" | |
}, { | |
"name" : "dataFormatConfig.csvSkipStartLines", | |
"value" : 0 | |
}, { | |
"name" : "dataFormatConfig.parseNull", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.nullConstant", | |
"value" : "\\\\N" | |
}, { | |
"name" : "dataFormatConfig.xmlRecordElement", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.includeFieldXpathAttributes", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.xPathNamespaceContext", | |
"value" : [ ] | |
}, { | |
"name" : "dataFormatConfig.outputFieldAttributes", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.xmlMaxObjectLen", | |
"value" : 4096 | |
}, { | |
"name" : "dataFormatConfig.logMode", | |
"value" : "COMMON_LOG_FORMAT" | |
}, { | |
"name" : "dataFormatConfig.logMaxObjectLen", | |
"value" : 1024 | |
}, { | |
"name" : "dataFormatConfig.retainOriginalLine", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.customLogFormat", | |
"value" : "%h %l %u %t \"%r\" %>s %b" | |
}, { | |
"name" : "dataFormatConfig.regex", | |
"value" : "^(\\S+) (\\S+) (\\S+) \\[([\\w:/]+\\s[+\\-]\\d{4})\\] \"(\\S+) (\\S+) (\\S+)\" (\\d{3}) (\\d+)" | |
}, { | |
"name" : "dataFormatConfig.fieldPathsToGroupName", | |
"value" : [ { | |
"fieldPath" : "/", | |
"group" : 1 | |
} ] | |
}, { | |
"name" : "dataFormatConfig.grokPatternDefinition", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.grokPattern", | |
"value" : "%{COMMONAPACHELOG}" | |
}, { | |
"name" : "dataFormatConfig.onParseError", | |
"value" : "ERROR" | |
}, { | |
"name" : "dataFormatConfig.maxStackTraceLines", | |
"value" : 50 | |
}, { | |
"name" : "dataFormatConfig.enableLog4jCustomLogFormat", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.log4jCustomLogFormat", | |
"value" : "%r [%t] %-5p %c %x - %m%n" | |
}, { | |
"name" : "dataFormatConfig.avroSchemaSource", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.avroSchema", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.schemaRegistryUrls", | |
"value" : [ ] | |
}, { | |
"name" : "dataFormatConfig.basicAuth", | |
"value" : "" | |
}, { | |
"name" : "dataFormatConfig.schemaLookupMode", | |
"value" : "SUBJECT" | |
}, { | |
"name" : "dataFormatConfig.subject", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.schemaId", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.avroSkipUnionIndex", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.protoDescriptorFile", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.messageType", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.isDelimited", | |
"value" : true | |
}, { | |
"name" : "dataFormatConfig.binaryMaxObjectLen", | |
"value" : 1024 | |
}, { | |
"name" : "dataFormatConfig.datagramMode", | |
"value" : "SYSLOG" | |
}, { | |
"name" : "dataFormatConfig.typesDbPath", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.convertTime", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.excludeInterval", | |
"value" : true | |
}, { | |
"name" : "dataFormatConfig.authFilePath", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.netflowOutputValuesMode", | |
"value" : "RAW_AND_INTERPRETED" | |
}, { | |
"name" : "dataFormatConfig.maxTemplateCacheSize", | |
"value" : -1 | |
}, { | |
"name" : "dataFormatConfig.templateCacheTimeoutMs", | |
"value" : -1 | |
}, { | |
"name" : "dataFormatConfig.netflowOutputValuesModeDatagram", | |
"value" : "RAW_AND_INTERPRETED" | |
}, { | |
"name" : "dataFormatConfig.maxTemplateCacheSizeDatagram", | |
"value" : -1 | |
}, { | |
"name" : "dataFormatConfig.templateCacheTimeoutMsDatagram", | |
"value" : -1 | |
}, { | |
"name" : "dataFormatConfig.wholeFileMaxObjectLen", | |
"value" : 8192 | |
}, { | |
"name" : "dataFormatConfig.rateLimit", | |
"value" : "-1" | |
}, { | |
"name" : "dataFormatConfig.verifyChecksum", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.excelHeader", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.excelSkipCellsWithNoHeader", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.excelReadAllSheets", | |
"value" : true | |
}, { | |
"name" : "dataFormatConfig.excelSheetNames", | |
"value" : [ ] | |
} ] | |
} ] | |
}, { | |
"instanceName" : "FieldMapper_01", | |
"library" : "streamsets-datacollector-basic-lib", | |
"stageName" : "com_streamsets_pipeline_stage_processor_mapper_FieldMapperDProcessor", | |
"stageVersion" : "1", | |
"configuration" : [ { | |
"name" : "fieldMapperConfig.operateOn", | |
"value" : "FIELD_NAMES" | |
}, { | |
"name" : "fieldMapperConfig.conditionalExpression", | |
"value" : "${f:name() == 'vlan'}" | |
}, { | |
"name" : "fieldMapperConfig.mappingExpression", | |
"value" : "${str:concat('new_', str:concat(f:name(), '_map'))}" | |
}, { | |
"name" : "fieldMapperConfig.aggregationExpression", | |
"value" : "" | |
}, { | |
"name" : "fieldMapperConfig.structureChangeAllowed", | |
"value" : true | |
}, { | |
"name" : "fieldMapperConfig.appendListValues", | |
"value" : false | |
}, { | |
"name" : "fieldMapperConfig.maintainOriginalPaths", | |
"value" : true | |
}, { | |
"name" : "stageOnRecordError", | |
"value" : "TO_ERROR" | |
}, { | |
"name" : "stageRequiredFields", | |
"value" : [ ] | |
}, { | |
"name" : "stageRecordPreconditions", | |
"value" : [ ] | |
} ], | |
"uiInfo" : { | |
"yPos" : 50, | |
"stageType" : "PROCESSOR", | |
"icon" : "iconfinder_thefreeforty_map_1243687.png", | |
"description" : "", | |
"label" : "Add vlan_map", | |
"xPos" : 280 | |
}, | |
"inputLanes" : [ "DevRawDataSource_01OutputLane15889586173500" ], | |
"outputLanes" : [ "FieldMapper_01OutputLane15889586435950" ], | |
"eventLanes" : [ ], | |
"services" : [ ] | |
}, { | |
"instanceName" : "FieldMapper_02", | |
"library" : "streamsets-datacollector-basic-lib", | |
"stageName" : "com_streamsets_pipeline_stage_processor_mapper_FieldMapperDProcessor", | |
"stageVersion" : "1", | |
"configuration" : [ { | |
"name" : "fieldMapperConfig.operateOn", | |
"value" : "FIELD_VALUES" | |
}, { | |
"name" : "fieldMapperConfig.conditionalExpression", | |
"value" : "${f:name() == 'new_vlan_map'}" | |
}, { | |
"name" : "fieldMapperConfig.mappingExpression", | |
"value" : "${emptyMap()}" | |
}, { | |
"name" : "fieldMapperConfig.aggregationExpression", | |
"value" : "${fields}" | |
}, { | |
"name" : "fieldMapperConfig.structureChangeAllowed", | |
"value" : true | |
}, { | |
"name" : "fieldMapperConfig.appendListValues", | |
"value" : false | |
}, { | |
"name" : "fieldMapperConfig.maintainOriginalPaths", | |
"value" : false | |
}, { | |
"name" : "stageOnRecordError", | |
"value" : "TO_ERROR" | |
}, { | |
"name" : "stageRequiredFields", | |
"value" : [ ] | |
}, { | |
"name" : "stageRecordPreconditions", | |
"value" : [ ] | |
} ], | |
"uiInfo" : { | |
"yPos" : 50, | |
"stageType" : "PROCESSOR", | |
"icon" : "iconfinder_thefreeforty_map_1243687.png", | |
"description" : "", | |
"label" : "Change vlan_map to Map", | |
"xPos" : 500 | |
}, | |
"inputLanes" : [ "FieldMapper_01OutputLane15889586435950" ], | |
"outputLanes" : [ "FieldMapper_02OutputLane15889587787500" ], | |
"eventLanes" : [ ], | |
"services" : [ ] | |
}, { | |
"instanceName" : "FieldMapper_04", | |
"library" : "streamsets-datacollector-basic-lib", | |
"stageName" : "com_streamsets_pipeline_stage_processor_mapper_FieldMapperDProcessor", | |
"stageVersion" : "1", | |
"configuration" : [ { | |
"name" : "fieldMapperConfig.operateOn", | |
"value" : "FIELD_NAMES" | |
}, { | |
"name" : "fieldMapperConfig.conditionalExpression", | |
"value" : "${f:name() == 'vlan'}" | |
}, { | |
"name" : "fieldMapperConfig.mappingExpression", | |
"value" : "${str:concat(f:name(), 'id')}" | |
}, { | |
"name" : "fieldMapperConfig.aggregationExpression", | |
"value" : "${fields}" | |
}, { | |
"name" : "fieldMapperConfig.structureChangeAllowed", | |
"value" : true | |
}, { | |
"name" : "fieldMapperConfig.appendListValues", | |
"value" : false | |
}, { | |
"name" : "fieldMapperConfig.maintainOriginalPaths", | |
"value" : false | |
}, { | |
"name" : "stageOnRecordError", | |
"value" : "TO_ERROR" | |
}, { | |
"name" : "stageRequiredFields", | |
"value" : [ ] | |
}, { | |
"name" : "stageRecordPreconditions", | |
"value" : [ ] | |
} ], | |
"uiInfo" : { | |
"yPos" : 50, | |
"stageType" : "PROCESSOR", | |
"icon" : "iconfinder_thefreeforty_map_1243687.png", | |
"description" : "", | |
"label" : "Suffix id", | |
"xPos" : 720 | |
}, | |
"inputLanes" : [ "FieldMapper_02OutputLane15889587787500" ], | |
"outputLanes" : [ "FieldMapper_04OutputLane15889601175990" ], | |
"eventLanes" : [ ], | |
"services" : [ ] | |
}, { | |
"instanceName" : "FieldMapper_03", | |
"library" : "streamsets-datacollector-basic-lib", | |
"stageName" : "com_streamsets_pipeline_stage_processor_mapper_FieldMapperDProcessor", | |
"stageVersion" : "1", | |
"configuration" : [ { | |
"name" : "fieldMapperConfig.operateOn", | |
"value" : "FIELD_PATHS" | |
}, { | |
"name" : "fieldMapperConfig.conditionalExpression", | |
"value" : "${str:startsWith(f:name(), 'vlan')}" | |
}, { | |
"name" : "fieldMapperConfig.mappingExpression", | |
"value" : "${str:replace(f:path(), '/vlan', '/new_vlan_map/')}" | |
}, { | |
"name" : "fieldMapperConfig.aggregationExpression", | |
"value" : "" | |
}, { | |
"name" : "fieldMapperConfig.structureChangeAllowed", | |
"value" : true | |
}, { | |
"name" : "fieldMapperConfig.appendListValues", | |
"value" : false | |
}, { | |
"name" : "fieldMapperConfig.maintainOriginalPaths", | |
"value" : false | |
}, { | |
"name" : "stageOnRecordError", | |
"value" : "TO_ERROR" | |
}, { | |
"name" : "stageRequiredFields", | |
"value" : [ ] | |
}, { | |
"name" : "stageRecordPreconditions", | |
"value" : [ ] | |
} ], | |
"uiInfo" : { | |
"yPos" : 50, | |
"stageType" : "PROCESSOR", | |
"icon" : "iconfinder_thefreeforty_map_1243687.png", | |
"description" : "", | |
"label" : "Move vlan fields into vlan_map", | |
"xPos" : 940 | |
}, | |
"inputLanes" : [ "FieldMapper_04OutputLane15889601175990" ], | |
"outputLanes" : [ "FieldMapper_03OutputLane15889588568720" ], | |
"eventLanes" : [ ], | |
"services" : [ ] | |
}, { | |
"instanceName" : "FieldMapper_05", | |
"library" : "streamsets-datacollector-basic-lib", | |
"stageName" : "com_streamsets_pipeline_stage_processor_mapper_FieldMapperDProcessor", | |
"stageVersion" : "1", | |
"configuration" : [ { | |
"name" : "fieldMapperConfig.operateOn", | |
"value" : "FIELD_NAMES" | |
}, { | |
"name" : "fieldMapperConfig.conditionalExpression", | |
"value" : "${f:name() == 'new_vlan_map'}" | |
}, { | |
"name" : "fieldMapperConfig.mappingExpression", | |
"value" : "vlan" | |
}, { | |
"name" : "fieldMapperConfig.aggregationExpression", | |
"value" : "${fields}" | |
}, { | |
"name" : "fieldMapperConfig.structureChangeAllowed", | |
"value" : true | |
}, { | |
"name" : "fieldMapperConfig.appendListValues", | |
"value" : false | |
}, { | |
"name" : "fieldMapperConfig.maintainOriginalPaths", | |
"value" : false | |
}, { | |
"name" : "stageOnRecordError", | |
"value" : "TO_ERROR" | |
}, { | |
"name" : "stageRequiredFields", | |
"value" : [ ] | |
}, { | |
"name" : "stageRecordPreconditions", | |
"value" : [ ] | |
} ], | |
"uiInfo" : { | |
"yPos" : 50, | |
"stageType" : "PROCESSOR", | |
"icon" : "iconfinder_thefreeforty_map_1243687.png", | |
"description" : "", | |
"label" : "Rename back to vlan", | |
"xPos" : 1160 | |
}, | |
"inputLanes" : [ "FieldMapper_03OutputLane15889588568720" ], | |
"outputLanes" : [ "FieldMapper_05OutputLane15889601773700" ], | |
"eventLanes" : [ ], | |
"services" : [ ] | |
}, { | |
"instanceName" : "Trash_01", | |
"library" : "streamsets-datacollector-basic-lib", | |
"stageName" : "com_streamsets_pipeline_stage_destination_devnull_NullDTarget", | |
"stageVersion" : "1", | |
"configuration" : [ ], | |
"uiInfo" : { | |
"yPos" : 50, | |
"stageType" : "TARGET", | |
"icon" : "trash.png", | |
"description" : "", | |
"label" : "Trash", | |
"xPos" : 1380 | |
}, | |
"inputLanes" : [ "FieldMapper_05OutputLane15889601773700" ], | |
"outputLanes" : [ ], | |
"eventLanes" : [ ], | |
"services" : [ ] | |
} ], | |
"errorStage" : { | |
"instanceName" : "Discard_ErrorStage", | |
"library" : "streamsets-datacollector-basic-lib", | |
"stageName" : "com_streamsets_pipeline_stage_destination_devnull_ToErrorNullDTarget", | |
"stageVersion" : "1", | |
"configuration" : [ ], | |
"uiInfo" : { | |
"yPos" : 50, | |
"stageType" : "TARGET", | |
"icon" : "", | |
"description" : "", | |
"label" : "Error Records - Discard", | |
"xPos" : 919 | |
}, | |
"inputLanes" : [ ], | |
"outputLanes" : [ ], | |
"eventLanes" : [ ], | |
"services" : [ ] | |
}, | |
"info" : { | |
"pipelineId" : "FieldRestructuring5b840130-fc26-4363-8e1d-b075a6db10a2", | |
"title" : "Field Restructuring", | |
"description" : "", | |
"created" : 1588958611231, | |
"lastModified" : 1588960341758, | |
"creator" : "admin", | |
"lastModifier" : "admin", | |
"lastRev" : "0", | |
"uuid" : "23890b10-6da9-4de1-b7e0-a8c32bd3e679", | |
"valid" : true, | |
"metadata" : { | |
"labels" : [ ] | |
}, | |
"name" : "FieldRestructuring5b840130-fc26-4363-8e1d-b075a6db10a2", | |
"sdcVersion" : "3.13.0", | |
"sdcId" : "92c115fc-9150-11ea-86cc-1d4da6ab472f" | |
}, | |
"metadata" : { | |
"labels" : [ ] | |
}, | |
"statsAggregatorStage" : { | |
"instanceName" : "statsAggregatorStageInstance", | |
"library" : "streamsets-datacollector-basic-lib", | |
"stageName" : "com_streamsets_pipeline_stage_destination_devnull_StatsDpmDirectlyDTarget", | |
"stageVersion" : "1", | |
"configuration" : [ ], | |
"uiInfo" : { | |
"stageType" : "TARGET", | |
"label" : "Stats Aggregator -Write Directly to Control Hub - statistics are not aggregated across Data Collectors" | |
}, | |
"inputLanes" : [ ], | |
"outputLanes" : [ ], | |
"eventLanes" : [ ], | |
"services" : [ ] | |
}, | |
"startEventStages" : [ { | |
"instanceName" : "Discard_StartEventStage", | |
"library" : "streamsets-datacollector-basic-lib", | |
"stageName" : "com_streamsets_pipeline_stage_destination_devnull_ToErrorNullDTarget", | |
"stageVersion" : "1", | |
"configuration" : [ ], | |
"uiInfo" : { | |
"yPos" : 50, | |
"stageType" : "TARGET", | |
"icon" : "", | |
"description" : "", | |
"label" : "Start Event - Discard", | |
"xPos" : 280 | |
}, | |
"inputLanes" : [ ], | |
"outputLanes" : [ ], | |
"eventLanes" : [ ], | |
"services" : [ ] | |
} ], | |
"stopEventStages" : [ { | |
"instanceName" : "Discard_StopEventStage", | |
"library" : "streamsets-datacollector-basic-lib", | |
"stageName" : "com_streamsets_pipeline_stage_destination_devnull_ToErrorNullDTarget", | |
"stageVersion" : "1", | |
"configuration" : [ ], | |
"uiInfo" : { | |
"yPos" : 50, | |
"stageType" : "TARGET", | |
"icon" : "", | |
"description" : "", | |
"label" : "Stop Event - Discard", | |
"xPos" : 280 | |
}, | |
"inputLanes" : [ ], | |
"outputLanes" : [ ], | |
"eventLanes" : [ ], | |
"services" : [ ] | |
} ], | |
"testOriginStage" : { | |
"instanceName" : "com_streamsets_pipeline_stage_devtest_rawdata_RawDataDSource_TestOriginStage", | |
"library" : "streamsets-datacollector-dev-lib", | |
"stageName" : "com_streamsets_pipeline_stage_devtest_rawdata_RawDataDSource", | |
"stageVersion" : "3", | |
"configuration" : [ { | |
"name" : "rawData", | |
"value" : "{\n \"f1\": \"abc\",\n \"f2\": \"xyz\",\n \"f3\": \"lmn\"\n}" | |
}, { | |
"name" : "stopAfterFirstBatch", | |
"value" : false | |
}, { | |
"name" : "eventData", | |
"value" : null | |
}, { | |
"name" : "stageOnRecordError", | |
"value" : "TO_ERROR" | |
} ], | |
"uiInfo" : { | |
"stageType" : "SOURCE", | |
"label" : "Test Origin - Dev Raw Data Source" | |
}, | |
"inputLanes" : [ ], | |
"outputLanes" : [ "com_streamsets_pipeline_stage_devtest_rawdata_RawDataDSource_TestOriginStageOutputLane1" ], | |
"eventLanes" : [ ], | |
"services" : [ { | |
"service" : "com.streamsets.pipeline.api.service.dataformats.DataFormatParserService", | |
"serviceVersion" : 1, | |
"configuration" : [ { | |
"name" : "displayFormats", | |
"value" : "DELIMITED,JSON,LOG,SDC_JSON,TEXT,XML" | |
}, { | |
"name" : "dataFormat", | |
"value" : "JSON" | |
}, { | |
"name" : "dataFormatConfig.compression", | |
"value" : "NONE" | |
}, { | |
"name" : "dataFormatConfig.filePatternInArchive", | |
"value" : "*" | |
}, { | |
"name" : "dataFormatConfig.charset", | |
"value" : "UTF-8" | |
}, { | |
"name" : "dataFormatConfig.removeCtrlChars", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.textMaxLineLen", | |
"value" : 1024 | |
}, { | |
"name" : "dataFormatConfig.useCustomDelimiter", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.customDelimiter", | |
"value" : "\\r\\n" | |
}, { | |
"name" : "dataFormatConfig.includeCustomDelimiterInTheText", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.jsonContent", | |
"value" : "MULTIPLE_OBJECTS" | |
}, { | |
"name" : "dataFormatConfig.jsonMaxObjectLen", | |
"value" : 4096 | |
}, { | |
"name" : "dataFormatConfig.csvFileFormat", | |
"value" : "CSV" | |
}, { | |
"name" : "dataFormatConfig.csvHeader", | |
"value" : "NO_HEADER" | |
}, { | |
"name" : "dataFormatConfig.csvAllowExtraColumns", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.csvExtraColumnPrefix", | |
"value" : "_extra_" | |
}, { | |
"name" : "dataFormatConfig.csvMaxObjectLen", | |
"value" : 1024 | |
}, { | |
"name" : "dataFormatConfig.csvCustomDelimiter", | |
"value" : "|" | |
}, { | |
"name" : "dataFormatConfig.multiCharacterFieldDelimiter", | |
"value" : "||" | |
}, { | |
"name" : "dataFormatConfig.multiCharacterLineDelimiter", | |
"value" : "${str:unescapeJava('\\\\n')}" | |
}, { | |
"name" : "dataFormatConfig.csvCustomEscape", | |
"value" : "\\" | |
}, { | |
"name" : "dataFormatConfig.csvCustomQuote", | |
"value" : "\"" | |
}, { | |
"name" : "dataFormatConfig.csvEnableComments", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.csvCommentMarker", | |
"value" : "#" | |
}, { | |
"name" : "dataFormatConfig.csvIgnoreEmptyLines", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.csvRecordType", | |
"value" : "LIST_MAP" | |
}, { | |
"name" : "dataFormatConfig.csvSkipStartLines", | |
"value" : 0 | |
}, { | |
"name" : "dataFormatConfig.parseNull", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.nullConstant", | |
"value" : "\\\\N" | |
}, { | |
"name" : "dataFormatConfig.xmlRecordElement", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.includeFieldXpathAttributes", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.xPathNamespaceContext", | |
"value" : [ ] | |
}, { | |
"name" : "dataFormatConfig.outputFieldAttributes", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.xmlMaxObjectLen", | |
"value" : 4096 | |
}, { | |
"name" : "dataFormatConfig.logMode", | |
"value" : "COMMON_LOG_FORMAT" | |
}, { | |
"name" : "dataFormatConfig.logMaxObjectLen", | |
"value" : 1024 | |
}, { | |
"name" : "dataFormatConfig.retainOriginalLine", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.customLogFormat", | |
"value" : "%h %l %u %t \"%r\" %>s %b" | |
}, { | |
"name" : "dataFormatConfig.regex", | |
"value" : "^(\\S+) (\\S+) (\\S+) \\[([\\w:/]+\\s[+\\-]\\d{4})\\] \"(\\S+) (\\S+) (\\S+)\" (\\d{3}) (\\d+)" | |
}, { | |
"name" : "dataFormatConfig.fieldPathsToGroupName", | |
"value" : [ { | |
"fieldPath" : "/", | |
"group" : 1 | |
} ] | |
}, { | |
"name" : "dataFormatConfig.grokPatternDefinition", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.grokPattern", | |
"value" : "%{COMMONAPACHELOG}" | |
}, { | |
"name" : "dataFormatConfig.onParseError", | |
"value" : "ERROR" | |
}, { | |
"name" : "dataFormatConfig.maxStackTraceLines", | |
"value" : 50 | |
}, { | |
"name" : "dataFormatConfig.enableLog4jCustomLogFormat", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.log4jCustomLogFormat", | |
"value" : "%r [%t] %-5p %c %x - %m%n" | |
}, { | |
"name" : "dataFormatConfig.avroSchemaSource", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.avroSchema", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.schemaRegistryUrls", | |
"value" : [ ] | |
}, { | |
"name" : "dataFormatConfig.basicAuth", | |
"value" : "" | |
}, { | |
"name" : "dataFormatConfig.schemaLookupMode", | |
"value" : "SUBJECT" | |
}, { | |
"name" : "dataFormatConfig.subject", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.schemaId", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.avroSkipUnionIndex", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.protoDescriptorFile", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.messageType", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.isDelimited", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.binaryMaxObjectLen", | |
"value" : 1024 | |
}, { | |
"name" : "dataFormatConfig.datagramMode", | |
"value" : "SYSLOG" | |
}, { | |
"name" : "dataFormatConfig.typesDbPath", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.convertTime", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.excludeInterval", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.authFilePath", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.netflowOutputValuesMode", | |
"value" : "RAW_AND_INTERPRETED" | |
}, { | |
"name" : "dataFormatConfig.maxTemplateCacheSize", | |
"value" : -1 | |
}, { | |
"name" : "dataFormatConfig.templateCacheTimeoutMs", | |
"value" : -1 | |
}, { | |
"name" : "dataFormatConfig.netflowOutputValuesModeDatagram", | |
"value" : "RAW_AND_INTERPRETED" | |
}, { | |
"name" : "dataFormatConfig.maxTemplateCacheSizeDatagram", | |
"value" : -1 | |
}, { | |
"name" : "dataFormatConfig.templateCacheTimeoutMsDatagram", | |
"value" : -1 | |
}, { | |
"name" : "dataFormatConfig.wholeFileMaxObjectLen", | |
"value" : 8192 | |
}, { | |
"name" : "dataFormatConfig.rateLimit", | |
"value" : "-1" | |
}, { | |
"name" : "dataFormatConfig.verifyChecksum", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.excelHeader", | |
"value" : null | |
}, { | |
"name" : "dataFormatConfig.excelSkipCellsWithNoHeader", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.excelReadAllSheets", | |
"value" : false | |
}, { | |
"name" : "dataFormatConfig.excelSheetNames", | |
"value" : [ ] | |
} ] | |
} ] | |
}, | |
"valid" : true, | |
"issues" : { | |
"pipelineIssues" : [ ], | |
"stageIssues" : { }, | |
"issueCount" : 0 | |
}, | |
"previewable" : true | |
}, | |
"pipelineRules" : { | |
"schemaVersion" : 3, | |
"version" : 2, | |
"metricsRuleDefinitions" : [ { | |
"id" : "badRecordsAlertID", | |
"alertText" : "High incidence of Error Records", | |
"metricId" : "pipeline.batchErrorRecords.counter", | |
"metricType" : "COUNTER", | |
"metricElement" : "COUNTER_COUNT", | |
"condition" : "${value() > 100}", | |
"sendEmail" : false, | |
"enabled" : false, | |
"timestamp" : 1588958611278, | |
"valid" : true | |
}, { | |
"id" : "stageErrorAlertID", | |
"alertText" : "High incidence of Stage Errors", | |
"metricId" : "pipeline.batchErrorMessages.counter", | |
"metricType" : "COUNTER", | |
"metricElement" : "COUNTER_COUNT", | |
"condition" : "${value() > 100}", | |
"sendEmail" : false, | |
"enabled" : false, | |
"timestamp" : 1588958611278, | |
"valid" : true | |
}, { | |
"id" : "idleGaugeID", | |
"alertText" : "Pipeline is Idle", | |
"metricId" : "RuntimeStatsGauge.gauge", | |
"metricType" : "GAUGE", | |
"metricElement" : "TIME_OF_LAST_RECEIVED_RECORD", | |
"condition" : "${time:now() - value() > 120000}", | |
"sendEmail" : false, | |
"enabled" : false, | |
"timestamp" : 1588958611278, | |
"valid" : true | |
}, { | |
"id" : "batchTimeAlertID", | |
"alertText" : "Batch taking more time to process", | |
"metricId" : "RuntimeStatsGauge.gauge", | |
"metricType" : "GAUGE", | |
"metricElement" : "CURRENT_BATCH_AGE", | |
"condition" : "${value() > 200}", | |
"sendEmail" : false, | |
"enabled" : false, | |
"timestamp" : 1588958611278, | |
"valid" : true | |
} ], | |
"dataRuleDefinitions" : [ ], | |
"driftRuleDefinitions" : [ ], | |
"uuid" : "71aafe13-1763-42ee-9753-1edf48008dc9", | |
"configuration" : [ { | |
"name" : "emailIDs", | |
"value" : [ ] | |
}, { | |
"name" : "webhookConfigs", | |
"value" : [ ] | |
} ], | |
"ruleIssues" : [ ], | |
"configIssues" : [ ] | |
}, | |
"libraryDefinitions" : null | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment