Created
July 11, 2018 16:23
-
-
Save mattyb149/ee134bbe63a1315eb643a8141ed9ae13 to your computer and use it in GitHub Desktop.
NiFi SplitRecord example that converts CSV to Avro while splitting files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8" standalone="yes"?> | |
<template encoding-version="1.2"> | |
<description></description> | |
<groupId>89f8c6b8-0164-1000-d937-82c55b6fdffd</groupId> | |
<name>SplitRecord_w_Conversion</name> | |
<snippet> | |
<controllerServices> | |
<id>350009ab-3f44-32bf-0000-000000000000</id> | |
<parentGroupId>fa0118f1-ff03-3030-0000-000000000000</parentGroupId> | |
<bundle> | |
<artifact>nifi-record-serialization-services-nar</artifact> | |
<group>org.apache.nifi</group> | |
<version>1.8.0-SNAPSHOT</version> | |
</bundle> | |
<comments></comments> | |
<descriptors> | |
<entry> | |
<key>schema-access-strategy</key> | |
<value> | |
<name>schema-access-strategy</name> | |
</value> | |
</entry> | |
<entry> | |
<key>schema-registry</key> | |
<value> | |
<identifiesControllerService>org.apache.nifi.schemaregistry.services.SchemaRegistry</identifiesControllerService> | |
<name>schema-registry</name> | |
</value> | |
</entry> | |
<entry> | |
<key>schema-name</key> | |
<value> | |
<name>schema-name</name> | |
</value> | |
</entry> | |
<entry> | |
<key>schema-version</key> | |
<value> | |
<name>schema-version</name> | |
</value> | |
</entry> | |
<entry> | |
<key>schema-branch</key> | |
<value> | |
<name>schema-branch</name> | |
</value> | |
</entry> | |
<entry> | |
<key>schema-text</key> | |
<value> | |
<name>schema-text</name> | |
</value> | |
</entry> | |
<entry> | |
<key>csv-reader-csv-parser</key> | |
<value> | |
<name>csv-reader-csv-parser</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Date Format</key> | |
<value> | |
<name>Date Format</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Time Format</key> | |
<value> | |
<name>Time Format</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Timestamp Format</key> | |
<value> | |
<name>Timestamp Format</name> | |
</value> | |
</entry> | |
<entry> | |
<key>CSV Format</key> | |
<value> | |
<name>CSV Format</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Value Separator</key> | |
<value> | |
<name>Value Separator</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Skip Header Line</key> | |
<value> | |
<name>Skip Header Line</name> | |
</value> | |
</entry> | |
<entry> | |
<key>ignore-csv-header</key> | |
<value> | |
<name>ignore-csv-header</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Quote Character</key> | |
<value> | |
<name>Quote Character</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Escape Character</key> | |
<value> | |
<name>Escape Character</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Comment Marker</key> | |
<value> | |
<name>Comment Marker</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Null String</key> | |
<value> | |
<name>Null String</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Trim Fields</key> | |
<value> | |
<name>Trim Fields</name> | |
</value> | |
</entry> | |
<entry> | |
<key>csvutils-character-set</key> | |
<value> | |
<name>csvutils-character-set</name> | |
</value> | |
</entry> | |
</descriptors> | |
<name>CSVReader</name> | |
<persistsState>false</persistsState> | |
<properties> | |
<entry> | |
<key>schema-access-strategy</key> | |
<value>schema-text-property</value> | |
</entry> | |
<entry> | |
<key>schema-registry</key> | |
</entry> | |
<entry> | |
<key>schema-name</key> | |
</entry> | |
<entry> | |
<key>schema-version</key> | |
</entry> | |
<entry> | |
<key>schema-branch</key> | |
</entry> | |
<entry> | |
<key>schema-text</key> | |
<value>${inferred.avro.schema}</value> | |
</entry> | |
<entry> | |
<key>csv-reader-csv-parser</key> | |
<value>jackson-csv</value> | |
</entry> | |
<entry> | |
<key>Date Format</key> | |
</entry> | |
<entry> | |
<key>Time Format</key> | |
</entry> | |
<entry> | |
<key>Timestamp Format</key> | |
</entry> | |
<entry> | |
<key>CSV Format</key> | |
</entry> | |
<entry> | |
<key>Value Separator</key> | |
</entry> | |
<entry> | |
<key>Skip Header Line</key> | |
<value>true</value> | |
</entry> | |
<entry> | |
<key>ignore-csv-header</key> | |
<value>true</value> | |
</entry> | |
<entry> | |
<key>Quote Character</key> | |
</entry> | |
<entry> | |
<key>Escape Character</key> | |
</entry> | |
<entry> | |
<key>Comment Marker</key> | |
</entry> | |
<entry> | |
<key>Null String</key> | |
</entry> | |
<entry> | |
<key>Trim Fields</key> | |
</entry> | |
<entry> | |
<key>csvutils-character-set</key> | |
</entry> | |
</properties> | |
<state>ENABLED</state> | |
<type>org.apache.nifi.csv.CSVReader</type> | |
</controllerServices> | |
<controllerServices> | |
<id>8cd2c752-582f-327b-0000-000000000000</id> | |
<parentGroupId>fa0118f1-ff03-3030-0000-000000000000</parentGroupId> | |
<bundle> | |
<artifact>nifi-record-serialization-services-nar</artifact> | |
<group>org.apache.nifi</group> | |
<version>1.8.0-SNAPSHOT</version> | |
</bundle> | |
<comments></comments> | |
<descriptors> | |
<entry> | |
<key>Schema Write Strategy</key> | |
<value> | |
<name>Schema Write Strategy</name> | |
</value> | |
</entry> | |
<entry> | |
<key>schema-access-strategy</key> | |
<value> | |
<name>schema-access-strategy</name> | |
</value> | |
</entry> | |
<entry> | |
<key>schema-registry</key> | |
<value> | |
<identifiesControllerService>org.apache.nifi.schemaregistry.services.SchemaRegistry</identifiesControllerService> | |
<name>schema-registry</name> | |
</value> | |
</entry> | |
<entry> | |
<key>schema-name</key> | |
<value> | |
<name>schema-name</name> | |
</value> | |
</entry> | |
<entry> | |
<key>schema-version</key> | |
<value> | |
<name>schema-version</name> | |
</value> | |
</entry> | |
<entry> | |
<key>schema-branch</key> | |
<value> | |
<name>schema-branch</name> | |
</value> | |
</entry> | |
<entry> | |
<key>schema-text</key> | |
<value> | |
<name>schema-text</name> | |
</value> | |
</entry> | |
<entry> | |
<key>compression-format</key> | |
<value> | |
<name>compression-format</name> | |
</value> | |
</entry> | |
</descriptors> | |
<name>AvroRecordSetWriter</name> | |
<persistsState>false</persistsState> | |
<properties> | |
<entry> | |
<key>Schema Write Strategy</key> | |
</entry> | |
<entry> | |
<key>schema-access-strategy</key> | |
</entry> | |
<entry> | |
<key>schema-registry</key> | |
</entry> | |
<entry> | |
<key>schema-name</key> | |
</entry> | |
<entry> | |
<key>schema-version</key> | |
</entry> | |
<entry> | |
<key>schema-branch</key> | |
</entry> | |
<entry> | |
<key>schema-text</key> | |
</entry> | |
<entry> | |
<key>compression-format</key> | |
</entry> | |
</properties> | |
<state>ENABLED</state> | |
<type>org.apache.nifi.avro.AvroRecordSetWriter</type> | |
</controllerServices> | |
<processors> | |
<id>8acee69c-23ad-315a-0000-000000000000</id> | |
<parentGroupId>fa0118f1-ff03-3030-0000-000000000000</parentGroupId> | |
<position> | |
<x>0.0</x> | |
<y>0.0</y> | |
</position> | |
<bundle> | |
<artifact>nifi-standard-nar</artifact> | |
<group>org.apache.nifi</group> | |
<version>1.8.0-SNAPSHOT</version> | |
</bundle> | |
<config> | |
<bulletinLevel>WARN</bulletinLevel> | |
<comments></comments> | |
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount> | |
<descriptors> | |
<entry> | |
<key>Record Reader</key> | |
<value> | |
<identifiesControllerService>org.apache.nifi.serialization.RecordReaderFactory</identifiesControllerService> | |
<name>Record Reader</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Record Writer</key> | |
<value> | |
<identifiesControllerService>org.apache.nifi.serialization.RecordSetWriterFactory</identifiesControllerService> | |
<name>Record Writer</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Records Per Split</key> | |
<value> | |
<name>Records Per Split</name> | |
</value> | |
</entry> | |
</descriptors> | |
<executionNode>ALL</executionNode> | |
<lossTolerant>false</lossTolerant> | |
<penaltyDuration>30 sec</penaltyDuration> | |
<properties> | |
<entry> | |
<key>Record Reader</key> | |
<value>350009ab-3f44-32bf-0000-000000000000</value> | |
</entry> | |
<entry> | |
<key>Record Writer</key> | |
<value>8cd2c752-582f-327b-0000-000000000000</value> | |
</entry> | |
<entry> | |
<key>Records Per Split</key> | |
<value>10000</value> | |
</entry> | |
</properties> | |
<runDurationMillis>0</runDurationMillis> | |
<schedulingPeriod>0 sec</schedulingPeriod> | |
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy> | |
<yieldDuration>1 sec</yieldDuration> | |
</config> | |
<executionNodeRestricted>false</executionNodeRestricted> | |
<name>SplitRecord</name> | |
<relationships> | |
<autoTerminate>false</autoTerminate> | |
<name>failure</name> | |
</relationships> | |
<relationships> | |
<autoTerminate>true</autoTerminate> | |
<name>original</name> | |
</relationships> | |
<relationships> | |
<autoTerminate>false</autoTerminate> | |
<name>splits</name> | |
</relationships> | |
<state>STOPPED</state> | |
<style/> | |
<type>org.apache.nifi.processors.standard.SplitRecord</type> | |
</processors> | |
</snippet> | |
<timestamp>07/11/2018 12:20:51 EDT</timestamp> | |
</template> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment