Created
February 17, 2017 06:15
-
-
Save alopresto/09b30565c9c2632658bc2423a6b3a752 to your computer and use it in GitHub Desktop.
Generates a flowfile with example CSV multiline text with newlines inside quoted strings. Replaces the newlines inside quotes with an escaped character, then splits each line. Includes LogAttributes processors to debug.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" ?> | |
<template encoding-version="1.0"> | |
<description>Generates a flowfile with example CSV multiline text with newlines inside quoted strings. Replaces the newlines inside quotes with an escaped character, then splits each line. Includes LogAttributes processors to debug. </description> | |
<groupId>4aa66ece-015a-1000-4348-efd94f63c4df</groupId> | |
<name>ReplaceText and SplitText</name> | |
<snippet> | |
<connections> | |
<id>015a1001-919e-1aa8-0000-000000000000</id> | |
<parentGroupId>4aa66ece-015a-1000-0000-000000000000</parentGroupId> | |
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold> | |
<backPressureObjectThreshold>10000</backPressureObjectThreshold> | |
<destination> | |
<groupId>4aa66ece-015a-1000-0000-000000000000</groupId> | |
<id>015a1000-919e-1aa8-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</destination> | |
<flowFileExpiration>0 sec</flowFileExpiration> | |
<labelIndex>1</labelIndex> | |
<name></name> | |
<selectedRelationships>success</selectedRelationships> | |
<source> | |
<groupId>4aa66ece-015a-1000-0000-000000000000</groupId> | |
<id>4aa731e3-015a-1000-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</source> | |
<zIndex>0</zIndex> | |
</connections> | |
<connections> | |
<id>015a1002-919e-1aa8-0000-000000000000</id> | |
<parentGroupId>4aa66ece-015a-1000-0000-000000000000</parentGroupId> | |
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold> | |
<backPressureObjectThreshold>10000</backPressureObjectThreshold> | |
<destination> | |
<groupId>4aa66ece-015a-1000-0000-000000000000</groupId> | |
<id>4aa7e7dc-015a-1000-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</destination> | |
<flowFileExpiration>0 sec</flowFileExpiration> | |
<labelIndex>1</labelIndex> | |
<name></name> | |
<selectedRelationships>success</selectedRelationships> | |
<source> | |
<groupId>4aa66ece-015a-1000-0000-000000000000</groupId> | |
<id>015a1000-919e-1aa8-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</source> | |
<zIndex>0</zIndex> | |
</connections> | |
<connections> | |
<id>4aa7ca80-015a-1000-0000-000000000000</id> | |
<parentGroupId>4aa66ece-015a-1000-0000-000000000000</parentGroupId> | |
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold> | |
<backPressureObjectThreshold>10000</backPressureObjectThreshold> | |
<destination> | |
<groupId>4aa66ece-015a-1000-0000-000000000000</groupId> | |
<id>4aa731e3-015a-1000-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</destination> | |
<flowFileExpiration>0 sec</flowFileExpiration> | |
<labelIndex>1</labelIndex> | |
<name></name> | |
<selectedRelationships>success</selectedRelationships> | |
<source> | |
<groupId>4aa66ece-015a-1000-0000-000000000000</groupId> | |
<id>4aa69dbb-015a-1000-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</source> | |
<zIndex>0</zIndex> | |
</connections> | |
<connections> | |
<id>4aa8e1bb-015a-1000-0000-000000000000</id> | |
<parentGroupId>4aa66ece-015a-1000-0000-000000000000</parentGroupId> | |
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold> | |
<backPressureObjectThreshold>10000</backPressureObjectThreshold> | |
<destination> | |
<groupId>4aa66ece-015a-1000-0000-000000000000</groupId> | |
<id>4aa8919e-015a-1000-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</destination> | |
<flowFileExpiration>0 sec</flowFileExpiration> | |
<labelIndex>1</labelIndex> | |
<name></name> | |
<selectedRelationships>splits</selectedRelationships> | |
<source> | |
<groupId>4aa66ece-015a-1000-0000-000000000000</groupId> | |
<id>4aa7e7dc-015a-1000-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</source> | |
<zIndex>0</zIndex> | |
</connections> | |
<processors> | |
<id>015a1000-919e-1aa8-0000-000000000000</id> | |
<parentGroupId>4aa66ece-015a-1000-0000-000000000000</parentGroupId> | |
<position> | |
<x>7.0</x> | |
<y>410.0</y> | |
</position> | |
<config> | |
<bulletinLevel>WARN</bulletinLevel> | |
<comments></comments> | |
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount> | |
<descriptors> | |
<entry> | |
<key>Log Level</key> | |
<value> | |
<name>Log Level</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Log Payload</key> | |
<value> | |
<name>Log Payload</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Attributes to Log</key> | |
<value> | |
<name>Attributes to Log</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Attributes to Ignore</key> | |
<value> | |
<name>Attributes to Ignore</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Log prefix</key> | |
<value> | |
<name>Log prefix</name> | |
</value> | |
</entry> | |
</descriptors> | |
<executionNode>ALL</executionNode> | |
<lossTolerant>false</lossTolerant> | |
<penaltyDuration>30 sec</penaltyDuration> | |
<properties> | |
<entry> | |
<key>Log Level</key> | |
<value>info</value> | |
</entry> | |
<entry> | |
<key>Log Payload</key> | |
<value>true</value> | |
</entry> | |
<entry> | |
<key>Attributes to Log</key> | |
</entry> | |
<entry> | |
<key>Attributes to Ignore</key> | |
</entry> | |
<entry> | |
<key>Log prefix</key> | |
</entry> | |
</properties> | |
<runDurationMillis>0</runDurationMillis> | |
<schedulingPeriod>0 sec</schedulingPeriod> | |
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy> | |
<yieldDuration>1 sec</yieldDuration> | |
</config> | |
<name>LogAttribute</name> | |
<relationships> | |
<autoTerminate>false</autoTerminate> | |
<name>success</name> | |
</relationships> | |
<style></style> | |
<type>org.apache.nifi.processors.standard.LogAttribute</type> | |
</processors> | |
<processors> | |
<id>4aa69dbb-015a-1000-0000-000000000000</id> | |
<parentGroupId>4aa66ece-015a-1000-0000-000000000000</parentGroupId> | |
<position> | |
<x>0.0</x> | |
<y>0.0</y> | |
</position> | |
<config> | |
<bulletinLevel>WARN</bulletinLevel> | |
<comments></comments> | |
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount> | |
<descriptors> | |
<entry> | |
<key>File Size</key> | |
<value> | |
<name>File Size</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Batch Size</key> | |
<value> | |
<name>Batch Size</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Data Format</key> | |
<value> | |
<name>Data Format</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Unique FlowFiles</key> | |
<value> | |
<name>Unique FlowFiles</name> | |
</value> | |
</entry> | |
<entry> | |
<key>generate-ff-custom-text</key> | |
<value> | |
<name>generate-ff-custom-text</name> | |
</value> | |
</entry> | |
</descriptors> | |
<executionNode>ALL</executionNode> | |
<lossTolerant>false</lossTolerant> | |
<penaltyDuration>30 sec</penaltyDuration> | |
<properties> | |
<entry> | |
<key>File Size</key> | |
<value>0B</value> | |
</entry> | |
<entry> | |
<key>Batch Size</key> | |
<value>1</value> | |
</entry> | |
<entry> | |
<key>Data Format</key> | |
<value>Text</value> | |
</entry> | |
<entry> | |
<key>Unique FlowFiles</key> | |
<value>false</value> | |
</entry> | |
<entry> | |
<key>generate-ff-custom-text</key> | |
<value>No,NAme,ID,Description | |
1,Stack,232,"ABCDEFGHIJKLMNO | |
-- Jiuaslkm asdasdasd" | |
2,Queue,454,"PQRSTUVWXYZ | |
-- Other words here"</value> | |
</entry> | |
</properties> | |
<runDurationMillis>0</runDurationMillis> | |
<schedulingPeriod>2 sec</schedulingPeriod> | |
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy> | |
<yieldDuration>1 sec</yieldDuration> | |
</config> | |
<name>GenerateFlowFile</name> | |
<relationships> | |
<autoTerminate>false</autoTerminate> | |
<name>success</name> | |
</relationships> | |
<style></style> | |
<type>org.apache.nifi.processors.standard.GenerateFlowFile</type> | |
</processors> | |
<processors> | |
<id>4aa731e3-015a-1000-0000-000000000000</id> | |
<parentGroupId>4aa66ece-015a-1000-0000-000000000000</parentGroupId> | |
<position> | |
<x>1.0</x> | |
<y>228.0</y> | |
</position> | |
<config> | |
<bulletinLevel>WARN</bulletinLevel> | |
<comments></comments> | |
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount> | |
<descriptors> | |
<entry> | |
<key>Regular Expression</key> | |
<value> | |
<name>Regular Expression</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Replacement Value</key> | |
<value> | |
<name>Replacement Value</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Character Set</key> | |
<value> | |
<name>Character Set</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Maximum Buffer Size</key> | |
<value> | |
<name>Maximum Buffer Size</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Replacement Strategy</key> | |
<value> | |
<name>Replacement Strategy</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Evaluation Mode</key> | |
<value> | |
<name>Evaluation Mode</name> | |
</value> | |
</entry> | |
</descriptors> | |
<executionNode>ALL</executionNode> | |
<lossTolerant>false</lossTolerant> | |
<penaltyDuration>30 sec</penaltyDuration> | |
<properties> | |
<entry> | |
<key>Regular Expression</key> | |
<value>"(.*?)(\n)(.*?)"</value> | |
</entry> | |
<entry> | |
<key>Replacement Value</key> | |
<value>"$1\\n$3"</value> | |
</entry> | |
<entry> | |
<key>Character Set</key> | |
<value>UTF-8</value> | |
</entry> | |
<entry> | |
<key>Maximum Buffer Size</key> | |
<value>1 MB</value> | |
</entry> | |
<entry> | |
<key>Replacement Strategy</key> | |
<value>Regex Replace</value> | |
</entry> | |
<entry> | |
<key>Evaluation Mode</key> | |
<value>Entire text</value> | |
</entry> | |
</properties> | |
<runDurationMillis>0</runDurationMillis> | |
<schedulingPeriod>0 sec</schedulingPeriod> | |
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy> | |
<yieldDuration>1 sec</yieldDuration> | |
</config> | |
<name>ReplaceText</name> | |
<relationships> | |
<autoTerminate>true</autoTerminate> | |
<name>failure</name> | |
</relationships> | |
<relationships> | |
<autoTerminate>false</autoTerminate> | |
<name>success</name> | |
</relationships> | |
<style></style> | |
<type>org.apache.nifi.processors.standard.ReplaceText</type> | |
</processors> | |
<processors> | |
<id>4aa7e7dc-015a-1000-0000-000000000000</id> | |
<parentGroupId>4aa66ece-015a-1000-0000-000000000000</parentGroupId> | |
<position> | |
<x>485.0</x> | |
<y>408.0</y> | |
</position> | |
<config> | |
<bulletinLevel>WARN</bulletinLevel> | |
<comments></comments> | |
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount> | |
<descriptors> | |
<entry> | |
<key>Line Split Count</key> | |
<value> | |
<name>Line Split Count</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Maximum Fragment Size</key> | |
<value> | |
<name>Maximum Fragment Size</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Header Line Count</key> | |
<value> | |
<name>Header Line Count</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Header Line Marker Characters</key> | |
<value> | |
<name>Header Line Marker Characters</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Remove Trailing Newlines</key> | |
<value> | |
<name>Remove Trailing Newlines</name> | |
</value> | |
</entry> | |
</descriptors> | |
<executionNode>ALL</executionNode> | |
<lossTolerant>false</lossTolerant> | |
<penaltyDuration>30 sec</penaltyDuration> | |
<properties> | |
<entry> | |
<key>Line Split Count</key> | |
<value>1</value> | |
</entry> | |
<entry> | |
<key>Maximum Fragment Size</key> | |
</entry> | |
<entry> | |
<key>Header Line Count</key> | |
<value>1</value> | |
</entry> | |
<entry> | |
<key>Header Line Marker Characters</key> | |
</entry> | |
<entry> | |
<key>Remove Trailing Newlines</key> | |
<value>true</value> | |
</entry> | |
</properties> | |
<runDurationMillis>0</runDurationMillis> | |
<schedulingPeriod>0 sec</schedulingPeriod> | |
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy> | |
<yieldDuration>1 sec</yieldDuration> | |
</config> | |
<name>SplitText</name> | |
<relationships> | |
<autoTerminate>true</autoTerminate> | |
<name>failure</name> | |
</relationships> | |
<relationships> | |
<autoTerminate>true</autoTerminate> | |
<name>original</name> | |
</relationships> | |
<relationships> | |
<autoTerminate>false</autoTerminate> | |
<name>splits</name> | |
</relationships> | |
<style></style> | |
<type>org.apache.nifi.processors.standard.SplitText</type> | |
</processors> | |
<processors> | |
<id>4aa8919e-015a-1000-0000-000000000000</id> | |
<parentGroupId>4aa66ece-015a-1000-0000-000000000000</parentGroupId> | |
<position> | |
<x>3.0</x> | |
<y>669.0</y> | |
</position> | |
<config> | |
<bulletinLevel>WARN</bulletinLevel> | |
<comments></comments> | |
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount> | |
<descriptors> | |
<entry> | |
<key>Log Level</key> | |
<value> | |
<name>Log Level</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Log Payload</key> | |
<value> | |
<name>Log Payload</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Attributes to Log</key> | |
<value> | |
<name>Attributes to Log</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Attributes to Ignore</key> | |
<value> | |
<name>Attributes to Ignore</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Log prefix</key> | |
<value> | |
<name>Log prefix</name> | |
</value> | |
</entry> | |
</descriptors> | |
<executionNode>ALL</executionNode> | |
<lossTolerant>false</lossTolerant> | |
<penaltyDuration>30 sec</penaltyDuration> | |
<properties> | |
<entry> | |
<key>Log Level</key> | |
<value>info</value> | |
</entry> | |
<entry> | |
<key>Log Payload</key> | |
<value>true</value> | |
</entry> | |
<entry> | |
<key>Attributes to Log</key> | |
</entry> | |
<entry> | |
<key>Attributes to Ignore</key> | |
</entry> | |
<entry> | |
<key>Log prefix</key> | |
</entry> | |
</properties> | |
<runDurationMillis>0</runDurationMillis> | |
<schedulingPeriod>0 sec</schedulingPeriod> | |
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy> | |
<yieldDuration>1 sec</yieldDuration> | |
</config> | |
<name>LogAttribute</name> | |
<relationships> | |
<autoTerminate>true</autoTerminate> | |
<name>success</name> | |
</relationships> | |
<style></style> | |
<type>org.apache.nifi.processors.standard.LogAttribute</type> | |
</processors> | |
</snippet> | |
<timestamp>02/16/2017 22:15:07 PST</timestamp> | |
</template> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment