Created
February 18, 2017 00:08
-
-
Save alopresto/2f26863afee6cc3109c1a4c5d33c1234 to your computer and use it in GitHub Desktop.
Lists files from a directory, then updates the attributes so a processor can run a command against "pairs" of files (i.e. sam.txt and sam.txt.gz) from a single flowfile.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" ?> | |
<template encoding-version="1.0"> | |
<description>Lists files from a directory, then updates the attributes so a processor can run a command against "pairs" of files (i.e. sam.txt and sam.txt.gz) from a single flowfile. </description> | |
<groupId>4e3e7f99-015a-1000-b05e-27a45fe36d70</groupId> | |
<name>ListFiles</name> | |
<snippet> | |
<connections> | |
<id>015a1012-dfe4-1e6f-0000-000000000000</id> | |
<parentGroupId>4e3e7f99-015a-1000-0000-000000000000</parentGroupId> | |
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold> | |
<backPressureObjectThreshold>10000</backPressureObjectThreshold> | |
<destination> | |
<groupId>4e3e7f99-015a-1000-0000-000000000000</groupId> | |
<id>015a1011-dfe4-1e6f-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</destination> | |
<flowFileExpiration>0 sec</flowFileExpiration> | |
<labelIndex>1</labelIndex> | |
<name></name> | |
<selectedRelationships>success</selectedRelationships> | |
<source> | |
<groupId>4e3e7f99-015a-1000-0000-000000000000</groupId> | |
<id>4e6fdfe4-015a-1000-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</source> | |
<zIndex>0</zIndex> | |
</connections> | |
<connections> | |
<id>015a1029-dfe4-1e6f-0000-000000000000</id> | |
<parentGroupId>4e3e7f99-015a-1000-0000-000000000000</parentGroupId> | |
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold> | |
<backPressureObjectThreshold>10000</backPressureObjectThreshold> | |
<destination> | |
<groupId>4e3e7f99-015a-1000-0000-000000000000</groupId> | |
<id>015a1028-dfe4-1e6f-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</destination> | |
<flowFileExpiration>0 sec</flowFileExpiration> | |
<labelIndex>1</labelIndex> | |
<name></name> | |
<selectedRelationships>success</selectedRelationships> | |
<source> | |
<groupId>4e3e7f99-015a-1000-0000-000000000000</groupId> | |
<id>015a1011-dfe4-1e6f-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</source> | |
<zIndex>0</zIndex> | |
</connections> | |
<connections> | |
<id>015a102a-dfe4-1e6f-0000-000000000000</id> | |
<parentGroupId>4e3e7f99-015a-1000-0000-000000000000</parentGroupId> | |
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold> | |
<backPressureObjectThreshold>10000</backPressureObjectThreshold> | |
<destination> | |
<groupId>4e3e7f99-015a-1000-0000-000000000000</groupId> | |
<id>015a1005-dfe4-1e6f-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</destination> | |
<flowFileExpiration>0 sec</flowFileExpiration> | |
<labelIndex>1</labelIndex> | |
<name></name> | |
<selectedRelationships>output stream</selectedRelationships> | |
<source> | |
<groupId>4e3e7f99-015a-1000-0000-000000000000</groupId> | |
<id>015a1028-dfe4-1e6f-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</source> | |
<zIndex>0</zIndex> | |
</connections> | |
<connections> | |
<id>4e7020c6-015a-1000-0000-000000000000</id> | |
<parentGroupId>4e3e7f99-015a-1000-0000-000000000000</parentGroupId> | |
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold> | |
<backPressureObjectThreshold>10000</backPressureObjectThreshold> | |
<destination> | |
<groupId>4e3e7f99-015a-1000-0000-000000000000</groupId> | |
<id>4e6fdfe4-015a-1000-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</destination> | |
<flowFileExpiration>0 sec</flowFileExpiration> | |
<labelIndex>1</labelIndex> | |
<name></name> | |
<selectedRelationships>success</selectedRelationships> | |
<source> | |
<groupId>4e3e7f99-015a-1000-0000-000000000000</groupId> | |
<id>4e6f0fcf-015a-1000-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</source> | |
<zIndex>0</zIndex> | |
</connections> | |
<labels> | |
<id>015a102b-dfe4-1e6f-0000-000000000000</id> | |
<parentGroupId>4e3e7f99-015a-1000-0000-000000000000</parentGroupId> | |
<position> | |
<x>0.0</x> | |
<y>571.6000076293946</y> | |
</position> | |
<height>224.3999481201172</height> | |
<label>You could also fetch the files here, etc.</label> | |
<style> | |
<entry> | |
<key>font-size</key> | |
<value>12px</value> | |
</entry> | |
</style> | |
<width>539.9999389648438</width> | |
</labels> | |
<processors> | |
<id>015a1005-dfe4-1e6f-0000-000000000000</id> | |
<parentGroupId>4e3e7f99-015a-1000-0000-000000000000</parentGroupId> | |
<position> | |
<x>157.1999954223635</x> | |
<y>856.799934387207</y> | |
</position> | |
<config> | |
<bulletinLevel>WARN</bulletinLevel> | |
<comments></comments> | |
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount> | |
<descriptors> | |
<entry> | |
<key>Log Level</key> | |
<value> | |
<name>Log Level</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Log Payload</key> | |
<value> | |
<name>Log Payload</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Attributes to Log</key> | |
<value> | |
<name>Attributes to Log</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Attributes to Ignore</key> | |
<value> | |
<name>Attributes to Ignore</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Log prefix</key> | |
<value> | |
<name>Log prefix</name> | |
</value> | |
</entry> | |
</descriptors> | |
<executionNode>ALL</executionNode> | |
<lossTolerant>false</lossTolerant> | |
<penaltyDuration>30 sec</penaltyDuration> | |
<properties> | |
<entry> | |
<key>Log Level</key> | |
<value>info</value> | |
</entry> | |
<entry> | |
<key>Log Payload</key> | |
<value>true</value> | |
</entry> | |
<entry> | |
<key>Attributes to Log</key> | |
</entry> | |
<entry> | |
<key>Attributes to Ignore</key> | |
</entry> | |
<entry> | |
<key>Log prefix</key> | |
</entry> | |
</properties> | |
<runDurationMillis>0</runDurationMillis> | |
<schedulingPeriod>0 sec</schedulingPeriod> | |
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy> | |
<yieldDuration>1 sec</yieldDuration> | |
</config> | |
<name>LogAttribute</name> | |
<relationships> | |
<autoTerminate>true</autoTerminate> | |
<name>success</name> | |
</relationships> | |
<style></style> | |
<type>org.apache.nifi.processors.standard.LogAttribute</type> | |
</processors> | |
<processors> | |
<id>015a1011-dfe4-1e6f-0000-000000000000</id> | |
<parentGroupId>4e3e7f99-015a-1000-0000-000000000000</parentGroupId> | |
<position> | |
<x>157.1999954223635</x> | |
<y>405.59998321533203</y> | |
</position> | |
<config> | |
<bulletinLevel>WARN</bulletinLevel> | |
<comments></comments> | |
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount> | |
<descriptors> | |
<entry> | |
<key>Delete Attributes Expression</key> | |
<value> | |
<name>Delete Attributes Expression</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Store State</key> | |
<value> | |
<name>Store State</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Stateful Variables Initial Value</key> | |
<value> | |
<name>Stateful Variables Initial Value</name> | |
</value> | |
</entry> | |
<entry> | |
<key>gzip_filename</key> | |
<value> | |
<name>gzip_filename</name> | |
</value> | |
</entry> | |
<entry> | |
<key>image_filename</key> | |
<value> | |
<name>image_filename</name> | |
</value> | |
</entry> | |
</descriptors> | |
<executionNode>ALL</executionNode> | |
<lossTolerant>false</lossTolerant> | |
<penaltyDuration>30 sec</penaltyDuration> | |
<properties> | |
<entry> | |
<key>Delete Attributes Expression</key> | |
</entry> | |
<entry> | |
<key>Store State</key> | |
<value>Do not store state</value> | |
</entry> | |
<entry> | |
<key>Stateful Variables Initial Value</key> | |
</entry> | |
<entry> | |
<key>gzip_filename</key> | |
<value>${filename}.gz</value> | |
</entry> | |
<entry> | |
<key>image_filename</key> | |
<value>${filename}</value> | |
</entry> | |
</properties> | |
<runDurationMillis>0</runDurationMillis> | |
<schedulingPeriod>0 sec</schedulingPeriod> | |
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy> | |
<yieldDuration>1 sec</yieldDuration> | |
</config> | |
<name>UpdateAttribute</name> | |
<relationships> | |
<autoTerminate>false</autoTerminate> | |
<name>success</name> | |
</relationships> | |
<style></style> | |
<type>org.apache.nifi.processors.attributes.UpdateAttribute</type> | |
</processors> | |
<processors> | |
<id>015a1028-dfe4-1e6f-0000-000000000000</id> | |
<parentGroupId>4e3e7f99-015a-1000-0000-000000000000</parentGroupId> | |
<position> | |
<x>157.1999954223635</x> | |
<y>624.0000076293945</y> | |
</position> | |
<config> | |
<bulletinLevel>WARN</bulletinLevel> | |
<comments></comments> | |
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount> | |
<descriptors> | |
<entry> | |
<key>Command Arguments</key> | |
<value> | |
<name>Command Arguments</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Command Path</key> | |
<value> | |
<name>Command Path</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Ignore STDIN</key> | |
<value> | |
<name>Ignore STDIN</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Working Directory</key> | |
<value> | |
<name>Working Directory</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Argument Delimiter</key> | |
<value> | |
<name>Argument Delimiter</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Output Destination Attribute</key> | |
<value> | |
<name>Output Destination Attribute</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Max Attribute Length</key> | |
<value> | |
<name>Max Attribute Length</name> | |
</value> | |
</entry> | |
</descriptors> | |
<executionNode>ALL</executionNode> | |
<lossTolerant>false</lossTolerant> | |
<penaltyDuration>30 sec</penaltyDuration> | |
<properties> | |
<entry> | |
<key>Command Arguments</key> | |
<value>--image ${image_filename} --gzip ${gzip_filename}</value> | |
</entry> | |
<entry> | |
<key>Command Path</key> | |
<value>echo</value> | |
</entry> | |
<entry> | |
<key>Ignore STDIN</key> | |
<value>false</value> | |
</entry> | |
<entry> | |
<key>Working Directory</key> | |
</entry> | |
<entry> | |
<key>Argument Delimiter</key> | |
<value>;</value> | |
</entry> | |
<entry> | |
<key>Output Destination Attribute</key> | |
</entry> | |
<entry> | |
<key>Max Attribute Length</key> | |
<value>256</value> | |
</entry> | |
</properties> | |
<runDurationMillis>0</runDurationMillis> | |
<schedulingPeriod>0 sec</schedulingPeriod> | |
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy> | |
<yieldDuration>1 sec</yieldDuration> | |
</config> | |
<name>ExecuteStreamCommand</name> | |
<relationships> | |
<autoTerminate>true</autoTerminate> | |
<name>original</name> | |
</relationships> | |
<relationships> | |
<autoTerminate>false</autoTerminate> | |
<name>output stream</name> | |
</relationships> | |
<style></style> | |
<type>org.apache.nifi.processors.standard.ExecuteStreamCommand</type> | |
</processors> | |
<processors> | |
<id>4e6f0fcf-015a-1000-0000-000000000000</id> | |
<parentGroupId>4e3e7f99-015a-1000-0000-000000000000</parentGroupId> | |
<position> | |
<x>157.1999954223635</x> | |
<y>0.0</y> | |
</position> | |
<config> | |
<bulletinLevel>WARN</bulletinLevel> | |
<comments></comments> | |
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount> | |
<descriptors> | |
<entry> | |
<key>Input Directory</key> | |
<value> | |
<name>Input Directory</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Recurse Subdirectories</key> | |
<value> | |
<name>Recurse Subdirectories</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Input Directory Location</key> | |
<value> | |
<name>Input Directory Location</name> | |
</value> | |
</entry> | |
<entry> | |
<key>File Filter</key> | |
<value> | |
<name>File Filter</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Path Filter</key> | |
<value> | |
<name>Path Filter</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Minimum File Age</key> | |
<value> | |
<name>Minimum File Age</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Maximum File Age</key> | |
<value> | |
<name>Maximum File Age</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Minimum File Size</key> | |
<value> | |
<name>Minimum File Size</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Maximum File Size</key> | |
<value> | |
<name>Maximum File Size</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Ignore Hidden Files</key> | |
<value> | |
<name>Ignore Hidden Files</name> | |
</value> | |
</entry> | |
</descriptors> | |
<executionNode>ALL</executionNode> | |
<lossTolerant>false</lossTolerant> | |
<penaltyDuration>30 sec</penaltyDuration> | |
<properties> | |
<entry> | |
<key>Input Directory</key> | |
<value>/Users/alopresto/Workspace/scratch/listfiles</value> | |
</entry> | |
<entry> | |
<key>Recurse Subdirectories</key> | |
<value>true</value> | |
</entry> | |
<entry> | |
<key>Input Directory Location</key> | |
<value>Local</value> | |
</entry> | |
<entry> | |
<key>File Filter</key> | |
<value>[^\.]*\.txt</value> | |
</entry> | |
<entry> | |
<key>Path Filter</key> | |
</entry> | |
<entry> | |
<key>Minimum File Age</key> | |
<value>0 sec</value> | |
</entry> | |
<entry> | |
<key>Maximum File Age</key> | |
</entry> | |
<entry> | |
<key>Minimum File Size</key> | |
<value>0 B</value> | |
</entry> | |
<entry> | |
<key>Maximum File Size</key> | |
</entry> | |
<entry> | |
<key>Ignore Hidden Files</key> | |
<value>true</value> | |
</entry> | |
</properties> | |
<runDurationMillis>0</runDurationMillis> | |
<schedulingPeriod>0 sec</schedulingPeriod> | |
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy> | |
<yieldDuration>1 sec</yieldDuration> | |
</config> | |
<name>List Images (Not Gzipped)</name> | |
<relationships> | |
<autoTerminate>false</autoTerminate> | |
<name>success</name> | |
</relationships> | |
<style></style> | |
<type>org.apache.nifi.processors.standard.ListFile</type> | |
</processors> | |
<processors> | |
<id>4e6fdfe4-015a-1000-0000-000000000000</id> | |
<parentGroupId>4e3e7f99-015a-1000-0000-000000000000</parentGroupId> | |
<position> | |
<x>157.1999954223635</x> | |
<y>202.79996490478516</y> | |
</position> | |
<config> | |
<bulletinLevel>WARN</bulletinLevel> | |
<comments></comments> | |
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount> | |
<descriptors> | |
<entry> | |
<key>Log Level</key> | |
<value> | |
<name>Log Level</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Log Payload</key> | |
<value> | |
<name>Log Payload</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Attributes to Log</key> | |
<value> | |
<name>Attributes to Log</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Attributes to Ignore</key> | |
<value> | |
<name>Attributes to Ignore</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Log prefix</key> | |
<value> | |
<name>Log prefix</name> | |
</value> | |
</entry> | |
</descriptors> | |
<executionNode>ALL</executionNode> | |
<lossTolerant>false</lossTolerant> | |
<penaltyDuration>30 sec</penaltyDuration> | |
<properties> | |
<entry> | |
<key>Log Level</key> | |
<value>info</value> | |
</entry> | |
<entry> | |
<key>Log Payload</key> | |
<value>true</value> | |
</entry> | |
<entry> | |
<key>Attributes to Log</key> | |
</entry> | |
<entry> | |
<key>Attributes to Ignore</key> | |
</entry> | |
<entry> | |
<key>Log prefix</key> | |
</entry> | |
</properties> | |
<runDurationMillis>0</runDurationMillis> | |
<schedulingPeriod>0 sec</schedulingPeriod> | |
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy> | |
<yieldDuration>1 sec</yieldDuration> | |
</config> | |
<name>LogAttribute</name> | |
<relationships> | |
<autoTerminate>false</autoTerminate> | |
<name>success</name> | |
</relationships> | |
<style></style> | |
<type>org.apache.nifi.processors.standard.LogAttribute</type> | |
</processors> | |
</snippet> | |
<timestamp>02/17/2017 16:07:39 PST</timestamp> | |
</template> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment