Created
October 10, 2016 16:50
-
-
Save mattyb149/8f2c133af5bc864c4a29bc5cd4148e9e to your computer and use it in GitHub Desktop.
Template to look up table names from a file and match them against ListDatabaseTables, to filter which tables to send to ExecuteSQL
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" ?> | |
<template encoding-version="1.0"> | |
<description>This template uses ExecuteScript and Groovy to read in (from a file) a list of tables to fetch, then if the incoming table name (from ListDatabaseTables) is in the list of tables to fetch, sends the flow file to success (for use by ExecuteSQL) or failure</description> | |
<groupId>af725e75-0157-1000-3844-d085884a56db</groupId> | |
<name>LookupFilter</name> | |
<snippet> | |
<connections> | |
<id>af73282a-0157-1000-0000-000000000000</id> | |
<parentGroupId>af725e75-0157-1000-0000-000000000000</parentGroupId> | |
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold> | |
<backPressureObjectThreshold>10000</backPressureObjectThreshold> | |
<destination> | |
<groupId>af725e75-0157-1000-0000-000000000000</groupId> | |
<id>af727e79-0157-1000-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</destination> | |
<flowFileExpiration>0 sec</flowFileExpiration> | |
<labelIndex>1</labelIndex> | |
<name></name> | |
<selectedRelationships>success</selectedRelationships> | |
<source> | |
<groupId>af725e75-0157-1000-0000-000000000000</groupId> | |
<id>af731ca8-0157-1000-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</source> | |
<zIndex>0</zIndex> | |
</connections> | |
<connections> | |
<id>af74752e-0157-1000-0000-000000000000</id> | |
<parentGroupId>af725e75-0157-1000-0000-000000000000</parentGroupId> | |
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold> | |
<backPressureObjectThreshold>10000</backPressureObjectThreshold> | |
<destination> | |
<groupId>af725e75-0157-1000-0000-000000000000</groupId> | |
<id>af7463eb-0157-1000-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</destination> | |
<flowFileExpiration>0 sec</flowFileExpiration> | |
<labelIndex>1</labelIndex> | |
<name></name> | |
<selectedRelationships>failure</selectedRelationships> | |
<source> | |
<groupId>af725e75-0157-1000-0000-000000000000</groupId> | |
<id>af727e79-0157-1000-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</source> | |
<zIndex>0</zIndex> | |
</connections> | |
<connections> | |
<id>af7b4775-0157-1000-0000-000000000000</id> | |
<parentGroupId>af725e75-0157-1000-0000-000000000000</parentGroupId> | |
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold> | |
<backPressureObjectThreshold>10000</backPressureObjectThreshold> | |
<destination> | |
<groupId>af725e75-0157-1000-0000-000000000000</groupId> | |
<id>af7b00b9-0157-1000-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</destination> | |
<flowFileExpiration>0 sec</flowFileExpiration> | |
<labelIndex>1</labelIndex> | |
<name></name> | |
<selectedRelationships>success</selectedRelationships> | |
<source> | |
<groupId>af725e75-0157-1000-0000-000000000000</groupId> | |
<id>af727e79-0157-1000-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</source> | |
<zIndex>0</zIndex> | |
</connections> | |
<connections> | |
<id>af7b5d29-0157-1000-0000-000000000000</id> | |
<parentGroupId>af725e75-0157-1000-0000-000000000000</parentGroupId> | |
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold> | |
<backPressureObjectThreshold>10000</backPressureObjectThreshold> | |
<destination> | |
<groupId>af725e75-0157-1000-0000-000000000000</groupId> | |
<id>af729a11-0157-1000-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</destination> | |
<flowFileExpiration>0 sec</flowFileExpiration> | |
<labelIndex>1</labelIndex> | |
<name></name> | |
<selectedRelationships>success</selectedRelationships> | |
<source> | |
<groupId>af725e75-0157-1000-0000-000000000000</groupId> | |
<id>af7b00b9-0157-1000-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</source> | |
<zIndex>0</zIndex> | |
</connections> | |
<controllerServices> | |
<id>b28b426b-0156-1000-0000-000000000000</id> | |
<parentGroupId>af725e75-0157-1000-0000-000000000000</parentGroupId> | |
<comments></comments> | |
<descriptors> | |
<entry> | |
<key>Database Connection URL</key> | |
<value> | |
<name>Database Connection URL</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Database Driver Class Name</key> | |
<value> | |
<name>Database Driver Class Name</name> | |
</value> | |
</entry> | |
<entry> | |
<key>database-driver-locations</key> | |
<value> | |
<name>database-driver-locations</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Database User</key> | |
<value> | |
<name>Database User</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Password</key> | |
<value> | |
<name>Password</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Max Wait Time</key> | |
<value> | |
<name>Max Wait Time</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Max Total Connections</key> | |
<value> | |
<name>Max Total Connections</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Validation-query</key> | |
<value> | |
<name>Validation-query</name> | |
</value> | |
</entry> | |
</descriptors> | |
<name>MySQLConnectionPool</name> | |
<properties> | |
<entry> | |
<key>Database Connection URL</key> | |
<value>jdbc:mysql://192.168.99.100:32768/test_schema</value> | |
</entry> | |
<entry> | |
<key>Database Driver Class Name</key> | |
<value>com.mysql.jdbc.Driver</value> | |
</entry> | |
<entry> | |
<key>database-driver-locations</key> | |
<value>file:///Users/mburgess/jdbc_drivers/mysql-connector-java-5.1.38-bin.jar</value> | |
</entry> | |
<entry> | |
<key>Database User</key> | |
<value>root</value> | |
</entry> | |
<entry> | |
<key>Password</key> | |
</entry> | |
<entry> | |
<key>Max Wait Time</key> | |
<value>500 millis</value> | |
</entry> | |
<entry> | |
<key>Max Total Connections</key> | |
<value>8</value> | |
</entry> | |
<entry> | |
<key>Validation-query</key> | |
</entry> | |
</properties> | |
<state>ENABLED</state> | |
<type>org.apache.nifi.dbcp.DBCPConnectionPool</type> | |
</controllerServices> | |
<processors> | |
<id>af727e79-0157-1000-0000-000000000000</id> | |
<parentGroupId>af725e75-0157-1000-0000-000000000000</parentGroupId> | |
<position> | |
<x>574.0</x> | |
<y>0.0</y> | |
</position> | |
<config> | |
<bulletinLevel>WARN</bulletinLevel> | |
<comments></comments> | |
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount> | |
<descriptors> | |
<entry> | |
<key>Script Engine</key> | |
<value> | |
<name>Script Engine</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Script File</key> | |
<value> | |
<name>Script File</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Script Body</key> | |
<value> | |
<name>Script Body</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Module Directory</key> | |
<value> | |
<name>Module Directory</name> | |
</value> | |
</entry> | |
</descriptors> | |
<lossTolerant>false</lossTolerant> | |
<penaltyDuration>30 sec</penaltyDuration> | |
<properties> | |
<entry> | |
<key>Script Engine</key> | |
<value>Groovy</value> | |
</entry> | |
<entry> | |
<key>Script File</key> | |
</entry> | |
<entry> | |
<key>Script Body</key> | |
<value>def flowFile = session.get() | |
if(!flowFile) return | |
// Read filter into a set | |
def tableFilter = [] as Set<String> | |
new File('/Users/mburgess/filter.txt').eachLine { line -> | |
tableFilter << line | |
} | |
session.transfer(flowFile, tableFilter.contains(flowFile.getAttribute('db.table.name')) ? REL_SUCCESS : REL_FAILURE)</value> | |
</entry> | |
<entry> | |
<key>Module Directory</key> | |
</entry> | |
</properties> | |
<runDurationMillis>0</runDurationMillis> | |
<schedulingPeriod>0 sec</schedulingPeriod> | |
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy> | |
<yieldDuration>1 sec</yieldDuration> | |
</config> | |
<name>ExecuteScript</name> | |
<relationships> | |
<autoTerminate>false</autoTerminate> | |
<name>failure</name> | |
</relationships> | |
<relationships> | |
<autoTerminate>false</autoTerminate> | |
<name>success</name> | |
</relationships> | |
<style></style> | |
<type>org.apache.nifi.processors.script.ExecuteScript</type> | |
</processors> | |
<processors> | |
<id>af729a11-0157-1000-0000-000000000000</id> | |
<parentGroupId>af725e75-0157-1000-0000-000000000000</parentGroupId> | |
<position> | |
<x>576.0</x> | |
<y>466.0</y> | |
</position> | |
<config> | |
<bulletinLevel>WARN</bulletinLevel> | |
<comments></comments> | |
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount> | |
<descriptors> | |
<entry> | |
<key>Log Level</key> | |
<value> | |
<name>Log Level</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Log Payload</key> | |
<value> | |
<name>Log Payload</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Attributes to Log</key> | |
<value> | |
<name>Attributes to Log</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Attributes to Ignore</key> | |
<value> | |
<name>Attributes to Ignore</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Log prefix</key> | |
<value> | |
<name>Log prefix</name> | |
</value> | |
</entry> | |
</descriptors> | |
<lossTolerant>false</lossTolerant> | |
<penaltyDuration>30 sec</penaltyDuration> | |
<properties> | |
<entry> | |
<key>Log Level</key> | |
<value>info</value> | |
</entry> | |
<entry> | |
<key>Log Payload</key> | |
<value>false</value> | |
</entry> | |
<entry> | |
<key>Attributes to Log</key> | |
</entry> | |
<entry> | |
<key>Attributes to Ignore</key> | |
</entry> | |
<entry> | |
<key>Log prefix</key> | |
</entry> | |
</properties> | |
<runDurationMillis>0</runDurationMillis> | |
<schedulingPeriod>0 sec</schedulingPeriod> | |
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy> | |
<yieldDuration>1 sec</yieldDuration> | |
</config> | |
<name>Do other stuff</name> | |
<relationships> | |
<autoTerminate>true</autoTerminate> | |
<name>success</name> | |
</relationships> | |
<style></style> | |
<type>org.apache.nifi.processors.standard.LogAttribute</type> | |
</processors> | |
<processors> | |
<id>af731ca8-0157-1000-0000-000000000000</id> | |
<parentGroupId>af725e75-0157-1000-0000-000000000000</parentGroupId> | |
<position> | |
<x>0.0</x> | |
<y>1.0</y> | |
</position> | |
<config> | |
<bulletinLevel>WARN</bulletinLevel> | |
<comments></comments> | |
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount> | |
<descriptors> | |
<entry> | |
<key>list-db-tables-db-connection</key> | |
<value> | |
<identifiesControllerService>org.apache.nifi.dbcp.DBCPService</identifiesControllerService> | |
<name>list-db-tables-db-connection</name> | |
</value> | |
</entry> | |
<entry> | |
<key>list-db-tables-catalog</key> | |
<value> | |
<name>list-db-tables-catalog</name> | |
</value> | |
</entry> | |
<entry> | |
<key>list-db-tables-schema-pattern</key> | |
<value> | |
<name>list-db-tables-schema-pattern</name> | |
</value> | |
</entry> | |
<entry> | |
<key>list-db-tables-name-pattern</key> | |
<value> | |
<name>list-db-tables-name-pattern</name> | |
</value> | |
</entry> | |
<entry> | |
<key>list-db-tables-types</key> | |
<value> | |
<name>list-db-tables-types</name> | |
</value> | |
</entry> | |
<entry> | |
<key>list-db-include-count</key> | |
<value> | |
<name>list-db-include-count</name> | |
</value> | |
</entry> | |
<entry> | |
<key>list-db-refresh-interval</key> | |
<value> | |
<name>list-db-refresh-interval</name> | |
</value> | |
</entry> | |
</descriptors> | |
<lossTolerant>false</lossTolerant> | |
<penaltyDuration>30 sec</penaltyDuration> | |
<properties> | |
<entry> | |
<key>list-db-tables-db-connection</key> | |
<value>b28b426b-0156-1000-0000-000000000000</value> | |
</entry> | |
<entry> | |
<key>list-db-tables-catalog</key> | |
</entry> | |
<entry> | |
<key>list-db-tables-schema-pattern</key> | |
</entry> | |
<entry> | |
<key>list-db-tables-name-pattern</key> | |
</entry> | |
<entry> | |
<key>list-db-tables-types</key> | |
<value>TABLE</value> | |
</entry> | |
<entry> | |
<key>list-db-include-count</key> | |
<value>false</value> | |
</entry> | |
<entry> | |
<key>list-db-refresh-interval</key> | |
<value>0 sec</value> | |
</entry> | |
</properties> | |
<runDurationMillis>0</runDurationMillis> | |
<schedulingPeriod>0 sec</schedulingPeriod> | |
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy> | |
<yieldDuration>1 sec</yieldDuration> | |
</config> | |
<name>ListDatabaseTables</name> | |
<relationships> | |
<autoTerminate>false</autoTerminate> | |
<name>success</name> | |
</relationships> | |
<style></style> | |
<type>org.apache.nifi.processors.standard.ListDatabaseTables</type> | |
</processors> | |
<processors> | |
<id>af7463eb-0157-1000-0000-000000000000</id> | |
<parentGroupId>af725e75-0157-1000-0000-000000000000</parentGroupId> | |
<position> | |
<x>23.0</x> | |
<y>251.0</y> | |
</position> | |
<config> | |
<bulletinLevel>WARN</bulletinLevel> | |
<comments></comments> | |
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount> | |
<descriptors> | |
<entry> | |
<key>Log Level</key> | |
<value> | |
<name>Log Level</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Log Payload</key> | |
<value> | |
<name>Log Payload</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Attributes to Log</key> | |
<value> | |
<name>Attributes to Log</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Attributes to Ignore</key> | |
<value> | |
<name>Attributes to Ignore</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Log prefix</key> | |
<value> | |
<name>Log prefix</name> | |
</value> | |
</entry> | |
</descriptors> | |
<lossTolerant>false</lossTolerant> | |
<penaltyDuration>30 sec</penaltyDuration> | |
<properties> | |
<entry> | |
<key>Log Level</key> | |
<value>info</value> | |
</entry> | |
<entry> | |
<key>Log Payload</key> | |
<value>false</value> | |
</entry> | |
<entry> | |
<key>Attributes to Log</key> | |
</entry> | |
<entry> | |
<key>Attributes to Ignore</key> | |
</entry> | |
<entry> | |
<key>Log prefix</key> | |
</entry> | |
</properties> | |
<runDurationMillis>0</runDurationMillis> | |
<schedulingPeriod>0 sec</schedulingPeriod> | |
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy> | |
<yieldDuration>1 sec</yieldDuration> | |
</config> | |
<name>Blacklisted tables</name> | |
<relationships> | |
<autoTerminate>true</autoTerminate> | |
<name>success</name> | |
</relationships> | |
<style></style> | |
<type>org.apache.nifi.processors.standard.LogAttribute</type> | |
</processors> | |
<processors> | |
<id>af7b00b9-0157-1000-0000-000000000000</id> | |
<parentGroupId>af725e75-0157-1000-0000-000000000000</parentGroupId> | |
<position> | |
<x>574.0</x> | |
<y>254.0</y> | |
</position> | |
<config> | |
<bulletinLevel>WARN</bulletinLevel> | |
<comments></comments> | |
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount> | |
<descriptors> | |
<entry> | |
<key>Database Connection Pooling Service</key> | |
<value> | |
<identifiesControllerService>org.apache.nifi.dbcp.DBCPService</identifiesControllerService> | |
<name>Database Connection Pooling Service</name> | |
</value> | |
</entry> | |
<entry> | |
<key>SQL select query</key> | |
<value> | |
<name>SQL select query</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Max Wait Time</key> | |
<value> | |
<name>Max Wait Time</name> | |
</value> | |
</entry> | |
<entry> | |
<key>dbf-normalize</key> | |
<value> | |
<name>dbf-normalize</name> | |
</value> | |
</entry> | |
</descriptors> | |
<lossTolerant>false</lossTolerant> | |
<penaltyDuration>30 sec</penaltyDuration> | |
<properties> | |
<entry> | |
<key>Database Connection Pooling Service</key> | |
<value>b28b426b-0156-1000-0000-000000000000</value> | |
</entry> | |
<entry> | |
<key>SQL select query</key> | |
<value>select * from ${db.table.name}</value> | |
</entry> | |
<entry> | |
<key>Max Wait Time</key> | |
<value>0 seconds</value> | |
</entry> | |
<entry> | |
<key>dbf-normalize</key> | |
<value>false</value> | |
</entry> | |
</properties> | |
<runDurationMillis>0</runDurationMillis> | |
<schedulingPeriod>0 sec</schedulingPeriod> | |
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy> | |
<yieldDuration>1 sec</yieldDuration> | |
</config> | |
<name>Whitelisted tables</name> | |
<relationships> | |
<autoTerminate>true</autoTerminate> | |
<name>failure</name> | |
</relationships> | |
<relationships> | |
<autoTerminate>false</autoTerminate> | |
<name>success</name> | |
</relationships> | |
<style></style> | |
<type>org.apache.nifi.processors.standard.ExecuteSQL</type> | |
</processors> | |
</snippet> | |
<timestamp>10/10/2016 12:49:12 EDT</timestamp> | |
</template> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment