Created
May 19, 2017 19:04
-
-
Save mattyb149/6c9ac2d0961b8ff38ad716646f45b073 to your computer and use it in GitHub Desktop.
A Groovy script for NiFi ExecuteScript to extract the schema from the header line of a CSV file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import groovy.json.* | |
def flowFile = session.get() | |
if(!flowFile) return | |
def delim = ',' | |
try { | |
delim = delimiter?.value ?: ',' | |
} catch (MissingPropertyException mpe) { } | |
try { | |
def line | |
def inputStream = session.read(flowFile) | |
inputStream.withReader { line = it.readLine() } | |
inputStream.close() | |
def json = new JsonBuilder() | |
json { | |
type('record') | |
name('csv_record') | |
fields(line.tokenize(delim).collect { col -> | |
['name': col, 'type': ["null","string"]] | |
}) | |
} | |
flowFile = session.putAttribute(flowFile, 'avro.schema', json.toString()) | |
session.transfer(flowFile, REL_SUCCESS) | |
} catch(Exception e) { | |
log.error('Error processing file, transferring to failure', e) | |
session.transfer(flowFile, REL_FAILURE) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment