Skip to content

Instantly share code, notes, and snippets.

@kmizumar
Last active October 20, 2017 04:06
Show Gist options
  • Save kmizumar/9b9b65026722986d7f8c5ca4501bce11 to your computer and use it in GitHub Desktop.
Save kmizumar/9b9b65026722986d7f8c5ca4501bce11 to your computer and use it in GitHub Desktop.
Kafka Source to HDFS Sink + Spark Sink
tier1.sources = source1
tier1.channels = channel1 channel2
tier1.sinks = sink1 sink2
tier1.sources.source1.type = org.apache.flume.source.kafka.KafkaSource
tier1.sources.source1.channels = channel1 channel2
tier1.sources.source1.batchSize = 5000
tier1.sources.source1.batchDurationMillis = 2000
tier1.sources.source1.kafka.bootstrap.servers = dev-kk1.globalcomrci.com:9092
tier1.sources.source1.kafka.topics = game_txs
tier1.channels.channel1.type = memory
tier1.channels.channel1.capacity = 10000
tier1.channels.channel1.transactionCapacity = 1000
tier1.channels.channel2.type = memory
tier1.channels.channel2.capacity = 10000
tier1.channels.channel2.transactionCapacity = 1000
tier1.sinks.sink1.type = hdfs
tier1.sinks.sink1.hdfs.writeFormat = Text
tier1.sinks.sink1.hdfs.fileType = DataStream
tier1.sinks.sink1.hdfs.filePrefix = game_txs
tier1.sinks.sink1.hdfs.userLocalTimeStamp = true
tier1.sinks.sink1.hdfs.path = /tmp/kafka/%{topic}/%y-%m-%d
tier1.sinks.sink1.hdfs.rollInterval = 5
tier1.sinks.sink1.hdfs.rollSize = 0
tier1.sinks.sink1.hdfs.rollCount = 0
tier1.sinks.sink1.hdfs.kerberosPrincipal = $KERBEROS_PRINCIPAL
tier1.sinks.sink1.hdfs.kerberosKeytab = $KERBEROS_KEYTAB
tier1.sinks.sink1.channel = channel1
tier1.sinks.sink2.type = org.apache.spark.streaming.flume.sink.SparkSink
tier1.sinks.sink2.hostname = dev-dn3.globalcomrci.com
tier1.sinks.sink2.port = 33333
tier1.sinks.sink2.channel = channel2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment