Last active
August 30, 2016 16:16
-
-
Save ibuenros/3cb4c9293edc7f43ab41c0d0d59cb586 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Job metadata | |
job.name=PullFromWikipediaToKafka | |
job.group=Wikipedia | |
job.description=Pull from Wikipedia and write to Kafka | |
# Schedule | |
job.schedule=0 0/2 * * * ? | |
# Source configuration | |
extract.namespace=gobblin.example.wikipedia | |
source.class=gobblin.example.wikipedia.WikipediaSource | |
# Wikipedia source configuration | |
source.page.titles=LinkedIn,Wikipedia:Sandbox | |
gobblin.wikipediaSource.maxRevisionsPerPage=20 | |
wikipedia.api.rooturl=https://en.wikipedia.org/w/api.php | |
wikipedia.avro.schema={"namespace": "example.wikipedia.avro","type": "record","name": "WikipediaArticle","fields": [{"name": "pageid", "type": ["double", "null"]},{"name": "title", "type": ["string", "null"]},{"name": "user", "type": ["string", "null"]},{"name": "anon", "type": ["string", "null"]},{"name": "userid", "type": ["double", "null"]},{"name": "timestamp", "type": ["string", "null"]},{"name": "size", "type": ["double", "null"]},{"name": "contentformat", "type": ["string", "null"]},{"name": "contentmodel", "type": ["string", "null"]}]} | |
wikipedia.source.bootstrap.lookback=P10D | |
# Converter configuration | |
converter.classes=gobblin.converter.json.JsonToStringConverter | |
# Writer configuration | |
writer.builder.class=gobblin.kafka.writer.KafkaDataWriterBuilder | |
writer.kafka.topic=WikipediaExample | |
writer.kafka.producerConfig.bootstrap.servers=localhost:9092 | |
writer.kafka.producerConfig.value.serializer=org.apache.kafka.common.serialization.StringSerializer | |
writer.output.format=TEXT | |
# Publisher Configuration | |
data.publisher.type=gobblin.publisher.NoopPublisher | |
##Use Confluent Schema Registry and serializers | |
# writer.kafka.producerConfig.value.serializer=io.confluent.kafka.serializers.KafkaAvroSerializer | |
# writer.kafka.producerConfig.key.serializer=io.confluent.kafka.serializers.KafkaAvroSerializer | |
# writer.kafka.producerConfig.schema.registry.url=http://localhost:8081 | |
#Use Local Schema Registry and serializers | |
#writer.kafka.producerConfig.value.serializer=gobblin.kafka.serialize.LiAvroSerializer | |
#writer.kafka.producerConfig.kafka.schemaRegistry.class=gobblin.kafka.schemareg.ConfigDrivenMd5SchemaRegistry | |
#writer.kafka.producerConfig.schemaRegistry.schema.name=WikipediaExample | |
#writer.kafka.producerConfig.schemaRegistry.schema.value={"namespace": "example.wikipedia.avro","type": "record","name": "WikipediaArticle","fields": [{"name": "pageid", "type": ["double", "null"]},{"name": "title", "type": ["string", "null"]},{"name": "user", "type": ["string", "null"]},{"name": "anon", "type": ["string", "null"]},{"name": "userid", "type": ["double", "null"]},{"name": "timestamp", "type": ["string", "null"]},{"name": "size", "type": ["double", "null"]},{"name": "contentformat", "type": ["string", "null"]},{"name": "contentmodel", "type": ["string", "null"]},{"name": "content", "type": ["string", "null"]}]} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment