Skip to content

Instantly share code, notes, and snippets.

@vsouza
Last active October 5, 2016 03:04
Show Gist options
  • Save vsouza/6f761a516961628508ae361f488e73b1 to your computer and use it in GitHub Desktop.
Save vsouza/6f761a516961628508ae361f488e73b1 to your computer and use it in GitHub Desktop.
from __future__ import print_function
from pyspark import SparkContext
from pyspark.streaming import StreamingContext
from pyspark.streaming.kinesis import KinesisUtils, InitialPositionInStream
import datetime
import json
from pyspark.sql import SQLContext, Row
from pyspark.sql.types import *
aws_region = 'us-east-1'
kinesis_stream = 'stream_name'
kinesis_endpoint = 'https://kinesis.us-east-1.amazonaws.com/'
kinesis_app_name = 'app_name'
kinesis_initial_position = InitialPositionInStream.LATEST
kinesis_checkpoint_interval = 5
spark_batch_interval = 5
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment