Skip to content

Instantly share code, notes, and snippets.

@coingraham
Last active June 6, 2018 17:19
Show Gist options
  • Select an option

  • Save coingraham/8e107c8fbd83128596c82ba55ce2e911 to your computer and use it in GitHub Desktop.

Select an option

Save coingraham/8e107c8fbd83128596c82ba55ce2e911 to your computer and use it in GitHub Desktop.
Glue Python Hello World Job Dataframe
import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
from awsglue.dynamicframe import DynamicFrame
## @params: [JOB_NAME]
args = getResolvedOptions(sys.argv, ['JOB_NAME'])
sc = SparkContext()
glueContext = GlueContext(sc)
spark = glueContext.spark_session
job = Job(glueContext)
job.init(args['JOB_NAME'], args)
# Assign the target bucket to a variable
s3_bucket = "s3://my-bucket-name/output/hello_world/"
# Create Hello World Dataframe
dataframe = spark.createDataFrame([("Hello", "World", "!")])
# Coalesce the data to 1 file
# Note that when you convert back to a Dynamic Frame, this will be ignored.
dataframe.coalesce(1)
# Convert to Dynamic Frame
hello_world_dynamic_frame = DynamicFrame.fromDF(dataframe, glueContext, "hello_world_dynamic_frame")
# Output the Dynamic Frame to S3 as a CSV
output_result = glueContext.write_dynamic_frame.from_options(frame = hello_world_dynamic_frame, connection_type = "s3", connection_options = {"path":s3_bucket}, format = "csv")
job.commit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment