Skip to content

Instantly share code, notes, and snippets.

@datitran
Last active November 8, 2016 10:34
Show Gist options
  • Save datitran/f4fa5ce20e0aed5e9eac389ab37b2b9e to your computer and use it in GitHub Desktop.
Save datitran/f4fa5ce20e0aed5e9eac389ab37b2b9e to your computer and use it in GitHub Desktop.
def add_step(self, job_flow_id, master_dns):
response = self.boto_client("emr").add_job_flow_steps(
JobFlowId=job_flow_id,
Steps=[
{
'Name': 'setup - copy files',
'ActionOnFailure': 'CANCEL_AND_WAIT',
'HadoopJarStep': {
'Jar': 'command-runner.jar',
'Args': ['aws', 's3', 'cp',
's3://{script_bucket_name}/pyspark_quick_setup.sh'.format(
script_bucket_name=self.script_bucket_name),
'/home/hadoop/']
}
},
{
'Name': 'setup pyspark with conda',
'ActionOnFailure': 'CANCEL_AND_WAIT',
'HadoopJarStep': {
'Jar': 'command-runner.jar',
'Args': ['sudo', 'bash', '/home/hadoop/pyspark_quick_setup.sh', master_dns]
}
}
]
)
logger.info(response)
return response
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment