Skip to content

Instantly share code, notes, and snippets.

@datitran
Last active November 8, 2016 10:34
Show Gist options
  • Save datitran/90b625c9958ef6719992ba1d5c491709 to your computer and use it in GitHub Desktop.
Save datitran/90b625c9958ef6719992ba1d5c491709 to your computer and use it in GitHub Desktop.
def load_cluster(self):
response = self.boto_client("emr").run_job_flow(
Name=self.cluster_name,
LogUri=self.log_uri,
ReleaseLabel=self.software_version,
Instances={
'MasterInstanceType': 'm3.xlarge',
'SlaveInstanceType': 'm3.xlarge',
'InstanceCount': self.instance_count,
'KeepJobFlowAliveWhenNoSteps': True,
'TerminationProtected': False,
'Ec2KeyName': self.key_name
},
Applications=[
{
'Name': 'Spark'
}
],
BootstrapActions=[
{
'Name': 'Install Conda',
'ScriptBootstrapAction': {
'Path': 's3://{script_bucket_name}/bootstrap_actions.sh'.format(
script_bucket_name=self.script_bucket_name),
}
},
],
VisibleToAllUsers=True,
JobFlowRole='EMR_EC2_DefaultRole',
ServiceRole='EMR_DefaultRole'
)
logger.info(response)
return response
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment