Created
February 27, 2016 12:57
-
-
Save tomron/6ebc60cd3450478c7fc4 to your computer and use it in GitHub Desktop.
Example of python code to submit spark process as an emr step to AWS emr cluster in AWS lambda function
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import time | |
import boto3 | |
def lambda_handler(event, context): | |
conn = boto3.client("emr") | |
# chooses the first cluster which is Running or Waiting | |
# possibly can also choose by name or already have the cluster id | |
clusters = conn.list_clusters() | |
# choose the correct cluster | |
clusters = [c["Id"] for c in clusters["Clusters"] | |
if c["Status"]["State"] in ["RUNNING", "WAITING"]] | |
if not clusters: | |
sys.stderr.write("No valid clusters\n") | |
sys.stderr.exit() | |
# take the first relevant cluster | |
cluster_id = clusters[0] | |
# code location on your emr master node | |
CODE_DIR = "/home/hadoop/code/" | |
# spark configuration example | |
step_args = ["/usr/bin/spark-submit", "--spark-conf", "your-configuration", | |
CODE_DIR + "your_file.py", '--your-parameters', 'parameters'] | |
step = {"Name": "what_you_do-" + time.strftime("%Y%m%d-%H:%M"), | |
'ActionOnFailure': 'CONTINUE', | |
'HadoopJarStep': { | |
'Jar': 's3n://elasticmapreduce/libs/script-runner/script-runner.jar', | |
'Args': step_args | |
} | |
} | |
action = conn.add_job_flow_steps(JobFlowId=cluster_id, Steps=[step]) | |
return "Added step: %s"%(action) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hey tomron can you tell me how is event looking
what have you written inside that