Skip to content

Instantly share code, notes, and snippets.

@elprup
Created June 25, 2013 02:55
Show Gist options
  • Save elprup/5855555 to your computer and use it in GitHub Desktop.
Save elprup/5855555 to your computer and use it in GitHub Desktop.
emr ini config and run tools
'''
EMR tools
Easy emr python console
author: yaowei
create: 2013-06-24
'''
import copy
import os.path
import boto.emr
def parse_steps(cfgs):
steps = []
for cfg in cfgs:
stype = cfg[0]
if stype == 'script_runner':
step = boto.emr.step.ScriptRunnerStep(name=cfg[1], step_args=cfg[2:])
steps.append(step)
return steps
def get_conf_ini(file_path):
'''
config file format: ini
support config job based on boto.emr job parameter
rules:
[job_name]
name=value
example:
[job_1]
parameter_name=parameter_value
[job_extend_job_1]
based=job_1
'''
result = {}
import ConfigParser
cfg = ConfigParser.ConfigParser()
cfg.read(file_path)
for section in sorted(cfg.sections()):
job = {}
for option in cfg.options(section):
v = eval(cfg.get(section, option))
if option == 'steps':
v = parse_steps(v)
job.update({option:v})
if 'base' in job:
base_name = job.pop('base')
base_job = copy.deepcopy(result[base_name])
base_job.update(job)
job = base_job
result[section] = job
return result
get_conf = get_conf_ini
example_ini='''
[0_default]
log_uri=None
ec2_keyname=None
availability_zone=None
master_instance_type='m1.small'
slave_instance_type='m1.small'
num_instances=1
action_on_failure='TERMINATE_JOB_FLOW'
keep_alive=False
enable_debugging=False
hadoop_version=None
steps=[]
bootstrap_actions=[]
instance_groups=None
additional_info=None
ami_version=None
api_params=None
visible_to_all_users=None
job_flow_role=None
'''
def init():
cfg_file = './jobs.ini'
if not os.path.exists('./jobs.ini'):
f = open(cfg_file,'w')
f.write(example_ini)
f.close()
conn = boto.emr.connect_to_region('us-west-1')
def add_job(conf, name):
jobid = conn.run_jobflow(name=name, **conf)
print jobid
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment