Created
June 3, 2012 14:11
-
-
Save gerigk/2863668 to your computer and use it in GitHub Desktop.
A .mrjob.conf to run Pandas with EMR
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
runners: | |
emr: | |
aws_access_key_id: youraccountid | |
#aws_region: us-west-1 your region. us east by default | |
aws_secret_access_key: yoursecretkey | |
bootstrap_actions: | |
# probably this is a good idea | |
- s3://elasticmapreduce/bootstrap-actions/configurations/latest/memory-intensive | |
# we disable this since it is run before our shell script and installs mrjob for python 2.6 | |
bootstrap_mrjob: False | |
bootstrap_files: | |
- s3n://yourbucket/emr_resources/bootstrap.sh | |
bootstrap_cmds: | |
- chmod 755 bootstrap.sh;./bootstrap.sh | |
# specifying an ssh key pair allows us to ssh tunnel to the job tracker | |
# and fetch logs via ssh | |
ec2_key_pair: keyname | |
ec2_key_pair_file: /home/$USER/.ssh/keyname.pem | |
# use beefier instances in production | |
ec2_instance_type: m1.large | |
# master large because otherwise it is small and may end up being 32 bit | |
# which would need 32 bit builds. | |
ec2_master_instance_type : m1.large | |
# but only use one unless overridden | |
num_ec2_instances: 1 | |
ec2_master_instance_bid_price: 0.15 | |
ec2_core_instance_bid_price: 0.15 | |
#ec2_task_instance_bid_price: 0.15 | |
cmdenv: | |
#adjust this | |
TZ: Europe/Berlin | |
s3_log_uri: s3://yourbucket/tmp/logs/ | |
s3_scratch_uri: s3://yourbucket/tmp/ | |
local: | |
# put any folder of your choice here. it should exist | |
base_tmp_dir: /home/$USER/emr/output/local |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment