Created
October 15, 2016 18:09
-
-
Save ericmjonas/0b44db5873d309960613e337b27e7662 to your computer and use it in GitHub Desktop.
A fabfile to launch an AWS gpu instance with additional local storage and tensorflow set up
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Fabric file to help with launching EC2 P2 instancesand | |
getting GPU support set up. Also installs latest | |
anaconda and then tensorflow. Use: | |
fab launch | |
# wait until you can ssh into the instance with | |
fab -R mygpu ssh | |
# install everything | |
fab -R mygpu basic_setup cuda_setup anaconda_setup tf_setup | |
# when you're done, terminate | |
fab -R mygpu terminate | |
Took inspiration from: | |
https://aws.amazon.com/blogs/aws/new-p2-instance-type-for-amazon-ec2-up-to-16-gpus/ | |
""" | |
from fabric.api import local, env, run, put, cd, task, sudo, settings, warn_only, lcd, path, get | |
from fabric.contrib import project | |
import boto3 | |
tgt_ami = 'ami-b04e92d0' | |
region = 'us-west-2' | |
unique_instance_name = 'p2_instance' | |
my_aws_key = 'ec2-us-west-2' | |
instance_name = "mygpu" | |
def tags_to_dict(d): | |
return {a['Key'] : a['Value'] for a in d} | |
def get_target_instance(): | |
res = [] | |
ec2 = boto3.resource('ec2', region_name=region) | |
for i in ec2.instances.all(): | |
if i.state['Name'] == 'running': | |
d = tags_to_dict(i.tags) | |
if d['Name'] == unique_instance_name: | |
res.append('ec2-user@{}'.format(i.public_dns_name)) | |
print "found", res | |
return {instance_name : res} | |
env.roledefs.update(get_target_instance()) | |
@task | |
def launch(): | |
ec2 = boto3.resource('ec2', region_name=region) | |
BlockDeviceMappings=[ | |
{ | |
'DeviceName': '/dev/xvda', | |
'Ebs': { | |
'VolumeSize': 50, | |
'DeleteOnTermination': True, | |
'VolumeType': 'standard', | |
'SnapshotId' : 'snap-c87f35ec' | |
}, | |
}, | |
] | |
instances = ec2.create_instances(ImageId=tgt_ami, MinCount=1, MaxCount=1, | |
KeyName=my_aws_key, InstanceType='p2.xlarge', | |
BlockDeviceMappings = BlockDeviceMappings | |
) | |
inst = instances[0] | |
print inst | |
inst.wait_until_running() | |
inst.reload() | |
inst.create_tags( | |
Resources=[ | |
inst.instance_id | |
], | |
Tags=[ | |
{ | |
'Key': 'Name', | |
'Value': unique_instance_name | |
}, | |
] | |
) | |
@task | |
def ssh(): | |
local("ssh -A " + env.host_string) | |
@task | |
def basic_setup(): | |
run("sudo yum update -q -y") | |
run("sudo yum groupinstall 'Development Tools' -q -y") | |
run("sudo yum install -q -y emacs tmux gcc g++") | |
run("sudo yum install -y kernel-devel-`uname -r`") | |
@task | |
def cuda_setup(): | |
run("wget http://us.download.nvidia.com/XFree86/Linux-x86_64/352.99/NVIDIA-Linux-x86_64-352.99.run") | |
run("wget http://developer.download.nvidia.com/compute/cuda/7.5/Prod/local_installers/cuda_7.5.18_linux.run") | |
run("chmod +x NVIDIA-Linux-x86_64-352.99.run") | |
run("chmod +x cuda_7.5.18_linux.run") | |
sudo("./NVIDIA-Linux-x86_64-352.99.run --silent") # still requires a few prompts | |
sudo("./cuda_7.5.18_linux.run --silent --toolkit --samples") # Don't install driver, just install CUDA and sample | |
sudo("nvidia-smi -pm 1") | |
sudo("nvidia-smi -acp 0") | |
sudo("nvidia-smi --auto-boost-permission=0") | |
sudo("nvidia-smi -ac 2505,875") | |
sudo('echo "/usr/local/cuda/lib/" >> /etc/ld.so.conf') | |
sudo('echo "/usr/local/cuda/lib64/" >> /etc/ld.so.conf') | |
sudo('ldconfig') | |
@task | |
def anaconda_setup(): | |
run("wget https://repo.continuum.io/archive/Anaconda2-4.2.0-Linux-x86_64.sh") | |
run("chmod +x Anaconda2-4.2.0-Linux-x86_64.sh") | |
run("./Anaconda2-4.2.0-Linux-x86_64.sh -b -p $HOME/anaconda") | |
run('echo "export PATH=$HOME/anaconda/bin:$PATH" >> .bash_profile') | |
run("conda upgrade -q -y --all") | |
run("conda install -q -y pandas scikit-learn scikit-image matplotlib seaborn") | |
TF_URL = "https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl" | |
@task | |
def tf_setup(): | |
run("pip install --ignore-installed --upgrade {}".format(TF_URL)) | |
@task | |
def terminate(): | |
ec2 = boto3.resource('ec2', region_name=region) | |
insts = [] | |
for i in ec2.instances.all(): | |
if i.state['Name'] == 'running': | |
d = tags_to_dict(i.tags) | |
if d['Name'] == unique_instance_name: | |
i.terminate() | |
insts.append(i) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment