Created
April 12, 2020 22:34
-
-
Save al102964/2ea8f0509176d74b86982eda7db7abf1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
AWSTemplateFormatVersion: '2010-09-09' | |
Description: MLflow server backed by Postgres RDS | |
Parameters: | |
KeyName: | |
Description: Name of an existing EC2 KeyPair to enable SSH access into the Airflow web server | |
Type: AWS::EC2::KeyPair::KeyName | |
ConstraintDescription: Must be the name of an existing EC2 KeyPair | |
S3BucketNameAirflow: | |
Default: al102964-airflow | |
Description: REQUIRED - A new S3 Bucket name. This bucket will be used to read and write the Movielens dataset. | |
Type: String | |
AllowedPattern: '.+' | |
S3BucketNameMLflow: | |
Default: al102964-mlflow | |
Description: REQUIRED - A new S3 Bucket name. This bucket will be used to read and write the Movielens dataset. | |
Type: String | |
AllowedPattern: '.+' | |
DBPassword: | |
Default: airflowpassword | |
NoEcho: 'true' | |
Description: Airflow database admin account password | |
Type: String | |
MinLength: '6' | |
MaxLength: '41' | |
AllowedPattern: '[a-zA-Z0-9]*' | |
ConstraintDescription: Must contain only alphanumeric characters | |
# Mapping to find the Amazon Linux AMI in each region. | |
Mappings: | |
RegionMap: | |
us-west-2: | |
AMI: ami-f2d3638a | |
Resources: | |
MLflowInstance: | |
Type: AWS::EC2::Instance | |
Properties: | |
KeyName: !Ref 'KeyName' | |
SecurityGroups: [!Ref 'MLflowEC2SecurityGroup'] | |
InstanceType: 't2.medium' | |
IamInstanceProfile: | |
Ref: EC2MLflowInstanceProfile | |
Tags: | |
- | |
Key: Name | |
Value: MLflow | |
ImageId: !FindInMap | |
- RegionMap | |
- !Ref 'AWS::Region' | |
- AMI | |
UserData: | |
Fn::Base64: !Sub | | |
#!/bin/bash | |
set -x | |
exec > >(tee /var/log/user-data.log|logger -t user-data ) 2>&1 | |
# Get the latest CloudFormation package | |
echo "Installing aws-cfn" | |
yum install -y aws-cfn-bootstrap | |
# Start cfn-init | |
/opt/aws/bin/cfn-init -v -c install --stack ${AWS::StackId} --resource MLflowInstance --region ${AWS::Region} | |
# Download and unzip the Movielens dataset | |
sudo yum update | |
sudo yum install postgresql8.x86_64 -y | |
sudo yum install -y python36 python36-virtualenv python36-pip | |
# Install git | |
sudo yum install -y git | |
# Clone the git repository | |
sudo pip-3.6 install boto3 | |
sudo pip-3.6 install psycopg2-binary | |
# Install airflow using pip | |
echo "Install MLflow" | |
sudo SLUGIFY_USES_TEXT_UNIDECODE=yes pip-3.6 install mlflow==1.6.0 | |
# Encrypt connection passwords in metadata db | |
# Postgres operators and hook, support as an Airflow backend | |
echo 'export PATH=/usr/local/bin:$PATH' >> /root/.bash_profile | |
source /root/.bash_profile | |
echo "CREATE DATABASE mlflow_db;" >> /home/ec2-user/init.sql | |
PGPASSWORD=${DBPassword} psql -h ${DBInstance.Endpoint.Address} -p 5432 -U airflow --dbname=airflowdb -f /home/ec2-user/init.sql | |
mlflow server --backend-store-uri postgresql://airflow:${DBPassword}@${DBInstance.Endpoint.Address}:${DBInstance.Endpoint.Port}/mlflow_db --default-artifact-root s3://al102964-mlflow/ --host 0.0.0.0 | |
Metadata: | |
AWS::CloudFormation::Init: | |
configSets: | |
install: | |
- gcc | |
gcc: | |
packages: | |
yum: | |
gcc: [] | |
DependsOn: | |
- DBInstance | |
- MLflowEC2SecurityGroup | |
- S3BucketMLflow | |
EC2MLflowRole: | |
Type: AWS::IAM::Role | |
Properties: | |
RoleName: MLflowInstanceRole | |
AssumeRolePolicyDocument: | |
Version: "2012-10-17" | |
Statement: | |
- | |
Effect: "Allow" | |
Principal: | |
Service: | |
- "ec2.amazonaws.com" | |
Action: | |
- "sts:AssumeRole" | |
ManagedPolicyArns: | |
- arn:aws:iam::aws:policy/AmazonS3FullAccess | |
EC2MLflowInstanceProfile: | |
Type: AWS::IAM::InstanceProfile | |
Properties: | |
InstanceProfileName: MLflowInstanceProfile | |
Roles: | |
- | |
Ref: EC2MLflowRole | |
EC2Instance: | |
Type: AWS::EC2::Instance | |
Properties: | |
KeyName: !Ref 'KeyName' | |
SecurityGroups: [!Ref 'AirflowEC2SecurityGroup'] | |
InstanceType: 't2.medium' | |
IamInstanceProfile: | |
Ref: EC2InstanceProfile | |
Tags: | |
- | |
Key: Name | |
Value: Airflow | |
ImageId: !FindInMap | |
- RegionMap | |
- !Ref 'AWS::Region' | |
- AMI | |
UserData: | |
Fn::Base64: !Sub | | |
#!/bin/bash | |
set -x | |
exec > >(tee /var/log/user-data.log|logger -t user-data ) 2>&1 | |
# Get the latest CloudFormation package | |
echo "Installing aws-cfn" | |
yum install -y aws-cfn-bootstrap | |
# Start cfn-init | |
/opt/aws/bin/cfn-init -v -c install --stack ${AWS::StackId} --resource EC2Instance --region ${AWS::Region} | |
# Download and unzip the Movielens dataset | |
wget http://files.grouplens.org/datasets/movielens/ml-latest.zip && unzip ml-latest.zip | |
# Upload the movielens dataset files to the S3 bucket | |
aws s3 cp ml-latest s3://${S3BucketNameAirflow} --recursive | |
# Install git | |
sudo yum install -y git | |
# Clone the git repository | |
git clone https://github.com/al102964/airflow-mlflow-aws.git | |
sudo pip install boto3 | |
# Install airflow using pip | |
echo "Install Apache Airflow" | |
sudo SLUGIFY_USES_TEXT_UNIDECODE=yes pip install -U apache-airflow | |
# Encrypt connection passwords in metadata db | |
sudo pip install apache-airflow[crypto] | |
# Postgres operators and hook, support as an Airflow backend | |
sudo pip install apache-airflow[postgres] | |
sudo -H pip install six==1.10.0 | |
sudo pip install --upgrade six | |
sudo pip install markupsafe | |
sudo pip install --upgrade MarkupSafe | |
sudo pip install SQLAlchemy==1.3.15 | |
echo 'export PATH=/usr/local/bin:$PATH' >> /root/.bash_profile | |
source /root/.bash_profile | |
# Initialize Airflow | |
airflow initdb | |
# Update the RDS connection in the Airflow Config file | |
sed -i '/sql_alchemy_conn/s/^/#/g' ~/airflow/airflow.cfg | |
sed -i '/sql_alchemy_conn/ a sql_alchemy_conn = postgresql://airflow:${DBPassword}@${DBInstance.Endpoint.Address}:${DBInstance.Endpoint.Port}/airflowdb' ~/airflow/airflow.cfg | |
# Update the type of executor in the Airflow Config file | |
sed -i '/executor = SequentialExecutor/s/^/#/g' ~/airflow/airflow.cfg | |
sed -i '/executor = SequentialExecutor/ a executor = LocalExecutor' ~/airflow/airflow.cfg | |
airflow initdb | |
# Move all the files to the ~/airflow directory. The Airflow config file is setup to hold all the DAG related files in the ~/airflow/ folder. | |
mv airflow-mlflow-aws/* ~/airflow/ | |
# Delete the higher-level git repository directory | |
rm -rf airflow-mlflow-aws | |
# Replace the name of the S3 bucket in each of the .scala files. CHANGE THE HIGHLIGHTED PORTION BELOW TO THE NAME OF THE S3 BUCKET YOU CREATED IN STEP 1. The below command replaces the instance of the string ‘<s3-bucket>’ in each of the scripts to the name of the actual bucket. | |
sed -i 's/<s3-bucket>/${S3BucketNameAirflow}/g' /root/airflow/dags/transform/* | |
# Run Airflow webserver | |
airflow webserver | |
Metadata: | |
AWS::CloudFormation::Init: | |
configSets: | |
install: | |
- gcc | |
gcc: | |
packages: | |
yum: | |
gcc: [] | |
DependsOn: | |
- DBInstance | |
- AirflowEC2SecurityGroup | |
AirflowEC2SecurityGroup: | |
Type: AWS::EC2::SecurityGroup | |
Properties: | |
GroupName: AirflowEC2SG | |
GroupDescription: Enable HTTP access via port 80 + SSH access | |
SecurityGroupIngress: | |
- IpProtocol: tcp | |
FromPort: 80 | |
ToPort: 80 | |
CidrIp: 0.0.0.0/0 | |
- IpProtocol: tcp | |
FromPort: 8080 | |
ToPort: 8080 | |
CidrIp: 0.0.0.0/0 | |
- IpProtocol: tcp | |
FromPort: 22 | |
ToPort: 22 | |
CidrIp: 0.0.0.0/0 | |
AirflowEMRMasterEC2SecurityGroup: | |
Type: AWS::EC2::SecurityGroup | |
Properties: | |
GroupName: AirflowEMRMasterSG | |
GroupDescription: Airflow EMR Master SG | |
DependsOn: | |
- AirflowEC2SecurityGroup | |
AirflowEMRMasterInboundRule: | |
Type: AWS::EC2::SecurityGroupIngress | |
Properties: | |
IpProtocol: tcp | |
FromPort: '8998' | |
ToPort: '8998' | |
SourceSecurityGroupName: !Ref 'AirflowEC2SecurityGroup' | |
GroupName: !Ref 'AirflowEMRMasterEC2SecurityGroup' | |
AirflowEMRSlaveEC2SecurityGroup: | |
Type: AWS::EC2::SecurityGroup | |
Properties: | |
GroupName: AirflowEMRSlaveSG | |
GroupDescription: Airflow EMR Slave SG | |
DBInstance: | |
Type: AWS::RDS::DBInstance | |
DeletionPolicy: Delete | |
Properties: | |
DBName: airflowdb | |
Engine: postgres | |
MasterUsername: airflow | |
MasterUserPassword: !Ref 'DBPassword' | |
DBInstanceClass: db.t2.small | |
AllocatedStorage: 5 | |
DBSecurityGroups: | |
- Ref: DBSecurityGroup | |
EC2Role: | |
Type: AWS::IAM::Role | |
Properties: | |
RoleName: AirflowInstanceRole | |
AssumeRolePolicyDocument: | |
Version: "2012-10-17" | |
Statement: | |
- | |
Effect: "Allow" | |
Principal: | |
Service: | |
- "ec2.amazonaws.com" | |
Action: | |
- "sts:AssumeRole" | |
ManagedPolicyArns: | |
- arn:aws:iam::aws:policy/AmazonS3FullAccess | |
- arn:aws:iam::aws:policy/AmazonElasticMapReduceFullAccess | |
EC2InstanceProfile: | |
Type: AWS::IAM::InstanceProfile | |
Properties: | |
InstanceProfileName: AirflowInstanceProfile | |
Roles: | |
- | |
Ref: EC2Role | |
EmrRole: | |
Type: AWS::IAM::Role | |
Properties: | |
RoleName: EmrRole | |
AssumeRolePolicyDocument: | |
Version: "2012-10-17" | |
Statement: | |
- | |
Effect: "Allow" | |
Principal: | |
Service: | |
- "elasticmapreduce.amazonaws.com" | |
- "s3.amazonaws.com" | |
Action: | |
- "sts:AssumeRole" | |
ManagedPolicyArns: | |
- arn:aws:iam::aws:policy/AmazonS3FullAccess | |
- arn:aws:iam::aws:policy/AmazonElasticMapReduceFullAccess | |
EmrEc2Role: | |
Type: AWS::IAM::Role | |
Properties: | |
RoleName: EmrEc2Role | |
AssumeRolePolicyDocument: | |
Version: "2012-10-17" | |
Statement: | |
- | |
Effect: "Allow" | |
Principal: | |
Service: | |
- "ec2.amazonaws.com" | |
Action: | |
- "sts:AssumeRole" | |
ManagedPolicyArns: | |
- arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceforEC2Role | |
- arn:aws:iam::aws:policy/AmazonS3FullAccess | |
EmrEc2InstanceProfile: | |
Type: AWS::IAM::InstanceProfile | |
Properties: | |
InstanceProfileName: EmrEc2InstanceProfile | |
Roles: | |
- | |
Ref: EmrEc2Role | |
MLflowEC2SecurityGroup: | |
Type: AWS::EC2::SecurityGroup | |
DeletionPolicy: Delete | |
Properties: | |
GroupName: MLflowEC2SG | |
GroupDescription: Enable HTTP access via port 5000 + SSH access | |
SecurityGroupIngress: | |
- IpProtocol: tcp | |
FromPort: 5000 | |
ToPort: 5000 | |
CidrIp: 0.0.0.0/0 | |
- IpProtocol: tcp | |
FromPort: 22 | |
ToPort: 22 | |
CidrIp: 0.0.0.0/0 | |
DBSecurityGroup: | |
Type: AWS::RDS::DBSecurityGroup | |
DeletionPolicy: Delete | |
Properties: | |
GroupDescription: Frontend Access | |
DBSecurityGroupIngress: | |
- EC2SecurityGroupName: | |
Ref: MLflowEC2SecurityGroup | |
- EC2SecurityGroupName: | |
Ref: AirflowEC2SecurityGroup | |
S3BucketAirflow: | |
Type: AWS::S3::Bucket | |
DeletionPolicy: Delete | |
Properties: | |
AccessControl: BucketOwnerFullControl | |
BucketName: !Ref 'S3BucketNameAirflow' | |
S3BucketMLflow: | |
Type: AWS::S3::Bucket | |
DeletionPolicy: Delete | |
Properties: | |
AccessControl: BucketOwnerFullControl | |
BucketName: !Ref 'S3BucketNameMLflow' | |
Outputs: | |
MLflowEC2PublicDNSName: | |
Description: Public DNS Name of the MLflow EC2 instance | |
Value: !Join ["", ["http://", !GetAtt MLflowInstance.PublicDnsName, ":5000"]] | |
AirflowEC2PublicDNSName: | |
Description: Public DNS Name of the Airflow EC2 instance | |
Value: !Join ["", ["http://", !GetAtt EC2Instance.PublicDnsName, ":8080"]] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment