shaypal5 · February 3, 2021 13:04
diff --git a/lifecycle_core.sh b/lifecycle_core.sh
 #!/bin/bash


 # --- script documentation
 # recieves two arrays as input using the length prefix convention. See:
 # https://stackoverflow.com/questions/43686878/pass-multiple-arrays-as-arguments-to-a-bash-script

 # add mandatory packages here
 # packages+=("pandas")
 echo ">>> conda_envs: ${conda_envs[@]}"
 echo ">>> packages: ${packages[@]}"
 # echo ">>> vars: ${vars[@]}"

 # ==== Part 1: Clone Repository ====

 # Clones the the right branch of the right repository
 # using the credentials of the right user
 # (so commits from SageMaker are legible)

 echo ">>> === Part 1 - Repo Cloning ==="

 exc

 if [ -d "./SageMaker/$REPO/" ]; then
    echo ">>> $REPO repository already cloned!"
 else
    cd SageMaker
    echo ">>> Cloning $REPO and checking out branch $BRANCH ..."
    git clone --branch "$BRANCH" "https://$GITHUB_UNAME:[email protected]/$GITHUB_ORG/$REPO.git"
    echo ">>> Done cloning!"
    cd "$REPO"
    echo ">>> See git status output:"
    git status
    cd ..
    cd ..
    echo ">>> Back to working dir!"
 fi


 # ==== Part 2: MLflow-on-DataBricks intergration ====

 echo ">>> === Part 1 - Databricks Integration  ==="

 echo "Injecting env vars into all desired conda environments..."
 # Iterating over all conda environments
 # (Note that "base" is special environment name, include it there as well)
 for env in base ${conda_envs[*]}; do

    # 2.1. databricks-required env var setup
    echo ">>> Injecting env vars into $env conda environment..."
    mkdir -p "/home/ec2-user/anaconda3/envs/$env/etc/conda/activate.d/"
    touch "/home/ec2-user/anaconda3/envs/$env/etc/conda/activate.d/env_vars.sh"
    for var in ${vars[*]}; do 
        conda env config vars set "$var"
        echo "$var" >> "/home/ec2-user/anaconda3/envs/$env/etc/conda/activate.d/env_vars.sh"
    done
 done
 echo "Done injecting env vars into all desired conda environments..."


 # ==== Part 3: Installing Python Packages ====

 echo ">>> === Part 3 - Python Packages Installs ==="

 # Iterate over SOME conda environments, and
 # 1) Install all required Python packages from pip
 # 2) Install the local repo's components as packages

 for env in ${conda_envs[*]}; do
    # installing required python packages on all conda environments:
    echo ">>> Installing pip packages in $env ..."
    
    # 3.1. Activate the current conda environment
    conda activate "$env"
    # source /home/ec2-user/anaconda3/bin/activate $(basename "$env")
    echo ">>> Check active conda env:"
    conda env list

    pip install --upgrade pip
    
    # 3.2.0. prevent installation errors when installing mlflow:
    # pip uninstall -y enum34

    # 3.2.1 Installing packages in the Jupyter system environment can affect stability
    # of your SageMaker Notebook Instance. You can remove this check if you'd like to
    # install Jupyter extensions, etc.
    if [ $env = 'JupyterSystemEnv' ]; then
        echo ">>> Skipping installation inside the Jupyer system environment."
        continue
    fi
    
    # 3.3. Running pip installs of packages

    for package in ${packages[*]}; do
        echo ">>> - Installing $package package..."
        pip install "$package"
    done
    # (You can also perform "conda install" here as well)
    echo ">>> Finished installing packages from PyPI. Check list:"
    pip list
    
    # 3.4. pip install local research repo components
    echo ">>> Installing cloned repo as a package"
    pip install -e "./SageMaker/$REPO[test]"
    
    # 3.5. Deactivate the current conda environment
    # source /home/ec2-user/anaconda3/bin/deactivate
    conda deactivate
 done
 echo "Done installing packages!"


 # ===== Part 4: Logging Utilization Metrics to CloudWatch ====

 echo ">>> === Part 4 - Utilization Logging  ==="

 # Installs the cloud watch agent on the notebook instance in order to collect load and utilization metrics.

 # OVERVIEW
 # This script publishes the system-level metrics from the Notebook instance to Cloudwatch.
 #
 # Note that this script will fail if either condition is not met
 #   1. Ensure the Notebook Instance has internet connectivity to fetch the example config
 #   2. Ensure the Notebook Instance execution role permissions to cloudwatch:PutMetricData to publish the system-level metrics
 #
 # https://aws.amazon.com/cloudwatch/pricing/

 # PARAMETERS
 NOTEBOOK_INSTANCE_NAME=$(jq '.ResourceName' \
                      /opt/ml/metadata/resource-metadata.json --raw-output)

 echo "Fetching the CloudWatch agent configuration file."
 wget https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-notebook-instance-lifecycle-config-samples/master/scripts/publish-instance-metrics/amazon-cloudwatch-agent.json

 sed -i -- "s/MyNotebookInstance/$NOTEBOOK_INSTANCE_NAME/g" amazon-cloudwatch-agent.json

 echo "Starting the CloudWatch agent on the Notebook Instance."
 sudo /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a \
    fetch-config -m ec2 -c file://$(pwd)/amazon-cloudwatch-agent.json -s

 rm amazon-cloudwatch-agent.json
	#!/bin/bash


	# --- script documentation
	# recieves two arrays as input using the length prefix convention. See:
	# https://stackoverflow.com/questions/43686878/pass-multiple-arrays-as-arguments-to-a-bash-script

	# add mandatory packages here
	# packages+=("pandas")
	echo ">>> conda_envs: ${conda_envs[@]}"
	echo ">>> packages: ${packages[@]}"
	# echo ">>> vars: ${vars[@]}"

	# ==== Part 1: Clone Repository ====

	# Clones the the right branch of the right repository
	# using the credentials of the right user
	# (so commits from SageMaker are legible)

	echo ">>> === Part 1 - Repo Cloning ==="

	exc

	if [ -d "./SageMaker/$REPO/" ]; then
	echo ">>> $REPO repository already cloned!"
	else
	cd SageMaker
	echo ">>> Cloning $REPO and checking out branch $BRANCH ..."
	git clone --branch "$BRANCH" "https://$GITHUB_UNAME:[email protected]/$GITHUB_ORG/$REPO.git"
	echo ">>> Done cloning!"
	cd "$REPO"
	echo ">>> See git status output:"
	git status
	cd ..
	cd ..
	echo ">>> Back to working dir!"
	fi


	# ==== Part 2: MLflow-on-DataBricks intergration ====

	echo ">>> === Part 1 - Databricks Integration ==="

	echo "Injecting env vars into all desired conda environments..."
	# Iterating over all conda environments
	# (Note that "base" is special environment name, include it there as well)
	for env in base ${conda_envs[*]}; do

	# 2.1. databricks-required env var setup
	echo ">>> Injecting env vars into $env conda environment..."
	mkdir -p "/home/ec2-user/anaconda3/envs/$env/etc/conda/activate.d/"
	touch "/home/ec2-user/anaconda3/envs/$env/etc/conda/activate.d/env_vars.sh"
	for var in ${vars[*]}; do
	conda env config vars set "$var"
	echo "$var" >> "/home/ec2-user/anaconda3/envs/$env/etc/conda/activate.d/env_vars.sh"
	done
	done
	echo "Done injecting env vars into all desired conda environments..."


	# ==== Part 3: Installing Python Packages ====

	echo ">>> === Part 3 - Python Packages Installs ==="

	# Iterate over SOME conda environments, and
	# 1) Install all required Python packages from pip
	# 2) Install the local repo's components as packages

	for env in ${conda_envs[*]}; do
	# installing required python packages on all conda environments:
	echo ">>> Installing pip packages in $env ..."

	# 3.1. Activate the current conda environment
	conda activate "$env"
	# source /home/ec2-user/anaconda3/bin/activate $(basename "$env")
	echo ">>> Check active conda env:"
	conda env list

	pip install --upgrade pip

	# 3.2.0. prevent installation errors when installing mlflow:
	# pip uninstall -y enum34

	# 3.2.1 Installing packages in the Jupyter system environment can affect stability
	# of your SageMaker Notebook Instance. You can remove this check if you'd like to
	# install Jupyter extensions, etc.
	if [ $env = 'JupyterSystemEnv' ]; then
	echo ">>> Skipping installation inside the Jupyer system environment."
	continue
	fi

	# 3.3. Running pip installs of packages

	for package in ${packages[*]}; do
	echo ">>> - Installing $package package..."
	pip install "$package"
	done
	# (You can also perform "conda install" here as well)
	echo ">>> Finished installing packages from PyPI. Check list:"
	pip list

	# 3.4. pip install local research repo components
	echo ">>> Installing cloned repo as a package"
	pip install -e "./SageMaker/$REPO[test]"

	# 3.5. Deactivate the current conda environment
	# source /home/ec2-user/anaconda3/bin/deactivate
	conda deactivate
	done
	echo "Done installing packages!"


	# ===== Part 4: Logging Utilization Metrics to CloudWatch ====

	echo ">>> === Part 4 - Utilization Logging ==="

	# Installs the cloud watch agent on the notebook instance in order to collect load and utilization metrics.

	# OVERVIEW
	# This script publishes the system-level metrics from the Notebook instance to Cloudwatch.
	#
	# Note that this script will fail if either condition is not met
	# 1. Ensure the Notebook Instance has internet connectivity to fetch the example config
	# 2. Ensure the Notebook Instance execution role permissions to cloudwatch:PutMetricData to publish the system-level metrics
	#
	# https://aws.amazon.com/cloudwatch/pricing/

	# PARAMETERS
	NOTEBOOK_INSTANCE_NAME=$(jq '.ResourceName' \
	/opt/ml/metadata/resource-metadata.json --raw-output)

	echo "Fetching the CloudWatch agent configuration file."
	wget https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-notebook-instance-lifecycle-config-samples/master/scripts/publish-instance-metrics/amazon-cloudwatch-agent.json

	sed -i -- "s/MyNotebookInstance/$NOTEBOOK_INSTANCE_NAME/g" amazon-cloudwatch-agent.json

	echo "Starting the CloudWatch agent on the Notebook Instance."
	sudo /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a \
	fetch-config -m ec2 -c file://$(pwd)/amazon-cloudwatch-agent.json -s

	rm amazon-cloudwatch-agent.json