Last active
November 3, 2015 10:13
-
-
Save LewisW/328d0bb7b7406ece009f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#-- | |
# Copyright 2014 Red Hat, Inc. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
#++ | |
# Purpose: This script grows the root filesystem and sets up LVM volumes | |
# for docker metadata and data. | |
# Author: Andy Grimm <[email protected]> | |
set -e | |
# This section reads the config file (/etc/sysconfig/docker-storage-setup. | |
# Currently supported options: | |
# DEVS: A quoted, space-separated list of devices to be used. This currently | |
# expects the devices to be unpartitioned drives. If "VG" is not | |
# specified, then use of the root disk's extra space is implied. | |
# | |
# VG: The volume group to use for docker storage. Defaults to the volume | |
# group where the root filesystem resides. If VG is specified and the | |
# volume group does not exist, it will be created (which requires that | |
# "DEVS" be nonempty, since we don't currently support putting a second | |
# partition on the root disk). | |
# | |
# The options below should be specified as values acceptable to 'lvextend -L': | |
# | |
# ROOT_SIZE: The size to which the root filesystem should be grown. | |
# | |
# DATA_SIZE: The desired size for the docker data LV. Defaults to using all | |
# free space in the VG after the root LV and docker metadata LV | |
# have been allocated/grown. | |
# | |
# Other possibilities: | |
# * Support lvm raid setups for docker data? This would not be very difficult | |
# if given multiple PVs and another variable; options could be just a simple | |
# "mirror" or "stripe", or something more detailed. | |
# In lvm thin pool , effectively data LV is named as pool LV. lvconvert | |
# takes the data lv name and uses it as pool lv name. And later even to | |
# resize the data lv, one has to use pool lv name. So name data lv | |
# appropriately. | |
# Note: lvm2 version should be same or higher than lvm2-2.02.112 for lvm | |
# thin pool functionality to work properly. | |
POOL_LV_NAME="docker-pool" | |
DATA_LV_NAME=$POOL_LV_NAME | |
META_LV_NAME="${POOL_LV_NAME}meta" | |
DOCKER_STORAGE="/etc/sysconfig/docker-storage" | |
STORAGE_DRIVERS="devicemapper overlay" | |
get_docker_version() { | |
local version | |
# docker version command exits with error as daemon is not running at this | |
# point of time. So continue despite the error. | |
version=`docker version --format='{{.Client.Version}}' 2>/dev/null` || true | |
echo $version | |
} | |
get_deferred_removal_string() { | |
local version major minor | |
if ! version=$(get_docker_version);then | |
return 0 | |
fi | |
[ -z "$version" ] && return 0 | |
major=$(echo $version | cut -d "." -f1) | |
minor=$(echo $version | cut -d "." -f2) | |
[ -z "$major" ] && return 0 | |
[ -z "$minor" ] && return 0 | |
# docker 1.7 onwards supports deferred device removal. Enable it. | |
if [ $major -gt 1 ] || ([ $major -eq 1 ] && [ $minor -ge 7 ]);then | |
echo "--storage-opt dm.use_deferred_removal=true" | |
fi | |
} | |
get_deferred_deletion_string() { | |
local version major minor | |
if ! version=$(get_docker_version);then | |
return 0 | |
fi | |
[ -z "$version" ] && return 0 | |
major=$(echo $version | cut -d "." -f1) | |
minor=$(echo $version | cut -d "." -f2) | |
[ -z "$major" ] && return 0 | |
[ -z "$minor" ] && return 0 | |
# docker 1.9 onwards supports deferred device removal. Enable it. | |
if [ $major -gt 1 ] || ([ $major -eq 1 ] && [ $minor -ge 9 ]);then | |
echo "--storage-opt dm.use_deferred_deletion=true" | |
fi | |
} | |
get_devicemapper_config_options() { | |
local storage_options | |
# docker expects device mapper device and not lvm device. Do the conversion. | |
eval $( lvs --nameprefixes --noheadings -o lv_name,kernel_major,kernel_minor $VG | while read line; do | |
eval $line | |
if [ "$LVM2_LV_NAME" = "$DATA_LV_NAME" ]; then | |
echo POOL_DEVICE_PATH=/dev/mapper/$( cat /sys/dev/block/${LVM2_LV_KERNEL_MAJOR}:${LVM2_LV_KERNEL_MINOR}/dm/name ) | |
fi | |
done ) | |
storage_options="DOCKER_STORAGE_OPTIONS=--storage-driver devicemapper --storage-opt dm.fs=xfs --storage-opt dm.thinpooldev=$POOL_DEVICE_PATH $(get_deferred_removal_string) $(get_deferred_deletion_string)" | |
echo $storage_options | |
} | |
get_overlay_config_options() { | |
echo "DOCKER_STORAGE_OPTIONS=--storage-driver overlay" | |
} | |
write_storage_config_file () { | |
local storage_options | |
if [ "$STORAGE_DRIVER" == "devicemapper" ]; then | |
if ! storage_options=$(get_devicemapper_config_options); then | |
return 1 | |
fi | |
elif [ "$STORAGE_DRIVER" == "overlay" ];then | |
if ! storage_options=$(get_overlay_config_options); then | |
return 1 | |
fi | |
fi | |
cat <<EOF > $DOCKER_STORAGE.tmp | |
$storage_options | |
EOF | |
mv $DOCKER_STORAGE.tmp $DOCKER_STORAGE | |
} | |
create_metadata_lv() { | |
# If metadata lvm already exists (failures from previous run), then | |
# don't create it. | |
# TODO: Modify script to cleanup meta and data lvs if failure happened | |
# later. Don't exit with error leaving partially created lvs behind. | |
if lvs -a $VG/${META_LV_NAME} --noheadings &>/dev/null; then | |
echo "Metadata volume $META_LV_NAME already exists. Not creating a new one." | |
return 0 | |
fi | |
# Reserve 0.1% of the free space in the VG for docker metadata. | |
# Calculating the based on actual data size might be better, but is | |
# more difficult do to the range of possible inputs. | |
VG_SIZE=$( vgs --noheadings --nosuffix --units s -o vg_size $VG ) | |
META_SIZE=$(( $VG_SIZE / 1000 + 1 )) | |
if [ ! -n "$META_LV_SIZE" ]; then | |
lvcreate -L ${META_SIZE}s -n $META_LV_NAME $VG | |
fi | |
} | |
convert_size_in_bytes() { | |
local size=$1 prefix suffix | |
# if it is all numeric, it is valid as by default it will be MiB. | |
if [[ $size =~ ^[[:digit:]]+$ ]]; then | |
echo $(($size*1024*1024)) | |
return 0 | |
fi | |
# supprt G, G[bB] or Gi[bB] inputs. | |
prefix=${size%[bBsSkKmMgGtTpPeE]i[bB]} | |
prefix=${prefix%[bBsSkKmMgGtTpPeE][bB]} | |
prefix=${prefix%[bBsSkKmMgGtTpPeE]} | |
# if prefix is not all numeric now, it is an error. | |
if ! [[ $prefix =~ ^[[:digit:]]+$ ]]; then | |
return 1 | |
fi | |
suffix=${data_size#$prefix} | |
case $suffix in | |
b*|B*) echo $prefix;; | |
s*|S*) echo $(($prefix*512));; | |
k*|K*) echo $(($prefix*2**10));; | |
m*|M*) echo $(($prefix*2**20));; | |
g*|G*) echo $(($prefix*2**30));; | |
t*|T*) echo $(($prefix*2**40));; | |
p*|P*) echo $(($prefix*2**50));; | |
e*|E*) echo $(($prefix*2**60));; | |
*) return 1;; | |
esac | |
} | |
data_size_in_bytes() { | |
local data_size=$1 | |
local bytes vg_size free_space percent | |
# -L compatible syntax | |
if [[ $DATA_SIZE != *%* ]]; then | |
bytes=`convert_size_in_bytes $data_size` | |
[ $? -ne 0 ] && return 1 | |
# If integer overflow took place, value is too large to handle. | |
if [ $bytes -lt 0 ];then | |
echo "DATA_SIZE=$data_size is too large to handle." 1>&2 | |
return 1 | |
fi | |
echo $bytes | |
return 0 | |
fi | |
if [[ $DATA_SIZE == *%FREE ]];then | |
free_space=$(vgs --noheadings --nosuffix --units b -o vg_free $VG) | |
percent=${DATA_SIZE%\%FREE} | |
echo $((percent*free_space/100)) | |
return 0 | |
fi | |
if [[ $DATA_SIZE == *%VG ]];then | |
vg_size=$(vgs --noheadings --nosuffix --units b -o vg_size $VG) | |
percent=${DATA_SIZE%\%VG} | |
echo $((percent*vg_size/100)) | |
fi | |
return 0 | |
} | |
check_min_data_size_condition() { | |
local min_data_size_bytes data_size_bytes free_space | |
[ -z $MIN_DATA_SIZE ] && return 0 | |
if ! check_numeric_size_syntax $MIN_DATA_SIZE; then | |
echo "MIN_DATA_SIZE value $MIN_DATA_SIZE is invalid." | |
exit 1 | |
fi | |
if ! min_data_size_bytes=$(convert_size_in_bytes $MIN_DATA_SIZE);then | |
echo "Failed to convert MIN_DATA_SIZE to bytes" | |
exit 1 | |
fi | |
# If integer overflow took place, value is too large to handle. | |
if [ $min_data_size_bytes -lt 0 ];then | |
echo "MIN_DATA_SIZE=$MIN_DATA_SIZE is too large to handle." | |
exit 1 | |
fi | |
free_space=$(vgs --noheadings --nosuffix --units b -o vg_free $VG) | |
if [ $free_space -lt $min_data_size_bytes ];then | |
echo "There is not enough free space in volume group $VG to create data volume of size MIN_DATA_SIZE=${MIN_DATA_SIZE}." | |
exit 1 | |
fi | |
if ! data_size_bytes=$(data_size_in_bytes $DATA_SIZE);then | |
echo "Failed to convert desired data size to bytes" | |
exit 1 | |
fi | |
if [ $data_size_bytes -lt $min_data_size_bytes ]; then | |
# Increasing DATA_SIZE to meet minimum data size requirements. | |
echo "DATA_SIZE=${DATA_SIZE} is smaller than MIN_DATA_SIZE=${MIN_DATA_SIZE}. Will create data volume of size specified by MIN_DATA_SIZE." | |
DATA_SIZE=$MIN_DATA_SIZE | |
fi | |
} | |
create_data_lv() { | |
if [ ! -n "$DATA_SIZE" ]; then | |
echo "Data volume creation failed. No DATA_SIZE specified" | |
exit 1 | |
fi | |
if ! check_data_size_syntax $DATA_SIZE; then | |
echo "DATA_SIZE value $DATA_SIZE is invalid." | |
exit 1 | |
fi | |
check_min_data_size_condition | |
# TODO: Error handling when DATA_SIZE > available space. | |
if [[ $DATA_SIZE == *%* ]]; then | |
lvcreate -y -l $DATA_SIZE -n $DATA_LV_NAME $VG | |
else | |
lvcreate -y -L $DATA_SIZE -n $DATA_LV_NAME $VG | |
fi | |
} | |
create_lvm_thin_pool () { | |
if [ -z "$DEVS" ] && [ -z "$VG_EXISTS" ]; then | |
echo "Specified volume group $VG does not exists, and no devices were specified" >&2 | |
exit 1 | |
fi | |
# First create metadata lv. Down the line let lvm2 create it automatically. | |
create_metadata_lv | |
create_data_lv | |
if [ -n "$CHUNK_SIZE" ]; then | |
CHUNK_SIZE_ARG="-c $CHUNK_SIZE" | |
fi | |
lvconvert -y --zero n $CHUNK_SIZE_ARG --thinpool $VG/$DATA_LV_NAME --poolmetadata $VG/$META_LV_NAME | |
} | |
setup_lvm_thin_pool () { | |
if ! lvm_pool_exists; then | |
create_lvm_thin_pool | |
write_storage_config_file | |
fi | |
# Enable or disable automatic pool extension | |
if [ "$AUTO_EXTEND_POOL" == "yes" ];then | |
enable_auto_pool_extension ${VG} ${POOL_LV_NAME} | |
else | |
disable_auto_pool_extension ${VG} ${POOL_LV_NAME} | |
fi | |
} | |
setup_overlay () { | |
write_storage_config_file | |
} | |
lvm_pool_exists() { | |
local lv_data | |
local lvname lv lvsize | |
lv_data=$( lvs --noheadings -o lv_name,lv_attr --separator , $VG | sed -e 's/^ *//') | |
SAVEDIFS=$IFS | |
for lv in $lv_data; do | |
IFS=, | |
read lvname lvattr <<< "$lv" | |
# pool logical volume has "t" as first character in its attributes | |
if [ "$lvname" == "$POOL_LV_NAME" ] && [[ $lvattr == t* ]]; then | |
IFS=$SAVEDIFS | |
return 0 | |
fi | |
done | |
IFS=$SAVEDIFS | |
return 1 | |
} | |
# If a /etc/sysconfig/docker-storage file is present and if it contains | |
# dm.datadev or dm.metadatadev entries, that means we have used old mode | |
# in the past. | |
is_old_data_meta_mode() { | |
if [ ! -f "$DOCKER_STORAGE" ];then | |
return 1 | |
fi | |
if ! grep -e "^DOCKER_STORAGE_OPTIONS=.*dm\.datadev" -e "^DOCKER_STORAGE_OPTIONS=.*dm\.metadatadev" $DOCKER_STORAGE > /dev/null 2>&1;then | |
return 1 | |
fi | |
return 0 | |
} | |
grow_root_pvs() { | |
# Grow root pvs only if user asked for it through config file. | |
[ "$GROWPART" != "true" ] && return | |
if [ ! -x "/usr/bin/growpart" ];then | |
echo "GROWPART=true is specified and /usr/bin/growpart executable is not available. Install /usr/bin/growpart and try again." | |
return 1 | |
fi | |
# Note that growpart is only variable here because we may someday support | |
# using separate partitions on the same disk. Today we fail early in that | |
# case. Also note that the way we are doing this, it should support LVM | |
# RAID for the root device. In the mirrored or striped case, we are growing | |
# partitions on all disks, so as long as they match, growing the LV should | |
# also work. | |
for pv in $ROOT_PVS; do | |
# Split device & partition. Ick. | |
growpart $( echo $pv | sed -r 's/([^0-9]*)([0-9]+)/\1 \2/' ) || true | |
pvresize $pv | |
done | |
} | |
grow_root_lv_fs() { | |
if [ -n "$ROOT_SIZE" ]; then | |
# TODO: Error checking if specified size is <= current size | |
lvextend -r -L $ROOT_SIZE $ROOT_DEV || true | |
fi | |
} | |
create_disk_partitions() { | |
for dev in $DEVS; do | |
if expr match $dev ".*[0-9]" > /dev/null; then | |
echo "Partition specification unsupported at this time." >&2 | |
exit 1 | |
fi | |
if [[ $dev != /dev/* ]]; then | |
dev=/dev/$dev | |
fi | |
# Use a single partition of a whole device | |
# TODO: | |
# * Consider gpt, or unpartitioned volumes | |
# * Error handling when partition(s) already exist | |
# * Deal with loop/nbd device names. See growpart code | |
PARTS=$( awk "\$4 ~ /"$( basename $dev )"[0-9]/ { print \$4 }" /proc/partitions ) | |
if [ -n "$PARTS" ]; then | |
echo "$dev has partitions: $PARTS" | |
exit 1 | |
fi | |
size=$(( $( awk "\$4 ~ /"$( basename $dev )"/ { print \$3 }" /proc/partitions ) * 2 - 2048 )) | |
cat <<EOF | sfdisk $dev | |
unit: sectors | |
${dev}1 : start= 2048, size= ${size}, Id=8e | |
EOF | |
pvcreate ${dev}1 | |
PVS="$PVS ${dev}1" | |
done | |
} | |
create_extend_volume_group() { | |
if [ -z "$VG_EXISTS" ]; then | |
vgcreate $VG $PVS | |
else | |
# TODO: | |
# * Error handling when PV is already part of a VG | |
vgextend $VG $PVS | |
fi | |
} | |
# Auto extension logic. Create a profile for pool and attach that profile | |
# the pool volume. | |
enable_auto_pool_extension() { | |
local volume_group=$1 | |
local pool_volume=$2 | |
local profileName="${volume_group}--${pool_volume}-extend" | |
local profileFile="${profileName}.profile" | |
local profileDir | |
local tmpFile=`mktemp -t tmp.XXXXX` | |
profileDir=$(lvm dumpconfig | grep "profile_dir" | cut -d "=" -f2 | sed 's/"//g') | |
[ -n "$profileDir" ] || return 1 | |
if [ ! -n "$POOL_AUTOEXTEND_THRESHOLD" ];then | |
echo "POOL_AUTOEXTEND_THRESHOLD not specified" | |
return 1 | |
fi | |
if [ ! -n "$POOL_AUTOEXTEND_PERCENT" ];then | |
echo "POOL_AUTOEXTEND_PERCENT not specified" | |
return 1 | |
fi | |
cat <<EOF > $tmpFile | |
activation { | |
thin_pool_autoextend_threshold=${POOL_AUTOEXTEND_THRESHOLD} | |
thin_pool_autoextend_percent=${POOL_AUTOEXTEND_PERCENT} | |
} | |
EOF | |
mv $tmpFile ${profileDir}/${profileFile} | |
lvchange --metadataprofile ${profileName} ${volume_group}/${pool_volume} | |
} | |
disable_auto_pool_extension() { | |
local volume_group=$1 | |
local pool_volume=$2 | |
local profileName="${volume_group}--${pool_volume}-extend" | |
local profileFile="${profileName}.profile" | |
local profileDir | |
profileDir=$(lvm dumpconfig | grep "profile_dir" | cut -d "=" -f2 | sed 's/"//g') | |
[ -n "$profileDir" ] || return 1 | |
lvchange --detachprofile ${volume_group}/${pool_volume} | |
rm -f ${profileDir}/${profileFile} | |
} | |
# Gets the current DOCKER_STORAGE_OPTIONS= string. | |
get_docker_storage_options() { | |
local options | |
if options=$(grep -e "^DOCKER_STORAGE_OPTIONS=" $DOCKER_STORAGE | sed 's/DOCKER_STORAGE_OPTIONS=//' | sed 's/^ *//');then | |
echo $options | |
return 0 | |
fi | |
return 1 | |
} | |
is_valid_storage_driver() { | |
local driver=$1 d | |
for d in $STORAGE_DRIVERS;do | |
[ "$driver" == "$d" ] && return 0 | |
done | |
return 1 | |
} | |
# Gets the existing storage driver configured in /etc/sysconfig/docker-storage | |
get_existing_storage_driver() { | |
local options driver | |
if [ ! -f "$DOCKER_STORAGE" ];then | |
return 0 | |
fi | |
if ! options=$(get_docker_storage_options); then | |
return 1 | |
fi | |
# DOCKER_STORAGE_OPTIONS= is empty there is no storage driver configured yet. | |
[ -z "$options" ] && return 0 | |
# Check if -storage-driver <driver> is there. | |
if ! driver=$(echo $options | sed -n 's/.*\(--storage-driver [ ]*[a-z]*\).*/\1/p' | sed 's/--storage-driver *//');then | |
return 1 | |
fi | |
if [ -n "$driver" ] && [ ! "$driver" == "$options" ];then | |
echo $driver | |
return 0 | |
fi | |
# Check if -s <driver> is there. | |
if ! driver=$(echo $options | sed -n 's/.*\(-s [ ]*[a-z]*\).*/\1/p' | sed 's/-s *//');then | |
return 1 | |
fi | |
# If pattern does not match then driver == options. | |
if [ -n "$driver" ] && [ ! "$driver" == "$options" ];then | |
echo $driver | |
return 0 | |
fi | |
# We shipped some versions where we did not specify -s devicemapper. | |
# If dm.thinpooldev= is present driver is devicemapper. | |
if echo $options | grep -q -e "--storage-opt dm.thinpooldev=";then | |
echo "devicemapper" | |
return 0 | |
fi | |
#Failed to determine existing storage driver. | |
return 1 | |
} | |
setup_storage() { | |
local current_driver | |
if [ "$STORAGE_DRIVER" == "" ];then | |
echo "No storage driver specified. Specify one using STORAGE_DRIVER option." | |
exit 0 | |
fi | |
if ! is_valid_storage_driver $STORAGE_DRIVER;then | |
echo "Invalid storage driver: ${STORAGE_DRIVER}." | |
exit 1 | |
fi | |
if ! current_driver=$(get_existing_storage_driver);then | |
echo "Failed to determine existing storage driver." | |
exit 1 | |
fi | |
# If storage is configured and new driver should match old one. | |
if [ -n "$current_driver" ] && [ "$current_driver" != "$STORAGE_DRIVER" ];then | |
echo "Storage is already configured with ${current_driver} driver. Can't configure it with ${STORAGE_DRIVER} driver. To override, remove $DOCKER_STORAGE and retry." | |
exit 1 | |
fi | |
# Set up lvm thin pool LV | |
if [ "$STORAGE_DRIVER" == "devicemapper" ]; then | |
setup_lvm_thin_pool | |
elif [ "$STORAGE_DRIVER" == "overlay" ];then | |
setup_overlay | |
fi | |
} | |
usage() { | |
echo "Usage: $1 [OPTIONS]" | |
echo | |
echo "Grows the root filesystem and sets up storage for docker." | |
echo | |
echo "Options:" | |
echo " -h, --help Print help message." | |
} | |
# Main Script | |
if [ $# -gt 0 ]; then | |
usage $0 | |
exit 0 | |
fi | |
# Source library | |
if [ -e /usr/lib/docker-storage-setup/libdss.sh ]; then | |
source /usr/lib/docker-storage-setup/libdss.sh | |
fi | |
if [ -e /usr/lib/docker-storage-setup/docker-storage-setup ]; then | |
source /usr/lib/docker-storage-setup/docker-storage-setup | |
fi | |
# If user has overridden any settings in /etc/sysconfig/docker-storage-setup | |
# take that into account. | |
if [ -e /etc/sysconfig/docker-storage-setup ]; then | |
source /etc/sysconfig/docker-storage-setup | |
fi | |
# Read mounts | |
ROOT_DEV=$( awk '$2 ~ /^\/$/ && $1 !~ /rootfs/ { print $1 }' /proc/mounts ) | |
if ! ROOT_VG=$(lvs --noheadings -o vg_name $ROOT_DEV);then | |
ROOT_VG= | |
else | |
ROOT_VG=$(echo $ROOT_VG | sed -e 's/^ *//' -e 's/ *$//') | |
fi | |
ROOT_PVS=$( pvs --noheadings -o pv_name,vg_name | awk "\$2 ~ /^$ROOT_VG\$/ { print \$1 }" ) | |
VG_EXISTS= | |
if [ -z "$VG" ]; then | |
# At this point of time, either user should pass in a volume group name | |
# which either exists or can be created. Or there needs to be a valid root | |
# device volume group on which this script can operate. If none of that | |
# happens, we can't make progress. Error out. | |
if [ -z "$ROOT_VG" ]; then | |
echo "No volume group has been specified and root device volume group could not be determined. Exiting." | |
exit 1 | |
fi | |
VG=$ROOT_VG | |
VG_EXISTS=1 | |
else | |
for vg_name in $( vgs --noheadings -o vg_name ); do | |
if [ "$vg_name" == "$VG" ]; then | |
VG_EXISTS=1 | |
break | |
fi | |
done | |
fi | |
if [ -n "$DEVS" ] ; then | |
create_disk_partitions | |
create_extend_volume_group | |
fi | |
grow_root_pvs | |
# NB: We are growing root here first, because when root and docker share a | |
# disk, we'll default to giving some portion of remaining space to docker. | |
grow_root_lv_fs | |
if is_old_data_meta_mode; then | |
echo "ERROR: Old mode of passing data and metadata logical volumes to docker is not supported. Exiting." | |
exit 1 | |
fi | |
setup_storage |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment