Last active
June 7, 2016 00:55
-
-
Save JustinGrote/094af97e665300e202f5 to your computer and use it in GitHub Desktop.
Destage Files to Another File System using High/Low Watermark Method
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#Cache Destaging Script | |
#Author: Justin Grote <[email protected]> | |
#License: MIT | |
#Synopsis: Move files from one directory to another when a high watermark is reached | |
#This was written to move Plex files from a fast SSD cache to a slower speed cloud storage | |
#on an automatic basis. It uses FIFO logic, the oldest file in the cache is the first to be destaged | |
#This is designed to run as a cron job, recommend run every 5 minutes as the script will ensure only | |
#one copy is ever running at a time. | |
#The original version of this script worked on a per-file basis but that led to fragmented filesystems, w hich works fine | |
#But led to a risk of a fragmented library in the event the cache died | |
#So I changed it to work on a per-directory basis with special consideration for TV shows. | |
#Written using advice from http://www.davidpashley.com/articles/writing-robust-shell-scripts/ | |
#Bash Unofficial "Strict" mode. Stop on errors and uninitialized variables. | |
set -eu IFS=$'\n\t' | |
#DEBUG | |
#set -x | |
###SETTINGS | |
#Path of your source cache. If using encfs, point this to the raw encrypted files, NOT the decrypt mount point. | |
CACHEPATH=/home/plex/.local | |
export PATH=/usr/local/bin:$PATH | |
#Path to exclude | |
EXCLUDEPATH=./fOTaL90/* | |
#Where to destage files from cache to (usually a slower but larger filesystem). | |
#If using encfs, point this to the raw encrypted files, NOT the decrypt mount point | |
#If using ACDCLI, specify the path relative to the root of your Amazon Cloud Drive where you want to save to. | |
DESTAGEPATH=/Plex | |
#DESTAGEPATH=/home/plex/.acd/Plex | |
#Percentage of filesystem full where this script should act | |
HIGHMARK=60 | |
#Script will move files until it reaches this level | |
LOWMARK=40 | |
#Which tool to use for moving files | |
#Normally we use acd_cli upload to move files instead of mv. Better performance and "safer" compared to u sing mv to acd_cli FUSE filesystem. | |
#Usually you want yes unless you need mv for compatibility reasons (such as having spaces in your paths) | |
#THIS WILL BREAK IF YOU HAVE SPACES IN YOUR PATHS. SET THIS TO "NO" IF YOU NEED SPACES SUPPORT | |
#If you specify anything else it will use the basic mv method. Good for destaging to systems other than A mazon Cloud Drive (any other local or FUSE filesystem for example) | |
MOVEMETHOD=acdcli | |
#Only perform this number of move actions before bailing out. | |
#This is infinite Loop protection in case there is a permissions problem or something. | |
#It will pick up where it left off on next run assuming still under high water mark | |
#This number should be about 50% more than the average number of move actions you expect to transfer per session | |
#Too high and it may take the script a while to bail out if there is a problem, potentially causing damag e | |
#Too low and you may not hit your low water mark before bailing, and have to wait until you hit your high water mark again | |
MOVEACTIONCOUNT=1000 | |
#Where to store the lockfile so that this only has one running instance at a time. Process needs writable access to this location. | |
LOCKDIR=/var/lib/plexmediaserver | |
###MAIN SCRIPT START | |
LOCKFILE=$LOCKDIR/cache_destage_acd.pid | |
if [ -e ${LOCKFILE} ]; then | |
if [ $(kill -0 `cat ${LOCKFILE}` || :) ]; then | |
echo "$0 already running. Please wait for it to complete or kill the other running process" 1 >&2 | |
set +e;exit 3 | |
fi | |
fi | |
# make sure the lockfile is cleared when we exit and then claim it | |
trap "rm -f ${LOCKFILE}; exit" INT TERM EXIT | |
echo $$ > ${LOCKFILE} | |
#Get the Total Capacity of the cache filesystem | |
FSTOTAL=$(df $CACHEPATH | grep -v '1K-blocks' | awk '{ print $2 "\t" }') | |
#Get the current usage of the cache path and | |
#Calculate the percentage of utilization of the cache directory. | |
#we do this in case some other | |
#directory other than our intended is filling the filesystem (e.g. logs) | |
#and we don't want this to erroneously starve the cache vs. | |
#fixing the other disk space issue | |
CACHESIZE=$(du -s $CACHEPATH | awk '{ print $1 "\t" }') | |
CACHEUTIL=$(awk "BEGIN { pc=100*${CACHESIZE}/${FSTOTAL}; i=int(pc); print (pc-i<0.5)?i:i+1 }") | |
#echo "DEBUG: CACHESIZE=$CACHESIZE FSTOTAL=$FSTOTAL CACHEUTIL=$CACHEUTIL" | |
if [ $CACHEUTIL -lt $HIGHMARK ]; then | |
echo "DEBUG High Watermark Not Met ($CACHEUTIL/$HIGHMARK)" | |
set +e;exit 0 | |
else | |
###Move oldest files to the destage area until we reach our low mark. | |
#Do an ACD_CLI Sync if that method is being used | |
if [ $MOVEMETHOD == "acdcli" ]; then | |
acd_cli -nl sync | |
fi | |
#Set a dummy number of 9999 to run this loop at least once | |
#Done because bash has no do...until support that is easily portable | |
CACHEUTIL=9999 | |
I=1 | |
while [ $CACHEUTIL -gt $LOWMARK ]; do | |
#echo "DEBUG: CacheUtil is greater than LowMark. Moving files." | |
#Get the oldest file by modify date in the directory | |
FILETOMOVE=$(find "$CACHEPATH" -type f -printf '%T+ %p\n' ! -path "$EXCLUDEPATH" | sort | head -n 1 | cut -d' ' -f 2-) | |
#Get the relative path | |
FTMRELATIVE=$(realpath "$FILETOMOVE" --relative-to="$CACHEPATH") | |
FTMRELATIVEDIR=$(dirname "$FTMRELATIVE") | |
#echo "DEBUG Loop $I: CacheUtil=$CACHEUTIL LowMark=$LOWMARK FileToMove=$FILETOMOVE" | |
#echo "DEBUG Loop $I: DestinationFile=$DESTAGEPATH/$FTMRELATIVE DestinationFolder=$DESTAG EPATH/$FTMRELATIVEDIR" | |
case "$MOVEMETHOD" in | |
acdcli) | |
acd_cli -nl mkdir -p "$DESTAGEPATH/$FTMRELATIVEDIR" | |
#echo "DEBUG: acd_cli mkdir status: $?" | |
acd_cli -v -nl upload --remove-source-files "$FILETOMOVE" "$DESTAGEPATH/$ FTMRELATIVEDIR" 2>/tmp/unionfs_acd_cli.result | |
#If the file already exists, assume it was successfully uploaded and remo ve it | |
#TODO: Hash Check, but this is generally not necessary unless files are o verwritten | |
ACDRESULT=`cat /tmp/unionfs_acd_cli.result | grep "Skipping upload of exi sting file"` || true | |
[[ ! -z $ACDRESULT ]] && rm -vf "$FILETOMOVE" | |
[[ -e /tmp/unionfs_acd_cli.result ]] && rm -f /tmp/unionfs_acd_cli.result | |
;; | |
*) | |
#Get the relative path and move the file, creating the directory if neces sary | |
mkdir -vp "$DESTAGEPATH/$FTMRELATIVEDIR" | |
mv -vf "$FILETOMOVE" "$DESTAGEPATH/$FTMRELATIVE" | |
;; | |
esac | |
#Refresh the utilization after the move is complete for the next lowmark test | |
CACHEUTIL=$(awk "BEGIN { pc=100*${CACHESIZE}/${FSTOTAL}; i=int(pc); print (pc-i<0.5)?i:i+ 1 }") | |
CACHESIZE=$(du -s $CACHEPATH | awk '{ print $1 "\t" }') | |
let I=I+1 | |
if [ $I -gt $MOVEACTIONCOUNT ]; then | |
echo "MOVEACTIONCOUNT threshold of $MOVEACTIONCOUNT reached. Terminating script" 1>&2 | |
set +e;exit 4 | |
fi | |
done | |
fi | |
#Cleanup empty directories left behind on the local cache drive after the move | |
find $CACHEPATH -type d -empty -delete | |
#Allow normal error behavior again before exit | |
set +eu | |
jgrote@tinycloud /var/lib/plexmediaserver/unionfs_cache_destage (master*) $ cat unionfs_cache_destage.sh | |
#!/bin/bash | |
#Cache Destaging Script | |
#Author: Justin Grote <[email protected]> | |
#License: MIT | |
#Synopsis: Move files from one directory to another when a high watermark is reached | |
#This was written to move Plex files from a fast SSD cache to a slower speed cloud storage | |
#on an automatic basis. It uses FIFO logic, the oldest file in the cache is the first to be destaged | |
#This is designed to run as a cron job, recommend run every 5 minutes as the script will ensure only | |
#one copy is ever running at a time. | |
#The original version of this script worked on a per-file basis but that led to fragmented filesystems, which works fine | |
#But led to a risk of a fragmented library in the event the cache died | |
#So I changed it to work on a per-directory basis with special consideration for TV shows. | |
#Written using advice from http://www.davidpashley.com/articles/writing-robust-shell-scripts/ | |
#Bash Unofficial "Strict" mode. Stop on errors and uninitialized variables. | |
set -eu IFS=$'\n\t' | |
#DEBUG | |
#set -x | |
###SETTINGS | |
#Path of your source cache. If using encfs, point this to the raw encrypted files, NOT the decrypt mount point. | |
CACHEPATH=/home/plex/.local | |
export PATH=/usr/local/bin:$PATH | |
#Path to exclude | |
EXCLUDEPATH=./fOTaL90/* | |
#Where to destage files from cache to (usually a slower but larger filesystem). | |
#If using encfs, point this to the raw encrypted files, NOT the decrypt mount point | |
#If using ACDCLI, specify the path relative to the root of your Amazon Cloud Drive where you want to save to. | |
DESTAGEPATH=/Plex | |
#DESTAGEPATH=/home/plex/.acd/Plex | |
#Percentage of filesystem full where this script should act | |
HIGHMARK=60 | |
#Script will move files until it reaches this level | |
LOWMARK=40 | |
#Which tool to use for moving files | |
#Normally we use acd_cli upload to move files instead of mv. Better performance and "safer" compared to using mv to acd_cli FUSE filesystem. | |
#Usually you want yes unless you need mv for compatibility reasons (such as having spaces in your paths) | |
#THIS WILL BREAK IF YOU HAVE SPACES IN YOUR PATHS. SET THIS TO "NO" IF YOU NEED SPACES SUPPORT | |
#If you specify anything else it will use the basic mv method. Good for destaging to systems other than Amazon Cloud Drive (any other local or FUSE filesystem for example) | |
MOVEMETHOD=acdcli | |
#Only perform this number of move actions before bailing out. | |
#This is infinite Loop protection in case there is a permissions problem or something. | |
#It will pick up where it left off on next run assuming still under high water mark | |
#This number should be about 50% more than the average number of move actions you expect to transfer per session | |
#Too high and it may take the script a while to bail out if there is a problem, potentially causing damage | |
#Too low and you may not hit your low water mark before bailing, and have to wait until you hit your high water mark again | |
MOVEACTIONCOUNT=1000 | |
#Where to store the lockfile so that this only has one running instance at a time. Process needs writable access to this location. | |
LOCKDIR=/var/lib/plexmediaserver | |
###MAIN SCRIPT START | |
LOCKFILE=$LOCKDIR/cache_destage_acd.pid | |
if [ -e ${LOCKFILE} ]; then | |
if [ $(kill -0 `cat ${LOCKFILE}` || :) ]; then | |
echo "$0 already running. Please wait for it to complete or kill the other running process" 1>&2 | |
set +e;exit 3 | |
fi | |
fi | |
# make sure the lockfile is cleared when we exit and then claim it | |
trap "rm -f ${LOCKFILE}; exit" INT TERM EXIT | |
echo $$ > ${LOCKFILE} | |
#Get the Total Capacity of the cache filesystem | |
FSTOTAL=$(df $CACHEPATH | grep -v '1K-blocks' | awk '{ print $2 "\t" }') | |
#Get the current usage of the cache path and | |
#Calculate the percentage of utilization of the cache directory. | |
#we do this in case some other | |
#directory other than our intended is filling the filesystem (e.g. logs) | |
#and we don't want this to erroneously starve the cache vs. | |
#fixing the other disk space issue | |
CACHESIZE=$(du -s $CACHEPATH | awk '{ print $1 "\t" }') | |
CACHEUTIL=$(awk "BEGIN { pc=100*${CACHESIZE}/${FSTOTAL}; i=int(pc); print (pc-i<0.5)?i:i+1 }") | |
#echo "DEBUG: CACHESIZE=$CACHESIZE FSTOTAL=$FSTOTAL CACHEUTIL=$CACHEUTIL" | |
if [ $CACHEUTIL -lt $HIGHMARK ]; then | |
echo "DEBUG High Watermark Not Met ($CACHEUTIL/$HIGHMARK)" | |
set +e;exit 0 | |
else | |
###Move oldest files to the destage area until we reach our low mark. | |
#Do an ACD_CLI Sync if that method is being used | |
if [ $MOVEMETHOD == "acdcli" ]; then | |
acd_cli -nl sync | |
fi | |
#Set a dummy number of 9999 to run this loop at least once | |
#Done because bash has no do...until support that is easily portable | |
CACHEUTIL=9999 | |
I=1 | |
while [ $CACHEUTIL -gt $LOWMARK ]; do | |
#echo "DEBUG: CacheUtil is greater than LowMark. Moving files." | |
#Get the oldest file by modify date in the directory | |
FILETOMOVE=$(find "$CACHEPATH" -type f -printf '%T+ %p\n' ! -path "$EXCLUDEPATH" | sort | head -n 1 | cut -d' ' -f 2-) | |
#Get the relative path | |
FTMRELATIVE=$(realpath "$FILETOMOVE" --relative-to="$CACHEPATH") | |
FTMRELATIVEDIR=$(dirname "$FTMRELATIVE") | |
#echo "DEBUG Loop $I: CacheUtil=$CACHEUTIL LowMark=$LOWMARK FileToMove=$FILETOMOVE" | |
#echo "DEBUG Loop $I: DestinationFile=$DESTAGEPATH/$FTMRELATIVE DestinationFolder=$DESTAGEPATH/$FTMRELATIVEDIR" | |
case "$MOVEMETHOD" in | |
acdcli) | |
acd_cli -nl mkdir -p "$DESTAGEPATH/$FTMRELATIVEDIR" | |
#echo "DEBUG: acd_cli mkdir status: $?" | |
acd_cli -v -nl upload --remove-source-files "$FILETOMOVE" "$DESTAGEPATH/$FTMRELATIVEDIR" 2>/tmp/unionfs_acd_cli.result | |
#If the file already exists, assume it was successfully uploaded and remove it | |
#TODO: Hash Check, but this is generally not necessary unless files are overwritten | |
ACDRESULT=`cat /tmp/unionfs_acd_cli.result | grep "Skipping upload of existing file"` || true | |
[[ ! -z $ACDRESULT ]] && rm -vf "$FILETOMOVE" | |
[[ -e /tmp/unionfs_acd_cli.result ]] && rm -f /tmp/unionfs_acd_cli.result | |
;; | |
*) | |
#Get the relative path and move the file, creating the directory if necessary | |
mkdir -vp "$DESTAGEPATH/$FTMRELATIVEDIR" | |
mv -vf "$FILETOMOVE" "$DESTAGEPATH/$FTMRELATIVE" | |
;; | |
esac | |
#Refresh the utilization after the move is complete for the next lowmark test | |
CACHEUTIL=$(awk "BEGIN { pc=100*${CACHESIZE}/${FSTOTAL}; i=int(pc); print (pc-i<0.5)?i:i+1 }") | |
CACHESIZE=$(du -s $CACHEPATH | awk '{ print $1 "\t" }') | |
let I=I+1 | |
if [ $I -gt $MOVEACTIONCOUNT ]; then | |
echo "MOVEACTIONCOUNT threshold of $MOVEACTIONCOUNT reached. Terminating script" 1>&2 | |
set +e;exit 4 | |
fi | |
done | |
fi | |
#Cleanup empty directories left behind on the local cache drive after the move | |
find $CACHEPATH -type d -empty -delete | |
#Allow normal error behavior again before exit | |
set +eu |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#Cache Destaging Script | |
#Author: Justin Grote [email protected] | |
#License: MIT | |
#Synopsis: Move files from one directory to another when a high watermark is reached | |
#This was written to move Plex files from a fast SSD cache to a slower speed cloud storage | |
#On an automatic basis. It uses FIFO logic, the oldest file in the cache is the first to be destaged | |
#This is designed to run as a cron job, recommend every 5 minutes as the script will ensure only | |
#One copy is ever running at a time | |
#Written using advice from http://www.davidpashley.com/articles/writing-robust-shell-scripts/ | |
#Stop the script if ANY error occurs. Everything should return normal exit codes. | |
set -e | |
###Variables | |
#Where to store the lockfile so that this only has one running instance at a time | |
LOCKDIR=/var/lib/plexmediaserver | |
#Percentage of filesystem full where this script should act | |
HIGHMARK=75 | |
#Script will move files until it reaches this level | |
LOWMARK=50 | |
#Only move this number of files before bailing out. | |
#This is infinite Loop protection in case there is a permissions problem or something. | |
#It will pick up where it left off on next run assuming still under high water mark | |
FILEMOVECOUNT=50 | |
#Path of your cache. If using encfs, point this to the raw encrypted files, NOT the decrypt mount point. | |
CACHEPATH=/home/plex/.local | |
#Where to destage files from cache to (usually a slower but larger filesystem). If using encfs, point this to the raw encrypted files, NOT the decrypt mount point | |
DESTAGEPATH=/home/plex/.acd/Plex | |
LOCKFILE=$LOCKDIR/fs_cache_destage.pid | |
if [ -e ${LOCKFILE} ] && $(kill -0 `cat ${LOCKFILE}` || :); then | |
echo "$0 already running. Please wait for it to complete or kill the other running process" 1>&2 | |
set +e;exit 3 | |
fi | |
# make sure the lockfile is removed when we exit and then claim it | |
trap "rm -f ${LOCKFILE}; exit" INT TERM EXIT | |
echo $$ > ${LOCKFILE} | |
###Move oldest files to the destage area until we reach our low mark. | |
#Set a dummy number of 100 to run this loop at least once | |
CACHEUTIL=9999 | |
I=1 | |
#Get the Total Capacity of the cache filesystem | |
FSTOTAL=$(df $CACHEPATH | grep -v '1K-blocks' | awk '{ print $2 "\t" }') | |
#Get the current usage of the cache path and | |
#Calculate the percentage of utilization of the cache directory. | |
#we do this in case some other | |
#directory other than our intended is filling the filesystem (e.g. logs) | |
#and we don't want this to erroneously starve the cache vs. | |
#fixing the other problem | |
CACHESIZE=$(du -s $CACHEPATH | awk '{ print $1 "\t" }') | |
CACHEUTIL=$(awk "BEGIN { pc=100*${CACHESIZE}/${FSTOTAL}; i=int(pc); print (pc-i<0.5)?i:i+1 }") | |
if [ $CACHESIZE -lt $HIGHMARK ]; then | |
echo "DEBUG High Watermark Not Met ($CACHEUTIL/$HIGHMARK)" | |
set +e;exit 0 | |
else | |
while [ $CACHEUTIL -gt $LOWMARK ]; do | |
FILETOMOVE=$(find "$CACHEPATH" -not -path '*/\.*' -type f -printf '%T+ %p\n' | sort | head -n 1 | cut -d' ' -f 2-) | |
echo "DEBUG Loop $I: CacheUtil=$CACHEUTIL LowMark=$LOWMARK FileToMove=$FILETOMOVE" | |
FTMRELATIVE=$(realpath "$FILETOMOVE" --relative-to="$CACHEPATH") | |
FTMRELATIVEDIR=$(dirname "$FTMRELATIVE") | |
mkdir -vp "$DESTAGEPATH/$FTMRELATIVEDIR" | |
mv -vf "$FILETOMOVE" "$DESTAGEPATH/$FTMRELATIVE" | |
#Refresh the utilization after the move is complete for the next lowmark test | |
CACHEUTIL=$(awk "BEGIN { pc=100*${CACHESIZE}/${FSTOTAL}; i=int(pc); print (pc-i<0.5)?i:i+1 }") | |
CACHESIZE=$(du -s $CACHEPATH | awk '{ print $1 "\t" }') | |
let I=I+1 | |
if [ $I -gt $FILEMOVECOUNT ]; then | |
echo "FILEMOVECOUNT threshold of $FILEMOVECOUNT reached. Terminating script" 1>&2 | |
set +e;exit 4 | |
fi | |
done | |
fi | |
#Cleanup empty directories left behind on the local cache drive after the move | |
find $CACHEPATH -type d -empty -delete | |
#Allow normal error behavior again before exit | |
set +e |
I just updated and posted a "working" one I've been using, but it's not all the way there and doesn't handle errors well. I found bash to be way too frustrating for this task, and found it was a lot easier to just set it up to destage everything with acd_cli upload in one command, it also processes it in parallel.
So now I basically just have a watch, and when it hits 90%, it just does acd_cli upload -d --remove-source-files --deduplicate which works "good enough" for now.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hey Justin, do you have updated destage scripts involving acd_cli? I see you started to add the environment variables, but not the functionality