Last active
November 3, 2020 13:01
-
-
Save mikeggrant-eumetsat/fd6956912d8280024df0c4925709bbdf to your computer and use it in GitHub Desktop.
Script to do a recursive du on an isilon cluster using the FSA reporting
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# if you have to manage an EMC Isilon cluster and want to use the | |
# File System Analytics (FSA) tool to do a "du --depth X", you'll quickly | |
# realise they unbelievably haven't implemented a recursive scan. | |
# This script does that. | |
# | |
# Must be run on the isilon cluster machines with sufficient privileges to | |
# access the FSA reports | |
# | |
# known issues: | |
# - probably won't like funny characters in directory names.. | |
# | |
# Refs: | |
# https://community.emc.com/community/products/isilon/blog/2016/07/25/insightiq-iiqdataexport-utility-under-onefs-v800 | |
# https://thesanguy.com/2018/01/09/insightiq-data-export-utility/ | |
if [ $# -ne 4 ] ; then | |
echo "Usage: $0 CLUSTER_NAME BASE_DIR MAX_DEPTH OUTPUT_FILE" >&2 | |
echo >&2 | |
echo " CLUSTER_NAME = tcenas, eumetsat [=DSNNAS]" >&2 | |
echo " BASE_DIR = /ifs/ERA-CLIM/Repro" >&2 | |
echo " MAX_DEPTH = depth to scan to, limited also by FSA resolution" >&2 | |
echo " OUTPUT_FILE = where to write the final outputs to" >&2 | |
echo >&2 | |
echo "e.g. $0 eumetsat /ifs/ERA-CLIM/Repro 2 results.csv" >&2 | |
exit 1 | |
fi | |
CLUSTER_NAME=$1 | |
# must be /ifs/MODULE/dir; verify this | |
BASE_DIR=$2 | |
echo $BASE_DIR | grep -q '^/ifs/[^/]*/[^/]' | |
if [ $? -ne 0 ] ; then | |
echo "BASE_DIR must be at 3 levels deep (e.g. /ifs/MODULE/xxx) because the FSA export tool requires a different usage for the MODULE level" >&2 | |
echo "(actually, I've not tested this, so maybe it works - edit the script and try if you like..)" >&2 | |
exit 1 | |
fi | |
MAX_DEPTH=$3 | |
OUTPUT_FILE=$4 | |
# make a unique place to dump temporary files | |
TMPOUT=$(mktemp -d) | |
# need to get the id number of the latest File System Analytics (FSA) report | |
# output looks like this | |
# get the last id | |
# Available Reports for: tme-sandbox Time Zone: EDT | |
#================================================================================ | |
# |ID |FSA Job Start |FSA Job End |Size | | |
#================================================================================ | |
# |473 |Jun 10 2016, 10:00 PM |Jun 10 2016, 10:30 PM |92.933G | | |
#.... | |
#-------------------------------------------------------------------------------- | |
# |492 |Jun 13 2016, 10:00 PM |Jun 13 2016, 10:32 PM |4.794G | | |
#-------------------------------------------------------------------------------- | |
# |498 |Jun 14 2016, 10:00 PM |Jun 14 2016, 10:30 PM |4.816G | | |
#================================================================================ | |
#(space/empty line) | |
iiq_data_export fsa list --reports ${CLUSTER_NAME} > ${TMPOUT}/reports.txt | |
REPORT_ID=$(tail -n 3 ${TMPOUT}/reports.txt | head -n 1 | cut -f2 -d\|) | |
echo "Using report id $REPORT_ID ($(tail -n 3 ${TMPOUT}/reports.txt | head -n 1))" | |
# first get the base dir contents | |
iiq_data_export fsa export -c ${CLUSTER_NAME} --data-module directories -o ${REPORT_ID} -r "directory:${BASE_DIR}" -n ${TMPOUT}/basedir_with_header.csv | |
# extract header for later | |
head -n 1 ${TMPOUT}/basedir_with_header.csv > ${TMPOUT}/header.csv | |
# strip header for following work | |
tail -n +2 ${TMPOUT}/basedir_with_header.csv > ${TMPOUT}/level0.csv | |
# output of all these reports looks like | |
#path[directory:/ifs/ERA-CLIM/Repro_Temp/mviri/],dir_cnt (count),file_cnt (count),ads_cnt,other_cnt (count),log_size_sum (bytes),phys_size_sum (bytes),log_size_sum_overflow,report_date: 1558306942 | |
#/ifs/ERA-CLIM/Repro/mviri/level0,927,0,0,967506,172233718,2539652608,0 | |
#/ifs/ERA-CLIM/Repro/mviri/level1,894,0,0,933778,166229545,2468796928,0 | |
#/ifs/ERA-CLIM/Repro/mviri/level2,44868,2535868,0,2,1246000052970,2271269414912,0 | |
# for each depth after the first, request reports on each item listed in the previous depth | |
for depth in $(seq 1 ${MAX_DEPTH}); do | |
echo Depth $depth | |
# scan through previous report, request dumps on each directory listed and combine into a single report for this level | |
for dir in $(cut -f1 -d, ${TMPOUT}/level$(($depth - 1)).csv); do | |
iiq_data_export fsa export -c ${CLUSTER_NAME} --data-module directories -o ${REPORT_ID} -r "directory:${dir}" -n ${TMPOUT}/temp_fsa_dump.csv | |
tail -n +2 ${TMPOUT}/temp_fsa_dump.csv >> ${TMPOUT}/level${depth}.csv | |
done | |
rm -f ${TMPOUT}/temp_fsa_dump.csv | |
done | |
# final step, combine all levels, sort and add the header. Dump to stdout. | |
cat ${TMPOUT}/header.csv > $OUTPUT_FILE | |
sort ${TMPOUT}/level*.csv >> $OUTPUT_FILE | |
# clean up | |
rm -rf $TMPOUT |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment