mikeggrant-eumetsat · November 3, 2020 13:01
diff --git a/isilon_recursive_fsa.sh b/isilon_recursive_fsa.sh
 #!/bin/bash
 #
 # if you have to manage an EMC Isilon cluster and want to use the
 # File System Analytics (FSA) tool to do a "du --depth X", you'll quickly
 # realise they unbelievably haven't implemented a recursive scan.
 # This script does that.
 #
 # Must be run on the isilon cluster machines with sufficient privileges to
 # access the FSA reports
 #
 # known issues:
 #  - probably won't like funny characters in directory names..
 #
 # Refs:
 #  https://community.emc.com/community/products/isilon/blog/2016/07/25/insightiq-iiqdataexport-utility-under-onefs-v800
 #  https://thesanguy.com/2018/01/09/insightiq-data-export-utility/

 if [ $# -ne 4 ] ; then
    echo "Usage: $0 CLUSTER_NAME BASE_DIR MAX_DEPTH OUTPUT_FILE" >&2
    echo >&2
    echo "  CLUSTER_NAME = tcenas, eumetsat [=DSNNAS]" >&2
    echo "  BASE_DIR = /ifs/ERA-CLIM/Repro" >&2
    echo "  MAX_DEPTH = depth to scan to, limited also by FSA resolution" >&2
    echo "  OUTPUT_FILE = where to write the final outputs to" >&2
    echo >&2
    echo "e.g. $0 eumetsat /ifs/ERA-CLIM/Repro 2 results.csv" >&2
    exit 1
 fi

 CLUSTER_NAME=$1

 # must be /ifs/MODULE/dir; verify this
 BASE_DIR=$2
 echo $BASE_DIR | grep -q '^/ifs/[^/]*/[^/]'
 if [ $? -ne 0 ] ; then
    echo "BASE_DIR must be at 3 levels deep (e.g. /ifs/MODULE/xxx) because the FSA export tool requires a different usage for the MODULE level" >&2
    echo "(actually, I've not tested this, so maybe it works - edit the script and try if you like..)" >&2
    exit 1
 fi

 MAX_DEPTH=$3
 OUTPUT_FILE=$4

 # make a unique place to dump temporary files
 TMPOUT=$(mktemp -d)

 # need to get the id number of the latest File System Analytics (FSA) report
 # output looks like this
 # get the last id
 #    Available Reports for: tme-sandbox Time Zone: EDT
 #================================================================================
 #    |ID |FSA Job Start                |FSA Job End               |Size        |
 #================================================================================
 #    |473 |Jun 10 2016, 10:00 PM        |Jun 10 2016, 10:30 PM     |92.933G     |
 #....
 #--------------------------------------------------------------------------------
 #    |492 |Jun 13 2016, 10:00 PM        |Jun 13 2016, 10:32 PM     |4.794G      |
 #--------------------------------------------------------------------------------
 #    |498 |Jun 14 2016, 10:00 PM        |Jun 14 2016, 10:30 PM     |4.816G      |
 #================================================================================
 #(space/empty line)
 iiq_data_export fsa list --reports ${CLUSTER_NAME} > ${TMPOUT}/reports.txt

 REPORT_ID=$(tail -n 3 ${TMPOUT}/reports.txt | head -n 1 | cut -f2 -d\|)

 echo "Using report id $REPORT_ID ($(tail -n 3 ${TMPOUT}/reports.txt | head -n 1))"

 # first get the base dir contents
 iiq_data_export fsa export -c ${CLUSTER_NAME} --data-module directories -o ${REPORT_ID} -r "directory:${BASE_DIR}" -n ${TMPOUT}/basedir_with_header.csv
 # extract header for later
 head -n 1 ${TMPOUT}/basedir_with_header.csv > ${TMPOUT}/header.csv
 # strip header for following work
 tail -n +2 ${TMPOUT}/basedir_with_header.csv > ${TMPOUT}/level0.csv

 # output of all these reports looks like
 #path[directory:/ifs/ERA-CLIM/Repro_Temp/mviri/],dir_cnt (count),file_cnt (count),ads_cnt,other_cnt (count),log_size_sum (bytes),phys_size_sum (bytes),log_size_sum_overflow,report_date: 1558306942
 #/ifs/ERA-CLIM/Repro/mviri/level0,927,0,0,967506,172233718,2539652608,0
 #/ifs/ERA-CLIM/Repro/mviri/level1,894,0,0,933778,166229545,2468796928,0
 #/ifs/ERA-CLIM/Repro/mviri/level2,44868,2535868,0,2,1246000052970,2271269414912,0

 # for each depth after the first, request reports on each item listed in the previous depth
 for depth in $(seq 1 ${MAX_DEPTH}); do
    echo Depth $depth
    # scan through previous report, request dumps on each directory listed and combine into a single report for this level
    for dir in $(cut -f1 -d, ${TMPOUT}/level$(($depth - 1)).csv); do
        iiq_data_export fsa export -c ${CLUSTER_NAME} --data-module directories -o ${REPORT_ID} -r "directory:${dir}" -n ${TMPOUT}/temp_fsa_dump.csv
        tail -n +2 ${TMPOUT}/temp_fsa_dump.csv >> ${TMPOUT}/level${depth}.csv
    done
    rm -f ${TMPOUT}/temp_fsa_dump.csv
 done

 # final step, combine all levels, sort and add the header.  Dump to stdout.
 cat ${TMPOUT}/header.csv > $OUTPUT_FILE
 sort ${TMPOUT}/level*.csv >> $OUTPUT_FILE

 # clean up
 rm -rf $TMPOUT
	#!/bin/bash
	#
	# if you have to manage an EMC Isilon cluster and want to use the
	# File System Analytics (FSA) tool to do a "du --depth X", you'll quickly
	# realise they unbelievably haven't implemented a recursive scan.
	# This script does that.
	#
	# Must be run on the isilon cluster machines with sufficient privileges to
	# access the FSA reports
	#
	# known issues:
	# - probably won't like funny characters in directory names..
	#
	# Refs:
	# https://community.emc.com/community/products/isilon/blog/2016/07/25/insightiq-iiqdataexport-utility-under-onefs-v800
	# https://thesanguy.com/2018/01/09/insightiq-data-export-utility/

	if [ $# -ne 4 ] ; then
	echo "Usage: $0 CLUSTER_NAME BASE_DIR MAX_DEPTH OUTPUT_FILE" >&2
	echo >&2
	echo " CLUSTER_NAME = tcenas, eumetsat [=DSNNAS]" >&2
	echo " BASE_DIR = /ifs/ERA-CLIM/Repro" >&2
	echo " MAX_DEPTH = depth to scan to, limited also by FSA resolution" >&2
	echo " OUTPUT_FILE = where to write the final outputs to" >&2
	echo >&2
	echo "e.g. $0 eumetsat /ifs/ERA-CLIM/Repro 2 results.csv" >&2
	exit 1
	fi

	CLUSTER_NAME=$1

	# must be /ifs/MODULE/dir; verify this
	BASE_DIR=$2
	echo $BASE_DIR \| grep -q '^/ifs/[^/]*/[^/]'
	if [ $? -ne 0 ] ; then
	echo "BASE_DIR must be at 3 levels deep (e.g. /ifs/MODULE/xxx) because the FSA export tool requires a different usage for the MODULE level" >&2
	echo "(actually, I've not tested this, so maybe it works - edit the script and try if you like..)" >&2
	exit 1
	fi

	MAX_DEPTH=$3
	OUTPUT_FILE=$4

	# make a unique place to dump temporary files
	TMPOUT=$(mktemp -d)

	# need to get the id number of the latest File System Analytics (FSA) report
	# output looks like this
	# get the last id
	# Available Reports for: tme-sandbox Time Zone: EDT
	#================================================================================
	# \|ID \|FSA Job Start \|FSA Job End \|Size \|
	#================================================================================
	# \|473 \|Jun 10 2016, 10:00 PM \|Jun 10 2016, 10:30 PM \|92.933G \|
	#....
	#--------------------------------------------------------------------------------
	# \|492 \|Jun 13 2016, 10:00 PM \|Jun 13 2016, 10:32 PM \|4.794G \|
	#--------------------------------------------------------------------------------
	# \|498 \|Jun 14 2016, 10:00 PM \|Jun 14 2016, 10:30 PM \|4.816G \|
	#================================================================================
	#(space/empty line)
	iiq_data_export fsa list --reports ${CLUSTER_NAME} > ${TMPOUT}/reports.txt

	REPORT_ID=$(tail -n 3 ${TMPOUT}/reports.txt \| head -n 1 \| cut -f2 -d\\|)

	echo "Using report id $REPORT_ID ($(tail -n 3 ${TMPOUT}/reports.txt \| head -n 1))"

	# first get the base dir contents
	iiq_data_export fsa export -c ${CLUSTER_NAME} --data-module directories -o ${REPORT_ID} -r "directory:${BASE_DIR}" -n ${TMPOUT}/basedir_with_header.csv
	# extract header for later
	head -n 1 ${TMPOUT}/basedir_with_header.csv > ${TMPOUT}/header.csv
	# strip header for following work
	tail -n +2 ${TMPOUT}/basedir_with_header.csv > ${TMPOUT}/level0.csv

	# output of all these reports looks like
	#path[directory:/ifs/ERA-CLIM/Repro_Temp/mviri/],dir_cnt (count),file_cnt (count),ads_cnt,other_cnt (count),log_size_sum (bytes),phys_size_sum (bytes),log_size_sum_overflow,report_date: 1558306942
	#/ifs/ERA-CLIM/Repro/mviri/level0,927,0,0,967506,172233718,2539652608,0
	#/ifs/ERA-CLIM/Repro/mviri/level1,894,0,0,933778,166229545,2468796928,0
	#/ifs/ERA-CLIM/Repro/mviri/level2,44868,2535868,0,2,1246000052970,2271269414912,0

	# for each depth after the first, request reports on each item listed in the previous depth
	for depth in $(seq 1 ${MAX_DEPTH}); do
	echo Depth $depth
	# scan through previous report, request dumps on each directory listed and combine into a single report for this level
	for dir in $(cut -f1 -d, ${TMPOUT}/level$(($depth - 1)).csv); do
	iiq_data_export fsa export -c ${CLUSTER_NAME} --data-module directories -o ${REPORT_ID} -r "directory:${dir}" -n ${TMPOUT}/temp_fsa_dump.csv
	tail -n +2 ${TMPOUT}/temp_fsa_dump.csv >> ${TMPOUT}/level${depth}.csv
	done
	rm -f ${TMPOUT}/temp_fsa_dump.csv
	done

	# final step, combine all levels, sort and add the header. Dump to stdout.
	cat ${TMPOUT}/header.csv > $OUTPUT_FILE
	sort ${TMPOUT}/level*.csv >> $OUTPUT_FILE

	# clean up
	rm -rf $TMPOUT