Last active
May 3, 2017 06:22
-
-
Save schmohlio/f1b2987e8da7c511a3b9 to your computer and use it in GitHub Desktop.
delete or warn on empty part files in hadoop, by extension.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash -e | |
| """ USAGE: ./remove_empty_part_files.sh <qualified hdfs dir path> """ | |
| HDFS=$1 | |
| echo "checking for empty files in $HDFS..." | |
| IFS=$'\n' | |
| for i in `hadoop fs -ls $HDFS/* | grep -e "$HDFS/.*" | awk '{print $0}'` ; do | |
| file=$(echo $i | awk '{print $8}') | |
| size=$(echo $i | awk '{print $5}') | |
| if [ $size -eq 0 ]; then | |
| case $file in | |
| *.gz) echo "deleting $file ..."; hadoop fs -rm -skipTrash $file ;; | |
| *.gz.tmp) echo "warning $i is empty" ;; | |
| *) echo "not sure" ;; | |
| esac | |
| fi | |
| done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment