elranu · September 4, 2017 18:39
diff --git a/s3check.sh b/s3check.sh
 #!/bin/bash
 # ./s3check /user/raw/2016 
 # it will check recursivly check all files on that path on S3, with the equivalent path on s3.
 # if the file is not on s3 it will created with compresion
 if [ -z "$1" ]; then
    echo usage: $0 directory
    exit
 fi

 hdfsDir=$1
 echo hdfs dir: $hdfsDir

 files=$(hadoop fs -ls -R $hdfsDir/*/ | awk '{print $NF}' | grep FlumeData.*$ |tr '\n' '\n') # change FlumeData with the name of the files u wanna check
 let count=0
 let errors=0
 let checked=0

 for file in $files
 do
    lsFile=$( aws s3 ls s3://bucket/directory$file ) #change the bucket and the directory name
    fileArr=( $lsFile )
    fileSize=${fileArr[2]}
    if [[ -n "$lsFile" ]] && [[ $fileSize -gt 500 ]]; then
 	let checked=checked+1
 	echo "Checked: $file"
    else
 	echo copying: $file
 	if (hdfs dfs -cat hdfs://$file |gzip| aws s3 cp - s3://btr-dataPlatform/backup$file.gz); then
 	    let count=count+1
 	else
 	    echo error on $file
 	    let errors=errors+1
        fi
    fi
 done
 echo "Count: $count"
 echo "Errors: $errors"
 echo "Checked: $checked"
	#!/bin/bash
	# ./s3check /user/raw/2016
	# it will check recursivly check all files on that path on S3, with the equivalent path on s3.
	# if the file is not on s3 it will created with compresion
	if [ -z "$1" ]; then
	echo usage: $0 directory
	exit
	fi

	hdfsDir=$1
	echo hdfs dir: $hdfsDir

	files=$(hadoop fs -ls -R $hdfsDir// \| awk '{print $NF}' \| grep FlumeData.$ \|tr '\n' '\n') # change FlumeData with the name of the files u wanna check
	let count=0
	let errors=0
	let checked=0

	for file in $files
	do
	lsFile=$( aws s3 ls s3://bucket/directory$file ) #change the bucket and the directory name
	fileArr=( $lsFile )
	fileSize=${fileArr[2]}
	if [[ -n "$lsFile" ]] && [[ $fileSize -gt 500 ]]; then
	let checked=checked+1
	echo "Checked: $file"
	else
	echo copying: $file
	if (hdfs dfs -cat hdfs://$file \|gzip\| aws s3 cp - s3://btr-dataPlatform/backup$file.gz); then
	let count=count+1
	else
	echo error on $file
	let errors=errors+1
	fi
	fi
	done
	echo "Count: $count"
	echo "Errors: $errors"
	echo "Checked: $checked"