BeatriceMoissinac · May 1, 2018 23:35
diff --git a/unzip command.sh b/unzip command.sh
 # UNZIP SESSION FILES
 # Reference: https://aws.amazon.com/blogs/big-data/seven-tips-for-using-s3distcp-on-amazon-emr-to-move-data-efficiently-between-hdfs-and-amazon-s3/
 # http://docs.aws.amazon.com/emr/latest/ReleaseGuide/UsingEMR_s3distcp.html
 # Hadoop has to be installed on the cluster (add Name=Hadoop)
 # As a step
 aws emr add-steps --profile $KEY --cluster-id $CLUSTER --steps Type=CUSTOM_JAR,Name="S3DistCp",ActionOnFailure=CONTINUE,Jar="command-runner.jar",Args=[s3-dist-cp,--src,s3://osu-is/aruba/raw/Client_Session_1479414885.zip,--dest,hdfs:///output,--outputCodec,none]
 # or as direct command in ssh consol					  					  
 # s3-dist-cp --src s3://osu-is/aruba/raw --dest hdfs:///output --srcPattern .*\.zip --outputCodec=none

 # aws s3 cp --profile $KEY src/main/shell/unzipper.sh s3://osu-is/jar
 # Run unzipper.sh script
 aws emr add-steps --profile $KEY --cluster-id $CLUSTER --steps Type=CUSTOM_JAR,Name=CustomJAR,ActionOnFailure=CONTINUE,Jar=s3://region.elasticmapreduce/libs/script-runner/script-runner.jar,Args=["s3://osu-is/jar/unzipper.sh"]

 # Copy unzipped file back to S3
 s3-dist-cp --src /output/Client_Session/ --dest s3://osu-is/aruba/raw-unzipped --outputCodec=keep
	# UNZIP SESSION FILES
	# Reference: https://aws.amazon.com/blogs/big-data/seven-tips-for-using-s3distcp-on-amazon-emr-to-move-data-efficiently-between-hdfs-and-amazon-s3/
	# http://docs.aws.amazon.com/emr/latest/ReleaseGuide/UsingEMR_s3distcp.html
	# Hadoop has to be installed on the cluster (add Name=Hadoop)
	# As a step
	aws emr add-steps --profile $KEY --cluster-id $CLUSTER --steps Type=CUSTOM_JAR,Name="S3DistCp",ActionOnFailure=CONTINUE,Jar="command-runner.jar",Args=[s3-dist-cp,--src,s3://osu-is/aruba/raw/Client_Session_1479414885.zip,--dest,hdfs:///output,--outputCodec,none]
	# or as direct command in ssh consol
	# s3-dist-cp --src s3://osu-is/aruba/raw --dest hdfs:///output --srcPattern .*\.zip --outputCodec=none

	# aws s3 cp --profile $KEY src/main/shell/unzipper.sh s3://osu-is/jar
	# Run unzipper.sh script
	aws emr add-steps --profile $KEY --cluster-id $CLUSTER --steps Type=CUSTOM_JAR,Name=CustomJAR,ActionOnFailure=CONTINUE,Jar=s3://region.elasticmapreduce/libs/script-runner/script-runner.jar,Args=["s3://osu-is/jar/unzipper.sh"]

	# Copy unzipped file back to S3
	s3-dist-cp --src /output/Client_Session/ --dest s3://osu-is/aruba/raw-unzipped --outputCodec=keep