robinkraft · October 12, 2015 07:08
diff --git a/gistfile1.sh b/gistfile1.sh
 # NB: these are two separate recipes - one for working from the 
 # dev machine, the other from an EC2 instance

 ########################
 # from the dev machine #
 ########################

 # split the gzipped occurrence data into 250mb chunks, upload chunks to S3
 # this takes about 10 hours
 split -b 250MiB occurrence_20120802.txt.gz occ.gz_
 s3cmd put occ.gz* s3://gbifsource/occurrence/


 ########################
 # from an EC2 instance #
 ########################

 # setup
 cd /mnt/
 sudo chmod 777
 mkdir data; cd data

 # download, reconstruct and decompress gzip file
 s3cmd get s3://gbifsource/occurrence/occ.gz*
 cat occ.gz* > occ.gz
 rm occ.gz*
 gunzip occ.gz

 # split text file into 250mb chunks, upload chunks to S3
 split -C 250MiB occ occ_
 s3cmd put occ_* s3://gbifsource/occurrence-text/

 # all this stuff on EC2 takes 3-4 hours total on a large instance
	# NB: these are two separate recipes - one for working from the
	# dev machine, the other from an EC2 instance

	########################
	# from the dev machine #
	########################

	# split the gzipped occurrence data into 250mb chunks, upload chunks to S3
	# this takes about 10 hours
	split -b 250MiB occurrence_20120802.txt.gz occ.gz_
	s3cmd put occ.gz* s3://gbifsource/occurrence/


	########################
	# from an EC2 instance #
	########################

	# setup
	cd /mnt/
	sudo chmod 777
	mkdir data; cd data

	# download, reconstruct and decompress gzip file
	s3cmd get s3://gbifsource/occurrence/occ.gz*
	cat occ.gz* > occ.gz
	rm occ.gz*
	gunzip occ.gz

	# split text file into 250mb chunks, upload chunks to S3
	split -C 250MiB occ occ_
	s3cmd put occ_* s3://gbifsource/occurrence-text/

	# all this stuff on EC2 takes 3-4 hours total on a large instance
No results found