rsnemmen · June 16, 2017 16:11
diff --git a/compress_sets.py b/compress_sets.py
 #!/usr/bin/env python
 """
 This script solves a very specific problem: I am uploading files
 to figshare and exceeding the limits on the number of uploaded 
 files. Since the file size limit is 5GB and I need to upload a 
 large quantity of files, I want to compress sequences of data files
 and upload only the compressed ones.

 Given a pattern string and a number, this script will compress
 all files that match the pattern. Each compressed file will 
 contain n of the original files.

 Usage:

 >>> compress_sets.py "[pattern]" [n] 

 Example: compress all *dbl files in current dir, each tar 
 file will get 1000 dbl files.

 >>> compress_sets.py "*dbl" 1000
 """

 import sys, os, glob, subprocess, shutil
 import numpy
 import tqdm

 # get command-line arguments
 if len(sys.argv)==3: # there are command-line arguments that were actually typed
 	pattern = sys.argv[1]
 	n = sys.argv[2]
 	n=int(n)
 else: # there is nothing
 	print('Usage: '+sys.argv[0]+' \'<pattern>\' <n> \n (note the quotes in the pattern string)')
 	sys.exit(0)

 # checks if lbzip2 is present in the system
 if shutil.which("lbzip2") is None:
 	print('Do you have lbzip2 installed?')
 	sys.exit(0)

 # list of files
 flist=glob.glob(pattern)

 # generates command to compress sequences of files
 j=0
 # loops through the files in chunks
 for i in tqdm.tqdm(range(0,numpy.size(flist),n)):
 	# generate text file listing files that will be compressed in this chunk
 	files=open('files.txt','w') #create file

 	# gather together list of files that will be in the chunk
 	for f in flist[i:i+n]:
 		files.write(f+'\n')
 	
 	files.close() 

 	# tar command
 	cmd='tar cf archive.'+str(j)+'.tar.bz2 --use-compress-program lbzip2 -T files.txt'
 	os.system(cmd)

 	j=j+1

 	#print(s)
	#!/usr/bin/env python
	"""
	This script solves a very specific problem: I am uploading files
	to figshare and exceeding the limits on the number of uploaded
	files. Since the file size limit is 5GB and I need to upload a
	large quantity of files, I want to compress sequences of data files
	and upload only the compressed ones.

	Given a pattern string and a number, this script will compress
	all files that match the pattern. Each compressed file will
	contain n of the original files.

	Usage:

	>>> compress_sets.py "[pattern]" [n]

	Example: compress all *dbl files in current dir, each tar
	file will get 1000 dbl files.

	>>> compress_sets.py "*dbl" 1000
	"""

	import sys, os, glob, subprocess, shutil
	import numpy
	import tqdm

	# get command-line arguments
	if len(sys.argv)==3: # there are command-line arguments that were actually typed
	pattern = sys.argv[1]
	n = sys.argv[2]
	n=int(n)
	else: # there is nothing
	print('Usage: '+sys.argv[0]+' \'<pattern>\' <n> \n (note the quotes in the pattern string)')
	sys.exit(0)

	# checks if lbzip2 is present in the system
	if shutil.which("lbzip2") is None:
	print('Do you have lbzip2 installed?')
	sys.exit(0)

	# list of files
	flist=glob.glob(pattern)

	# generates command to compress sequences of files
	j=0
	# loops through the files in chunks
	for i in tqdm.tqdm(range(0,numpy.size(flist),n)):
	# generate text file listing files that will be compressed in this chunk
	files=open('files.txt','w') #create file

	# gather together list of files that will be in the chunk
	for f in flist[i:i+n]:
	files.write(f+'\n')

	files.close()

	# tar command
	cmd='tar cf archive.'+str(j)+'.tar.bz2 --use-compress-program lbzip2 -T files.txt'
	os.system(cmd)

	j=j+1

	#print(s)
No results found