Last active
June 16, 2017 16:11
-
-
Save rsnemmen/0e02d49f7e8aacdc62cfdf15205a5a06 to your computer and use it in GitHub Desktop.
Given a pattern string and a number, this script will compress all files that match the pattern. Each compressed file will contain a number n of the original files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| """ | |
| This script solves a very specific problem: I am uploading files | |
| to figshare and exceeding the limits on the number of uploaded | |
| files. Since the file size limit is 5GB and I need to upload a | |
| large quantity of files, I want to compress sequences of data files | |
| and upload only the compressed ones. | |
| Given a pattern string and a number, this script will compress | |
| all files that match the pattern. Each compressed file will | |
| contain n of the original files. | |
| Usage: | |
| >>> compress_sets.py "[pattern]" [n] | |
| Example: compress all *dbl files in current dir, each tar | |
| file will get 1000 dbl files. | |
| >>> compress_sets.py "*dbl" 1000 | |
| """ | |
| import sys, os, glob, subprocess, shutil | |
| import numpy | |
| import tqdm | |
| # get command-line arguments | |
| if len(sys.argv)==3: # there are command-line arguments that were actually typed | |
| pattern = sys.argv[1] | |
| n = sys.argv[2] | |
| n=int(n) | |
| else: # there is nothing | |
| print('Usage: '+sys.argv[0]+' \'<pattern>\' <n> \n (note the quotes in the pattern string)') | |
| sys.exit(0) | |
| # checks if lbzip2 is present in the system | |
| if shutil.which("lbzip2") is None: | |
| print('Do you have lbzip2 installed?') | |
| sys.exit(0) | |
| # list of files | |
| flist=glob.glob(pattern) | |
| # generates command to compress sequences of files | |
| j=0 | |
| # loops through the files in chunks | |
| for i in tqdm.tqdm(range(0,numpy.size(flist),n)): | |
| # generate text file listing files that will be compressed in this chunk | |
| files=open('files.txt','w') #create file | |
| # gather together list of files that will be in the chunk | |
| for f in flist[i:i+n]: | |
| files.write(f+'\n') | |
| files.close() | |
| # tar command | |
| cmd='tar cf archive.'+str(j)+'.tar.bz2 --use-compress-program lbzip2 -T files.txt' | |
| os.system(cmd) | |
| j=j+1 | |
| #print(s) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment