Last active
December 21, 2015 06:28
-
-
Save jmealo/6264077 to your computer and use it in GitHub Desktop.
Extracts .tar, .tar.gz, .tar.bz2 and .zip files with an optional progress bar. Raises ExtractException when it encounters an error.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os, re, commands, subprocess | |
def streamCommand(cmd): | |
# runs a shell command and returns stdout as a stream you can loop over | |
stdout = subprocess.Popen(cmd, shell=True, | |
stdout=subprocess.PIPE, | |
stderr=subprocess.STDOUT).stdout | |
return stdout | |
def getFileExtension(filename): | |
# returns the file extension of a file, including archives with two dots | |
ext = filename.split('.') | |
if ext[-2] == 'tar': | |
return "tar." + ext[-1] | |
else: | |
return ext[-1] | |
def extractArchive(src, progress=True, dst=False): | |
file_type = getFileExtension(src).lower() | |
extract_commands = { | |
"tar": "tar -vxf", | |
"tar.gz": "tar -vzxf", | |
"tar.bz2": "tar -xvjf", | |
"zip": "unzip -o" | |
} | |
if file_type in extract_commands: | |
# check if we have a command to extract this type of file | |
cmd = extract_commands[file_type] | |
else: | |
# we can't extract this type of file; give up | |
raise ExtractException(file_type + " is an unsupported file type.") | |
# add filename to the command | |
cmd += ' ' + src | |
# if destination directory is specified append the necessary flags | |
if dst is False: | |
# make sure that the path actually exists | |
if os.path.exists(dst) is False: | |
raise ExtractException("Destination directory %s doesn't exist" % dst) | |
if file_type == 'zip': | |
cmd += " -d %s" % dst | |
else: | |
cmd += " -C %s" % dst | |
if progress is True: | |
# output progress to the terminal | |
start_line = "Extracting %s" % os.path.basename(src) | |
if dst is True: | |
start_line += " to %s" % dst | |
print start_line | |
# determine number of files in archive | |
if file_type != 'zip': | |
file_count = commands.getoutput("tar -tf %s | wc -l" % src).strip() | |
else: | |
file_count = commands.getoutput("unzip -l %s" % src) | |
# extracts file count from last line of unzip -l output | |
# ex: 196727429 4395 files | |
file_count = re.search('(\d+)\sfiles', file_count).group(1) | |
# convert file_count from string to integer | |
file_count = int(file_count) | |
# stream each line of output from extraction to update progress bar | |
extracted = 0 | |
digit_length = str(len(str(file_count))) | |
# determine the number of leading spaces based on the file_count | |
for line in streamCommand(cmd): | |
extracted += 1 | |
status = "Extracted %" + digit_length + "d files [%3.2f%%]" | |
status = status % (extracted, extracted * 100. / file_count) | |
status = status + chr(8) * (len(status) + 1) | |
print status, | |
# reset cursor to the next line | |
print("") | |
return extracted == file_count | |
else: | |
# extract without progress, use exit status instead of file count | |
# to determine whether extraction was successful | |
result = commands.getoutput("unzip -l %s" % src) | |
# an error occurred during extraction | |
if result[0] > 0: | |
raise ExtractException(result[1]) | |
else: | |
return True |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment