Created
February 2, 2016 23:12
-
-
Save ambv/909d38bdac4f3e719b7c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
"""Repacks entries in a ZIP file so that they become correctly zipimportable \ | |
in Python 3.5. See https://bugs.python.org/issue25710 for details. | |
""" | |
import argparse | |
from pathlib import Path | |
import importlib._bootstrap_external | |
import sys | |
import zipfile | |
def parse_args(): | |
parser = argparse.ArgumentParser(description=__doc__) | |
parser.add_argument( | |
'zipfiles', | |
metavar='FILE', | |
type=Path, | |
nargs='+', | |
help='paths to existing ZIP files', | |
) | |
parser.add_argument( | |
'--strip', '-s', | |
dest='strip', | |
action='store_true', | |
help='remove .py files when a corresponding .pyc file is present', | |
) | |
return parser.parse_args() | |
def is_suffix_valid(suffix): | |
"""We're stripping parts of the filename that are put by PEP-3147.""" | |
for invalid in ('.cpython-', '.pypy-', '.opt-', '.pyc', '.pyo'): | |
if suffix.startswith(invalid): | |
return False | |
return True | |
def redo_path(path): | |
"""redo_path(Path) -> Path | |
Converts paths like fb303/__pycache__/__init__.cpython-35.pyc into | |
fb303/__init__.pyc. | |
""" | |
stem = Path(path.stem) | |
while not is_suffix_valid(stem.suffix): | |
stem = Path(stem.stem) | |
assert path.parent.name == '__pycache__', ( | |
'Unexpected subdirectories in __pycache__: {}'.format(path.parent), | |
) | |
return path.parent.with_name(stem.name + '.pyc') | |
def scan_file(path, strip=False, compression=zipfile.ZIP_DEFLATED): | |
"""scan_file(Path('file.zip')) -> Path('file.new.zip') | |
Repackages a ZIP file so that it's zip-importable by Python 3.5. | |
If `strip` is True, .py files are removed when corresponding .pyc files | |
can be found. | |
File order and all attributes of the ZIP entries are maintained, except | |
for the compression method which is always set to `compression`. | |
""" | |
pure_py_files = set() | |
cached_py_files = set() | |
renames = {} | |
suffix = path.suffix | |
zip_offset = -1 | |
with path.open('rb') as zf: | |
z = zipfile.ZipFile(zf) | |
for info in z.infolist(): | |
p = Path(info.filename) | |
if p.match('*.py'): | |
pure_py_files.add(p) | |
elif p.match('**/__pycache__/*') or p.match('__pycache__/*'): | |
new_path = redo_path(p) | |
cached_py_files.add(new_path.with_suffix('.py')) | |
info.filename = str(new_path) | |
renames[p] = info | |
if zip_offset == -1: | |
zip_offset = info.header_offset | |
print('Renames to be done:', len(renames)) | |
print('.py files:', len(pure_py_files)) | |
print('.pyc files:', len(cached_py_files)) | |
without_cache = pure_py_files - cached_py_files | |
print('.py files without cache:', len(without_cache)) | |
for p in sorted(without_cache): | |
print(' ', p) | |
without_source = cached_py_files - pure_py_files | |
print('.pyc files without source:', len(without_source)) | |
for p in sorted(without_source): | |
print(' ', p) | |
new_path = path.with_suffix('.new' + suffix) | |
with path.open('rb') as sf: | |
preamble = b'' | |
while len(preamble) < zip_offset: | |
preamble += sf.read(zip_offset) | |
with new_path.open('wb') as tf: | |
tf.write(preamble[:zip_offset]) | |
sf.seek(0) | |
sz = zipfile.ZipFile(sf) | |
with zipfile.ZipFile(str(new_path), 'a', compression=compression) as tz: | |
for info in sz.infolist(): | |
p = Path(info.filename) | |
if strip and p.match('*.py') and p in cached_py_files: | |
continue | |
content = sz.read(info.filename) | |
info = renames.get(p, info) | |
info.compress_type = compression | |
tz.writestr(info, content) | |
return new_path | |
if __name__ == '__main__': | |
args = parse_args() | |
failures = 0 | |
for fp in args.zipfiles: | |
try: | |
scan_file(fp, strip=args.strip) | |
except OSError: | |
failures += 1 | |
continue | |
sys.exit(failures) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment