Skip to content

Instantly share code, notes, and snippets.

@ambv
Created February 2, 2016 23:12
Show Gist options
  • Save ambv/909d38bdac4f3e719b7c to your computer and use it in GitHub Desktop.
Save ambv/909d38bdac4f3e719b7c to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""Repacks entries in a ZIP file so that they become correctly zipimportable \
in Python 3.5. See https://bugs.python.org/issue25710 for details.
"""
import argparse
from pathlib import Path
import importlib._bootstrap_external
import sys
import zipfile
def parse_args():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
'zipfiles',
metavar='FILE',
type=Path,
nargs='+',
help='paths to existing ZIP files',
)
parser.add_argument(
'--strip', '-s',
dest='strip',
action='store_true',
help='remove .py files when a corresponding .pyc file is present',
)
return parser.parse_args()
def is_suffix_valid(suffix):
"""We're stripping parts of the filename that are put by PEP-3147."""
for invalid in ('.cpython-', '.pypy-', '.opt-', '.pyc', '.pyo'):
if suffix.startswith(invalid):
return False
return True
def redo_path(path):
"""redo_path(Path) -> Path
Converts paths like fb303/__pycache__/__init__.cpython-35.pyc into
fb303/__init__.pyc.
"""
stem = Path(path.stem)
while not is_suffix_valid(stem.suffix):
stem = Path(stem.stem)
assert path.parent.name == '__pycache__', (
'Unexpected subdirectories in __pycache__: {}'.format(path.parent),
)
return path.parent.with_name(stem.name + '.pyc')
def scan_file(path, strip=False, compression=zipfile.ZIP_DEFLATED):
"""scan_file(Path('file.zip')) -> Path('file.new.zip')
Repackages a ZIP file so that it's zip-importable by Python 3.5.
If `strip` is True, .py files are removed when corresponding .pyc files
can be found.
File order and all attributes of the ZIP entries are maintained, except
for the compression method which is always set to `compression`.
"""
pure_py_files = set()
cached_py_files = set()
renames = {}
suffix = path.suffix
zip_offset = -1
with path.open('rb') as zf:
z = zipfile.ZipFile(zf)
for info in z.infolist():
p = Path(info.filename)
if p.match('*.py'):
pure_py_files.add(p)
elif p.match('**/__pycache__/*') or p.match('__pycache__/*'):
new_path = redo_path(p)
cached_py_files.add(new_path.with_suffix('.py'))
info.filename = str(new_path)
renames[p] = info
if zip_offset == -1:
zip_offset = info.header_offset
print('Renames to be done:', len(renames))
print('.py files:', len(pure_py_files))
print('.pyc files:', len(cached_py_files))
without_cache = pure_py_files - cached_py_files
print('.py files without cache:', len(without_cache))
for p in sorted(without_cache):
print(' ', p)
without_source = cached_py_files - pure_py_files
print('.pyc files without source:', len(without_source))
for p in sorted(without_source):
print(' ', p)
new_path = path.with_suffix('.new' + suffix)
with path.open('rb') as sf:
preamble = b''
while len(preamble) < zip_offset:
preamble += sf.read(zip_offset)
with new_path.open('wb') as tf:
tf.write(preamble[:zip_offset])
sf.seek(0)
sz = zipfile.ZipFile(sf)
with zipfile.ZipFile(str(new_path), 'a', compression=compression) as tz:
for info in sz.infolist():
p = Path(info.filename)
if strip and p.match('*.py') and p in cached_py_files:
continue
content = sz.read(info.filename)
info = renames.get(p, info)
info.compress_type = compression
tz.writestr(info, content)
return new_path
if __name__ == '__main__':
args = parse_args()
failures = 0
for fp in args.zipfiles:
try:
scan_file(fp, strip=args.strip)
except OSError:
failures += 1
continue
sys.exit(failures)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment