Skip to content

Instantly share code, notes, and snippets.

@davidlatwe
Created November 24, 2021 07:52
Show Gist options
  • Save davidlatwe/2fe0e54209b77b190a540ef4b98f3a4f to your computer and use it in GitHub Desktop.
Save davidlatwe/2fe0e54209b77b190a540ef4b98f3a4f to your computer and use it in GitHub Desktop.
Test extracting archive file that contains unicode char with Python2 and 3

Extracting unicode contained tar, zip with Python 2 & 3

The testing resources are taken from Boost */libs/wave/test/testwave/testfiles/utf8-test-*

Here are the references : https://stackoverflow.com/q/37933910/4145300 https://superuser.com/a/190786/579628

This issue pops up when I was building USD with Python-2, see log below :

Building with settings:
  USD source directory          c:\..\usd\21.08\build\platform-windows\python-2.7\USD-21.08
  USD install directory         D:\payload
  3rd-party source directory    D:\payload\src
  3rd-party install directory   D:\payload
  Build directory               D:\payload\build
  CMake generator               Default
  CMake toolset                 Default
  Downloader                    curl

  Building                      Shared libraries
    Config                      Release
    Imaging                     On
      Ptex support:             Off
      OpenVDB support:          Off
      OpenImageIO support:      Off
      OpenColorIO support:      Off
      PRMan support:            Off
    UsdImaging                  On
      usdview:                  On
    Python support              On
      Python 3:                 Off
    Documentation               Off
    Tests                       Off
    Examples                    On
    Tutorials                   On
    Tools                       On
    Alembic Plugin              Off
      HDF5 support:             Off
    Draco Plugin                Off
    MaterialX Plugin            Off

  Dependencies                  zlib, boost, TBB, OpenSubdiv
  Build arguments               USD: "-DPXR_STRICT_BUILD_MODE=OFF"
STATUS: Installing zlib...
STATUS: Installing boost...
ERROR: Failed to extract archive boost_1_70_0.tar.gz: [Error 123] The filename, directory name, or volume label syntax is incorrect.: 'D:\\payload\\src\\extract_dir\\boost_1_70_0\\libs\\wave\\test\\testwave\\testfiles
\\utf8-test-\xc3\x9f\xc2\xb5\xe2\x84\xa2\xe2\x88\x83'
Traceback (most recent call last):
  File "c:\..\usd\21.08/rezbuild.py", line 115, in <module>
    targets=sys.argv[1:])
  File "c:\..\usd\21.08/rezbuild.py", line 82, in build
    env=env,
  File "C:\python27_64\lib\subprocess.py", line 190, in check_call
    raise CalledProcessError(retcode, cmd)
subprocess.CalledProcessError: Command '['python', 'c:\\..\\usd\\21.08\\build\\platform-windows\\python-2.7\\USD-21.08/build_scripts/build_usd.py', '--build-arg=USD,"-DPXR_STRICT_BUILD_MODE=OFF"', 'D:payload']' returned non-zero exit status 1

import os
import sys
import base64
import shutil
import zipfile
import tarfile
if sys.version_info.major >= 3:
unicode = lambda s, _=None: s
def testing(filename, dst="."):
if tarfile.is_tarfile(filename):
archive = tarfile.open(filename)
updated = []
for m in archive.getmembers():
m.name = unicode(m.name, 'utf-8')
updated.append(m)
dst = os.path.join(dst, "archive_tar")
elif zipfile.is_zipfile(filename):
archive = zipfile.ZipFile(filename)
updated = []
for d in archive.filelist:
if sys.version_info.major == 2:
d.filename = unicode(d.filename)
updated.append(d.filename)
dst = os.path.join(dst, "archive_zip")
else:
raise Exception("Unknown archive format.")
with archive:
tmpExtractedPath = unicode(dst)
if os.path.isdir(tmpExtractedPath):
shutil.rmtree(tmpExtractedPath)
os.makedirs(tmpExtractedPath)
archive.extractall(tmpExtractedPath, members=updated)
def setup(archive_b, path):
archive_b = archive_b.replace(b"\n", b"").strip()
with open(path, "wb") as o:
o.write(base64.b64decode(archive_b))
def encode(filename):
with open(filename, "rb") as f:
with open("encoded_archive.txt", "w") as o:
o.write(base64.b64encode(f.read()))
ZIP = b"""
UEsDBBQAAAAAAE9leFMAAAAAAAAAAAAAAAAKAAAAdGVzdGZpbGVzL1BLAwQUAAAIAACaHIpOA
AAAAAAAAAAAAAAAHwAAAHRlc3RmaWxlcy91dGY4LXRlc3Qtw5/CteKEouKIgy9QSwMEFAAACA
gAmhyKTntHGEnfAAAAygEAACcAAAB0ZXN0ZmlsZXMvdXRmOC10ZXN0LcOfwrXihKLiiIMvZml
sZS5ocHCtkLtug0AQRXu+Yko/kuVRWnKREKJYQWmQknI1LGNYCe+i2SGEvw8hneXSp525V0c3
3h3vSQQLz94HUV/4TQd4gkrQNcgNGH8ZeotOIN/vYWAa2BsKwTP0tmbkeU13IsMhjqdpUvVa5
LmNo/WU+2Fm23YCG7OFLEnSxyxJM3hDlsso8I42ECt4sUHY1qNQA6NriEE6+tdaeyp/lgmZoL
SGXKAH+CQO1jtIVaJgUxEBmj9fdLN1LZxtT2uyPOXFR1XoVCdKfgQWd7NIAcpt76v/bXS8J7t
lF61fT2WhdfQLUEsBAj8AFAAAAAAAT2V4UwAAAAAAAAAAAAAAAAoAJAAAAAAAAAAQAAAAAAAA
AHRlc3RmaWxlcy8KACAAAAAAAAEAGADvi5qv7eDXAe+Lmq/t4NcBkhM6bu3g1wFQSwECPwAUA
AAIAACaHIpOAAAAAAAAAAAAAAAAHwAkAAAAAAAAABAAAAAoAAAAdGVzdGZpbGVzL3V0ZjgtdG
VzdC3Dn8K14oSi4oiDLwoAIAAAAAAAAQAYAADiwZQL79QBtRtbqO3g1wGr9Fqo7eDXAVBLAQI
/ABQAAAgIAJocik57RxhJ3wAAAMoBAAAnACQAAAAAAAAAIAAAAGUAAAB0ZXN0ZmlsZXMvdXRm
OC10ZXN0LcOfwrXihKLiiIMvZmlsZS5ocHAKACAAAAAAAAEAGAAA4sGUC+/UAbUbW6jt4NcBt
RtbqO3g1wFQSwUGAAAAAAMAAwBGAQAAiQEAAAAA
"""
TAR = b"""
H4sICMrCnWEEAHRlc3RmaWxlcy50YXIA7ZPPTsIwHMd35il+RwHZ2jKYIeGgiJFIvJDocSmjs
CawLu1vTs568C1MfAdfQN+EJ3GbMUZi/BORC/tc2qbtL9/00x8Kg1M5F8ax/g1CXOJ5XjYWfB
jf5tSlrtd0KWu7FqGEMNeClrUFEoNcW1mC3eTdf4LTg0a+bDzfPz2ubh9Wdzeb+RQ/8t90W03
qMcpy/y3Wapf+t8E3/vMtO4xj6y9k/fylf4+yNf9tj2T+t+Jkx/07te4mqUDGkVIG7Ut+JTpw
CCPk0YTrCQRqEc8ljxB69TrEWsRaBcIYpWEux5rrZXE7RIw7jpOmqT0uCik9cyrFVk/FSy1nI
cJeUAVGCG0wQhmcco2LBOGMSyO0DcfSoJbjBMUEkmgiNGAoXmMVdUZqiinXAoYyEJER+3AhtJ
EqAmoTG/ZGQgAP8rw8WspoBnkbFDeHg17/fNT3qU9svEbIsgdZKOD4ee6189VKd5PUsnfx/ZP
BsO/7FaukpKTkV7wASpIL5wAMAAA=
"""
if __name__ == "__main__":
fname = "archive.zip"
setup(ZIP, fname)
testing(fname)
fname = "archive.tar.gz"
setup(TAR, fname)
testing(fname)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment