Last active
February 17, 2021 14:00
-
-
Save wangjiezhe/7841a350983a147b6d7e to your computer and use it in GitHub Desktop.
解决zip文件中文乱码问题
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
""" | |
unzip3_gbk.py: Deal with zip files using encoding GB2312/GBK/GB18030 | |
""" | |
import os | |
# import sys | |
import argparse | |
import zipfile | |
# import copy | |
import datetime | |
class GBKZipFile(zipfile.ZipFile): | |
"""Class with methods to list, extract zip files using encoding GB18030.""" | |
def __init__(self, filename): | |
super().__init__(filename, mode='r') | |
# self.filelist_old = copy.deepcopy(self.filelist) | |
# self.NameToInfo_old = copy.deepcopy(self.NameToInfo) | |
self.NameToInfo = {} | |
for zinfo in self.filelist: | |
zinfo.filename = zinfo.filename.encode('cp437').decode('gb18030') | |
self.NameToInfo[zinfo.filename] = zinfo | |
@staticmethod | |
def print_bold(text): | |
"""Print bold text.""" | |
bold = '\033[1m' | |
endc = '\033[0m' | |
print(bold + text + endc) | |
def pprintdir(self): | |
"""Print a table of contents of the zip files more elegantly.""" | |
self.print_bold('Archive: ' + os.path.basename(self.filename)) | |
if self.comment: | |
self.print_bold('Comment: ' + self.comment.decode('gb18030')) | |
print('{:^10} {:^19} {}'.format('Size', 'Modified', 'File Name')) | |
print('{:=^10} {:=^19} {:=<11}'.format('', '', '')) | |
size_sum = 0 | |
for zinfo in self.filelist: | |
filename = zinfo.filename | |
filetime = '{:%Y-%m-%d %H:%M:%S}'.format( | |
datetime.datetime(*zinfo.date_time)) | |
print('{:>10} {} {}'.format(zinfo.file_size, filetime, filename)) | |
size_sum += zinfo.file_size | |
file_sum = len(self.filelist) | |
print('{:-^10} {:^19} {:-^11}'.format('', '', '')) | |
print('{:>10} {:^19} {}'.format(str(size_sum), '', | |
str(file_sum) + ' files')) | |
def cenc(name): | |
"""Check if it's not None and encode.""" | |
return name is not None and name.encode() or None | |
class MyParser(argparse.ArgumentParser): | |
"""Paring command line options.""" | |
def __init__(self, prog=None): | |
description = 'Extract files from zipfiles using encoding GBK' | |
super().__init__(prog=prog, | |
description=description) | |
self.add_argument('zipfile', nargs='+') | |
self.add_argument('-l', '--list', action='store_true', dest='islist', | |
help='list files in zipfiles') | |
self.add_argument('-o', '--outdir', dest='outdir', | |
help='set output directory') | |
self.add_argument('-p', '--password', dest='password', | |
help='set password') | |
def main(): | |
"""Parse argument, list or extract zip files.""" | |
myparser = MyParser() | |
args = myparser.parse_args() | |
if args.islist: | |
for zfile in args.zipfile: | |
with GBKZipFile(zfile) as zfp: | |
if args.password: | |
zfp.setpassword(cenc(args.password)) | |
zfp.pprintdir() | |
else: | |
for zfile in args.zipfile: | |
with GBKZipFile(zfile) as zfp: | |
zfp.extractall(path=args.outdir, pwd=cenc(args.password)) | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2 | |
# -*- coding: utf-8 -*- | |
# unzip_gbk.py | |
""" | |
解决用 gbk 编码压缩的 zip 文件在 utf-8 环境下解压产生的中文文件名乱码问题 | |
""" | |
from __future__ import print_function | |
import os | |
import sys | |
import getopt | |
from zipfile import ZipFile | |
from textwrap import dedent | |
IFLIST = False | |
def usage(): | |
"""显示帮助""" | |
help_text = """\ | |
Usage: %s [options] zipfile1 [zipfile2 ...] | |
Options: | |
-h --help : display this help | |
-l --list : list files | |
-o --outdir : set output directory | |
-p --password : set password""" % sys.argv[0] | |
print(dedent(help_text)) | |
def analyse(args=sys.argv[1:]): | |
"""解析命令行参数, 返回输出文件夹, 解压密码和待解压文件""" | |
shortargs = "hlo:p:" | |
longargs = ["help", "list", "outdir=", "password="] | |
outdir = os.getcwdu() | |
password = None | |
try: | |
opts, zipfiles = getopt.getopt(args, shortargs, longargs) | |
except getopt.GetoptError: | |
print("Getopt error!") | |
usage() | |
sys.exit(1) | |
for opt, value in opts: | |
if opt in ("-h", "--help"): | |
usage() | |
sys.exit() | |
elif opt in ("-l", "--list"): | |
global IFLIST | |
IFLIST = True | |
elif opt in ("-o", "--outdir"): | |
outdir = value.decode('utf8') | |
elif opt in ("-p", "--password"): | |
password = value | |
return outdir, password, zipfiles | |
def listzip(filename, password=None): | |
"""列出文件内容""" | |
print("Archive: " + filename) | |
with ZipFile(filename, 'r') as infile: | |
if password: | |
infile.setpassword(password) | |
for name in infile.namelist(): | |
utf8name = name.decode('gbk') | |
print(utf8name) | |
def unzip(filename, outdir='', password=None): | |
"""解压文件""" | |
print("Unziping " + filename) | |
with ZipFile(filename, "r") as infile: | |
if password: | |
infile.setpassword(password) | |
for name in infile.namelist(): | |
utf8name = name.decode('gbk') | |
print("Extracting " + utf8name) | |
pathname = os.path.join(outdir, os.path.dirname(utf8name)) | |
targetname = os.path.join(outdir, utf8name) | |
if not os.path.exists(pathname): | |
os.makedirs(pathname) | |
data = infile.read(name) | |
if not os.path.exists(targetname): | |
with open(targetname, 'w') as myfile: | |
myfile.write(data) | |
def main(): | |
"""主程序""" | |
outdir, password, zipfiles = analyse() | |
if not zipfiles: | |
print("No file to unzip.") | |
usage() | |
sys.exit() | |
if IFLIST: | |
for filename in zipfiles: | |
listzip(filename, password) | |
else: | |
for filename in zipfiles: | |
unzip(filename, outdir, password) | |
sys.exit() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
压缩包解压的时候提示
Traceback (most recent call last):
File "./unzip-chn-1.py", line 91, in
main()
File "./unzip-chn-1.py", line 87, in main
zfp.extractall(path=args.outdir, pwd=cenc(args.password))
File "/usr/lib/python3.2/zipfile.py", line 1024, in extractall
self.extract(zipinfo, path, pwd)
File "/usr/lib/python3.2/zipfile.py", line 1012, in extract
return self._extract_member(member, path, pwd)
File "/usr/lib/python3.2/zipfile.py", line 1057, in _extract_member
shutil.copyfileobj(source, target)
File "/usr/lib/python3.2/shutil.py", line 65, in copyfileobj
buf = fsrc.read(length)
File "/usr/lib/python3.2/zipfile.py", line 585, in read
data = self.read1(n - len(buf))
File "/usr/lib/python3.2/zipfile.py", line 625, in read1
self._update_crc(data, eof=(self._compress_left==0))
File "/usr/lib/python3.2/zipfile.py", line 600, in _update_crc
raise BadZipFile("Bad CRC-32 for file %r" % self.name)
zipfile.BadZipFile: Bad CRC-32 for file '盒子/box.zip'