Created
June 1, 2024 22:48
-
-
Save platomav/7bd530bf83d8842a4ae7a8873c24baf7 to your computer and use it in GitHub Desktop.
Microsoft CAB Archive Extractor (Python-based)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# coding=utf-8 | |
""" | |
Microsoft CAB Archive Extractor (Python-based) | |
Usage: cabextract.py [-h] [-o OUTPUT_DIR] [paths ...] | |
Copyright (c) 2024 Plato Mavropoulos | |
""" | |
import os | |
import re | |
import struct | |
from argparse import ArgumentParser, Namespace | |
from re import Match, Pattern | |
from cabarchive import CabArchive, CorruptionError, NotSupportedError | |
class CabExtract: | |
""" Microsoft CAB Archive Extractor """ | |
CAB_DAT_LEN_OFF: int = 0x8 | |
CAB_HDR_PAT_VAL: Pattern[bytes] = re.compile(br'MSCF\x00{4}') | |
def __init__(self, input_paths: list, output_dir: str): | |
for input_path in self._get_files(input_paths): | |
with open(input_path, 'rb') as input_object: | |
input_data: bytes = input_object.read() | |
self._extract_cab(input_data=input_data, output_dir=output_dir) | |
@staticmethod | |
def _get_files(input_paths: list) -> list[str]: | |
path_files: list[str] = [] | |
for input_path in input_paths: | |
input_path_abs: str = os.path.abspath(input_path) | |
if os.path.isdir(input_path_abs): | |
for root_path, _, file_names in os.walk(input_path_abs, followlinks=False): | |
for file_name in file_names: | |
path_files.append(os.path.join(root_path, file_name)) | |
else: | |
path_files.append(input_path_abs) | |
return path_files | |
def _extract_cab(self, input_data: bytes, output_dir: str) -> int: | |
cab_match: Match[bytes] | None = self.CAB_HDR_PAT_VAL.search(input_data) | |
if cab_match: | |
cab_offset: int = cab_match.start() | |
cab_length: int = struct.unpack_from('<I', input_data, cab_offset + self.CAB_DAT_LEN_OFF)[0] | |
cab_data: bytes = input_data[cab_offset:cab_offset + cab_length] | |
try: | |
cab_archive: CabArchive = CabArchive(buf=cab_data, flattern=True) | |
for cab_file in cab_archive: | |
file_data: bytes = cab_archive[cab_file].buf | |
file_name: str = cab_archive[cab_file].filename.strip('\\/') | |
file_date: str = cab_archive[cab_file].date | |
file_time: str = cab_archive[cab_file].time | |
print(f'\n{file_name} ({file_date}, {file_time})') | |
file_path: str = os.path.join(output_dir, file_name) | |
os.makedirs(output_dir, exist_ok=True) | |
with open(file_path, 'wb') as file_object: | |
file_object.write(file_data) | |
except (NotSupportedError, CorruptionError) as exception: | |
print(f'\nError: {exception}!') | |
return 2 | |
else: | |
print('\nError: No CAB archive detected!') | |
return 1 | |
return 0 | |
if __name__ == "__main__": | |
parser: ArgumentParser = ArgumentParser() | |
parser.add_argument('paths', nargs='*') | |
parser.add_argument('-o', '--output-dir', help='extract in given output directory') | |
arguments: Namespace = parser.parse_args() | |
if arguments.files: | |
CabExtract(input_paths=arguments.paths, output_dir=arguments.output_dir) | |
else: | |
parser.print_help() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Copyright (c) 2024 Plato Mavropoulos | |
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: | |
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. | |
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. | |
Subject to the terms and conditions of this license, each copyright holder and contributor hereby grants to those receiving rights under this license a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except for failure to satisfy the conditions of this license) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer this software, where such license applies only to those patent claims, already acquired or hereafter acquired, licensable by such copyright holder or contributor that are necessarily infringed by: | |
(a) their Contribution(s) (the licensed copyrights of copyright holders and non-copyrightable additions of contributors, in source or binary form) alone; or | |
(b) combination of their Contribution(s) with the work of authorship to which such Contribution(s) was added by such copyright holder or contributor, if, at the time the Contribution is added, such addition causes such combination to be necessarily infringed. The patent license shall not apply to any other combinations which include the Contribution. | |
Except as expressly stated above, no rights or licenses from any copyright holder or contributor is granted under this license, whether expressly, by implication, estoppel or otherwise. | |
DISCLAIMER | |
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cabarchive==0.2.4 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment