Created
June 20, 2022 17:57
-
-
Save 0187773933/f1beeeffc481e1b3563b0d423d6fe06d to your computer and use it in GitHub Desktop.
Download and Extract Chrome Webstore Extension .crx File
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys | |
from pathlib import Path | |
from box import Box | |
import requests | |
from tqdm import tqdm | |
import tempfile | |
import binascii | |
from zipfile import ZipFile | |
import io | |
def download_file( url , save_path ): | |
r = requests.get( url , stream=True ) | |
total_size = int( r.headers.get( "content-length" , 0 ) ) | |
block_size = 1024 | |
t = tqdm( total=total_size , unit="iB" , unit_scale=True ) | |
with open( save_path , "wb" ) as f: | |
for data in r.iter_content( block_size ): | |
t.update( len( data ) ) | |
f.write( data ) | |
t.close() | |
if total_size != 0 and t.n != total_size: | |
print( "ERROR , something went wrong" ) | |
def parse_extension_id( extension_url ): | |
return extension_url.split( "/" )[ -1 ] | |
def parse_extension_name( extension_url ): | |
return extension_url.split( "/" )[ -2 ] | |
def build_crx_download_url( extension_id ): | |
return f"https://clients2.google.com/service/update2/crx?response=redirect&prodversion=49.0&acceptformat=crx3&x=id%3D{extension_id}%26installsource%3Dondemand%26uc" | |
def read_binary_file( file_path ): | |
with open( str( file_path ) , mode="rb" ) as file: | |
return file.read() | |
def extract_crx_file( crx_file_path ): | |
output_folder = crx_file_path.parent.joinpath( crx_file_path.stem ) | |
output_folder.mkdir( parents=True , exist_ok=True ) | |
crx_binary = read_binary_file( str( crx_file_path ) ) | |
crx_bytes_as_hex = binascii.b2a_hex( crx_binary ).decode() | |
crx_magic_header = crx_bytes_as_hex[ 0:8 ] | |
if crx_magic_header != "43723234": | |
print( "Invalid CRX File" ) | |
return False | |
crx_version = crx_bytes_as_hex[ 8:16 ] | |
public_key_length = crx_bytes_as_hex[ 8:16 ] | |
print( "Magic Header === " , crx_magic_header ) | |
print( "Version === " , crx_version ) | |
if crx_version == "03000000": | |
# print( "you got a version 3 crx" ) | |
header_length = bytearray.fromhex( crx_bytes_as_hex[ 16:24 ] ) | |
header_length.reverse() | |
header_length = ''.join( format( x , "02x" ) for x in header_length ) | |
header_length = int( header_length , 16 ) | |
print( "Header Length === " , header_length ) | |
# so actual crx data starts at : | |
# +4 for version bytes | |
# +4 for public key length description | |
# +4 for header length description | |
# + header length | |
# aka crx data start position = 12 + header_length | |
crx_data_offset = ( 12 + header_length ) | |
print( "Data Start Location Offset === " , crx_data_offset ) | |
crx_data_as_hex = crx_bytes_as_hex[ crx_data_offset : ] | |
crx_data = binascii.unhexlify( crx_data_as_hex ) | |
zf = ZipFile( io.BytesIO( crx_data ) ) | |
# print( zf.namelist() ) | |
zf.extractall( str( output_folder ) ) | |
zf.close() | |
elif crx_version == "02000000": | |
print( "you got a version 2 crx" ) | |
# var n = e.getUint32(8, !0); | |
# console.info('Public key length: ' + n); | |
# var i = e.getUint32(12, !0); | |
# console.info('Signature length: ' + i); | |
# var a = o.slice(16, 16 + n), | |
# l = o.slice(16 + n, 16 + n + i), | |
# s = o.slice(16 + n + i); | |
# return [s, a, l] | |
else: | |
print( "unknown crx version" ) | |
# https://crxextractor.com/ | |
# https://github.com/vladignatyev/crx-extractor/blob/master/js/app.js#L93= | |
if __name__ == "__main__": | |
chrome_webstore_extension_url = sys.argv[ 1 ] | |
print( "1.) Downloading CRX Extension" ) | |
chrome_webstore_extension_id = parse_extension_id( chrome_webstore_extension_url ) | |
chrome_webstore_extension_name = parse_extension_name( chrome_webstore_extension_url ) | |
extension_download_url = build_crx_download_url( chrome_webstore_extension_id ) | |
extension_download_path = Path.cwd().joinpath( f"{chrome_webstore_extension_name}.crx" ) | |
download_file( extension_download_url , str( extension_download_path ) ) | |
print( "2.) Extracting CRX / 'zip' File" ) | |
extract_crx_file( extension_download_path ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment