Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save 0187773933/f1beeeffc481e1b3563b0d423d6fe06d to your computer and use it in GitHub Desktop.
Save 0187773933/f1beeeffc481e1b3563b0d423d6fe06d to your computer and use it in GitHub Desktop.
Download and Extract Chrome Webstore Extension .crx File
#!/usr/bin/env python3
import sys
from pathlib import Path
from box import Box
import requests
from tqdm import tqdm
import tempfile
import binascii
from zipfile import ZipFile
import io
def download_file( url , save_path ):
r = requests.get( url , stream=True )
total_size = int( r.headers.get( "content-length" , 0 ) )
block_size = 1024
t = tqdm( total=total_size , unit="iB" , unit_scale=True )
with open( save_path , "wb" ) as f:
for data in r.iter_content( block_size ):
t.update( len( data ) )
f.write( data )
t.close()
if total_size != 0 and t.n != total_size:
print( "ERROR , something went wrong" )
def parse_extension_id( extension_url ):
return extension_url.split( "/" )[ -1 ]
def parse_extension_name( extension_url ):
return extension_url.split( "/" )[ -2 ]
def build_crx_download_url( extension_id ):
return f"https://clients2.google.com/service/update2/crx?response=redirect&prodversion=49.0&acceptformat=crx3&x=id%3D{extension_id}%26installsource%3Dondemand%26uc"
def read_binary_file( file_path ):
with open( str( file_path ) , mode="rb" ) as file:
return file.read()
def extract_crx_file( crx_file_path ):
output_folder = crx_file_path.parent.joinpath( crx_file_path.stem )
output_folder.mkdir( parents=True , exist_ok=True )
crx_binary = read_binary_file( str( crx_file_path ) )
crx_bytes_as_hex = binascii.b2a_hex( crx_binary ).decode()
crx_magic_header = crx_bytes_as_hex[ 0:8 ]
if crx_magic_header != "43723234":
print( "Invalid CRX File" )
return False
crx_version = crx_bytes_as_hex[ 8:16 ]
public_key_length = crx_bytes_as_hex[ 8:16 ]
print( "Magic Header === " , crx_magic_header )
print( "Version === " , crx_version )
if crx_version == "03000000":
# print( "you got a version 3 crx" )
header_length = bytearray.fromhex( crx_bytes_as_hex[ 16:24 ] )
header_length.reverse()
header_length = ''.join( format( x , "02x" ) for x in header_length )
header_length = int( header_length , 16 )
print( "Header Length === " , header_length )
# so actual crx data starts at :
# +4 for version bytes
# +4 for public key length description
# +4 for header length description
# + header length
# aka crx data start position = 12 + header_length
crx_data_offset = ( 12 + header_length )
print( "Data Start Location Offset === " , crx_data_offset )
crx_data_as_hex = crx_bytes_as_hex[ crx_data_offset : ]
crx_data = binascii.unhexlify( crx_data_as_hex )
zf = ZipFile( io.BytesIO( crx_data ) )
# print( zf.namelist() )
zf.extractall( str( output_folder ) )
zf.close()
elif crx_version == "02000000":
print( "you got a version 2 crx" )
# var n = e.getUint32(8, !0);
# console.info('Public key length: ' + n);
# var i = e.getUint32(12, !0);
# console.info('Signature length: ' + i);
# var a = o.slice(16, 16 + n),
# l = o.slice(16 + n, 16 + n + i),
# s = o.slice(16 + n + i);
# return [s, a, l]
else:
print( "unknown crx version" )
# https://crxextractor.com/
# https://github.com/vladignatyev/crx-extractor/blob/master/js/app.js#L93=
if __name__ == "__main__":
chrome_webstore_extension_url = sys.argv[ 1 ]
print( "1.) Downloading CRX Extension" )
chrome_webstore_extension_id = parse_extension_id( chrome_webstore_extension_url )
chrome_webstore_extension_name = parse_extension_name( chrome_webstore_extension_url )
extension_download_url = build_crx_download_url( chrome_webstore_extension_id )
extension_download_path = Path.cwd().joinpath( f"{chrome_webstore_extension_name}.crx" )
download_file( extension_download_url , str( extension_download_path ) )
print( "2.) Extracting CRX / 'zip' File" )
extract_crx_file( extension_download_path )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment