Created
December 13, 2017 21:51
-
-
Save gurnec/8e675c291191dbfb14d5ff164aaeb3e9 to your computer and use it in GitHub Desktop.
extracts unique P2PKH addresses from block files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from __future__ import print_function | |
import argparse, sys, atexit, hashlib, base64, itertools, struct | |
from os import path | |
from btcrecover.addressset import AddressSet, varint | |
def add(self, address): | |
pos = self._find(address) | |
if pos is True: | |
return False | |
bytes_to_add = address[ -(self._bytes_per_addr+self._hash_bytes) : -self._hash_bytes] | |
if bytes_to_add.endswith(self._null_addr): | |
return False # ignore these invalid addresses | |
if self._len >= self._max_len: | |
raise ValueError("addition to AddressSet exceeds load factor") | |
self._data[pos : pos+self._bytes_per_addr] = bytes_to_add | |
self._len += 1 | |
return True | |
dec_digit_to_base58 = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz" | |
def bytes_to_base58(bytes_rep): | |
int_rep = long(base64.b16encode(bytes_rep), 16) | |
base58_rep = '' | |
while int_rep: | |
int_rep, remainder = divmod(int_rep, 58) | |
base58_rep = dec_digit_to_base58[remainder] + base58_rep | |
return base58_rep | |
def hash160_to_base58check(hash160_bytes, version_byte): | |
assert len(hash160_bytes) == 20 | |
assert len(version_byte) == 1 | |
all_bytes = str(version_byte + hash160_bytes) | |
all_bytes += hashlib.sha256(hashlib.sha256(all_bytes).digest()).digest()[:4] | |
base58_rep = bytes_to_base58(all_bytes) | |
zero_count = next(zeros for zeros,byte in enumerate(all_bytes) if byte != '\0') | |
return '1' * zero_count + base58_rep | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--datadir", metavar="DIRECTORY", help="the Bitcoin data directory (default: auto)") | |
parser.add_argument("--force", action="store_true", help="overwrite any existing addresses file") | |
parser.add_argument("--no-pause", action="store_true", default=len(sys.argv)>1, help="never pause before exiting (default: auto)") | |
parser.add_argument("--no-progress",action="store_true", default=not sys.stdout.isatty(), help="disable the progress bar (shows cur. blockfile instead)") | |
parser.add_argument("addrfilename", nargs="?", default="addresses.txt", help="the name of the addresses file (default: addresses.txt)") | |
args = parser.parse_args() | |
if not args.no_pause: | |
atexit.register(lambda: raw_input("\nPress Enter to exit ...")) | |
if not args.force and path.exists(args.addrfilename): | |
sys.exit("Addresses file already exists (use --force to overwrite)") | |
if args.datadir: | |
blockdir = args.datadir | |
elif sys.platform == "win32": | |
blockdir = path.expandvars(r"%APPDATA%\Bitcoin") | |
elif sys.platform.startswith("linux"): | |
blockdir = path.expanduser("~/.bitcoin") | |
elif sys.platform == "darwin": | |
blockdir = path.expanduser("~/Library/Application Support/Bitcoin") | |
else: | |
sys.exit("Can't automatically determine Bitcoin data directory (use --datadir)") | |
blockdir = path.join(blockdir, "blocks") | |
if not path.isfile(path.join(blockdir, "blk00000.dat")): | |
raise ValueError("first block file 'blk00000.dat' doesn't exist in blocks directory '{}'".format(blockdir)) | |
address_set = AddressSet(1 << 29) | |
with open(args.addrfilename, "w") as addrfile: | |
if args.no_progress: | |
progress_bar = None | |
else: | |
try: | |
import progressbar | |
print("Parsing block files ...") | |
for filenum in itertools.count(0): | |
filename = path.join(blockdir, "blk{:05}.dat".format(filenum)) | |
if not path.isfile(filename): | |
break | |
progress_label = progressbar.FormatLabel(" {:11,} addrs. %(elapsed)s, ".format(len(address_set))) | |
progress_bar = progressbar.ProgressBar(maxval=filenum, widgets=[ | |
progressbar.SimpleProgress(), " ", | |
progressbar.Bar(left="[", fill="-", right="]"), | |
progress_label, | |
progressbar.ETA() | |
]) | |
progress_bar.start() | |
except ImportError: | |
progress_bar = None | |
if not progress_bar: | |
print("Block file Address count") | |
print("------------ -------------") | |
# e.g. blk00943.dat 255,212,706 | |
for filenum in itertools.count(0): | |
filename = path.join(blockdir, "blk{:05}.dat".format(filenum)) | |
if not path.isfile(filename): | |
break | |
address_set.last_filenum = filenum | |
with open(filename, "rb") as blockfile: | |
if not progress_bar: | |
print(path.basename(filename), end=" ") | |
header = blockfile.read(8) # read in the magic and remaining (after these 8 bytes) block length | |
while len(header) == 8 and header[4:] != b"\0\0\0\0": | |
assert header[:4] == b"\xf9\xbe\xb4\xd9" # magic | |
block = blockfile.read(struct.unpack_from("<I", header, 4)[0]) # read in the rest of the block | |
tx_count, offset = varint(block, 80) # skips 80 bytes of header | |
for tx_num in xrange(tx_count): | |
offset += 4 # skips 4-byte tx version | |
is_bip144 = block[offset] == b"\0" # bip-144 marker | |
if is_bip144: | |
offset += 2 # skips 1-byte marker & 1-byte flag | |
txin_count, offset = varint(block, offset) | |
for txin_num in xrange(txin_count): | |
sigscript_len, offset = varint(block, offset + 36) # skips 32-byte tx id & 4-byte tx index | |
offset += sigscript_len + 4 # skips sequence number & sigscript | |
txout_count, offset = varint(block, offset) | |
for txout_num in xrange(txout_count): | |
pkscript_len, offset = varint(block, offset + 8) # skips 8-byte satoshi count | |
# If this is a P2PKH script (OP_DUP OP_HASH160 PUSH(20) <20 address bytes> OP_EQUALVERIFY OP_CHECKSIG) | |
if pkscript_len == 25 and block[offset:offset+3] == b"\x76\xa9\x14" and block[offset+23:offset+25] == b"\x88\xac": | |
# Add the discovered address to the address set and print it if it's new | |
if add(address_set, block[offset+3:offset+23]): | |
print(hash160_to_base58check(block[offset+3:offset+23], '\0'), file=addrfile) | |
offset += pkscript_len # advances past the pubkey script | |
if is_bip144: | |
for txin_num in xrange(txin_count): | |
stackitem_count, offset = varint(block, offset) | |
for stackitem_num in xrange(stackitem_count): | |
stackitem_len, offset = varint(block, offset) | |
offset += stackitem_len # skips this stack item | |
offset += 4 # skips the 4-byte locktime | |
header = blockfile.read(8) # read in the next magic and remaining block length | |
if progress_bar: | |
progress_label.format = " {:11,} addrs. %(elapsed)s, ".format(len(address_set)) # updates address count | |
nextval = progress_bar.currval + 1 | |
if nextval > progress_bar.maxval: # can happen if the bitcoin client is left running | |
progress_bar.maxval = nextval | |
progress_bar.update(nextval) | |
else: | |
print("{:13,}".format(len(address_set))) | |
if progress_bar: | |
progress_bar.widgets.pop() # remove the ETA | |
progress_bar.finish() | |
print("\nDone.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment