Skip to content

Instantly share code, notes, and snippets.

@vindard
Last active October 31, 2022 22:50
Show Gist options
  • Save vindard/4476caed8294a1b473aca50c1355332d to your computer and use it in GitHub Desktop.
Save vindard/4476caed8294a1b473aca50c1355332d to your computer and use it in GitHub Desktop.
A script to pull the whitepaper from the bitcoin blockchain
const axios = require('axios')
const fs = require('fs')
const TXID = "54e48e5f5c656b26c3bca14a8c95aa583d07ebe84dde3b7dd4a78f4e4186e713"
const fetchHex = (txid) => {
const endpoint = (txid) => `https://mempool.space/api/tx/${txid}/hex`
return axios.get(endpoint(TXID))
.then(res => res.data)
}
const hexToHexArray = (hex) => {
// ---- Parse pdf from hex data with vanilla JS ----
// Split outputs from hex data
let outputs = hex.split('0100000000000000');
let stripped_outputs = outputs.slice(1, outputs.length - 1);
// Parse out script-pub-key-hashes from outputs
let pdfDataArr = [];
let partLen = 130;
let span = 132;
stripped_outputs.forEach(function(output) {
let parts = '';
let start = 6;
// all outputs, except last 3, are 1-of-3 multisigs
for (i = 0; i < 3; i++) {
end = start + partLen;
parts += output.substring(start, end);
start += span;
}
pdfDataArr.push(parts);
});
// last bit is zeroes and is excluded
pdfDataArr[pdfDataArr.length - 1] = pdfDataArr[pdfDataArr.length - 1].substring(0, 50);
// join pdf data array in string
pdfData = pdfDataArr.join('')
// strip size and checksum from start
pdfData = pdfData.substring(16);
// convert string (hex) to binary array
const arrayFromHexString = hexdata =>
new Uint8Array(hexdata.match(/.{1,2}/g).map(byte => parseInt(byte, 16)));
return arrayFromHexString(pdfData)
}
const hexArrayToDisk = (hexArray) => {
fs.writeFileSync('bitcoin.pdf', hexArray)
}
const main = async () => {
// ---- Source txn hex data ----
const hex = await fetchHex(TXID)
// ---- Convert hex to hexArray ----
const hexArray = hexToHexArray(hex)
// ---- Write hexArray to disk ----
hexArrayToDisk(hexArray)
}
export default main
// ---- Source txn hex data (done with NodeJS here) ----
// 1. NodeJS: From local file
// save raw transaction hex from bitcoin-cli to file
// $ bitcoin-cli getrawtransaction 54e48e5f5c656b26c3bca14a8c95aa583d07ebe84dde3b7dd4a78f4e4186e713 > whitepaper.txn
// const fs = require('fs');
// let raw_txn = fs.readFileSync('whitepaper.txn', 'utf-8');
// 2. Browser: From web API
// Fetch raw txn from blockcypher api
let txid = "54e48e5f5c656b26c3bca14a8c95aa583d07ebe84dde3b7dd4a78f4e4186e713";
let query = `${txid}?includeHex=true`;
let url = `https://api.blockcypher.com/v1/btc/main/txs/${query}`;
const response = await fetch(url);
const json = await response.json();
let raw_txn = json.hex;
// ---- Parse pdf from hex data with vanilla JS ----
// Split outputs from hex data
let outputs = raw_txn.split('0100000000000000');
let stripped_outputs = outputs.slice(1, outputs.length - 1);
// Parse out script-pub-key-hashes from outputs
let pdfDataArr = [];
let partLen = 130;
let span = 132;
stripped_outputs.forEach(function(output) {
let parts = '';
let start = 6;
// all outputs, except last 3, are 1-of-3 multisigs
for (i = 0; i < 3; i++) {
end = start + partLen;
parts += output.substring(start, end);
start += span;
}
pdfDataArr.push(parts);
});
// last bit is zeroes and is excluded
pdfDataArr[pdfDataArr.length - 1] = pdfDataArr[pdfDataArr.length - 1].substring(0, 50);
// join pdf data array in string
pdfData = pdfDataArr.join('')
// strip size and checksum from start
pdfData = pdfData.substring(16);
// convert string (hex) to binary array
const arrayFromHexString = hexdata =>
new Uint8Array(hexdata.match(/.{1,2}/g).map(byte => parseInt(byte, 16)));
const hexArray = arrayFromHexString(pdfData);
// ---- Output pdf file (done with Node here) ----
// 1. NodeJS: write binary array to file
// fs.writeFileSync('bitcoin.pdf', hexArray)
// 2. Browser: prompt save from browser
let blob = new Blob([hexArray], {type: "application/pdf"});
let link=document.createElement('a');
link.href=window.URL.createObjectURL(blob);
link.download="bitcoin.pdf";
link.click();
# Inspired by:
# > Jimmy Song
# https://bitcoin.stackexchange.com/questions/35959/how-is-the-whitepaper-decoded-from-the-blockchain-tx-with-1000x-m-of-n-multisi/35970#35970
#
# > @mutatrum on Twittter
# https://twitter.com/mutatrum/status/1352288123846324224
from hashlib import sha256
import json
from pathlib import Path
import shutil
import subprocess
from urllib.request import urlopen
TXID = '54e48e5f5c656b26c3bca14a8c95aa583d07ebe84dde3b7dd4a78f4e4186e713'
# Fetch full hex of raw txn using bitcoin-cli
def fetch_raw_txn(filename):
raw_txn = None
if Path(filename).exists():
print(f"Fetching transaction from file...")
with open(filename, 'rb') as f:
raw_txn = f.read()
elif shutil.which('bitcoin-cli') is not None:
print("Fetching transaction via 'bitcoin-cli'...")
raw_txn = subprocess.check_output([
"bitcoin-cli",
"getrawtransaction",
TXID
])
else:
ENDPOINT = 'https://api.blockcypher.com/v1/btc/main/txs'
QUERY = f'{TXID}?includeHex=true'
try:
with urlopen(f"{ENDPOINT}/{QUERY}") as resp:
resp_json = json.loads(resp.read())
raw_txn = resp_json.get('hex')
except Exception as e:
print(e)
if raw_txn is None:
print("No whitepaper transaction sources found, exiting...")
print()
return raw_txn
def parse_fetched_txn(raw_txn):
# Parse fetched txn into bytes and string
if raw_txn is None:
return None, None
raw_str: str
raw_bytes: bytes
if isinstance(raw_txn, bytes):
raw_str = raw_txn.decode('utf-8')
raw_bytes = raw_txn
else:
raw_str = raw_txn
raw_bytes = raw_txn.encode('utf-8')
return raw_str, raw_bytes
def fetch_txn_hex(filename='whitepaper.txn'):
raw_txn = fetch_raw_txn(filename)
if raw_txn is None:
return None
raw_str, _ = parse_fetched_txn(raw_txn)
if not Path(filename).exists():
with open(filename, 'wb') as f:
f.write(raw_txn)
print(f"Saved raw txn to '{filename}'")
return raw_str
def process_txn(raw_str):
# Parse pdf from raw txn hex
outputs = raw_str.split("0100000000000000")
stripped_outputs = outputs[1:-1] # Discard first and last chunks
pdf_chunks = []
chunk_size, span = 130, 132
for output in stripped_outputs:
chunk = ''
start = 6
for _ in range(3):
end = start + chunk_size
chunk += output[start:end]
start += span
pdf_chunks.append(chunk)
# Replicating -c17-368600 from 'whitepaper.sh'
pdf_chunks[0] = pdf_chunks[0][16:]
pdf_chunks[-1] = pdf_chunks[-1][:50]
# Convert to hex
pdf = b''
for chunk in pdf_chunks:
pdf += bytes.fromhex(chunk)
return pdf
def check_sha256(pdf):
sha_reference = 'b1674191a88ec5cdd733e4240a81803105dc412d6c6708d53ab94fc248f4f553'
file_check = sha256(pdf).hexdigest()
pass_fail = 'passed!' if file_check == sha_reference else 'failed :('
print()
print(f"Running SHA256 checksum:")
print(f"{file_check} {pdf_filename}")
print(f"Check {pass_fail}")
return None
if __name__ == "__main__":
# Fetch transaction
raw_str = fetch_txn_hex()
# Process transaction
pdf = process_txn(raw_str)
# Write bytes to pdf file
pdf_filename = 'bitcoin.pdf'
with open(pdf_filename, "wb") as f:
f.write(pdf)
print(f"File created at '{pdf_filename}'")
# Run SHA256 check
check_sha256(pdf)
#!/bin/bash
# Credit: @mutatrum on Twittter
# https://twitter.com/mutatrum/status/1352288123846324224
# Parse the whitepaper
bitcoin-cli getrawtransaction 54e48e5f5c656b26c3bca14a8c95aa583d07ebe84dde3b7dd4a78f4e4186e713 \
| sed 's/0100000000000000/\n/g' \
| tail -n +2 \
| cut -c7-136,139-268,271-400 \
| tr -d '\n' \
| cut -c17-368600 \
| xxd -p -r > bitcoin.pdf
# Check sha256sum
echo "$ sha256sum bitcoin.pdf"
sha256sum bitcoin.pdf
# Credit: Jimmy Song
# https://bitcoin.stackexchange.com/questions/35959/how-is-the-whitepaper-decoded-from-the-blockchain-tx-with-1000x-m-of-n-multisi/35970#35970
import subprocess
# Fetch full hex of raw Tx using Bitcoin-cli
raw = subprocess.check_output([
"bitcoin-cli",
"getrawtransaction",
"54e48e5f5c656b26c3bca14a8c95aa583d07ebe84dde3b7dd4a78f4e4186e713"
])
if isinstance(raw, bytes):
raw = raw.decode('utf-8')
assert isinstance(raw, str), \
"Raw transaction needs to be fetched in as a string at this point before continuing"
# Parse pdf from raw hex
outputs = raw.split("0100000000000000")
pdf = b''
for output in outputs[1:-2]:
# there are 3 65-byte parts in this that we need
cur = 6
pdf += bytes.fromhex(output[cur:cur+130])
cur += 132
pdf += bytes.fromhex(output[cur:cur+130])
cur += 132
pdf += bytes.fromhex(output[cur:cur+130])
pdf += bytes.fromhex(outputs[-2][6:-4])
# Write bytes to pdf file
with open("bitcoin.pdf", "wb") as f:
f.write(pdf[8:-8])
@vindard
Copy link
Author

vindard commented Jan 24, 2021

Notes-to-self:

  1. Fetching the raw txn from rpc endpoint via cURL:

    $ curl \
         --data-binary '{"id":"getrawtransaction","params":["54e48e5f5c656b26c3bca14a8c95aa583d07ebe84dde3b7dd4a78f4e4186e713"],"method":"getrawtransaction","jsonrpc":"2.0"}' \
         http://$rpcuser:[email protected]:8332 \
         | jq -r .result
    
  2. Fetching the raw txn from public api via cURL:

    $ curl -s https://api.blockcypher.com/v1/btc/main/txs/54e48e5f5c656b26c3bca14a8c95aa583d07ebe84dde3b7dd4a78f4e4186e713\?includeHex\=true \
         | jq -r .hex
    

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment