Skip to content

Instantly share code, notes, and snippets.

@AdamISZ
Last active January 24, 2025 15:13
Show Gist options
  • Save AdamISZ/ded01ea20703f5325cbb02ef2da8a13d to your computer and use it in GitHub Desktop.
Save AdamISZ/ded01ea20703f5325cbb02ef2da8a13d to your computer and use it in GitHub Desktop.
Script for disambiguating taker vs maker in Joinmarket transactions
#!/usr/bin/env python3
""" Tool to take sets of Joinmarket coinjoin transactions,
and do subset sum iteration to find possible sets of taker
inputs, based on an assumed max possible value of maker fees
(see tolerance).
"""
from itertools import combinations, chain, product
from jmbitcoin import CTransaction, is_jm_tx, CScript
from jmbase import hextobin, bintohex
from jmclient import JsonRpc, JsonRpcError
import sys
import signal
import time
class TimeoutException(Exception):
pass
def timeout_handler(signum, frame):
raise TimeoutException
def get_transaction(jrpc: JsonRpc, txid: str):
try:
res = jrpc.call("getrawtransaction", [txid])
except JsonRpcError as e:
print("Failed gettransaction call; JsonRpcError: " + repr(e))
return None
except Exception as e:
print("Failed gettransaction call; unexpected error:")
print(str(e))
return None
if res is None:
# happens in case of rpc connection failure:
return None
return CTransaction.deserialize(hextobin(res))
def get_input_values_and_fee(jrpc: JsonRpc, txid, max_ins=15):
"""
Given a transaction ID (txid), returns a list of the values of each input.
Parameters:
txid (str): The transaction ID to query.
max_ins (int): The maximum size of the vector of inputs allowed.
(list of inputs, fee) or (None, fee) if input set is too large,
or None if the transaction inputs have the wrong script type.
"""
tx1 = get_transaction(jrpc, txid)
if len(tx1.vin) > max_ins:
return (None, None)
input_values = []
fee_total = -sum(x.nValue for x in tx1.vout)
for vin in tx1.vin:
input_txid = vin.prevout.hash[::-1]
vout_index = vin.prevout.n
input_tx = get_transaction(jrpc, bintohex(input_txid))
output_value = input_tx.vout[vout_index].nValue
if not input_tx.vout[vout_index].scriptPubKey.is_witness_v0_keyhash():
print("rejecting {} because it has a non p2wpkh input".format(txid))
return None
input_values.append(output_value)
fee_total += output_value # which is actually an input ..
return (input_values, fee_total)
def read_txs_from_file(fn):
lines = []
txs = []
with open(fn, "rb") as f:
lines = f.readlines()
for l in lines:
txs.append(CTransaction.deserialize(
hextobin(l.strip().decode())))
return txs
def get_tx_info(jrpc: JsonRpc, tx: CTransaction, max_ins=15):
hextxid = bintohex(tx.GetTxid()[::-1])
print("Starting analysis of ", hextxid)
# Check script types
if not all([y.scriptPubKey.is_witness_v0_keyhash() for y in tx.vout]):
print("rejecting a tx: {} because its outputs were not all p2wpkh".format(hextxid))
return [None]*5
cj_amt, num = is_jm_tx(tx) #note, a sanity check if your input already passed this filter
res = get_input_values_and_fee(jrpc, txid=hextxid, max_ins=max_ins)
if res is None:
print("Transaction {} has inputs of wrong script type.".format(hextxid))
return [None]*5
if res[0] is None:
print("Transaction's input set is greater than 20, ignoring: {}".format(hextxid))
return [None]*5
in_vals, txfee = res
all_outs = [x.nValue for x in tx.vout]
change_outs = [x.nValue for x in tx.vout if x.nValue != cj_amt]
if len(change_outs) not in [num, num -1]:
print("Not a valid JM pattern transaction: {}, ignoring".format(hextxid))
return [None]*5
return (in_vals, change_outs, num, cj_amt, txfee)
def power_set_indices(l):
"""Generate all subsets of the indices of the input list lazily."""
iil = range(len(l))
for r in range(1, len(iil) + 1): # For subset sizes from 0 to len(l)
for subset in combinations(iil, r): # Yield each combination of size r
yield subset
def power_set2(l):
"""Generate all subsets of the input list lazily."""
for r in range(1, len(l) + 1): # For subset sizes from 1 to len(l)
for subset in combinations(l, r): # Yield each combination of size r
yield subset
def check_no_overlap(S):
x_values = set()
y_values = set()
for x, y in S:
# Check for overlap in y
if y in y_values:
return False
# Check for overlap in x
if x_values.intersection(x):
return False
x_values.update(x)
y_values.add(y)
return True
# strategy:
# first iterate over all subsets, then over all change outs, check if they are valid
# taker or maker combinations. make a taker and maker list of such subsets.
# then, iterate over each valid taker subset+change, and iterate over all
# sets of N maker subsets where N is cjouts -1, then look for any overlap in ins or change outs
# if there are none, the total set is valid.
def jm_sudoku(tx_ins, tx_change_outs, num_cjouts, cjout, txfee, tolerance_factor, tolerance_min):
#if num_cjouts and tx_change_outs and num_cjouts > len(tx_change_outs):
# return "sweep"
if tx_ins is None:
return
tolerance = int(tolerance_factor * cjout)
if tolerance < tolerance_min:
tolerance = tolerance_min
def is_valid_taker(ins_sum, change):
delta = ins_sum - cjout - change
if delta > 0 and delta < txfee + tolerance*(num_cjouts-1):
return True
return False
def is_valid_maker(ins_sum, change):
delta = ins_sum - cjout - change
if delta <= 0 and delta > - tolerance:
return True
return False
valid_taker_sets = []
valid_maker_sets = []
combos_found = 0
for x in power_set_indices(tx_ins):
sumx = sum([tx_ins[i] for i in x])
for co in tx_change_outs:
if is_valid_maker(sumx, co):
valid_maker_sets.append((x, co))
if is_valid_taker(sumx, co):
valid_taker_sets.append((x, co))
if is_valid_taker(sumx, 0):
valid_taker_sets.append((x, 0))
# we now have a much filtered set of subsets that fit maker or taker
# pattern.
print("Found {} valid maker sets and {} valid taker sets".format(
len(valid_maker_sets), len(valid_taker_sets)))
for ts in valid_taker_sets:
nmakers = num_cjouts - 1
for s in combinations(valid_maker_sets, nmakers):
s_with_taker = list(s)
s_with_taker.append(ts)
# check if there is any overlap in choices of inputs or change outs;
# if none, it is valid
if check_no_overlap(s_with_taker):
print("found one valid combination: ", s_with_taker, ts)
combos_found += 1
if combos_found > 4:
return combos_found
print("End of loop for input data: ", (tx_ins, tx_change_outs, num_cjouts, cjout, txfee))
return combos_found
if __name__ == "__main__":
# for testing algo, some fixed data:
#for x in tx_data_list:
# jm_sudoku(*x)
# arguments: the file containing one hex tx per line, and then the limit
# on how many inputs we will try to analyze (maybe 15-20; too many becomes too
# computationally hard to process)
filename = sys.argv[1]
max_ins = int(sys.argv[2])
rpc_host = sys.argv[3]
rpc_port = int(sys.argv[4])
rpc_username = sys.argv[5]
rpc_password = sys.argv[6]
jrpc = JsonRpc(rpc_host, rpc_port, rpc_username, rpc_password)
# btc value assumed to be the max realistic size of the fee to an
# # individual maker, in the join; expressed as a fraction of the coinjoin amount:
tolerance_factor = 0.004
# But if that is too low, we can go with a more realistic maximum sats:
# # (TODO need to better model ranges of fees for different scenarios)
tolerance_min = 1000
txs = read_txs_from_file(filename)
ress = {"sweep": 0, "one": 0, "more": 0, "none": 0}
for tx in txs:
signal.signal(signal.SIGALRM, timeout_handler)
signal.alarm(360)
# this outputs "found one valid combination..." as per above
# when it finds any completely valid solution. So if only one,
# the taker set is uniquely determined (modulo the tolerance
# limits being valid), if it outputs more than one, check
# whether the final entry (which is the taker set) is different
# across the multiple solutions.
try:
cf = jm_sudoku(*get_tx_info(jrpc, tx, max_ins=max_ins),
tolerance_factor, tolerance_min)
if cf is None:
continue
if cf == "sweep":
ress["sweep"] += 1
elif cf == 1:
ress["one"] += 1
elif cf > 1:
ress["more"] += 1
elif cf == 0:
ress["none"] += 1
except TimeoutException:
print(f"Transaction {tx} timed out after 360 seconds.")
finally:
# Disable the alarm
signal.alarm(0)
print("result of full run: ", ress)
# earlier data gathered for testing:
# data used:
# first a silly example with round numbers for sanity cheking the algo, then:
# ptodd's example, 4 party:
#([30933, 100000, 58226, 75420, 1307822],[21103, 1258325, 39617, 50158], 4, 50042, 3030)
#ptodd's example, 5 party:
# ([438789, 90210, 1017628, 79494, 7579144], [388797, 967828,40710,24132,7529601],5, 50000, 4197)
# example data from block 800K onwards, three transactions:
# 71ccddbacdf18fafdf5aec9941bcc2fd608ce41925b2aff1c6d634ef38a5b5b7
# c9215bf1fdad0ebbef21957abd4483ae4edb56bfcfdd0345c875e16e62159498
# bdc48bc1a857727bd0d992d5ecbc40ad940cf1f3b4fbfd83c60fca0595d11ede
# (With 37 inputs this last one proves not tractable on my machine (40GB).
# The power set of inputs is of size 2^39 which isn't very easy to handle.)
tx_data_list = [
([100000000,
200000000,
300000000,
400000000,
500000000,
600000000,
700000000],
[1050000000,
350000000,
649999999],
3,
250000000,
1),
([30933, 100000, 58226, 75420, 1307822],[21103, 1258325, 39617, 50158], 4, 50042, 3030),
([438789, 90210, 1017628, 79494, 7579144], [388797, 967828,40710,24132,7529601],5, 50000, 4197),
([17501103, 5277511, 3000000, 1308769, 97290000, 19408302,
213206329, 96726994, 1797090977, 2986222, 16040755,
49937999, 206354034], [109627040, 116481366, 565037,
1234015, 17460219, 1700365531],
6, 96726994, 33823),
([9392989,21898505,382810465,87690000,10143340,257092100,257092100,
9233256,172030383,13453138,20157334,193677471,17646058,8473024],
[5929832,85091950,210813255,21680433,85091450,20140033], 6, 172000650, 39310),
([385528339,426871087,362729339,217656296,542591231,254333081,574248535,
256197495,151160752,246701132,249017703,190614994, 1884197, 1299920545,
104680590,499793847,248613107,480556856,210415858,97290000,392872777,
871817754,104000000,242219464,171236016,372773157,1805067232,96651122,
362729339,118018888,96726994,163505485,499700000,104680590,97290000,
98340000,1173951,23972532,3034468], [340347513,65129613,205371416,
314505417,209880426,89542239,
3699410], 6, 15.99729410, 32849)]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment