Last active
January 24, 2025 15:13
-
-
Save AdamISZ/ded01ea20703f5325cbb02ef2da8a13d to your computer and use it in GitHub Desktop.
Script for disambiguating taker vs maker in Joinmarket transactions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" Tool to take sets of Joinmarket coinjoin transactions, | |
and do subset sum iteration to find possible sets of taker | |
inputs, based on an assumed max possible value of maker fees | |
(see tolerance). | |
""" | |
from itertools import combinations, chain, product | |
from jmbitcoin import CTransaction, is_jm_tx, CScript | |
from jmbase import hextobin, bintohex | |
from jmclient import JsonRpc, JsonRpcError | |
import sys | |
import signal | |
import time | |
class TimeoutException(Exception): | |
pass | |
def timeout_handler(signum, frame): | |
raise TimeoutException | |
def get_transaction(jrpc: JsonRpc, txid: str): | |
try: | |
res = jrpc.call("getrawtransaction", [txid]) | |
except JsonRpcError as e: | |
print("Failed gettransaction call; JsonRpcError: " + repr(e)) | |
return None | |
except Exception as e: | |
print("Failed gettransaction call; unexpected error:") | |
print(str(e)) | |
return None | |
if res is None: | |
# happens in case of rpc connection failure: | |
return None | |
return CTransaction.deserialize(hextobin(res)) | |
def get_input_values_and_fee(jrpc: JsonRpc, txid, max_ins=15): | |
""" | |
Given a transaction ID (txid), returns a list of the values of each input. | |
Parameters: | |
txid (str): The transaction ID to query. | |
max_ins (int): The maximum size of the vector of inputs allowed. | |
(list of inputs, fee) or (None, fee) if input set is too large, | |
or None if the transaction inputs have the wrong script type. | |
""" | |
tx1 = get_transaction(jrpc, txid) | |
if len(tx1.vin) > max_ins: | |
return (None, None) | |
input_values = [] | |
fee_total = -sum(x.nValue for x in tx1.vout) | |
for vin in tx1.vin: | |
input_txid = vin.prevout.hash[::-1] | |
vout_index = vin.prevout.n | |
input_tx = get_transaction(jrpc, bintohex(input_txid)) | |
output_value = input_tx.vout[vout_index].nValue | |
if not input_tx.vout[vout_index].scriptPubKey.is_witness_v0_keyhash(): | |
print("rejecting {} because it has a non p2wpkh input".format(txid)) | |
return None | |
input_values.append(output_value) | |
fee_total += output_value # which is actually an input .. | |
return (input_values, fee_total) | |
def read_txs_from_file(fn): | |
lines = [] | |
txs = [] | |
with open(fn, "rb") as f: | |
lines = f.readlines() | |
for l in lines: | |
txs.append(CTransaction.deserialize( | |
hextobin(l.strip().decode()))) | |
return txs | |
def get_tx_info(jrpc: JsonRpc, tx: CTransaction, max_ins=15): | |
hextxid = bintohex(tx.GetTxid()[::-1]) | |
print("Starting analysis of ", hextxid) | |
# Check script types | |
if not all([y.scriptPubKey.is_witness_v0_keyhash() for y in tx.vout]): | |
print("rejecting a tx: {} because its outputs were not all p2wpkh".format(hextxid)) | |
return [None]*5 | |
cj_amt, num = is_jm_tx(tx) #note, a sanity check if your input already passed this filter | |
res = get_input_values_and_fee(jrpc, txid=hextxid, max_ins=max_ins) | |
if res is None: | |
print("Transaction {} has inputs of wrong script type.".format(hextxid)) | |
return [None]*5 | |
if res[0] is None: | |
print("Transaction's input set is greater than 20, ignoring: {}".format(hextxid)) | |
return [None]*5 | |
in_vals, txfee = res | |
all_outs = [x.nValue for x in tx.vout] | |
change_outs = [x.nValue for x in tx.vout if x.nValue != cj_amt] | |
if len(change_outs) not in [num, num -1]: | |
print("Not a valid JM pattern transaction: {}, ignoring".format(hextxid)) | |
return [None]*5 | |
return (in_vals, change_outs, num, cj_amt, txfee) | |
def power_set_indices(l): | |
"""Generate all subsets of the indices of the input list lazily.""" | |
iil = range(len(l)) | |
for r in range(1, len(iil) + 1): # For subset sizes from 0 to len(l) | |
for subset in combinations(iil, r): # Yield each combination of size r | |
yield subset | |
def power_set2(l): | |
"""Generate all subsets of the input list lazily.""" | |
for r in range(1, len(l) + 1): # For subset sizes from 1 to len(l) | |
for subset in combinations(l, r): # Yield each combination of size r | |
yield subset | |
def check_no_overlap(S): | |
x_values = set() | |
y_values = set() | |
for x, y in S: | |
# Check for overlap in y | |
if y in y_values: | |
return False | |
# Check for overlap in x | |
if x_values.intersection(x): | |
return False | |
x_values.update(x) | |
y_values.add(y) | |
return True | |
# strategy: | |
# first iterate over all subsets, then over all change outs, check if they are valid | |
# taker or maker combinations. make a taker and maker list of such subsets. | |
# then, iterate over each valid taker subset+change, and iterate over all | |
# sets of N maker subsets where N is cjouts -1, then look for any overlap in ins or change outs | |
# if there are none, the total set is valid. | |
def jm_sudoku(tx_ins, tx_change_outs, num_cjouts, cjout, txfee, tolerance_factor, tolerance_min): | |
#if num_cjouts and tx_change_outs and num_cjouts > len(tx_change_outs): | |
# return "sweep" | |
if tx_ins is None: | |
return | |
tolerance = int(tolerance_factor * cjout) | |
if tolerance < tolerance_min: | |
tolerance = tolerance_min | |
def is_valid_taker(ins_sum, change): | |
delta = ins_sum - cjout - change | |
if delta > 0 and delta < txfee + tolerance*(num_cjouts-1): | |
return True | |
return False | |
def is_valid_maker(ins_sum, change): | |
delta = ins_sum - cjout - change | |
if delta <= 0 and delta > - tolerance: | |
return True | |
return False | |
valid_taker_sets = [] | |
valid_maker_sets = [] | |
combos_found = 0 | |
for x in power_set_indices(tx_ins): | |
sumx = sum([tx_ins[i] for i in x]) | |
for co in tx_change_outs: | |
if is_valid_maker(sumx, co): | |
valid_maker_sets.append((x, co)) | |
if is_valid_taker(sumx, co): | |
valid_taker_sets.append((x, co)) | |
if is_valid_taker(sumx, 0): | |
valid_taker_sets.append((x, 0)) | |
# we now have a much filtered set of subsets that fit maker or taker | |
# pattern. | |
print("Found {} valid maker sets and {} valid taker sets".format( | |
len(valid_maker_sets), len(valid_taker_sets))) | |
for ts in valid_taker_sets: | |
nmakers = num_cjouts - 1 | |
for s in combinations(valid_maker_sets, nmakers): | |
s_with_taker = list(s) | |
s_with_taker.append(ts) | |
# check if there is any overlap in choices of inputs or change outs; | |
# if none, it is valid | |
if check_no_overlap(s_with_taker): | |
print("found one valid combination: ", s_with_taker, ts) | |
combos_found += 1 | |
if combos_found > 4: | |
return combos_found | |
print("End of loop for input data: ", (tx_ins, tx_change_outs, num_cjouts, cjout, txfee)) | |
return combos_found | |
if __name__ == "__main__": | |
# for testing algo, some fixed data: | |
#for x in tx_data_list: | |
# jm_sudoku(*x) | |
# arguments: the file containing one hex tx per line, and then the limit | |
# on how many inputs we will try to analyze (maybe 15-20; too many becomes too | |
# computationally hard to process) | |
filename = sys.argv[1] | |
max_ins = int(sys.argv[2]) | |
rpc_host = sys.argv[3] | |
rpc_port = int(sys.argv[4]) | |
rpc_username = sys.argv[5] | |
rpc_password = sys.argv[6] | |
jrpc = JsonRpc(rpc_host, rpc_port, rpc_username, rpc_password) | |
# btc value assumed to be the max realistic size of the fee to an | |
# # individual maker, in the join; expressed as a fraction of the coinjoin amount: | |
tolerance_factor = 0.004 | |
# But if that is too low, we can go with a more realistic maximum sats: | |
# # (TODO need to better model ranges of fees for different scenarios) | |
tolerance_min = 1000 | |
txs = read_txs_from_file(filename) | |
ress = {"sweep": 0, "one": 0, "more": 0, "none": 0} | |
for tx in txs: | |
signal.signal(signal.SIGALRM, timeout_handler) | |
signal.alarm(360) | |
# this outputs "found one valid combination..." as per above | |
# when it finds any completely valid solution. So if only one, | |
# the taker set is uniquely determined (modulo the tolerance | |
# limits being valid), if it outputs more than one, check | |
# whether the final entry (which is the taker set) is different | |
# across the multiple solutions. | |
try: | |
cf = jm_sudoku(*get_tx_info(jrpc, tx, max_ins=max_ins), | |
tolerance_factor, tolerance_min) | |
if cf is None: | |
continue | |
if cf == "sweep": | |
ress["sweep"] += 1 | |
elif cf == 1: | |
ress["one"] += 1 | |
elif cf > 1: | |
ress["more"] += 1 | |
elif cf == 0: | |
ress["none"] += 1 | |
except TimeoutException: | |
print(f"Transaction {tx} timed out after 360 seconds.") | |
finally: | |
# Disable the alarm | |
signal.alarm(0) | |
print("result of full run: ", ress) | |
# earlier data gathered for testing: | |
# data used: | |
# first a silly example with round numbers for sanity cheking the algo, then: | |
# ptodd's example, 4 party: | |
#([30933, 100000, 58226, 75420, 1307822],[21103, 1258325, 39617, 50158], 4, 50042, 3030) | |
#ptodd's example, 5 party: | |
# ([438789, 90210, 1017628, 79494, 7579144], [388797, 967828,40710,24132,7529601],5, 50000, 4197) | |
# example data from block 800K onwards, three transactions: | |
# 71ccddbacdf18fafdf5aec9941bcc2fd608ce41925b2aff1c6d634ef38a5b5b7 | |
# c9215bf1fdad0ebbef21957abd4483ae4edb56bfcfdd0345c875e16e62159498 | |
# bdc48bc1a857727bd0d992d5ecbc40ad940cf1f3b4fbfd83c60fca0595d11ede | |
# (With 37 inputs this last one proves not tractable on my machine (40GB). | |
# The power set of inputs is of size 2^39 which isn't very easy to handle.) | |
tx_data_list = [ | |
([100000000, | |
200000000, | |
300000000, | |
400000000, | |
500000000, | |
600000000, | |
700000000], | |
[1050000000, | |
350000000, | |
649999999], | |
3, | |
250000000, | |
1), | |
([30933, 100000, 58226, 75420, 1307822],[21103, 1258325, 39617, 50158], 4, 50042, 3030), | |
([438789, 90210, 1017628, 79494, 7579144], [388797, 967828,40710,24132,7529601],5, 50000, 4197), | |
([17501103, 5277511, 3000000, 1308769, 97290000, 19408302, | |
213206329, 96726994, 1797090977, 2986222, 16040755, | |
49937999, 206354034], [109627040, 116481366, 565037, | |
1234015, 17460219, 1700365531], | |
6, 96726994, 33823), | |
([9392989,21898505,382810465,87690000,10143340,257092100,257092100, | |
9233256,172030383,13453138,20157334,193677471,17646058,8473024], | |
[5929832,85091950,210813255,21680433,85091450,20140033], 6, 172000650, 39310), | |
([385528339,426871087,362729339,217656296,542591231,254333081,574248535, | |
256197495,151160752,246701132,249017703,190614994, 1884197, 1299920545, | |
104680590,499793847,248613107,480556856,210415858,97290000,392872777, | |
871817754,104000000,242219464,171236016,372773157,1805067232,96651122, | |
362729339,118018888,96726994,163505485,499700000,104680590,97290000, | |
98340000,1173951,23972532,3034468], [340347513,65129613,205371416, | |
314505417,209880426,89542239, | |
3699410], 6, 15.99729410, 32849)] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment