AdamISZ · January 24, 2025 15:13
diff --git a/jmsudokuer.py b/jmsudokuer.py
 #!/usr/bin/env python3

 """ Tool to take sets of Joinmarket coinjoin transactions,
    and do subset sum iteration to find possible sets of taker
    inputs, based on an assumed max possible value of maker fees
    (see tolerance).
 """

 from itertools import combinations, chain, product
 from jmbitcoin import CTransaction, is_jm_tx, CScript
 from jmbase import hextobin, bintohex
 from jmclient import JsonRpc, JsonRpcError
 import sys
 import signal

 import time

 class TimeoutException(Exception):
    pass

 def timeout_handler(signum, frame):
    raise TimeoutException

 def get_transaction(jrpc: JsonRpc, txid: str):
    try:
        res = jrpc.call("getrawtransaction", [txid])
    except JsonRpcError as e:
        print("Failed gettransaction call; JsonRpcError: " + repr(e))
        return None
    except Exception as e:
        print("Failed gettransaction call; unexpected error:")
        print(str(e))
        return None
    if res is None:
        # happens in case of rpc connection failure:
        return None
    return CTransaction.deserialize(hextobin(res))
        
 def get_input_values_and_fee(jrpc: JsonRpc, txid, max_ins=15):
    """
    Given a transaction ID (txid), returns a list of the values of each input.
    Parameters:
        txid (str): The transaction ID to query.
        max_ins (int): The maximum size of the vector of inputs allowed.
        (list of inputs, fee) or (None, fee) if input set is too large,
        or None if the transaction inputs have the wrong script type.
    """
    tx1 = get_transaction(jrpc, txid)

    if len(tx1.vin) > max_ins:
        return (None, None)
    input_values = []
    fee_total = -sum(x.nValue for x in tx1.vout)
    for vin in tx1.vin:
        input_txid = vin.prevout.hash[::-1]
        vout_index = vin.prevout.n
        input_tx = get_transaction(jrpc, bintohex(input_txid))
        output_value = input_tx.vout[vout_index].nValue
        if not input_tx.vout[vout_index].scriptPubKey.is_witness_v0_keyhash():
            print("rejecting {} because it has a non p2wpkh input".format(txid))
            return None
        input_values.append(output_value)
        fee_total += output_value # which is actually an input ..
    
    return (input_values, fee_total)

 def read_txs_from_file(fn):
    lines = []
    txs = []
    with open(fn, "rb") as f:
        lines = f.readlines()
    for l in lines:
        txs.append(CTransaction.deserialize(
            hextobin(l.strip().decode())))
    return txs

 def get_tx_info(jrpc: JsonRpc, tx: CTransaction, max_ins=15):
    hextxid = bintohex(tx.GetTxid()[::-1])
    print("Starting analysis of ", hextxid)
    # Check script types
    if not all([y.scriptPubKey.is_witness_v0_keyhash() for y in tx.vout]):
        print("rejecting a tx: {} because its outputs were not all p2wpkh".format(hextxid))
        return [None]*5
    cj_amt, num = is_jm_tx(tx) #note, a sanity check if your input already passed this filter
    res = get_input_values_and_fee(jrpc, txid=hextxid, max_ins=max_ins)
    if res is None:
        print("Transaction {} has inputs of wrong script type.".format(hextxid))
        return [None]*5
    if res[0] is None:
        print("Transaction's input set is greater than 20, ignoring: {}".format(hextxid))
        return [None]*5
    in_vals, txfee = res
    all_outs = [x.nValue for x in tx.vout]
    change_outs = [x.nValue for x in tx.vout if x.nValue != cj_amt]
    if len(change_outs) not in [num, num -1]:
        print("Not a valid JM pattern transaction: {}, ignoring".format(hextxid))
        return [None]*5
    return (in_vals, change_outs, num, cj_amt, txfee)


 def power_set_indices(l):
    """Generate all subsets of the indices of the input list lazily."""
    iil = range(len(l))
    for r in range(1, len(iil) + 1):  # For subset sizes from 0 to len(l)
        for subset in combinations(iil, r):  # Yield each combination of size r
            yield subset

 def power_set2(l):
    """Generate all subsets of the input list lazily."""
    for r in range(1, len(l) + 1):  # For subset sizes from 1 to len(l)
        for subset in combinations(l, r):  # Yield each combination of size r
            yield subset

 def check_no_overlap(S):
    x_values = set()
    y_values = set()
    for x, y in S:
        # Check for overlap in y
        if y in y_values:
            return False
        # Check for overlap in x
        if x_values.intersection(x):
            return False
        x_values.update(x)
        y_values.add(y)

    return True

 # strategy:
 # first iterate over all subsets, then over all change outs, check if they are valid
 # taker or maker combinations. make a taker and maker list of such subsets.
 # then, iterate over each valid taker subset+change, and iterate over all
 # sets of N maker subsets where N is cjouts -1, then look for any overlap in ins or change outs
 # if there are none, the total set is valid.
 def jm_sudoku(tx_ins, tx_change_outs, num_cjouts, cjout, txfee, tolerance_factor, tolerance_min):
    #if num_cjouts and tx_change_outs and num_cjouts > len(tx_change_outs):
    #    return "sweep"
    if tx_ins is None:
        return
    tolerance = int(tolerance_factor * cjout)
    if tolerance < tolerance_min:
        tolerance = tolerance_min
    def is_valid_taker(ins_sum, change):
        delta = ins_sum - cjout - change
        if delta > 0 and delta < txfee + tolerance*(num_cjouts-1):
            return True
        return False
    def is_valid_maker(ins_sum, change):
        delta = ins_sum - cjout - change
        if delta <= 0 and delta > - tolerance:
            return True
        return False

    valid_taker_sets = []
    valid_maker_sets = []
    combos_found = 0
    for x in power_set_indices(tx_ins):
        sumx = sum([tx_ins[i] for i in x])
        for co in tx_change_outs:
            if is_valid_maker(sumx, co):
                valid_maker_sets.append((x, co))
            if is_valid_taker(sumx, co):
                valid_taker_sets.append((x, co))
        if is_valid_taker(sumx, 0):
            valid_taker_sets.append((x, 0))
    # we now have a much filtered set of subsets that fit maker or taker
    # pattern.
    print("Found {} valid maker sets and {} valid taker sets".format(
        len(valid_maker_sets), len(valid_taker_sets)))
    for ts in valid_taker_sets:
            nmakers = num_cjouts - 1
            for s in combinations(valid_maker_sets, nmakers):
                s_with_taker = list(s)
                s_with_taker.append(ts)
                # check if there is any overlap in choices of inputs or change outs;
                # if none, it is valid
                if check_no_overlap(s_with_taker):
                    print("found one valid combination: ", s_with_taker, ts)
                    combos_found += 1
                if combos_found > 4:
                    return combos_found
    print("End of loop for input data: ", (tx_ins, tx_change_outs, num_cjouts, cjout, txfee))
    return combos_found 

 if __name__ == "__main__":
    # for testing algo, some fixed data:
    #for x in tx_data_list:
    #    jm_sudoku(*x)
    # arguments: the file containing one hex tx per line, and then the limit
    # on how many inputs we will try to analyze (maybe 15-20; too many becomes too
    # computationally hard to process)
    filename = sys.argv[1]
    max_ins = int(sys.argv[2])
    rpc_host = sys.argv[3]
    rpc_port = int(sys.argv[4])
    rpc_username = sys.argv[5]
    rpc_password = sys.argv[6]
    jrpc = JsonRpc(rpc_host, rpc_port, rpc_username, rpc_password)
    # btc value assumed to be the max realistic size of the fee to an
    # # individual maker, in the join; expressed as a fraction of the coinjoin amount:
    tolerance_factor = 0.004
    # But if that is too low, we can go with a more realistic maximum sats:
    # # (TODO need to better model ranges of fees for different scenarios)
    tolerance_min = 1000
    txs = read_txs_from_file(filename)
    ress = {"sweep": 0, "one": 0, "more": 0, "none": 0}
    for tx in txs:
        signal.signal(signal.SIGALRM, timeout_handler)
        signal.alarm(360)
        # this outputs "found one valid combination..." as per above
        # when it finds any completely valid solution. So if only one,
        # the taker set is uniquely determined (modulo the tolerance
        # limits being valid), if it outputs more than one, check
        # whether the final entry (which is the taker set) is different
        # across the multiple solutions.
        try:
            cf = jm_sudoku(*get_tx_info(jrpc, tx, max_ins=max_ins),
                           tolerance_factor, tolerance_min)
            if cf is None:
                continue
            if cf == "sweep":
                ress["sweep"] += 1
            elif cf == 1:
                ress["one"] += 1
            elif cf > 1:
                ress["more"] += 1
            elif cf  == 0:
                ress["none"] += 1
        except TimeoutException:
            print(f"Transaction {tx} timed out after 360 seconds.")
        finally:
            # Disable the alarm
            signal.alarm(0)
    print("result of full run: ", ress)




 # earlier data gathered for testing:

 # data used:
 # first a silly example with round numbers for sanity cheking the algo, then:

 # ptodd's example, 4 party:
 #([30933, 100000, 58226, 75420, 1307822],[21103, 1258325, 39617, 50158], 4, 50042, 3030)

 #ptodd's example, 5 party:
 # ([438789, 90210, 1017628, 79494, 7579144], [388797, 967828,40710,24132,7529601],5, 50000, 4197)

 # example data from block 800K onwards, three transactions:
 # 71ccddbacdf18fafdf5aec9941bcc2fd608ce41925b2aff1c6d634ef38a5b5b7
 # c9215bf1fdad0ebbef21957abd4483ae4edb56bfcfdd0345c875e16e62159498
 # bdc48bc1a857727bd0d992d5ecbc40ad940cf1f3b4fbfd83c60fca0595d11ede
 # (With 37 inputs this last one proves not tractable on my machine (40GB).
 # The power set of inputs is of size 2^39 which isn't very easy to handle.)

 tx_data_list = [
    ([100000000,
      200000000,
      300000000,
      400000000,
      500000000,
      600000000,
      700000000],
     [1050000000,
      350000000,
      649999999],
      3,
      250000000,
      1),
    ([30933, 100000, 58226, 75420, 1307822],[21103, 1258325, 39617, 50158], 4, 50042, 3030),
    ([438789, 90210, 1017628, 79494, 7579144], [388797, 967828,40710,24132,7529601],5, 50000, 4197),
    ([17501103, 5277511, 3000000, 1308769, 97290000, 19408302,
                 213206329, 96726994, 1797090977, 2986222, 16040755,
                 49937999, 206354034], [109627040, 116481366, 565037,
                                           1234015, 17460219, 1700365531],
                                           6, 96726994, 33823),
                ([9392989,21898505,382810465,87690000,10143340,257092100,257092100,
                  9233256,172030383,13453138,20157334,193677471,17646058,8473024],
                  [5929832,85091950,210813255,21680433,85091450,20140033], 6, 172000650, 39310),
                ([385528339,426871087,362729339,217656296,542591231,254333081,574248535,
                  256197495,151160752,246701132,249017703,190614994, 1884197, 1299920545,
                  104680590,499793847,248613107,480556856,210415858,97290000,392872777,
                  871817754,104000000,242219464,171236016,372773157,1805067232,96651122,
                  362729339,118018888,96726994,163505485,499700000,104680590,97290000,
                  98340000,1173951,23972532,3034468], [340347513,65129613,205371416,
                                                                 314505417,209880426,89542239,
                                                                 3699410], 6, 15.99729410, 32849)]
	#!/usr/bin/env python3

	""" Tool to take sets of Joinmarket coinjoin transactions,
	and do subset sum iteration to find possible sets of taker
	inputs, based on an assumed max possible value of maker fees
	(see tolerance).
	"""

	from itertools import combinations, chain, product
	from jmbitcoin import CTransaction, is_jm_tx, CScript
	from jmbase import hextobin, bintohex
	from jmclient import JsonRpc, JsonRpcError
	import sys
	import signal

	import time

	class TimeoutException(Exception):
	pass

	def timeout_handler(signum, frame):
	raise TimeoutException

	def get_transaction(jrpc: JsonRpc, txid: str):
	try:
	res = jrpc.call("getrawtransaction", [txid])
	except JsonRpcError as e:
	print("Failed gettransaction call; JsonRpcError: " + repr(e))
	return None
	except Exception as e:
	print("Failed gettransaction call; unexpected error:")
	print(str(e))
	return None
	if res is None:
	# happens in case of rpc connection failure:
	return None
	return CTransaction.deserialize(hextobin(res))

	def get_input_values_and_fee(jrpc: JsonRpc, txid, max_ins=15):
	"""
	Given a transaction ID (txid), returns a list of the values of each input.
	Parameters:
	txid (str): The transaction ID to query.
	max_ins (int): The maximum size of the vector of inputs allowed.
	(list of inputs, fee) or (None, fee) if input set is too large,
	or None if the transaction inputs have the wrong script type.
	"""
	tx1 = get_transaction(jrpc, txid)

	if len(tx1.vin) > max_ins:
	return (None, None)
	input_values = []
	fee_total = -sum(x.nValue for x in tx1.vout)
	for vin in tx1.vin:
	input_txid = vin.prevout.hash[::-1]
	vout_index = vin.prevout.n
	input_tx = get_transaction(jrpc, bintohex(input_txid))
	output_value = input_tx.vout[vout_index].nValue
	if not input_tx.vout[vout_index].scriptPubKey.is_witness_v0_keyhash():
	print("rejecting {} because it has a non p2wpkh input".format(txid))
	return None
	input_values.append(output_value)
	fee_total += output_value # which is actually an input ..

	return (input_values, fee_total)

	def read_txs_from_file(fn):
	lines = []
	txs = []
	with open(fn, "rb") as f:
	lines = f.readlines()
	for l in lines:
	txs.append(CTransaction.deserialize(
	hextobin(l.strip().decode())))
	return txs

	def get_tx_info(jrpc: JsonRpc, tx: CTransaction, max_ins=15):
	hextxid = bintohex(tx.GetTxid()[::-1])
	print("Starting analysis of ", hextxid)
	# Check script types
	if not all([y.scriptPubKey.is_witness_v0_keyhash() for y in tx.vout]):
	print("rejecting a tx: {} because its outputs were not all p2wpkh".format(hextxid))
	return [None]*5
	cj_amt, num = is_jm_tx(tx) #note, a sanity check if your input already passed this filter
	res = get_input_values_and_fee(jrpc, txid=hextxid, max_ins=max_ins)
	if res is None:
	print("Transaction {} has inputs of wrong script type.".format(hextxid))
	return [None]*5
	if res[0] is None:
	print("Transaction's input set is greater than 20, ignoring: {}".format(hextxid))
	return [None]*5
	in_vals, txfee = res
	all_outs = [x.nValue for x in tx.vout]
	change_outs = [x.nValue for x in tx.vout if x.nValue != cj_amt]
	if len(change_outs) not in [num, num -1]:
	print("Not a valid JM pattern transaction: {}, ignoring".format(hextxid))
	return [None]*5
	return (in_vals, change_outs, num, cj_amt, txfee)


	def power_set_indices(l):
	"""Generate all subsets of the indices of the input list lazily."""
	iil = range(len(l))
	for r in range(1, len(iil) + 1): # For subset sizes from 0 to len(l)
	for subset in combinations(iil, r): # Yield each combination of size r
	yield subset

	def power_set2(l):
	"""Generate all subsets of the input list lazily."""
	for r in range(1, len(l) + 1): # For subset sizes from 1 to len(l)
	for subset in combinations(l, r): # Yield each combination of size r
	yield subset

	def check_no_overlap(S):
	x_values = set()
	y_values = set()
	for x, y in S:
	# Check for overlap in y
	if y in y_values:
	return False
	# Check for overlap in x
	if x_values.intersection(x):
	return False
	x_values.update(x)
	y_values.add(y)

	return True

	# strategy:
	# first iterate over all subsets, then over all change outs, check if they are valid
	# taker or maker combinations. make a taker and maker list of such subsets.
	# then, iterate over each valid taker subset+change, and iterate over all
	# sets of N maker subsets where N is cjouts -1, then look for any overlap in ins or change outs
	# if there are none, the total set is valid.
	def jm_sudoku(tx_ins, tx_change_outs, num_cjouts, cjout, txfee, tolerance_factor, tolerance_min):
	#if num_cjouts and tx_change_outs and num_cjouts > len(tx_change_outs):
	# return "sweep"
	if tx_ins is None:
	return
	tolerance = int(tolerance_factor * cjout)
	if tolerance < tolerance_min:
	tolerance = tolerance_min
	def is_valid_taker(ins_sum, change):
	delta = ins_sum - cjout - change
	if delta > 0 and delta < txfee + tolerance*(num_cjouts-1):
	return True
	return False
	def is_valid_maker(ins_sum, change):
	delta = ins_sum - cjout - change
	if delta <= 0 and delta > - tolerance:
	return True
	return False

	valid_taker_sets = []
	valid_maker_sets = []
	combos_found = 0
	for x in power_set_indices(tx_ins):
	sumx = sum([tx_ins[i] for i in x])
	for co in tx_change_outs:
	if is_valid_maker(sumx, co):
	valid_maker_sets.append((x, co))
	if is_valid_taker(sumx, co):
	valid_taker_sets.append((x, co))
	if is_valid_taker(sumx, 0):
	valid_taker_sets.append((x, 0))
	# we now have a much filtered set of subsets that fit maker or taker
	# pattern.
	print("Found {} valid maker sets and {} valid taker sets".format(
	len(valid_maker_sets), len(valid_taker_sets)))
	for ts in valid_taker_sets:
	nmakers = num_cjouts - 1
	for s in combinations(valid_maker_sets, nmakers):
	s_with_taker = list(s)
	s_with_taker.append(ts)
	# check if there is any overlap in choices of inputs or change outs;
	# if none, it is valid
	if check_no_overlap(s_with_taker):
	print("found one valid combination: ", s_with_taker, ts)
	combos_found += 1
	if combos_found > 4:
	return combos_found
	print("End of loop for input data: ", (tx_ins, tx_change_outs, num_cjouts, cjout, txfee))
	return combos_found

	if __name__ == "__main__":
	# for testing algo, some fixed data:
	#for x in tx_data_list:
	# jm_sudoku(*x)
	# arguments: the file containing one hex tx per line, and then the limit
	# on how many inputs we will try to analyze (maybe 15-20; too many becomes too
	# computationally hard to process)
	filename = sys.argv[1]
	max_ins = int(sys.argv[2])
	rpc_host = sys.argv[3]
	rpc_port = int(sys.argv[4])
	rpc_username = sys.argv[5]
	rpc_password = sys.argv[6]
	jrpc = JsonRpc(rpc_host, rpc_port, rpc_username, rpc_password)
	# btc value assumed to be the max realistic size of the fee to an
	# # individual maker, in the join; expressed as a fraction of the coinjoin amount:
	tolerance_factor = 0.004
	# But if that is too low, we can go with a more realistic maximum sats:
	# # (TODO need to better model ranges of fees for different scenarios)
	tolerance_min = 1000
	txs = read_txs_from_file(filename)
	ress = {"sweep": 0, "one": 0, "more": 0, "none": 0}
	for tx in txs:
	signal.signal(signal.SIGALRM, timeout_handler)
	signal.alarm(360)
	# this outputs "found one valid combination..." as per above
	# when it finds any completely valid solution. So if only one,
	# the taker set is uniquely determined (modulo the tolerance
	# limits being valid), if it outputs more than one, check
	# whether the final entry (which is the taker set) is different
	# across the multiple solutions.
	try:
	cf = jm_sudoku(*get_tx_info(jrpc, tx, max_ins=max_ins),
	tolerance_factor, tolerance_min)
	if cf is None:
	continue
	if cf == "sweep":
	ress["sweep"] += 1
	elif cf == 1:
	ress["one"] += 1
	elif cf > 1:
	ress["more"] += 1
	elif cf == 0:
	ress["none"] += 1
	except TimeoutException:
	print(f"Transaction {tx} timed out after 360 seconds.")
	finally:
	# Disable the alarm
	signal.alarm(0)
	print("result of full run: ", ress)




	# earlier data gathered for testing:

	# data used:
	# first a silly example with round numbers for sanity cheking the algo, then:

	# ptodd's example, 4 party:
	#([30933, 100000, 58226, 75420, 1307822],[21103, 1258325, 39617, 50158], 4, 50042, 3030)

	#ptodd's example, 5 party:
	# ([438789, 90210, 1017628, 79494, 7579144], [388797, 967828,40710,24132,7529601],5, 50000, 4197)

	# example data from block 800K onwards, three transactions:
	# 71ccddbacdf18fafdf5aec9941bcc2fd608ce41925b2aff1c6d634ef38a5b5b7
	# c9215bf1fdad0ebbef21957abd4483ae4edb56bfcfdd0345c875e16e62159498
	# bdc48bc1a857727bd0d992d5ecbc40ad940cf1f3b4fbfd83c60fca0595d11ede
	# (With 37 inputs this last one proves not tractable on my machine (40GB).
	# The power set of inputs is of size 2^39 which isn't very easy to handle.)

	tx_data_list = [
	([100000000,
	200000000,
	300000000,
	400000000,
	500000000,
	600000000,
	700000000],
	[1050000000,
	350000000,
	649999999],
	3,
	250000000,
	1),
	([30933, 100000, 58226, 75420, 1307822],[21103, 1258325, 39617, 50158], 4, 50042, 3030),
	([438789, 90210, 1017628, 79494, 7579144], [388797, 967828,40710,24132,7529601],5, 50000, 4197),
	([17501103, 5277511, 3000000, 1308769, 97290000, 19408302,
	213206329, 96726994, 1797090977, 2986222, 16040755,
	49937999, 206354034], [109627040, 116481366, 565037,
	1234015, 17460219, 1700365531],
	6, 96726994, 33823),
	([9392989,21898505,382810465,87690000,10143340,257092100,257092100,
	9233256,172030383,13453138,20157334,193677471,17646058,8473024],
	[5929832,85091950,210813255,21680433,85091450,20140033], 6, 172000650, 39310),
	([385528339,426871087,362729339,217656296,542591231,254333081,574248535,
	256197495,151160752,246701132,249017703,190614994, 1884197, 1299920545,
	104680590,499793847,248613107,480556856,210415858,97290000,392872777,
	871817754,104000000,242219464,171236016,372773157,1805067232,96651122,
	362729339,118018888,96726994,163505485,499700000,104680590,97290000,
	98340000,1173951,23972532,3034468], [340347513,65129613,205371416,
	314505417,209880426,89542239,
	3699410], 6, 15.99729410, 32849)]