Skip to content

Instantly share code, notes, and snippets.

@josiahcarlson
Created October 3, 2023 03:22
Show Gist options
  • Save josiahcarlson/299c6cfaee33985c2b4aa80a83247cf9 to your computer and use it in GitHub Desktop.
Save josiahcarlson/299c6cfaee33985c2b4aa80a83247cf9 to your computer and use it in GitHub Desktop.
Find all 1-bit differences among provided integers.
"""
one_bit_difference.py
Written September / October 2023 by Josiah Carlson - [email protected]
Released into the public domain. Please include this authorship note / reference
to this GIST if you use / derive this software. Thank you.
Been thinking about this algorithm for about 17 years, haven't seen it around.
I might not know the proper literature search to find relevant references, or if
this was already done.
This algorithm finds all 1-bit differences in provided integers in
O(num_items * num_bits) time. We directly examine every bit approximately once
during our algorithm, may subtract up to num_items of up to num_bits, and may
mask a total of num_items for up to num_bits. Overall, this gives us a
worst-case constant for potentially large-number bit examinations at 3. For
random numbers, and even chosen 1-bit difference groups, we usually see a total
of closer to 2.3-2.5 in practice for non-sequential data, as execution constants
vary depending on the data bits themselves.
Overall Algorithm:
1. Insert all items into a prefix trie
2. From leaf to root, perform the "find_difference_merge()" function defined
below, with leaves having bpos=0, their result having bpos=1, etc.
3. your output should contain all pairs of 1-bit difference values
find_difference_merge:
1. if only one leaf has data, or there is only one leaf, return that one leaf
2. Given that zeroes and ones are already individually sorted, up to bpos bits,
we can zipper-compare the lowest bpos bits
a. if the lower bpos bits are the same, emit the match, advance to the next
item in both sequences
b. if the zeroes item is less, advance to the next zeroes item
c. else the ones item is less, advance to the next ones item
^^^ each step in #2 advances at least 1 item in zeroes, ones, or both lists
-- we count "comparisons" as subtractions in our metrics, as most platforms get
at least == 0 and <0 as easy checks for integer and infinite-precision
integer operations.
Cost of creating trie: O(num_items * num_bits) bit comparisons
Cost of traversing trie from leaf to root: O(num_items * comparisons_per_item)
comparisons_per_item <= num_bits - 1
In random inputs, sequential inputs, and explicitly generated 1-bit-difference
graphs, we see "subtractions_per_bit" never go above .75.
* for low-match scenarios, we will see masks / subtractions approach 1 from above
* for high-match scenarios, we will see masks / subtractions approach 2 + epsilon
from below
"""
from itertools import count
import os
# metrics counters
check_bit = count()
masks = count()
subtractions = count()
control_flow = count()
traverse = count()
def find_difference_merge(output: list, zeroes: list, ones: list, bpos: int) -> list:
"""
Args:
output - list to produce pairs of 1-bit differences
zeroes - list of items with a 0 in the bpos bit position
ones - list of items with a 1 in the bpos bit position
bpos - which bit position we are checking
Given two lists of sorted items with identical prefixes of n-bpos-1 bits, we
are to calculate all pairs of items with 1 total bit of difference.
Side-effects:
Appends (smaller, larger) pairs of values with 1 bit differences to output,
and returns combined list of zeroes + ones.
"""
next(control_flow)
if not zeroes:
return ones
if not ones:
return zeroes
zp = 0
op = 0
lz = len(zeroes)
lo = len(ones)
mask = (1 << bpos) - 1
next(masks)
next(masks)
zer = zeroes[zp] & mask
one = ones[op] & mask
while zp < lz and op < lo:
next(control_flow)
# Really want a 3-way comparison, there's an assembly instruction for that,
# or even just I86 cmov I suspect.
# print("comparing", bin(zer), zer, bin(one),one, bin(mask))
delta = zer - one
next(subtractions)
if delta == 0:
output.append((zeroes[zp], ones[op]))
zp += 1
op += 1
if zp < lz:
zer = zeroes[zp] & mask
next(masks)
if op < lo:
one = ones[op] & mask
next(masks)
next(control_flow) # op < lo and zp < lz
elif delta < 0:
zp += 1
if zp < lz:
zer = zeroes[zp] & mask
next(masks)
next(control_flow) # zp < lz
else:
op += 1
if op < lo:
one = ones[op] & mask
next(masks)
next(control_flow) # op < lo
next(control_flow) # delta == 0 or delta < 0
next(control_flow) # loop check exit
# all zeroes come before all ones
return zeroes + ones
def construct_trie(items: list, bits: int) -> tuple:
"""
Args:
items - list of items to partition into a bitwise trie
bits - how many bits to partition this list of items
Returns:
Recursively-partitioned trie structure.
construct_trie([3,1,2], 2) -> (([], [1]), ([2], [3]))
"""
mask = 1 << bits
zo = [], []
for item in items:
# we are literally examining the bits
next(check_bit)
zo[bool(item & mask)].append(item)
return tuple(
(construct_trie(zo_i, bits-1) if zo_i and bits else zo_i) for zo_i in zo)
def _traverse_trie(output: list, this_part: tuple, bits: int) -> list:
if isinstance(this_part, list):
return this_part
next(traverse)
this_part = tuple(
(_traverse_trie(output, part, bits - 1) if part else part) for part in this_part
)
return find_difference_merge(output, this_part[0], this_part[1], bits)
def find_difference_caller(items: list, bits: int, metrics=True) -> list:
"""
Args:
items - list of items to compare for 1-bit differences
bits - number of bits to compare for differences
Returns:
[(low, high), ...] for all pairs low and high from items differing in 1 bit.
"""
if metrics:
names = 'check_bit masks subtractions control_flow traverse'.split()
gl = globals()
for name in names:
gl[name] = count()
trie = construct_trie(items, bits-1)
output = []
_traverse_trie(output, trie, bits-1)
if metrics:
for name in names:
print(name, "per bit", (next(gl[name]) - 1) / len(items) / bits)
return output
def one_off_seq(count: int, bits: int, step: int=1) -> list:
"""
Args:
count - number of items to return
bits - number of bits in each item
step - how many bits to step (will check every bit, otherwise)
Returns:
list of items with as many entries having <bits> 1-bit differences as
possible.
"""
out = some_random_vals(1, bits)
known = set(out)
i = 0
while len(out) < count:
it = out[i]
for j in range(0, bits, step):
iit = it ^ (1 << ((j + i) % bits))
if iit not in known:
known.add(iit)
out.append(iit)
i += 1
return out
def some_random_vals(count: int, bits: int) -> list:
"""
Args:
count - number of items wanted
bits - number of bits in each item
Returns:
List of <count> unique items generated from os.urandom() of <bits> length.
"""
mask = (2**bits)-1
assert count <= (mask>>1)
read = (7 + bits) >> 3
out = set()
while len(out) < count:
it = int.from_bytes(os.urandom(read), 'big') & mask
out.add(it)
return list(out)
if __name__ == "__main__":
bits = 24
for cnt in (512, 4096, 65536):
# average case
items = some_random_vals(cnt, bits)
print("random", len(find_difference_caller(items, bits, True)))
# best case
# adjacent items are super easy to process, and have a lot of 1-bit
# differences
items = list(range(cnt))
print("sequential", len(find_difference_caller(items, bits, True)))
# these actually have fewer 1-off differences than sequential items, which
# is due to our latter construction of items that are 1 bit different from
# earlier items, but the earliest items have 1-bit differences from <bits>
# items
items = one_off_seq(cnt, bits)
print("explicit", len(find_difference_caller(items, bits, True)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment