Skip to content

Instantly share code, notes, and snippets.

@peterjwest
Last active January 3, 2025 19:10
Show Gist options
  • Save peterjwest/cc7d23201aa2051cd123276aee544074 to your computer and use it in GitHub Desktop.
Save peterjwest/cc7d23201aa2051cd123276aee544074 to your computer and use it in GitHub Desktop.
UUID search
import random
random.seed(42)
UUID_ENTROPY = 122
UUID_COUNT = 1 << UUID_ENTROPY
BIT_MASK_FULL = UUID_COUNT - 1
INDEX_OFFSET = 0
INDEX_OFFSET = random.randrange(UUID_COUNT)
def rank(matrix: list[int]) -> int:
'''
Calculate rank of matrix on GF(2) using Gaussian elimination
'''
matrix = [row for row in matrix if row > 0]
if len(matrix) == 0:
return 0
target_index = next((i for i, row in enumerate(matrix) if row & 1 == 1), None)
if target_index is None:
return rank([row >> 1 for row in matrix])
target_row = matrix[target_index]
return 1 + rank([(row ^ target_row) >> 1 if row & 1 else row >> 1 for row in matrix])
def is_full_rank(matrix: list[int]) -> bool:
'''
Returns if a matrix is full rank on GF(2) using gaussian elimination
'''
return rank(matrix) == len(matrix)
def random_invertible_matrix(size):
'''
Creates a random invertible matrix on GF(2)
'''
while True:
matrix = [random.randrange(1 << size) for _ in range(size)]
if is_full_rank(matrix):
return matrix
ENCRYPT_MATRIX = random_invertible_matrix(UUID_ENTROPY)
def encrypt(value: int) -> int:
encrypted = 0
for i in range(UUID_ENTROPY):
encrypted |= ((ENCRYPT_MATRIX[i] & value).bit_count() % 2) << i
return encrypted
def uuid_to_string(value: int) -> str:
result = (
value >> 74 << 80
| 4 << 76
| (value >> 62 & (1 << 12) - 1) << 64
| 2 << 62
| value & (1 << 62) - 1
)
hex = '{:032x}'.format(result)
return f'{hex[:8]}-{hex[8:12]}-{hex[12:16]}-{hex[16:20]}-{hex[20:]}'
def offset_index(index: int) -> int:
return (index + INDEX_OFFSET) & BIT_MASK_FULL
def deoffset_index(index: int) -> int:
return (index - INDEX_OFFSET) & BIT_MASK_FULL
def uuid_from_index(index: int) -> str:
return uuid_to_string(encrypt(offset_index(index)))
def compare_values(x: int, y: int):
'''
Compares two values, returns 1 if x > y, -1 if x < y, 0 if x == y
'''
return 1 if x > y else -1 if x < y else 0
def back_substitute(adjoint_matrix: list[tuple[int, int]], column: int, value: int):
'''
Back substitutes a value into the adjoint matrix
'''
solution_bit_mask = 1 << column
for i in range(len(adjoint_matrix)):
row, bit = adjoint_matrix[i]
if row & solution_bit_mask > 0:
adjoint_matrix[i] = (row % solution_bit_mask, bit ^ value)
def mask_index(index: int, bit: int):
return index >> (bit + 1) << (bit + 1)
def find_next_index(
adjoint_matrix: list[tuple[int, int]],
start_index: int,
current_bit_index: int,
direction: int,
):
'''
Finds the index for the next uuid which matches the known bits supplied in the specified direction.
Parameters:
adjoint_matrix: Matrix rows corresponding paired with known bits of the UUID
Matrix must in Row Echelon Form without trailing zero rows
start_index: The index of the point to start searching from.
current_bit_index: The index of the current bit to be computed
direction: The direction to search in: 1 or -1.
'''
# Find the solution from MSB to LSB
solution = 0
for i in range(current_bit_index, -1, -1):
solution_bit_mask = 1 << i
# If the last row of the matrix is a single bit in i, this determines a bit of the solution
if len(adjoint_matrix) > 0 and adjoint_matrix[-1][0] == solution_bit_mask:
_, bit = adjoint_matrix.pop()
# Otherwise the bit is not determined, so we can pick intelligently or guess
# We check if the solution has diverged from the start_index,
# if so then we want to pick the lowest/highest value (depending on the direction)
# to minmise the difference between this solution and the start_index.
elif mask_index(start_index, i) != solution:
bit = 0 if direction > 0 else 1
# Otherwise we have no idea what next bit is,
# so we guess the next bit from the start_index,
# recurse into the solver, and check if we were correct
else:
bit = start_index >> i & 1
# Copy the adjoint matrix, since we may backtrack out of this
adjoint_matrix_copy = adjoint_matrix.copy()
back_substitute(adjoint_matrix_copy, i, bit)
potential_solution = find_next_index(adjoint_matrix_copy, start_index % solution_bit_mask, i - 1, direction)
if compare_values(potential_solution, start_index % solution_bit_mask) == direction:
return potential_solution | (start_index >> i << i)
# Otherwise we guessed wrong
bit ^= 1
# Back substitute value into matrix and update the solution
back_substitute(adjoint_matrix, i, bit)
if bit:
solution |= solution_bit_mask
return solution
def hex_to_bits(character: str):
'''
Takes a hex character or asterisk
- For a hex character returns a list of bits LSB first
- For an asterisk returns a list of None (indicating unknown bits)
'''
if character == '*':
return [None, None, None, None]
else:
value = int(character, 16)
return [value >> i & 1 for i in range(3, -1, -1)]
def index_distance(a: int, b: int, direction: int):
'''
Returns the distance between two indexes in a direction
Parameters:
a: The first index
b: The second index
direction: The direction to look in 1 or -1
'''
return ((b - a) * direction) % UUID_COUNT
def pattern_to_bits(pattern: str) -> list[int | None]:
'''
Returns an LSB first list of bits, excluding fixed bits, where unknown bits '*' are None
Parameters:
pattern: UUID pattern
Pattern of the form: ********-****-4***-8***-************
- '*' can be an asterisk or a hex digit
- '-' can be a dash or an asterisk
'''
if len(pattern) != 36:
raise ValueError('Pattern length incorrect')
if any(pattern[i] not in '*-' for i in (8, 13, 18, 23)):
raise ValueError('Pattern incorrect delimiters')
if pattern[14] not in '*4' or pattern[19] not in '*89ab':
raise ValueError('Pattern incorrect fixed values')
no_dash = pattern[:8] + pattern[9:13] + pattern[14:18] + pattern[19:23] + pattern[24:]
try:
pattern_bits = sum((hex_to_bits(ch) for ch in no_dash), [])
except ValueError:
raise ValueError('Pattern contains non-hex value')
pattern_bits = pattern_bits[:48] + pattern_bits[52:64] + pattern_bits[66:]
pattern_bits.reverse()
return pattern_bits
def to_row_echelon_form(adjoint_matrix: list[tuple[int, int]]):
row = 0
column = 0
while row < len(adjoint_matrix):
# Find a row where the column bit is 1
row_index = None
non_zero = False
for i in range(row, len(adjoint_matrix)):
if adjoint_matrix[i][0] > 0:
non_zero = True
if (adjoint_matrix[i][0] >> column & 1) == 1:
row_index = i
break
else:
if non_zero:
column += 1
continue
else:
# Every row from this row is zero, truncate the zero part
while row < len(adjoint_matrix):
adjoint_matrix.pop()
return
# Swap row_index with row
adjoint_matrix[row], adjoint_matrix[row_index] = adjoint_matrix[row_index], adjoint_matrix[row]
# Eliminate column from rows below
for i in range(row + 1, len(adjoint_matrix)):
adjoint_row = adjoint_matrix[i]
if adjoint_row[0] >> column & 1:
adjoint_matrix[i] = (
adjoint_row[0] ^ adjoint_matrix[row][0],
adjoint_row[1] ^ adjoint_matrix[row][1],
)
# Move to next row
row += 1
column += 1
def get_known_adjoint_matrix(matrix, vector):
'''
Returns the adjoint matrix only for rows with known bits
'''
return [(matrix[i], bit) for i, bit in enumerate(vector) if bit is not None]
def find_next_index_for_pattern(pattern_bits: list[int | None], start_index: int, direction: int) -> int:
'''
Finds the index for the next uuid which matches the pattern of bits supplied in the specified direction.
Parameters:
pattern_bits: Sequence of pattern bits. Each bit can be 1, 0 or None. LSB first.
start_index: The index of the point to start searching from.
direction: The direction to search in: 1 or -1.
'''
adjoint_matrix = get_known_adjoint_matrix(ENCRYPT_MATRIX, pattern_bits)
to_row_echelon_form(adjoint_matrix)
return deoffset_index(find_next_index(
adjoint_matrix,
offset_index(start_index),
UUID_ENTROPY - 1,
direction,
))
def find_next_index_for_query(query: str, start_index: int, direction: int) -> int | None:
'''
Finds the index for the next uuid which matches the query supplied in the specified direction.
1. Iterates through each possible position for the query in the UUID string
2. Solves each possibility
3. Returns the solution closest to the start_index
Parameters:
query: Search term string.
start_index: The index of the point to start searching from.
direction: The direction to search in: 1 or -1.
'''
if len(query) <= 3:
return find_next_index_for_small_query(query, start_index, direction)
query = query.lower()
pad_length = len(uuid_from_index(0)) - len(query)
solutions = []
for left in range(pad_length + 1):
pattern = '*' * left + query + '*' * (pad_length - left)
try:
pattern_bits = pattern_to_bits(pattern)
except ValueError:
continue
solutions.append(find_next_index_for_pattern(pattern_bits, start_index, direction))
return sorted(solutions, key=lambda solution: index_distance(start_index, solution, direction))[0]
def find_next_index_for_small_query(query: str, start_index: int, direction: int):
'''
Finds a query by searching through consecutive indexes
For small queries (3 or less hex characters) this is consitently fast
'''
query = query.lower()
index = start_index
while True:
index = (index + direction) % UUID_COUNT
if query in uuid_from_index(index):
return index
i = 15
query = 'abcd'
direction = 1
print(f'Matches for "{query}" starting from {i} {'upwards' if direction == 1 else 'downwards'}')
for _ in range(16):
i = find_next_index_for_query(query, i, direction)
print(i, uuid_from_index(i))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment