Last active
January 3, 2025 19:10
-
-
Save peterjwest/cc7d23201aa2051cd123276aee544074 to your computer and use it in GitHub Desktop.
UUID search
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
random.seed(42) | |
UUID_ENTROPY = 122 | |
UUID_COUNT = 1 << UUID_ENTROPY | |
BIT_MASK_FULL = UUID_COUNT - 1 | |
INDEX_OFFSET = 0 | |
INDEX_OFFSET = random.randrange(UUID_COUNT) | |
def rank(matrix: list[int]) -> int: | |
''' | |
Calculate rank of matrix on GF(2) using Gaussian elimination | |
''' | |
matrix = [row for row in matrix if row > 0] | |
if len(matrix) == 0: | |
return 0 | |
target_index = next((i for i, row in enumerate(matrix) if row & 1 == 1), None) | |
if target_index is None: | |
return rank([row >> 1 for row in matrix]) | |
target_row = matrix[target_index] | |
return 1 + rank([(row ^ target_row) >> 1 if row & 1 else row >> 1 for row in matrix]) | |
def is_full_rank(matrix: list[int]) -> bool: | |
''' | |
Returns if a matrix is full rank on GF(2) using gaussian elimination | |
''' | |
return rank(matrix) == len(matrix) | |
def random_invertible_matrix(size): | |
''' | |
Creates a random invertible matrix on GF(2) | |
''' | |
while True: | |
matrix = [random.randrange(1 << size) for _ in range(size)] | |
if is_full_rank(matrix): | |
return matrix | |
ENCRYPT_MATRIX = random_invertible_matrix(UUID_ENTROPY) | |
def encrypt(value: int) -> int: | |
encrypted = 0 | |
for i in range(UUID_ENTROPY): | |
encrypted |= ((ENCRYPT_MATRIX[i] & value).bit_count() % 2) << i | |
return encrypted | |
def uuid_to_string(value: int) -> str: | |
result = ( | |
value >> 74 << 80 | |
| 4 << 76 | |
| (value >> 62 & (1 << 12) - 1) << 64 | |
| 2 << 62 | |
| value & (1 << 62) - 1 | |
) | |
hex = '{:032x}'.format(result) | |
return f'{hex[:8]}-{hex[8:12]}-{hex[12:16]}-{hex[16:20]}-{hex[20:]}' | |
def offset_index(index: int) -> int: | |
return (index + INDEX_OFFSET) & BIT_MASK_FULL | |
def deoffset_index(index: int) -> int: | |
return (index - INDEX_OFFSET) & BIT_MASK_FULL | |
def uuid_from_index(index: int) -> str: | |
return uuid_to_string(encrypt(offset_index(index))) | |
def compare_values(x: int, y: int): | |
''' | |
Compares two values, returns 1 if x > y, -1 if x < y, 0 if x == y | |
''' | |
return 1 if x > y else -1 if x < y else 0 | |
def back_substitute(adjoint_matrix: list[tuple[int, int]], column: int, value: int): | |
''' | |
Back substitutes a value into the adjoint matrix | |
''' | |
solution_bit_mask = 1 << column | |
for i in range(len(adjoint_matrix)): | |
row, bit = adjoint_matrix[i] | |
if row & solution_bit_mask > 0: | |
adjoint_matrix[i] = (row % solution_bit_mask, bit ^ value) | |
def mask_index(index: int, bit: int): | |
return index >> (bit + 1) << (bit + 1) | |
def find_next_index( | |
adjoint_matrix: list[tuple[int, int]], | |
start_index: int, | |
current_bit_index: int, | |
direction: int, | |
): | |
''' | |
Finds the index for the next uuid which matches the known bits supplied in the specified direction. | |
Parameters: | |
adjoint_matrix: Matrix rows corresponding paired with known bits of the UUID | |
Matrix must in Row Echelon Form without trailing zero rows | |
start_index: The index of the point to start searching from. | |
current_bit_index: The index of the current bit to be computed | |
direction: The direction to search in: 1 or -1. | |
''' | |
# Find the solution from MSB to LSB | |
solution = 0 | |
for i in range(current_bit_index, -1, -1): | |
solution_bit_mask = 1 << i | |
# If the last row of the matrix is a single bit in i, this determines a bit of the solution | |
if len(adjoint_matrix) > 0 and adjoint_matrix[-1][0] == solution_bit_mask: | |
_, bit = adjoint_matrix.pop() | |
# Otherwise the bit is not determined, so we can pick intelligently or guess | |
# We check if the solution has diverged from the start_index, | |
# if so then we want to pick the lowest/highest value (depending on the direction) | |
# to minmise the difference between this solution and the start_index. | |
elif mask_index(start_index, i) != solution: | |
bit = 0 if direction > 0 else 1 | |
# Otherwise we have no idea what next bit is, | |
# so we guess the next bit from the start_index, | |
# recurse into the solver, and check if we were correct | |
else: | |
bit = start_index >> i & 1 | |
# Copy the adjoint matrix, since we may backtrack out of this | |
adjoint_matrix_copy = adjoint_matrix.copy() | |
back_substitute(adjoint_matrix_copy, i, bit) | |
potential_solution = find_next_index(adjoint_matrix_copy, start_index % solution_bit_mask, i - 1, direction) | |
if compare_values(potential_solution, start_index % solution_bit_mask) == direction: | |
return potential_solution | (start_index >> i << i) | |
# Otherwise we guessed wrong | |
bit ^= 1 | |
# Back substitute value into matrix and update the solution | |
back_substitute(adjoint_matrix, i, bit) | |
if bit: | |
solution |= solution_bit_mask | |
return solution | |
def hex_to_bits(character: str): | |
''' | |
Takes a hex character or asterisk | |
- For a hex character returns a list of bits LSB first | |
- For an asterisk returns a list of None (indicating unknown bits) | |
''' | |
if character == '*': | |
return [None, None, None, None] | |
else: | |
value = int(character, 16) | |
return [value >> i & 1 for i in range(3, -1, -1)] | |
def index_distance(a: int, b: int, direction: int): | |
''' | |
Returns the distance between two indexes in a direction | |
Parameters: | |
a: The first index | |
b: The second index | |
direction: The direction to look in 1 or -1 | |
''' | |
return ((b - a) * direction) % UUID_COUNT | |
def pattern_to_bits(pattern: str) -> list[int | None]: | |
''' | |
Returns an LSB first list of bits, excluding fixed bits, where unknown bits '*' are None | |
Parameters: | |
pattern: UUID pattern | |
Pattern of the form: ********-****-4***-8***-************ | |
- '*' can be an asterisk or a hex digit | |
- '-' can be a dash or an asterisk | |
''' | |
if len(pattern) != 36: | |
raise ValueError('Pattern length incorrect') | |
if any(pattern[i] not in '*-' for i in (8, 13, 18, 23)): | |
raise ValueError('Pattern incorrect delimiters') | |
if pattern[14] not in '*4' or pattern[19] not in '*89ab': | |
raise ValueError('Pattern incorrect fixed values') | |
no_dash = pattern[:8] + pattern[9:13] + pattern[14:18] + pattern[19:23] + pattern[24:] | |
try: | |
pattern_bits = sum((hex_to_bits(ch) for ch in no_dash), []) | |
except ValueError: | |
raise ValueError('Pattern contains non-hex value') | |
pattern_bits = pattern_bits[:48] + pattern_bits[52:64] + pattern_bits[66:] | |
pattern_bits.reverse() | |
return pattern_bits | |
def to_row_echelon_form(adjoint_matrix: list[tuple[int, int]]): | |
row = 0 | |
column = 0 | |
while row < len(adjoint_matrix): | |
# Find a row where the column bit is 1 | |
row_index = None | |
non_zero = False | |
for i in range(row, len(adjoint_matrix)): | |
if adjoint_matrix[i][0] > 0: | |
non_zero = True | |
if (adjoint_matrix[i][0] >> column & 1) == 1: | |
row_index = i | |
break | |
else: | |
if non_zero: | |
column += 1 | |
continue | |
else: | |
# Every row from this row is zero, truncate the zero part | |
while row < len(adjoint_matrix): | |
adjoint_matrix.pop() | |
return | |
# Swap row_index with row | |
adjoint_matrix[row], adjoint_matrix[row_index] = adjoint_matrix[row_index], adjoint_matrix[row] | |
# Eliminate column from rows below | |
for i in range(row + 1, len(adjoint_matrix)): | |
adjoint_row = adjoint_matrix[i] | |
if adjoint_row[0] >> column & 1: | |
adjoint_matrix[i] = ( | |
adjoint_row[0] ^ adjoint_matrix[row][0], | |
adjoint_row[1] ^ adjoint_matrix[row][1], | |
) | |
# Move to next row | |
row += 1 | |
column += 1 | |
def get_known_adjoint_matrix(matrix, vector): | |
''' | |
Returns the adjoint matrix only for rows with known bits | |
''' | |
return [(matrix[i], bit) for i, bit in enumerate(vector) if bit is not None] | |
def find_next_index_for_pattern(pattern_bits: list[int | None], start_index: int, direction: int) -> int: | |
''' | |
Finds the index for the next uuid which matches the pattern of bits supplied in the specified direction. | |
Parameters: | |
pattern_bits: Sequence of pattern bits. Each bit can be 1, 0 or None. LSB first. | |
start_index: The index of the point to start searching from. | |
direction: The direction to search in: 1 or -1. | |
''' | |
adjoint_matrix = get_known_adjoint_matrix(ENCRYPT_MATRIX, pattern_bits) | |
to_row_echelon_form(adjoint_matrix) | |
return deoffset_index(find_next_index( | |
adjoint_matrix, | |
offset_index(start_index), | |
UUID_ENTROPY - 1, | |
direction, | |
)) | |
def find_next_index_for_query(query: str, start_index: int, direction: int) -> int | None: | |
''' | |
Finds the index for the next uuid which matches the query supplied in the specified direction. | |
1. Iterates through each possible position for the query in the UUID string | |
2. Solves each possibility | |
3. Returns the solution closest to the start_index | |
Parameters: | |
query: Search term string. | |
start_index: The index of the point to start searching from. | |
direction: The direction to search in: 1 or -1. | |
''' | |
if len(query) <= 3: | |
return find_next_index_for_small_query(query, start_index, direction) | |
query = query.lower() | |
pad_length = len(uuid_from_index(0)) - len(query) | |
solutions = [] | |
for left in range(pad_length + 1): | |
pattern = '*' * left + query + '*' * (pad_length - left) | |
try: | |
pattern_bits = pattern_to_bits(pattern) | |
except ValueError: | |
continue | |
solutions.append(find_next_index_for_pattern(pattern_bits, start_index, direction)) | |
return sorted(solutions, key=lambda solution: index_distance(start_index, solution, direction))[0] | |
def find_next_index_for_small_query(query: str, start_index: int, direction: int): | |
''' | |
Finds a query by searching through consecutive indexes | |
For small queries (3 or less hex characters) this is consitently fast | |
''' | |
query = query.lower() | |
index = start_index | |
while True: | |
index = (index + direction) % UUID_COUNT | |
if query in uuid_from_index(index): | |
return index | |
i = 15 | |
query = 'abcd' | |
direction = 1 | |
print(f'Matches for "{query}" starting from {i} {'upwards' if direction == 1 else 'downwards'}') | |
for _ in range(16): | |
i = find_next_index_for_query(query, i, direction) | |
print(i, uuid_from_index(i)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment