Skip to content

Instantly share code, notes, and snippets.

@simonLeary42
Last active March 17, 2025 17:52
Show Gist options
  • Save simonLeary42/68b76f4f49109c2e41971eb95bcd3f70 to your computer and use it in GitHub Desktop.
Save simonLeary42/68b76f4f49109c2e41971eb95bcd3f70 to your computer and use it in GitHub Desktop.
def _cluster(sorted_nums_ids: list[tuple[int, str]], max_reduction: int) -> list[tuple[int, str]]:
"cluster integers by reducing them by no more than max_reduction"
# if the entire range can be reduced to equal the lowest number without violating max_reduction
if sorted_nums_ids[-1][0] - sorted_nums_ids[0][0] <= max_reduction:
new_num = sorted_nums_ids[0][0]
return [(new_num, _id) for _, _id in sorted_nums_ids]
# divide and conquer. split the range at the biggest gap
# for each element, the corresponding gap is the distance between it and the previous element
gaps = [-1]
for i, (num, _) in enumerate(sorted_nums_ids[1:], start=1):
gaps.append(num - sorted_nums_ids[i - 1][0])
# https://stackoverflow.com/a/11825864/18696276
biggest_gap_index = max(range(len(gaps)), key=gaps.__getitem__)
# https://stackoverflow.com/a/1724975/18696276
return itertools.chain(
_cluster(sorted_nums_ids[:biggest_gap_index], max_reduction),
_cluster(sorted_nums_ids[biggest_gap_index:], max_reduction),
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment