Last active
January 15, 2025 14:14
-
-
Save riga/1a077b9daa4d4c98ac75877466433861 to your computer and use it in GitHub Desktop.
Deterministic seeds per event and per jet
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
""" | |
Pseudo code for generating deterministic event and jet seeds, | |
to be used in random number generators with consistent output. | |
The seeds use the entire uint64 space and are proven to be | |
uniformly distributed using bijective hashing. | |
It requires a list of the first 200 primes plus some event | |
and object level integer (!) quantities. Floats are not used | |
as any algorithm based on them is intrinsically non-deterministic. | |
Objects are expected to be sorted by decreasing pt for reproducibility. | |
Vectorized version in columnflow as a reference: | |
https://github.com/columnflow/columnflow/blob/master/columnflow/production/cms/seeds.py | |
C++ interface to Numpy's random number generation: | |
https://gist.github.com/riga/32ba6c77943eaf34f059663cd11d90df | |
""" | |
import hashlib | |
# first 200 primes, starting at 2 | |
primes = [ | |
2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, | |
101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, | |
197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, | |
311, 313, 317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, | |
431, 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509, 521, 523, 541, 547, | |
557, 563, 569, 571, 577, 587, 593, 599, 601, 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, | |
661, 673, 677, 683, 691, 701, 709, 719, 727, 733, 739, 743, 751, 757, 761, 769, 773, 787, 797, | |
809, 811, 821, 823, 827, 829, 839, 853, 857, 859, 863, 877, 881, 883, 887, 907, 911, 919, 929, | |
937, 941, 947, 953, 967, 971, 977, 983, 991, 997, 1009, 1013, 1019, 1021, 1031, 1033, 1039, | |
1049, 1051, 1061, 1063, 1069, 1087, 1091, 1093, 1097, 1103, 1109, 1117, 1123, 1129, 1151, 1153, | |
1163, 1171, 1181, 1187, 1193, 1201, 1213, 1217, 1223, | |
] | |
# singular hash function converting int to uint64 | |
def create_seed(val: int, n_hex: int = 16) -> int: | |
return int(hashlib.sha256(bytes(str(val), "utf-8")).hexdigest()[:-(n_hex + 1):-1], base=16) | |
# | |
# event seed calculation | |
# | |
# inputs to identifiy the event (order matters!) | |
# (get() is a placeholder for a retrieval function) | |
index_inputs = [ | |
get("event"), get("run"), get("luminosityBlock"), | |
] | |
# event-level inputs, i.e, one number per event (order matters!) | |
event_inputs = [ | |
get("Pileup.nPU"), | |
get("nJet"), get("nFatJet"), get("nSubJet"), | |
get("nMuon"), get("nElectron"), get("nTau"), | |
get("nSV"), get("nGenJet"), | |
] | |
# object-level inputs, i.e., one number per object (order matters!) | |
# (here, each get() would return a list of numbers) | |
object_inputs = [ | |
get("Electron.jetIdx"), get("Electron.seediPhiOriY"), | |
get("Muon.jetIdx"), get("Muon.nStations"), | |
get("Tau.jetIdx"), get("Tau.decayMode"), | |
get("Jet.nConstituents"), get("Jet.nElectrons"), get("Jet.nMuons"), | |
] | |
# start by creating a short seed from index inputs | |
event_seed = create_seed( | |
index_inputs[0] * primes[7] + | |
index_inputs[1] * primes[5] + | |
index_inputs[2] * primes[3], | |
n_hex=14, | |
) | |
# fold with event level info | |
value_offset = 3 | |
prime_offset = 15 | |
for i, inp in enumerate(event_inputs): | |
inp += i + value_offset | |
event_seed += primes[(inp + prime_offset) % len(primes)] * inp | |
# fold with object level info | |
for i, inps in enumerate(object_inputs): | |
inp_sum = len(inps) | |
for j, inp in enumerate(inps): | |
inp += i + value_offset | |
inp_sum += inp * (j + 1) + inp**2 * (j + 1) | |
event_seed += primes[(inp_sum + prime_offset) % len(primes)] * inp_sum | |
# final seed | |
event_seed = create_seed(event_seed) # done | |
# | |
# jet seed calculation | |
# | |
for i, jet in enumerate(jets): | |
jet_seed = event_seed + primes[event_seed % len(primes)] * (i + primes[50]) | |
jet_seed = create_seed(jet_seed) # done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment