Last active
November 5, 2022 16:45
-
-
Save Radagaisus/deb303e032d79a32329e4fe2132b7fe8 to your computer and use it in GitHub Desktop.
Helper module for deterministic PRNG seeding
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Deterministic Pseudo-Random Number Generator Seeding | |
# ------------------------------------------------------------------------------ | |
# A helper script for setting a consistent deterministic PRNG seed for third- | |
# party libraries, supporting more reproducible experiment execution. | |
# | |
# Setting a deterministic seed: | |
# | |
# - By an environment variable: `PYTHON_SEED=12345 python script.py` | |
# - By calling `randomness.reseed(12345)` | |
# | |
# The seed is determined by the `PYTHON_SEED` environment variable and must | |
# be an integer. It is optional, and, if not present, the module’s interface | |
# will remain the same, albeit results will run nondeterministically. It’s | |
# best to import this module early in execution, as it also seeds the PRNG | |
# of some global functionality. | |
# | |
# Note that any code using `np.random` should instead import `random` from this | |
# module. Likewise any code using `mimesis`. The API is the same, only using a | |
# fixed seed. | |
# | |
# The following seeds are set: | |
# | |
# - The standard library’s `random`. | |
# - Numpy’s global `np.random` (which is used by scikit-learn). | |
# - PyTorch’s seed (also configures settings for deterministic results). | |
# | |
# The following objects are exported: | |
# | |
# - `mimesis`, a `mimesis.Generic()` initialized with the given seed. | |
# - `random`, with the same interface as `np.random`, using Numpy’s PCG64 | |
# generator. | |
# | |
# PyTorch and Mimesis configuration is done only if their respective packages | |
# are already installed. | |
# | |
# See: https://numpy.org/doc/stable/reference/random/index.html | |
# See: https://pytorch.org/docs/stable/notes/randomness.html | |
# See: https://mimesis.name/api.html#baseprovider | |
# | |
import os | |
from importlib.util import find_spec | |
import random as stdlib_random | |
import numpy as np | |
from numpy.random import Generator, PCG64 | |
def is_package_installed(package: str) -> bool: | |
"""Returns a boolean indicating whether the given package is installed.""" | |
return bool(find_spec(package)) | |
# Flags for conditionally setting seeds for packages only if they’re installed | |
use_torch = is_package_installed('torch') | |
use_mimesis = is_package_installed('mimesis') | |
# Conditionally import only the packages that are already installed | |
if use_torch: import torch | |
if use_mimesis: from mimesis import Generic | |
def reseed(seed: int) -> None: | |
""" | |
Reseeds randomness services. | |
- Reseeds the standard library’s `random`. | |
- Reseeds the global `np.random`. | |
- Reseeds `randomness.random` (a fixed seed PCG64 `np.random`) | |
- Reseeds `randomness.mimesis` (a fixed seed `mimesis.Generic()`) | |
- Reseeds PyTorch (and ensures settings for deterministic results). | |
Args: | |
seed: the seed to use for reseeding. | |
""" | |
# Seed Python’s built-in PRNG | |
stdlib_random.seed(seed) | |
# Seed the global numpy PRNG, which is used by scikit-learn, among others. | |
np.random.seed(seed) | |
# Reseed the local numpy PRNG | |
global random; random = Generator(PCG64(seed)) | |
# Reseed the Mimesis generic instance used for fake data generation | |
if use_mimesis: global mimesis; mimesis = Generic(seed=seed) | |
# PyTorch deterministic seed settings | |
if use_torch: | |
# Set PyTorch’s seed | |
torch.manual_seed(seed) | |
# Settings for deterministic reproducible PyTorch results | |
# See: https://pytorch.org/docs/stable/notes/randomness.html | |
torch.backends.cudnn.deterministic = True | |
torch.backends.cudnn.benchmark = False | |
# Retrieve the PRNG seed from the environment. `None` if missing. | |
seed = os.environ.get('PYTHON_SEED') | |
# If the seed exists, convert it to an integer and reseed randomness in | |
# all the relevant services. Otherwise, expose an equivalent interface, | |
# though a nondeterministic one. | |
if seed != None: | |
seed = int(seed) | |
reseed(seed) | |
else: | |
# Expose a PCG64 PRNG generator using a nondeterministic seed. | |
random = Generator(PCG64()) | |
# Expose a Mimesis instance using a nondeterministic seed. | |
if use_mimesis: global mimesis; mimesis = Generic() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment