Created
June 12, 2023 19:30
-
-
Save derrickburns/a1d90312765a1c7612b9a79e929fdfcd to your computer and use it in GitHub Desktop.
Probabilistic analysis of failures
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import yaml | |
import sys | |
from decimal import Decimal, getcontext | |
from collections import defaultdict | |
# Set the precision | |
getcontext().prec = 100 | |
def calculate_probability(N: int, M: int) -> float: | |
return 1 - (Decimal(1)/Decimal(M))**N | |
# Read YAML input from stdin | |
input_yaml = yaml.safe_load(sys.stdin) | |
# Specify the number of nodes | |
M = 429 | |
# Create a defaultdict of lists to store the Deployment groups by number of replicas | |
deployments_by_replicas = defaultdict(list) | |
# Group the Deployments by the number of replicas and store them in the defaultdict | |
for deployment in input_yaml['items']: | |
if deployment['kind'] == "Deployment": | |
replicas = deployment['spec']['replicas'] | |
# Ignore Deployments with zero replicas | |
if replicas > 0: | |
deployments_by_replicas[replicas].append(deployment['metadata']['name']) | |
# Print the header line of the histogram | |
print("Number of Replicas,Number of Services,Probability,Joint Probability No Failure") | |
# Calculate the no-failure probability for each group of Deployments, sorted by the number of replicas | |
total_no_failure_prob = Decimal(1.0) | |
for replicas in sorted(deployments_by_replicas.keys()): | |
number_of_services = len(deployments_by_replicas[replicas]) | |
probability = calculate_probability(replicas, M) | |
joint_probability_no_failure = probability ** number_of_services | |
print(f"{replicas},{number_of_services},{float(probability)},{float(joint_probability_no_failure)}") | |
total_no_failure_prob *= joint_probability_no_failure | |
# Calculate the total probability that a single node failure causes the loss of availability of at least one service | |
total_prob = 1 - total_no_failure_prob | |
print(f"Total probability that a single node failure causes the loss of availability of at least one service is {float(total_prob)}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment