Skip to content

Instantly share code, notes, and snippets.

@maqp
Last active January 5, 2021 01:35
Show Gist options
  • Save maqp/c4c86007122e93414436475a328e044f to your computer and use it in GitHub Desktop.
Save maqp/c4c86007122e93414436475a328e044f to your computer and use it in GitHub Desktop.
Pythagorean factorization in the world of real RSA keys.
import math
import time
from cryptography.hazmat.primitives.asymmetric import rsa
def main():
key_sizes_to_test = [512, 640, 768, 896]
number_of_keys_per_size_to_generate = 10000
previous_smallest_distance_between_two_x = None
previous_key_size = None
for key_size in key_sizes_to_test:
list_of_x_values = []
start_time = time.monotonic()
for i in range(number_of_keys_per_size_to_generate):
private_key = rsa.generate_private_key(public_exponent=65537, key_size=key_size)
prime_factor_1 = private_key.private_numbers().p
prime_factor_2 = private_key.private_numbers().q
semiprime = private_key.private_numbers().public_numbers.n
side_C = (prime_factor_1 + prime_factor_2) / 2
side_B = math.sqrt(semiprime)
x = side_C - side_B
# We collect all values of x for later analysis.
list_of_x_values.append(x)
duration = time.monotonic() - start_time
# We get the smallest and greatest values for X.
smallest_x = f"{min(list_of_x_values):.0f}"
greatest_x = f"{max(list_of_x_values):.0f}"
smallest_distance_between_x = None
for i, first_value in enumerate(list_of_x_values):
for j, second_value in enumerate(list_of_x_values):
if i == j: # Ignore comparison of values with same identity. This doesn't prevent distance between
continue # two compared x numbers from being 0, it just prevents comparisons against each value of
# X against themselves (which is always zero).
distance_between_values = abs(first_value-second_value)
if smallest_distance_between_x is None:
smallest_distance_between_x = distance_between_values
else:
smallest_distance_between_x = min(distance_between_values, smallest_distance_between_x)
smallest_distance_between_x_str = f"{smallest_distance_between_x:.0f}"
# Whitespace for aligning the decimal points of each value.
length_of_longest_value = len(max(smallest_x, greatest_x, smallest_distance_between_x_str, key=len))
whitespace1 = ' ' * (length_of_longest_value - len(smallest_x))
whitespace2 = ' ' * (length_of_longest_value - len(greatest_x))
whitespace3 = ' ' * (length_of_longest_value - len(smallest_distance_between_x_str))
print(f"\nGenerated {number_of_keys_per_size_to_generate} {key_size}-bit keys in {duration:.1f} seconds.\n"
f"The smallest x was {whitespace1} {smallest_x}\n"
f"The greatest x was {whitespace2} {greatest_x}\n"
f"The smallest distance between two x values was {whitespace3} {smallest_distance_between_x_str}")
if previous_smallest_distance_between_two_x is not None and previous_key_size is not None:
problem_size = smallest_distance_between_x / previous_smallest_distance_between_two_x
print(f"\nIncreasing key size linearly from {previous_key_size} to {key_size} made the smallest distance between x:s in the sample {problem_size:.0f} times larger.")
previous_smallest_distance_between_two_x = smallest_distance_between_x
previous_key_size = key_size
if __name__ == '__main__':
main()
# Output when I ran the program:
# Generated 10000 512-bit keys in 62.5 seconds.
# The smallest x was 12019896571057247261053876450711896266866078393495290408054292480000
# The greatest x was 1009223972550147275717101238160743268768383705107733611013898955977893347328
# The smallest distance between two x values was 1113672342193250620561601408476119330051987562811226746577265623040
#
# Generated 10000 640-bit keys in 71.0 seconds.
# The smallest x was 21986876100048913151778421649542160516144701231635518536667851486981475794322035048448
# The greatest x was 18675834825171408407759973623876278536724568807543318811379579247292871367999119300836187963392
# The smallest distance between two x values was 834029113031968889456023867956337401692058697869430504912429717413540816780605259776
#
# Increasing key size linearly from 512 to 640 made the smallest distance between x:s in the sample 748899906582436736 times larger.
#
# Generated 10000 768-bit keys in 101.7 seconds.
# The smallest x was 1596443682508973441432548160541972187872163423236923252373060004830134954773635695097917138724648663384064
# The greatest x was 350565251279761667930362658206232530455526033878367674381360158638699293357613475168646552181512859306400061325312
# The smallest distance between two x values was 379155538639685551066124516066385428441241214506476589429534483079952532505213417538949562210592334282752
#
# Increasing key size linearly from 640 to 768 made the smallest distance between x:s in the sample 454607078716150579200 times larger.
#
# Generated 10000 896-bit keys in 142.2 seconds.
# The smallest x was 53464918858628610837571819224003485199149048817386891440515370270439865747191380750795481804744223769729748571876867811835904
# The greatest x was 6446610185364092605972281272641218049742659728012226654868106589696897120263776775183201080556852464575219028976564006853482026893312
# The smallest distance between two x values was 2886309801231642076087903654506366522532028649934864581466469495773308879509324245297286093628874061087480770472538510721024
#
# Increasing key size linearly from 768 to 896 made the smallest distance between x:s in the sample 7612469045255131136 times larger.
#
# Process finished with exit code 0
# So it's obvious X is most likely not going to be 0, or 1, or 2, or 100000000000000000000000000000000000000000000, about ever.
# Grant claims that number of semiprimes where X is small grows when key size grows. This is technically true,
# but meaningless in practice.
# To prove this, let's simplify a bit and say any two integers p and q are valid factors of a product N (N=p*q).
# Let's compare two decimal digit values.
#
# Both numbers can have values between 00, 01, 02, ..., 99 so that's 100 different values. In how many cases are the
# distance of p and q 0 (and have thus very small X)? It turns out are also 100 cases of pairs (p, q) where
# both numbers have the same exact value (00, 00), (01, 01) ... (99,99). All these values have distance |p-q| = 0.
#
# So the question for security is, if we randomly pick p and q, how likely is it we pick the same values leading to
# 0? Calculating the probability of picking a pair with identical p and q requires us to know the set of all cases.
# The list of all pairs go from (00,00) (00,01) ... (00,99), (01, 00), (01, 01), ... (99,99).
# The total number of pairs is 100*100 = 10,000.
#
# So the probability of picking a pair with two identical values is 100 / 10,000 = 0.01 = 1%. One in 100 cases is
# quite bad.
#
# Now we want to know is, if we increase key size, does this probability change, because if it does, it would help
# if we would increase the size of the factors and thus the keysize.
#
# So let's increase the length of both factors by 1 decimal digit, i.e., let's compare three decimal digit values.
#
# Now the distance is 0 if all three digits in the pair are the same, (000,000) (001, 001), ..., (999, 999).
# Now there is 1000 different value pairs that have |p-q| = 0! So like grant says, the number of factor pairs that
# have very small X grows. Grant's claim is essentially, "because the number of pairs with small X grows, the
# probability of X being small remains likely, and he claims the algorithm works in "massive number of cases".
#
# But wait, let's not give in yet. How many different cases are there in total? 1000*1000 = 1,000,000.
# So a million cases. What is the probability of picking a pair with three same digits in this case?
# 10,000 / 1,000,000 = 0.001 = 0.1%
#
# So 0.1 percent of prime pairs are bad. So ten times smaller portion of pairs were bad. That means it's ten times
# less likely to pick a pair of bad factors.
#
# So while there are objectively 10 times more factors that would result in small X, the relative portion of those
# pairs becomes 10 times smaller every time we add a digit. So when the size of factors grows linearly (i.e. when you
# add one zero and so make a 10 a 100), the probability that you pick a bad factor becomes 10 times smaller.
#
# This allows us to determine the probability of picking two identical factors that result in small x, when given
# the number of digits in the base10 factor:
#
# P(N) = 10^N / (10^N)^2
# = 1 / 10^N
#
# This shows that the probability of picking bad prime pair can only get more unlikely when key size increases.
# This applies in all situations, including substrings.
#
# This also applies if the factors are prime numbers. The requirement of having a prime factors only means there
# is not efficient way to figure out prime factors from semi prime. The requirement of having prime factors also
# means that the space of possible factors is smaller, but not too small The size of gaps between prime factors
# grows when number of digits in prime grows:
# https://en.wikipedia.org/wiki/Prime_gap#/media/File:Wikipedia_primegaps.png
# Since there are roughly n/ln(n) primes smaller than n[1], that would mean there are about 10^305 1024-bit primes.
#
#
# But what if you get REAAAALLLY unlucky?
#
# Well, you'll be pleased to know that NIST requires that FIPS-compliant RSA implementations (read: every modern
# crypto library) CHECK that the minimum value for p-q is at least 2^(N/2 - 100). So that would be 2^924 for
# RSA-2048.
#
# [1] https://stackoverflow.com/a/16091676
# So Grant is not TECHNICALLY lying when he states that space for X=0 is massive when generating random primes and
# he's also not TECHNICALLY lying when he states X gets larger when key size grows. But he fails to mention
# what we've shown here:
#
# * Sampling showed RSA-512, RSA-680, RSA-768 and RSA-960, X is not even near 0.
# * Probability of picking prime factors that result in small X gets exponentially smaller when key size grows linearly
# * RSA implementation algorithms check that the distance between P and Q is large enough to make the attack impossible.
# This attack against RSA has been known for decades and it has been mitigated long since.
#
# So as always, Grant hasn't achieved shit. The reason he managed to factor semi-primes was he was actually factoring
# numbers where the prime factors are actually close to one another. The reason they appear to novice eyes to be far
# apart, is because we humans can't comprehend how long the excel file would have to be. To give you an idea
# let's create an RSA-2048 key and look at the distance between its factors:
# private_key = rsa.generate_private_key(public_exponent=65537, key_size=2048)
# prime_factor_1 = private_key.private_numbers().p
# prime_factor_2 = private_key.private_numbers().q
#
# distance = f"{abs(prime_factor_1 - prime_factor_2):.0f}"
# print(distance)
#
# This output for me the following value
# 158668351314163897941513021472819439535737161270384929160014826156809873701937156100996679638299361924158643541160
# 363536051484219329528722492947568028265196689575979113059512991254454139565628801812999505564564300856542096983478
# 60599736696003561965602317367013105909087707110614109439636710610778611723010048
# That is the distance between p and q. If we imagine p=2, and the distance is the next prime, we see there are
# about n/ln(n) = 10^304 primes between them, so the Excel would need to be at least 10^304 lines long. There are
# only 10^80 atoms on the observable universe so it's not possible to create excel sheet from all the matter that
# exists.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment