Last active
January 5, 2021 01:35
-
-
Save maqp/c4c86007122e93414436475a328e044f to your computer and use it in GitHub Desktop.
Pythagorean factorization in the world of real RSA keys.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
import time | |
from cryptography.hazmat.primitives.asymmetric import rsa | |
def main(): | |
key_sizes_to_test = [512, 640, 768, 896] | |
number_of_keys_per_size_to_generate = 10000 | |
previous_smallest_distance_between_two_x = None | |
previous_key_size = None | |
for key_size in key_sizes_to_test: | |
list_of_x_values = [] | |
start_time = time.monotonic() | |
for i in range(number_of_keys_per_size_to_generate): | |
private_key = rsa.generate_private_key(public_exponent=65537, key_size=key_size) | |
prime_factor_1 = private_key.private_numbers().p | |
prime_factor_2 = private_key.private_numbers().q | |
semiprime = private_key.private_numbers().public_numbers.n | |
side_C = (prime_factor_1 + prime_factor_2) / 2 | |
side_B = math.sqrt(semiprime) | |
x = side_C - side_B | |
# We collect all values of x for later analysis. | |
list_of_x_values.append(x) | |
duration = time.monotonic() - start_time | |
# We get the smallest and greatest values for X. | |
smallest_x = f"{min(list_of_x_values):.0f}" | |
greatest_x = f"{max(list_of_x_values):.0f}" | |
smallest_distance_between_x = None | |
for i, first_value in enumerate(list_of_x_values): | |
for j, second_value in enumerate(list_of_x_values): | |
if i == j: # Ignore comparison of values with same identity. This doesn't prevent distance between | |
continue # two compared x numbers from being 0, it just prevents comparisons against each value of | |
# X against themselves (which is always zero). | |
distance_between_values = abs(first_value-second_value) | |
if smallest_distance_between_x is None: | |
smallest_distance_between_x = distance_between_values | |
else: | |
smallest_distance_between_x = min(distance_between_values, smallest_distance_between_x) | |
smallest_distance_between_x_str = f"{smallest_distance_between_x:.0f}" | |
# Whitespace for aligning the decimal points of each value. | |
length_of_longest_value = len(max(smallest_x, greatest_x, smallest_distance_between_x_str, key=len)) | |
whitespace1 = ' ' * (length_of_longest_value - len(smallest_x)) | |
whitespace2 = ' ' * (length_of_longest_value - len(greatest_x)) | |
whitespace3 = ' ' * (length_of_longest_value - len(smallest_distance_between_x_str)) | |
print(f"\nGenerated {number_of_keys_per_size_to_generate} {key_size}-bit keys in {duration:.1f} seconds.\n" | |
f"The smallest x was {whitespace1} {smallest_x}\n" | |
f"The greatest x was {whitespace2} {greatest_x}\n" | |
f"The smallest distance between two x values was {whitespace3} {smallest_distance_between_x_str}") | |
if previous_smallest_distance_between_two_x is not None and previous_key_size is not None: | |
problem_size = smallest_distance_between_x / previous_smallest_distance_between_two_x | |
print(f"\nIncreasing key size linearly from {previous_key_size} to {key_size} made the smallest distance between x:s in the sample {problem_size:.0f} times larger.") | |
previous_smallest_distance_between_two_x = smallest_distance_between_x | |
previous_key_size = key_size | |
if __name__ == '__main__': | |
main() | |
# Output when I ran the program: | |
# Generated 10000 512-bit keys in 62.5 seconds. | |
# The smallest x was 12019896571057247261053876450711896266866078393495290408054292480000 | |
# The greatest x was 1009223972550147275717101238160743268768383705107733611013898955977893347328 | |
# The smallest distance between two x values was 1113672342193250620561601408476119330051987562811226746577265623040 | |
# | |
# Generated 10000 640-bit keys in 71.0 seconds. | |
# The smallest x was 21986876100048913151778421649542160516144701231635518536667851486981475794322035048448 | |
# The greatest x was 18675834825171408407759973623876278536724568807543318811379579247292871367999119300836187963392 | |
# The smallest distance between two x values was 834029113031968889456023867956337401692058697869430504912429717413540816780605259776 | |
# | |
# Increasing key size linearly from 512 to 640 made the smallest distance between x:s in the sample 748899906582436736 times larger. | |
# | |
# Generated 10000 768-bit keys in 101.7 seconds. | |
# The smallest x was 1596443682508973441432548160541972187872163423236923252373060004830134954773635695097917138724648663384064 | |
# The greatest x was 350565251279761667930362658206232530455526033878367674381360158638699293357613475168646552181512859306400061325312 | |
# The smallest distance between two x values was 379155538639685551066124516066385428441241214506476589429534483079952532505213417538949562210592334282752 | |
# | |
# Increasing key size linearly from 640 to 768 made the smallest distance between x:s in the sample 454607078716150579200 times larger. | |
# | |
# Generated 10000 896-bit keys in 142.2 seconds. | |
# The smallest x was 53464918858628610837571819224003485199149048817386891440515370270439865747191380750795481804744223769729748571876867811835904 | |
# The greatest x was 6446610185364092605972281272641218049742659728012226654868106589696897120263776775183201080556852464575219028976564006853482026893312 | |
# The smallest distance between two x values was 2886309801231642076087903654506366522532028649934864581466469495773308879509324245297286093628874061087480770472538510721024 | |
# | |
# Increasing key size linearly from 768 to 896 made the smallest distance between x:s in the sample 7612469045255131136 times larger. | |
# | |
# Process finished with exit code 0 | |
# So it's obvious X is most likely not going to be 0, or 1, or 2, or 100000000000000000000000000000000000000000000, about ever. | |
# Grant claims that number of semiprimes where X is small grows when key size grows. This is technically true, | |
# but meaningless in practice. | |
# To prove this, let's simplify a bit and say any two integers p and q are valid factors of a product N (N=p*q). | |
# Let's compare two decimal digit values. | |
# | |
# Both numbers can have values between 00, 01, 02, ..., 99 so that's 100 different values. In how many cases are the | |
# distance of p and q 0 (and have thus very small X)? It turns out are also 100 cases of pairs (p, q) where | |
# both numbers have the same exact value (00, 00), (01, 01) ... (99,99). All these values have distance |p-q| = 0. | |
# | |
# So the question for security is, if we randomly pick p and q, how likely is it we pick the same values leading to | |
# 0? Calculating the probability of picking a pair with identical p and q requires us to know the set of all cases. | |
# The list of all pairs go from (00,00) (00,01) ... (00,99), (01, 00), (01, 01), ... (99,99). | |
# The total number of pairs is 100*100 = 10,000. | |
# | |
# So the probability of picking a pair with two identical values is 100 / 10,000 = 0.01 = 1%. One in 100 cases is | |
# quite bad. | |
# | |
# Now we want to know is, if we increase key size, does this probability change, because if it does, it would help | |
# if we would increase the size of the factors and thus the keysize. | |
# | |
# So let's increase the length of both factors by 1 decimal digit, i.e., let's compare three decimal digit values. | |
# | |
# Now the distance is 0 if all three digits in the pair are the same, (000,000) (001, 001), ..., (999, 999). | |
# Now there is 1000 different value pairs that have |p-q| = 0! So like grant says, the number of factor pairs that | |
# have very small X grows. Grant's claim is essentially, "because the number of pairs with small X grows, the | |
# probability of X being small remains likely, and he claims the algorithm works in "massive number of cases". | |
# | |
# But wait, let's not give in yet. How many different cases are there in total? 1000*1000 = 1,000,000. | |
# So a million cases. What is the probability of picking a pair with three same digits in this case? | |
# 10,000 / 1,000,000 = 0.001 = 0.1% | |
# | |
# So 0.1 percent of prime pairs are bad. So ten times smaller portion of pairs were bad. That means it's ten times | |
# less likely to pick a pair of bad factors. | |
# | |
# So while there are objectively 10 times more factors that would result in small X, the relative portion of those | |
# pairs becomes 10 times smaller every time we add a digit. So when the size of factors grows linearly (i.e. when you | |
# add one zero and so make a 10 a 100), the probability that you pick a bad factor becomes 10 times smaller. | |
# | |
# This allows us to determine the probability of picking two identical factors that result in small x, when given | |
# the number of digits in the base10 factor: | |
# | |
# P(N) = 10^N / (10^N)^2 | |
# = 1 / 10^N | |
# | |
# This shows that the probability of picking bad prime pair can only get more unlikely when key size increases. | |
# This applies in all situations, including substrings. | |
# | |
# This also applies if the factors are prime numbers. The requirement of having a prime factors only means there | |
# is not efficient way to figure out prime factors from semi prime. The requirement of having prime factors also | |
# means that the space of possible factors is smaller, but not too small The size of gaps between prime factors | |
# grows when number of digits in prime grows: | |
# https://en.wikipedia.org/wiki/Prime_gap#/media/File:Wikipedia_primegaps.png | |
# Since there are roughly n/ln(n) primes smaller than n[1], that would mean there are about 10^305 1024-bit primes. | |
# | |
# | |
# But what if you get REAAAALLLY unlucky? | |
# | |
# Well, you'll be pleased to know that NIST requires that FIPS-compliant RSA implementations (read: every modern | |
# crypto library) CHECK that the minimum value for p-q is at least 2^(N/2 - 100). So that would be 2^924 for | |
# RSA-2048. | |
# | |
# [1] https://stackoverflow.com/a/16091676 | |
# So Grant is not TECHNICALLY lying when he states that space for X=0 is massive when generating random primes and | |
# he's also not TECHNICALLY lying when he states X gets larger when key size grows. But he fails to mention | |
# what we've shown here: | |
# | |
# * Sampling showed RSA-512, RSA-680, RSA-768 and RSA-960, X is not even near 0. | |
# * Probability of picking prime factors that result in small X gets exponentially smaller when key size grows linearly | |
# * RSA implementation algorithms check that the distance between P and Q is large enough to make the attack impossible. | |
# This attack against RSA has been known for decades and it has been mitigated long since. | |
# | |
# So as always, Grant hasn't achieved shit. The reason he managed to factor semi-primes was he was actually factoring | |
# numbers where the prime factors are actually close to one another. The reason they appear to novice eyes to be far | |
# apart, is because we humans can't comprehend how long the excel file would have to be. To give you an idea | |
# let's create an RSA-2048 key and look at the distance between its factors: | |
# private_key = rsa.generate_private_key(public_exponent=65537, key_size=2048) | |
# prime_factor_1 = private_key.private_numbers().p | |
# prime_factor_2 = private_key.private_numbers().q | |
# | |
# distance = f"{abs(prime_factor_1 - prime_factor_2):.0f}" | |
# print(distance) | |
# | |
# This output for me the following value | |
# 158668351314163897941513021472819439535737161270384929160014826156809873701937156100996679638299361924158643541160 | |
# 363536051484219329528722492947568028265196689575979113059512991254454139565628801812999505564564300856542096983478 | |
# 60599736696003561965602317367013105909087707110614109439636710610778611723010048 | |
# That is the distance between p and q. If we imagine p=2, and the distance is the next prime, we see there are | |
# about n/ln(n) = 10^304 primes between them, so the Excel would need to be at least 10^304 lines long. There are | |
# only 10^80 atoms on the observable universe so it's not possible to create excel sheet from all the matter that | |
# exists. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment