Last active
December 29, 2022 12:57
-
-
Save homuler/c1eafcd7cadf254b02feb83b62171747 to your computer and use it in GitHub Desktop.
Sieving Prime Numbers in SQL (PostgreSQL 9.6)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Enumerate prime numbers below N using only SQL | |
-- DDL | |
create table primes( | |
value integer primary key, | |
sieved boolean not null default false | |
); | |
create index prime_sieve_idx on | |
primes(value, sieved); | |
-- Table for wheel factorization | |
create table wheel_mods( | |
r integer primary key -- Integer in the inner-most circle. This and n = \prod_{p \in P} are coprime. | |
); | |
-- Table for atkin sieve. This is used to save particular solutions of ax^2 + by^2 = d | |
-- | |
-- Case 1. 4x^2 + y^2 = d | |
-- enumerate (x, y) pairs such that 0 <= x <= 15, 0 <= y <= 30, (d, 60) = 1, d ≡ 1 (mod 4) | |
-- a = 4, b = 1, x_mod = 15, y_mod = 30 | |
-- | |
-- Case 2. 3x^2 + y^2 = d | |
-- enumerate (x, y) pairs such that 0 <= x <= 10, 0 <= y <= 30, (d, 60) = 1, d ≡ 7 (mod 12) | |
-- a = 3, b = 1, x_mod = 10, y_mod = 30 | |
-- | |
-- Case 3. 3x^2 - y^2 = d | |
-- enumerate (x, y) pairs such that 0 <= x <= 10, 0 <= y <= 30, (d, 60) = 1, d ≡ 11 (mod 12) | |
-- a = 3, b = -1, x_mod = 10, y_mod = 30 | |
create table atkin_ps( | |
x integer, | |
y integer, | |
d integer, | |
a integer, | |
b integer, | |
x_mod integer, | |
y_mod integer, | |
primary key(d, x, y) | |
); | |
create index atkin_ps_b_idx on | |
atkin_ps(b); | |
-- Prime Sieve | |
-- | |
-- All the queries are tested on Mac Book Pro (2015), 2.2 GHz Intel Core i7, 16 GB 1600 MHz DDR3 | |
-- In each case, the order of operations is written, but they are just for reference and might be inaccurate. | |
-- A) Insert simply (O(n (\log n)^2) | |
create or replace function enum_primes(integer) returns int as $$ | |
declare cnt int; | |
begin | |
delete from primes; | |
insert into primes (value) | |
select X.id from generate_series(2, $1) as X(id) | |
except( | |
select (A.id * B.id) as composite | |
from | |
generate_series(2, sqrt($1)::integer) as A(id), | |
generate_series(A.id, $1 / A.id) as B(id)); | |
select count(*) into STRICT cnt from primes; | |
return cnt; | |
end; | |
$$ LANGUAGE plpgsql; | |
-- select * from enum_primes(1000000); | |
-- Execution Time: | |
-- | n | ms | | |
-- | ---------- | ------ | | |
-- | 100,000 | 458 | | |
-- | 1,000,000 | 5,861 | | |
-- | 10,000,000 | 79,000 | | |
-- B) Sieve of Eratosthenes (O(n (\log n + \log n \log \log n))) | |
create or replace function eratosthenes(integer) returns int as $$ | |
declare i int; | |
declare cnt int; | |
begin | |
delete from primes; | |
insert into primes (value) | |
select X.id from generate_series(2, $1) as X(id); | |
i := 2; | |
LOOP | |
update primes | |
set sieved = true | |
where value in ( | |
select * from generate_series(i * i, $1, i) | |
); | |
select value into STRICT i | |
from primes | |
where value > i and sieved = false | |
order by value asc limit 1; | |
if i > sqrt( $1 ) then EXIT; end if; | |
end LOOP; | |
delete from primes where sieved = true; | |
select count(*) into STRICT cnt from primes; | |
return cnt; | |
end; | |
$$ LANGUAGE plpgsql; | |
-- select * from eratosthenes(1000000); | |
-- Execution Time: | |
-- | n | ms | | |
-- | ---------- | ------ | | |
-- | 100,000 | 2,483 | | |
-- | 1,000,000 | 48,958 | | |
-- | 10,000,000 | | | |
-- C) Wheel Factorization (O(48/210 * n (\log n)^2)) | |
create or replace function wheel_factorize(integer) returns int as $$ | |
declare modulo int; | |
declare cnt int; | |
begin | |
modulo := 210; | |
delete from wheel_mods; | |
delete from primes; | |
insert into wheel_mods | |
select S.id from generate_series(1, modulo) as S(id) | |
where 0 not in (MOD(S.id, 2), MOD(S.id, 3), MOD(S.id, 5), MOD(S.id, 7)); | |
insert into primes (value) | |
values (2), (3), (5), (7) | |
union | |
select modulo * V.id + wheel_mods.r | |
from generate_series(0, $1 / modulo) as V(id) | |
cross join wheel_mods | |
where modulo * V.id + wheel_mods.r between 2 and $1; | |
delete from primes | |
using primes p1, primes p2 | |
where p1.value <= sqrt($1) | |
and p2.value <= $1 / p1.value | |
and primes.value = p1.value * p2.value; | |
select count(*) into STRICT cnt from primes; | |
return cnt; | |
end; | |
$$ LANGUAGE plpgsql; | |
-- select * from wheel_factorize(1000000); | |
-- Execution Time: | |
-- | n | ms | | |
-- | ---------- | ------ | | |
-- | 100,000 | 128 | | |
-- | 1,000,000 | 1,883 | | |
-- | 10,000,000 | 27,105 | | |
-- D) Sieve of Atkin (O(n \log n / \log \log n + (n /\log \log n)^3/2)) | |
-- [Prime Sieves using Binary Quadratic Forms](http://www.ams.org/journals/mcom/2004-73-246/S0025-5718-03-01501-1/S0025-5718-03-01501-1.pdf) | |
create or replace function atkin_sieve(integer) returns int as $$ | |
declare cnt int; | |
begin | |
delete from primes; | |
delete from atkin_ps; | |
insert into atkin_ps (x, y, d, a, b, x_mod, y_mod) | |
select X.id, Y.id, MOD(4 * X.id * X.id + Y.id * Y.id, 60), 4, 1, 15, 30 | |
from | |
generate_series(0, 14) as X(id), | |
generate_series(0, 29) as Y(id) | |
where MOD(4 * X.id * X.id + Y.id * Y.id, 60) in (1, 13, 17, 29, 37, 41, 49, 53) | |
union | |
select X.id, Y.id, MOD(3 * X.id * X.id + Y.id * Y.id, 60), 3, 1, 10, 30 | |
from | |
generate_series(0, 9) as X(id), | |
generate_series(0, 29) as Y(id) | |
where MOD(3 * X.id * X.id + Y.id * Y.id, 60) in (7, 19, 31, 43) | |
union | |
select X.id, Y.id, MOD(MOD(3 * X.id * X.id - Y.id * Y.id, 60) + 60, 60), 3, -1, 10, 30 | |
from | |
generate_series(0, 9) as X(id), | |
generate_series(0, 29) as Y(id) | |
where MOD(MOD(3 * X.id * X.id - Y.id * Y.id, 60) + 60, 60) in (11, 23, 47, 59); | |
insert into primes(value) | |
select | |
a * X.id * X.id + b * Y.id * Y.id | |
from atkin_ps | |
cross join | |
generate_series(x, sqrt($1::float/a)::integer, x_mod) as X(id), | |
generate_series(y, sqrt(greatest($1 - a*X.id*X.id, 0))::integer, y_mod) as Y(id) | |
where (a * X.id * X.id + b * Y.id * Y.id) <= $1 and b > 0 and X.id > 0 and Y.id > 0 | |
group by (a * X.id * X.id + b * Y.id * Y.id) | |
having MOD(count(*), 2) = 1 | |
union | |
select | |
a * X.id * X.id + b * Y.id * Y.id | |
from atkin_ps | |
cross join | |
generate_series(x, sqrt($1::float/(a + b))::integer, x_mod) as X(id), | |
generate_series(y, X.id - 1, y_mod) as Y(id) | |
where (a * X.id * X.id + b * Y.id * Y.id) <= $1 and b < 0 and X.id > 0 | |
group by (a * X.id * X.id + b * Y.id * Y.id) | |
having MOD(count(*), 2) = 1; | |
delete from primes | |
using primes p1 | |
where p1.value <= sqrt(primes.value) | |
and p1.value <= sqrt($1) | |
and MOD(primes.value, p1.value) = 0; | |
insert into primes (value) | |
values (2), (3), (5); | |
select count(*) into STRICT cnt from primes; | |
return cnt; | |
end; | |
$$ LANGUAGE plpgsql; | |
-- select * from atkin_sieve(1000000); | |
-- Execution Time: | |
-- | n | ms | | |
-- | ---------- | ------ | | |
-- | 100,000 | 348 | | |
-- | 1,000,000 | 2,180 | | |
-- | 10,000,000 | 37,434 | | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment