Skip to content

Instantly share code, notes, and snippets.

@maltzsama
Last active December 29, 2023 12:12
Show Gist options
  • Save maltzsama/073b040022e9088a291cd91bf66b7911 to your computer and use it in GitHub Desktop.
Save maltzsama/073b040022e9088a291cd91bf66b7911 to your computer and use it in GitHub Desktop.
Geração de CPF válido usando Presto SQL e PySpark SQL
WITH RECURSIVE
cpf(digits, len) AS (
SELECT ARRAY[CAST(FLOOR(RANDOM() * 10) AS INTEGER)], 1
UNION ALL
SELECT digits || CAST(FLOOR(RANDOM() * 10) AS INTEGER), len + 1
FROM cpf
WHERE len < 9
),
cpf_with_first_verifier(digits, len) AS (
SELECT digits || (CASE WHEN 11 - MOD(SUM((10 - i + 1) * v), 11) >= 10 THEN 0 ELSE 11 - MOD(SUM((10 - i + 1) * v), 11) END), len + 1
FROM (
SELECT digits, len, v, i
FROM cpf, UNNEST(digits) WITH ORDINALITY t(v, i)
) AS subquery
WHERE len = 9
GROUP BY digits, len
),
cpf_with_second_verifier(digits, len) AS (
SELECT digits || (CASE WHEN 11 - MOD(SUM((11 - i + 1) * v), 11) >= 10 THEN 0 ELSE 11 - MOD(SUM((11 - i + 1) * v), 11) END), len + 1
FROM (
SELECT digits, len, v, i
FROM cpf_with_first_verifier, UNNEST(digits) WITH ORDINALITY t(v, i)
) AS subquery
WHERE len = 10
GROUP BY digits, len
)
SELECT ARRAY_JOIN(digits, '') FROM cpf_with_second_verifier WHERE len = 11;
WITH sequence_table AS (
SELECT
CAST(seq AS INTEGER) AS id
FROM
(SELECT EXPLODE(SEQUENCE(1, 100)) AS seq) AS t
),
cpf AS (
SELECT
id,
CAST(FLOOR(RANDOM() * 10) AS INTEGER) AS d1,
CAST(FLOOR(RANDOM() * 10) AS INTEGER) AS d2,
CAST(FLOOR(RANDOM() * 10) AS INTEGER) AS d3,
CAST(FLOOR(RANDOM() * 10) AS INTEGER) AS d4,
CAST(FLOOR(RANDOM() * 10) AS INTEGER) AS d5,
CAST(FLOOR(RANDOM() * 10) AS INTEGER) AS d6,
CAST(FLOOR(RANDOM() * 10) AS INTEGER) AS d7,
CAST(FLOOR(RANDOM() * 10) AS INTEGER) AS d8,
CAST(FLOOR(RANDOM() * 10) AS INTEGER) AS d9
FROM sequence_table
),
cpf_with_first_verifier AS (
SELECT
id,
d1, d2, d3, d4, d5, d6, d7, d8, d9,
(CASE WHEN 11 - MOD(d1*10 + d2*9 + d3*8 + d4*7 + d5*6 + d6*5 + d7*4 + d8*3 + d9*2, 11) >= 10 THEN 0 ELSE 11 - MOD(d1*10 + d2*9 + d3*8 + d4*7 + d5*6 + d6*5 + d7*4 + d8*3 + d9*2, 11) END) AS d10
FROM cpf
),
cpf_with_second_verifier AS (
SELECT
id,
d1, d2, d3, d4, d5, d6, d7, d8, d9, d10,
(CASE WHEN 11 - MOD(d1*11 + d2*10 + d3*9 + d4*8 + d5*7 + d6*6 + d7*5 + d8*4 + d9*3 + d10*2, 11) >= 10 THEN 0 ELSE 11 - MOD(d1*11 + d2*10 + d3*9 + d4*8 + d5*7 + d6*6 + d7*5 + d8*4 + d9*3 + d10*2, 11) END) AS d11
FROM cpf_with_first_verifier
) SELECT id, CONCAT(cast(d1 as string), cast(d2 as string), cast(d3 as string), cast(d4 as string), cast(d5 as string), cast(d6 as string), cast(d7 as string), cast(d8 as string), cast(d9 as string), cast(d10 as string), cast(d11 as string)) as cpf FROM cpf_with_second_verifier;
WITH sequence_table AS (
SELECT
CAST(seq AS INTEGER) AS id
FROM
UNNEST(SEQUENCE(1, 100)) AS t(seq)
),
cpf AS (
SELECT
id,
CAST(FLOOR(RANDOM() * 10) AS INTEGER) AS d1,
CAST(FLOOR(RANDOM() * 10) AS INTEGER) AS d2,
CAST(FLOOR(RANDOM() * 10) AS INTEGER) AS d3,
CAST(FLOOR(RANDOM() * 10) AS INTEGER) AS d4,
CAST(FLOOR(RANDOM() * 10) AS INTEGER) AS d5,
CAST(FLOOR(RANDOM() * 10) AS INTEGER) AS d6,
CAST(FLOOR(RANDOM() * 10) AS INTEGER) AS d7,
CAST(FLOOR(RANDOM() * 10) AS INTEGER) AS d8,
CAST(FLOOR(RANDOM() * 10) AS INTEGER) AS d9
FROM sequence_table
),
cpf_with_first_verifier AS (
SELECT
id,
d1, d2, d3, d4, d5, d6, d7, d8, d9,
(CASE WHEN 11 - MOD(d1*10 + d2*9 + d3*8 + d4*7 + d5*6 + d6*5 + d7*4 + d8*3 + d9*2, 11) >= 10 THEN 0 ELSE 11 - MOD(d1*10 + d2*9 + d3*8 + d4*7 + d5*6 + d6*5 + d7*4 + d8*3 + d9*2, 11) END) AS d10
FROM cpf
),
cpf_with_second_verifier AS (
SELECT
id,
d1, d2, d3, d4, d5, d6, d7, d8, d9, d10,
(CASE WHEN 11 - MOD(d1*11 + d2*10 + d3*9 + d4*8 + d5*7 + d6*6 + d7*5 + d8*4 + d9*3 + d10*2, 11) >= 10 THEN 0 ELSE 11 - MOD(d1*11 + d2*10 + d3*9 + d4*8 + d5*7 + d6*6 + d7*5 + d8*4 + d9*3 + d10*2, 11) END) AS d11
FROM cpf_with_first_verifier
)
SELECT id, CONCAT(cast(d1 as varchar), cast(d2 as varchar), cast(d3 as varchar), cast(d4 as varchar), cast(d5 as varchar), cast(d6 as varchar), cast(d7 as varchar), cast(d8 as varchar), cast(d9 as varchar), cast(d10 as varchar), cast(d11 as varchar)) as cpf FROM cpf_with_second_verifier;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment