Skip to content

Instantly share code, notes, and snippets.

@xoelop
Created April 11, 2022 11:09
Show Gist options
  • Save xoelop/ff42ab06406e180b71a92d7753988793 to your computer and use it in GitHub Desktop.
Save xoelop/ff42ab06406e180b71a92d7753988793 to your computer and use it in GitHub Desktop.
-- s1 is not used
WITH s1 AS (
SELECT
linkedin_slug
, count(*) OVER (PARTITION BY 1)
, max(updated_at) max_updated_at
FROM person
GROUP BY linkedin_slug
HAVING count(*) > 1
)
--SELECT * FROM s1;
, s2 AS (
SELECT
max(id) id
, max(full_name) full_name
, max("role") "role"
, max(first_name) first_name
, max(last_name) last_name
, linkedin_slug
, max(linkedin_id) linkedin_id
, max(email) email
, max("domain") "domain"
, max(yc_bio) yc_bio
, min(created_at) created_at
, now() updated_at
, max(extrapolated_email_confidence) extrapolated_email_confidence
, max(email_status) email_status
, max(email_searched_on_apollo::text)::bool email_searched_on_apollo
FROM person
GROUP BY linkedin_slug
HAVING count(*) > 1
-- ORDER BY linkedin_slug
-- LIMIT 5
)
-- update data
, s3 AS (
UPDATE person p
SET
"role" = s2."role",
first_name = s2.first_name,
last_name = s2.last_name,
linkedin_slug = s2.linkedin_slug,
linkedin_id = s2.linkedin_id,
email = s2.email,
"domain" = s2."domain",
yc_bio = s2.yc_bio,
created_at = s2.created_at,
updated_at = s2.updated_at,
extrapolated_email_confidence = s2.extrapolated_email_confidence,
email_status = s2.email_status,
email_searched_on_apollo = s2.email_searched_on_apollo
FROM s2
WHERE p.id = s2.id
)
-- data to be deleted
, s4 AS (
SELECT
p.id
, p.linkedin_slug
, p.updated_at
FROM person p
JOIN s2 ON p.linkedin_slug = s2.linkedin_slug
WHERE p.id < s2.id
)
--SELECT * FROM s4;
, s5 AS (
DELETE FROM person p
WHERE
p.id IN (SELECT id FROM s4)
RETURNING *
)
SELECT count(*) deleted_rows FROM s5
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment