Skip to content

Instantly share code, notes, and snippets.

@yvanzo
Last active June 29, 2020 18:21
Show Gist options
  • Save yvanzo/7aefa508cf436c313bbecacf0a1eb8f7 to your computer and use it in GitHub Desktop.
Save yvanzo/7aefa508cf436c313bbecacf0a1eb8f7 to your computer and use it in GitHub Desktop.
Select unrelated recording-of relationship edits in editing history of artists, recordings and releases
WITH
first_edit AS (
SELECT id FROM edit WHERE id > 70000000 AND open_time >= '2020-06-15'::timestamp ORDER BY id ASC LIMIT 1
),
last_edit AS (
SELECT id FROM edit WHERE id > 70000000 AND open_time <= '2020-06-29T16:30'::timestamp ORDER BY id DESC LIMIT 1
),
recording_artist AS (
SELECT r.id recording, acn.artist
FROM recording r
JOIN artist_credit ac ON r.artist_credit = ac.id
JOIN artist_credit_name acn ON ac.id = acn.artist_credit
)
SELECT COUNT(ea.*)
FROM edit_artist ea
JOIN artist a ON ea.artist = a.id
JOIN edit e ON ea.edit = e.id
JOIN edit_data ed ON e.id = ed.edit
JOIN recording r ON jsonb_extract_path_text(ed.data, 'entity0', 'id')::int = r.id
WHERE (e.type >= 90 AND e.type <= 99)
AND (jsonb_extract_path_text(ed.data, 'type0') = 'recording' AND jsonb_extract_path_text(ed.data, 'type1') = 'work')
AND ea.artist NOT IN (SELECT artist FROM recording_artist ra WHERE r.id = ra.recording)
AND e.id >= (SELECT id FROM first_edit) AND e.id <= (SELECT id FROM last_edit);
-- AND e.id >= 70658947 AND e.id < 71006471;
-- AND e.open_time >= '2020-06-15'::timestamp AND e.open_time < '2020-06-29T16:30'::timestamp;
-- 208558
WITH
first_edit AS (
SELECT id FROM edit WHERE id > 70000000 AND open_time >= '2020-06-15'::timestamp ORDER BY id ASC LIMIT 1
),
last_edit AS (
SELECT id FROM edit WHERE id > 70000000 AND open_time <= '2020-06-29T16:30'::timestamp ORDER BY id DESC LIMIT 1
)
SELECT COUNT(er.*)
FROM edit_recording er
JOIN recording r ON er.recording = r.id
JOIN edit e ON er.edit = e.id
JOIN edit_data ed ON e.id = ed.edit
JOIN recording r2 ON jsonb_extract_path_text(ed.data, 'entity0', 'id')::int = r2.id
WHERE (e.type >= 90 AND e.type <= 99)
AND (jsonb_extract_path_text(ed.data, 'type0') = 'recording' AND jsonb_extract_path_text(ed.data, 'type1') = 'work')
AND r.id != r2.id
AND e.id >= (SELECT id FROM first_edit) AND e.id <= (SELECT id FROM last_edit);
-- AND e.id >= 70658947 AND e.id < 71006471;
-- AND e.open_time >= '2020-06-15'::timestamp AND e.open_time < '2020-06-29T16:30'::timestamp;
-- 814133
WITH
first_edit AS (
SELECT id FROM edit WHERE id > 70000000 AND open_time >= '2020-06-15'::timestamp ORDER BY id ASC LIMIT 1
),
last_edit AS (
SELECT id FROM edit WHERE id > 70000000 AND open_time <= '2020-06-29T16:30'::timestamp ORDER BY id DESC LIMIT 1
),
recording_release AS (
SELECT recording.id AS recording, release.id AS release
FROM release
JOIN medium ON release.id = medium.release
JOIN track ON medium.id = track.medium
JOIN recording ON track.recording = recording.id
)
SELECT COUNT(er.*)
FROM edit_release er
JOIN edit e ON er.edit = e.id
JOIN edit_data ed ON e.id = ed.edit
JOIN recording r ON jsonb_extract_path_text(ed.data, 'entity0', 'id')::int = r.id
WHERE (e.type >= 90 AND e.type <= 99)
AND (jsonb_extract_path_text(ed.data, 'type0') = 'recording' AND jsonb_extract_path_text(ed.data, 'type1') = 'work')
AND r.id NOT IN (SELECT recording FROM recording_release rr WHERE er.release = rr.release)
AND e.id >= (SELECT id FROM first_edit) AND e.id <= (SELECT id FROM last_edit);
-- AND e.id >= 70658947 AND e.id < 71006471;
-- AND e.open_time >= '2020-06-15'::timestamp AND e.open_time < '2020-06-29T16:30'::timestamp;
-- 1080204
@yvanzo
Copy link
Author

yvanzo commented Jun 29, 2020

Reran it after deploying MusicBrainz Server update, 2020-06-29 that contains a patch for MBS-10908.

I limited queries to the period of time the faulty MusicBrainz Server v-2020-06-15 was running. I used edit.id to make it faster.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment