Skip to content

Instantly share code, notes, and snippets.

@froemken
Last active February 12, 2020 17:38
Show Gist options
  • Save froemken/956a854d1b0776f7ea9a6a002e2e83cf to your computer and use it in GitHub Desktop.
Save froemken/956a854d1b0776f7ea9a6a002e2e83cf to your computer and use it in GitHub Desktop.
# Delete all duplicate sys_file_records, where one and the same sys_file UID
# is related to the same tt_content record
DELETE t_orig
FROM sys_file_reference t_orig
INNER JOIN sys_file_reference t_copy
WHERE t_orig.tablenames='tt_content'
AND t_orig.fieldname='image'
AND t_orig.deleted=0
AND t_copy.tablenames='tt_content'
AND t_copy.fieldname='image'
AND t_copy.deleted=0
AND t_orig.uid < t_copy.uid
AND t_orig.uid_foreign=t_copy.uid_foreign
AND t_orig.uid_local=t_copy.uid_local
# Quick check, if a tt_content record has sys_file_references
# to the same image, which has duplicates in sys_file.
# sys_file UID differs, but identifier_hash is duplicate
SELECT sfr.uid_foreign, sf.identifier, COUNT(*) as Anzahl
FROM sys_file_reference sfr
LEFT JOIN sys_file sf
ON sfr.uid_local=sf.uid
WHERE sfr.deleted=0
AND sfr.tablenames='tt_content'
AND sfr.fieldname='image'
GROUP BY sfr.uid_foreign, identifier_hash
HAVING Anzahl > 1
==> In my case: 1.521
# Prepare statement from above for deletion. Last check, if amount of records is the same
SELECT *
FROM sys_file_reference t_orig
LEFT JOIN sys_file sf ON t_orig.uid_local = sf.uid
LEFT JOIN (
SELECT MAX(sfr.uid_local) as MaxSfrUidLocal, sf.identifier, sfr.uid_foreign, sfr.deleted, sfr.tablenames, sfr.fieldname, COUNT( * ) AS Anzahl
FROM sys_file_reference sfr
LEFT JOIN sys_file sf ON sfr.uid_local = sf.uid
WHERE sfr.deleted = 0
AND sfr.tablenames = 'tt_content'
AND sfr.fieldname = 'image'
GROUP BY sfr.uid_foreign, identifier_hash
HAVING Anzahl > 1
) t_copy
ON t_orig.deleted = t_copy.deleted
AND t_orig.uid_foreign = t_copy.uid_foreign
AND t_orig.tablenames = t_copy.tablenames
AND t_orig.fieldname = t_copy.fieldname
AND sf.identifier = t_copy.identifier
WHERE t_orig.uid_local < t_copy.MaxSfrUidLocal
==> In my case: 1.521
# Same as above, but with DELETE clause
# We will keep the records with highest uid_local.
# Needed for deletion of duplicates in sys_file below
DELETE t_orig
FROM sys_file_reference t_orig
LEFT JOIN sys_file sf ON t_orig.uid_local = sf.uid
LEFT JOIN (
SELECT MAX(sfr.uid_local) as MaxSfrUidLocal, sf.identifier, sfr.uid_foreign, sfr.deleted, sfr.tablenames, sfr.fieldname, COUNT( * ) AS Anzahl
FROM sys_file_reference sfr
LEFT JOIN sys_file sf ON sfr.uid_local = sf.uid
WHERE sfr.deleted = 0
AND sfr.tablenames = 'tt_content'
AND sfr.fieldname = 'image'
GROUP BY sfr.uid_foreign, identifier_hash
HAVING Anzahl > 1
) t_copy
ON t_orig.deleted = t_copy.deleted
AND t_orig.uid_foreign = t_copy.uid_foreign
AND t_orig.tablenames = t_copy.tablenames
AND t_orig.fieldname = t_copy.fieldname
AND sf.identifier = t_copy.identifier
WHERE t_orig.uid_local < t_copy.MaxSfrUidLocal
==> PhpMyAdmin deletes 1.521 records in my case. Perfect
# Show all duplicate sys_file entries
SELECT sf.identifier, sf.identifier_hash, COUNT(*) as Anzahl
FROM sys_file sf
GROUP BY sf.identifier_hash
HAVING Anzahl > 1
ORDER BY Anzahl DESC
==> 1.593
# Last Statement to check, if amount of records to delete is the same
SELECT t_orig.uid
FROM sys_file t_orig
INNER JOIN sys_file t_copy
WHERE t_orig.identifier_hash=t_copy.identifier_hash
AND t_orig.uid < t_copy.uid
==> 1.593. Perfect
# Same as above, but with DELETE clause
# We delete all duplicates with a lower UID. Remember uid_local from sys_file_reference above.
DELETE t_orig
FROM sys_file t_orig
INNER JOIN sys_file t_copy
WHERE t_orig.identifier_hash=t_copy.identifier_hash
AND t_orig.uid < t_copy.uid
@franzkugelmann
Copy link

franzkugelmann commented Feb 12, 2020

for anybody with that or similar problems:
we created an extension to find and fix sys_file duplicates, which also adapts links in RTE (used and tested in TYPO3 8):
https://github.com/ElementareTeilchen/unduplicator

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment