Last active
February 12, 2020 17:38
-
-
Save froemken/956a854d1b0776f7ea9a6a002e2e83cf to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Delete all duplicate sys_file_records, where one and the same sys_file UID | |
# is related to the same tt_content record | |
DELETE t_orig | |
FROM sys_file_reference t_orig | |
INNER JOIN sys_file_reference t_copy | |
WHERE t_orig.tablenames='tt_content' | |
AND t_orig.fieldname='image' | |
AND t_orig.deleted=0 | |
AND t_copy.tablenames='tt_content' | |
AND t_copy.fieldname='image' | |
AND t_copy.deleted=0 | |
AND t_orig.uid < t_copy.uid | |
AND t_orig.uid_foreign=t_copy.uid_foreign | |
AND t_orig.uid_local=t_copy.uid_local | |
# Quick check, if a tt_content record has sys_file_references | |
# to the same image, which has duplicates in sys_file. | |
# sys_file UID differs, but identifier_hash is duplicate | |
SELECT sfr.uid_foreign, sf.identifier, COUNT(*) as Anzahl | |
FROM sys_file_reference sfr | |
LEFT JOIN sys_file sf | |
ON sfr.uid_local=sf.uid | |
WHERE sfr.deleted=0 | |
AND sfr.tablenames='tt_content' | |
AND sfr.fieldname='image' | |
GROUP BY sfr.uid_foreign, identifier_hash | |
HAVING Anzahl > 1 | |
==> In my case: 1.521 | |
# Prepare statement from above for deletion. Last check, if amount of records is the same | |
SELECT * | |
FROM sys_file_reference t_orig | |
LEFT JOIN sys_file sf ON t_orig.uid_local = sf.uid | |
LEFT JOIN ( | |
SELECT MAX(sfr.uid_local) as MaxSfrUidLocal, sf.identifier, sfr.uid_foreign, sfr.deleted, sfr.tablenames, sfr.fieldname, COUNT( * ) AS Anzahl | |
FROM sys_file_reference sfr | |
LEFT JOIN sys_file sf ON sfr.uid_local = sf.uid | |
WHERE sfr.deleted = 0 | |
AND sfr.tablenames = 'tt_content' | |
AND sfr.fieldname = 'image' | |
GROUP BY sfr.uid_foreign, identifier_hash | |
HAVING Anzahl > 1 | |
) t_copy | |
ON t_orig.deleted = t_copy.deleted | |
AND t_orig.uid_foreign = t_copy.uid_foreign | |
AND t_orig.tablenames = t_copy.tablenames | |
AND t_orig.fieldname = t_copy.fieldname | |
AND sf.identifier = t_copy.identifier | |
WHERE t_orig.uid_local < t_copy.MaxSfrUidLocal | |
==> In my case: 1.521 | |
# Same as above, but with DELETE clause | |
# We will keep the records with highest uid_local. | |
# Needed for deletion of duplicates in sys_file below | |
DELETE t_orig | |
FROM sys_file_reference t_orig | |
LEFT JOIN sys_file sf ON t_orig.uid_local = sf.uid | |
LEFT JOIN ( | |
SELECT MAX(sfr.uid_local) as MaxSfrUidLocal, sf.identifier, sfr.uid_foreign, sfr.deleted, sfr.tablenames, sfr.fieldname, COUNT( * ) AS Anzahl | |
FROM sys_file_reference sfr | |
LEFT JOIN sys_file sf ON sfr.uid_local = sf.uid | |
WHERE sfr.deleted = 0 | |
AND sfr.tablenames = 'tt_content' | |
AND sfr.fieldname = 'image' | |
GROUP BY sfr.uid_foreign, identifier_hash | |
HAVING Anzahl > 1 | |
) t_copy | |
ON t_orig.deleted = t_copy.deleted | |
AND t_orig.uid_foreign = t_copy.uid_foreign | |
AND t_orig.tablenames = t_copy.tablenames | |
AND t_orig.fieldname = t_copy.fieldname | |
AND sf.identifier = t_copy.identifier | |
WHERE t_orig.uid_local < t_copy.MaxSfrUidLocal | |
==> PhpMyAdmin deletes 1.521 records in my case. Perfect | |
# Show all duplicate sys_file entries | |
SELECT sf.identifier, sf.identifier_hash, COUNT(*) as Anzahl | |
FROM sys_file sf | |
GROUP BY sf.identifier_hash | |
HAVING Anzahl > 1 | |
ORDER BY Anzahl DESC | |
==> 1.593 | |
# Last Statement to check, if amount of records to delete is the same | |
SELECT t_orig.uid | |
FROM sys_file t_orig | |
INNER JOIN sys_file t_copy | |
WHERE t_orig.identifier_hash=t_copy.identifier_hash | |
AND t_orig.uid < t_copy.uid | |
==> 1.593. Perfect | |
# Same as above, but with DELETE clause | |
# We delete all duplicates with a lower UID. Remember uid_local from sys_file_reference above. | |
DELETE t_orig | |
FROM sys_file t_orig | |
INNER JOIN sys_file t_copy | |
WHERE t_orig.identifier_hash=t_copy.identifier_hash | |
AND t_orig.uid < t_copy.uid |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
for anybody with that or similar problems:
we created an extension to find and fix sys_file duplicates, which also adapts links in RTE (used and tested in TYPO3 8):
https://github.com/ElementareTeilchen/unduplicator