Last active
October 8, 2020 15:41
-
-
Save philbirnie/71e709ccc53cd9bbeed2fb0f2022ffb8 to your computer and use it in GitHub Desktop.
Wordpress Image Deduper (Beta)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
require_once 'wp-config.php'; | |
print 'Starting Image De-duper' . "\n"; | |
global $table_prefix; | |
$host = DB_HOST; | |
$user = DB_USER; | |
$password = DB_PASSWORD; | |
$db = DB_NAME; | |
$charset = 'utf8mb4'; | |
/** | |
* Configurable Settings | |
*/ | |
$batch_size = 100; | |
$starting_post_id = 10172; | |
$dsn = "mysql:host=$host;dbname=$db;charset=$charset"; | |
$options = [ | |
PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION, | |
PDO::ATTR_DEFAULT_FETCH_MODE => PDO::FETCH_ASSOC, | |
PDO::ATTR_EMULATE_PREPARES => false, | |
]; | |
try { | |
$pdo = new PDO( $dsn, $user, $password, $options ); | |
} catch ( \PDOException $e ) { | |
throw new \PDOException( $e->getMessage(), (int) $e->getCode() ); | |
} | |
print 'Getting Attachments' . "\n"; | |
/** | |
* Get the Attachment Post IDs | |
* and Image Sources | |
*/ | |
$sql = "SELECT meta_value, post_id | |
FROM ${table_prefix}postmeta | |
WHERE meta_key = '_wp_attached_file' | |
AND post_id > ? | |
ORDER BY post_id ASC"; | |
$stmt = $pdo->prepare( $sql ); | |
$stmt->execute( [ $starting_post_id ] ); | |
$image_sources_results = $stmt->fetchAll(); | |
$image_sources = []; | |
foreach ( $image_sources_results as $image_sources_result ) { | |
$image_sources[ $image_sources_result['post_id'] ] = $image_sources_result['meta_value']; | |
} | |
print sprintf( '%d attachment%s found', count( $image_sources ), count( $image_sources ) !== 1 ? 's' : '' ) . "\n"; | |
$count = 0; | |
foreach ( $image_sources as $image_post_id => $image_source ) { | |
$count++; | |
if ( $count > $batch_size ) { | |
print( 'Batch Complete; Pausing' ) . "\n"; | |
sleep( 5 ); | |
print( 'Resuming...' ) . "\n"; | |
$count = 0; | |
} | |
/** If not an image, skip. */ | |
if ( ! is_image( $image_source ) ) { | |
print sprintf( '%d: Not an Image. Skipping: %s', $image_post_id, $image_source ) . "\n"; | |
continue; | |
} | |
/** | |
* If not a duplicate candidate, skip | |
*/ | |
if ( ! is_duplicate_candidate( $image_source ) ) { | |
print sprintf( '%d: Not a Duplicate Candidate. Skipping: %s', $image_post_id, $image_source ) . "\n"; | |
continue; | |
} | |
/** @var string $root_image Root Image that can be used for comparison. */ | |
$root_image = get_root( $image_source ); | |
/** | |
* Get the Root Image ID if it exists in the array | |
*/ | |
$root_image_post_id = array_search($root_image, $image_sources); | |
/** | |
* If there is no root image or the root image is the current image, | |
* don't delete it. | |
*/ | |
if ( ! $root_image_post_id || $root_image_post_id === $image_post_id ) { | |
print sprintf( '%d: Original. Skipping: %s', $image_post_id, $image_source ) . "\n"; | |
continue; | |
} | |
/** | |
* We have a successful duplicate image candidate; | |
* first, update the thumbnail | |
*/ | |
$sql = "UPDATE ${table_prefix}postmeta | |
SET meta_value = ? | |
WHERE meta_value = ? | |
AND meta_key = '_thumbnail_id'"; | |
$stmt = $pdo->prepare( $sql )->execute( [ $root_image_post_id, $image_post_id ] ); | |
/** | |
* Updates Gallery Images; these are comma separated and the safest | |
* bet is to extract the full string, change it to an array; update the array if necessary and | |
* return it to the DB. | |
*/ | |
$sql = "SELECT meta_id,meta_value | |
FROM ${table_prefix}postmeta | |
WHERE meta_key = '_product_image_gallery' | |
AND meta_value LIKE '%${image_post_id}%' | |
"; | |
$stmt = $pdo->query( $sql ); | |
$results = $stmt->fetchAll(); | |
/** | |
* If there are gallery images, update the images; the best way to do this is to extract the entire | |
* string as an array, replace it and save it to prevent issues with "partials" (e.g. 28 matches 5228) | |
*/ | |
if ( $results ) { | |
foreach ( $results as $result ) { | |
$gallery_image_ids = explode( ',', $result['meta_value'] ); | |
$meta_id = $result['meta_id']; | |
/** | |
* If the image id is actually in the array, replace it and update the record. | |
*/ | |
if ( $gallery_image_ids && in_array( $image_post_id, $gallery_image_ids ) ) { | |
$gallery_image_ids = str_replace( $image_post_id, $root_image_post_id, $gallery_image_ids ); | |
$sql = "UPDATE ${table_prefix}postmeta | |
SET meta_value = ? | |
WHERE meta_id = ? | |
LIMIT 1"; | |
$stmt = $pdo->prepare( $sql )->execute( [ implode( ',', $gallery_image_ids ), $meta_id ] ); | |
print sprintf( 'Updating gallery images for %d, from: %d, to: %d', $meta_id, $image_post_id, $root_image_post_id ) . "\n"; | |
} | |
} | |
} | |
print sprintf( 'Deleting %d: %s', $image_post_id, $image_source ) . "\n"; | |
$command = "wp post delete --force ${image_post_id} --allow-root --skip-plugins --skip-themes"; | |
exec( $command ); | |
remove_media_images( $image_source ); | |
} | |
print 'De-Deuper Process Complete' . "\n"; | |
exit( 0 ); | |
function is_image( $source ): bool { | |
if ( ! $source || strlen( $source ) < 3 ) { | |
return false; | |
} | |
$valid_image_suffixes = [ 'jpg', 'png', 'svg', 'peg' ]; | |
$suffix = strtolower( substr( $source, -3, 3 ) ); | |
return in_array( $suffix, $valid_image_suffixes, true ); | |
} | |
/** | |
* Returns Image Source | |
* | |
* @param string $source Image Source | |
* | |
* @notes Duplicate Candidates will be -1.jpg (note that there is not a zero, so these numbers are intentially excluded | |
* | |
* @return bool | |
*/ | |
function is_duplicate_candidate( $source ): bool { | |
return (bool) preg_match( "/-[1-9](\d+)?\.([jJpPgGnNsSvVeE]{3,4})$/", $source ); | |
} | |
/** | |
* Gets Likely Root Image by removing xx part of -xx.jpg | |
* | |
* @param $source | |
* | |
* @return string|string[]|null | |
*/ | |
function get_root( $source ) { | |
$pattern = "/-(\d+)\.([jJpPgGnNsSvVeE]{1,4})$/"; | |
$replace = '.$2'; | |
return preg_replace( $pattern, $replace, $source ); | |
} | |
function remove_media_images( $source ) { | |
$fullpath = __DIR__ . '/wp-content/uploads/' . $source; | |
$directory = dirname( $fullpath ); | |
$files = scandir( $directory ); | |
if ( ! $files ) { | |
return; | |
} | |
$file_base = preg_replace( "/\.([jJpPgGnNsSvVeE]{1,4})$/", '', basename( $source ) ); | |
$media_variants = array_filter( $files ?? [], function ( $file ) use ( $file_base ) { | |
$file_ending = str_replace( $file_base, '', $file ); | |
return preg_match( "/^-\d{1,}x\d{1,}\.[A-Za-z]{3,4}$/", $file_ending ); | |
} ); | |
foreach ( $media_variants as $media_variant ) { | |
unlink( sprintf( '%s/%s', $directory, $media_variant ) ); | |
} | |
if ( count( $media_variants ) ) { | |
print( sprintf( 'Removed %d rogue media variants', count( $media_variants ) ) ) . "\n"; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment