Skip to content

Instantly share code, notes, and snippets.

@acataluddi
Last active December 8, 2022 14:58
Show Gist options
  • Save acataluddi/e5887529981f67e03eb3ffd53a577bef to your computer and use it in GitHub Desktop.
Save acataluddi/e5887529981f67e03eb3ffd53a577bef to your computer and use it in GitHub Desktop.
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
<title>Remove Magento's orphan images web console</title>
<link href='https://fonts.googleapis.com/css?family=Open+Sans+Condensed:300,700' rel='stylesheet' type='text/css'>
<link href='https://fonts.googleapis.com/css?family=Inconsolata:400,700&subset=latin,latin-ext' rel='stylesheet'
type='text/css'>
<style type="text/css">
body {
font-family: "Open Sans", Arial, sans-serif;
margin-left: 25px;
margin-right: 30px;
padding: 0 0 0 0;
}
pre {
font-family: Inconsolata, "Courier New", monospace;
font-size: 15px;
clear: both;
margin: 0 -0px 0 0;
background: #000;
border: 1px groove #ccc;
color: #ccc;
display: block;
width: 100%;
min-height: 600px;
padding: 5px 5px 5px 5px;
}
.logo {
float: left;
margin-right: 25px;;
}
.scriptInfo {
float: left;
}
h1 {
font-size: 16px;
margin-top: 0;
line-height: 49px;
margin-bottom: 0;
}
h1 span {
font-family: Inconsolata, "Courier New", monospace;
}
.header {
width: 100%;
height: auto;
display: table;
margin-top: 25px;
margin-bottom: 25px;;
}
</style>
</head>
<body>
<div class="header">
<div class="logo">
<img src="skin/frontend/default/modern/images/logo.gif" alt="Magento's Logo" width="173" height="49">
</div>
<div class="scriptInfo">
<h1>Delete Catalog Orphan Images <span>1.1</span></h1>
</div>
</div>
<pre>
<?php
require 'app/Mage.php';
/**
* NOTE: Set this to false if you want to actually delete files.
*/
$dryRun = true;
$job = new OrphanImagesCleaner();
$job->run($dryRun);
/**
* Class OrphanImagesCleaner
*
* @author Jeroen Boersma <[email protected]>
* @author Adriano Cataluddi <[email protected]>
*/
class OrphanImagesCleaner
{
/**
* @var bool
*/
protected $dryRun = true;
/**
* Executes the tools
* @param bool $dryRun
*/
public function run($dryRun = true)
{
$this->dryRun = $dryRun;
$duplicateCount = 0;
$duplicateTotalSize = 0;
$orphansCount = 0;
$orphansTotalSize = 0;
if (!Mage::isInstalled()) {
$this->wl('Application is not installed yet, please complete install wizard first.');
exit;
}
set_time_limit(0);
session_write_close();
umask(0);
Mage::app('admin')->setUseSessionInUrl(false);
$mediaPath = $this->getRootPath() . '/media/catalog/product';
if ($this->dryRun)
$this->wl('DRY RUN mode: the script will NOT modify any file or record.');
$this->wl();
$this->wl(' Magento release: ' . Mage::getVersion());
$this->wl(' Media path: ' . $mediaPath);
$this->wl();
$this->wl();
$this->wl('[Phase 1/2] Looking for duplicate products images...');
$this->wl(str_repeat('-', 80));
$connection = Mage::getSingleton('core/resource')
->getConnection('core_write');
$sql = 'select distinct '
. 'cp.entity_id, '
. 'cpg.value_id, '
. 'cpv.value as default_value, '
. 'cpg.value '
. 'from catalog_product_entity as cp '
. 'join catalog_product_entity_varchar as cpv on cp.entity_id = cpv.entity_id '
. 'join catalog_product_entity_media_gallery as cpg on cp.entity_id = cpg.entity_id '
. 'WHERE '
. 'cpv.attribute_id in(select attribute_id from eav_attribute where frontend_input = \'media_image\') '
. 'and '
. 'cpv.value != cpg.value;';
$results = $connection->fetchAll($sql);
$this->wl(sprintf('Found %s items to process.', sizeof($results)), true);
$lastEntityId = null;
$origSums = array();
foreach ($results as $row) {
if ($row['entity_id'] != $lastEntityId) {
$lastEntityId = $row['entity_id'];
$origSums = array();
}
$origFile = $mediaPath . $row['default_value'];
if (!file_exists($origFile)) {
continue;
}
$file = $mediaPath . $row['value'];
if (file_exists($file)) {
if (!isset($origSums[$origFile])) {
$origSums[$origFile] = md5_file($origFile);
}
$sum = md5_file($file);
if (!in_array($sum, $origSums)) {
$origSums[$file] = $sum;
}
else {
$this->wl(sprintf('Deleting image "%s" (#%s)', $file, $row['entity_id']), true);
$duplicateCount++;
$duplicateTotalSize += filesize($file);
if (!$this->dryRun) unlink($file);
}
}
if (!file_exists($file)) {
$this->wl(sprintf('Deleting record for "%s" (#%s)', $file, $row['entity_id']), true);
$deleteSql = 'delete from catalog_product_entity_media_gallery where value_id = ' . $row['value_id'] . ';';
if (!$this->dryRun) $connection->query($deleteSql);
}
}
// Find files on filesystem which aren't listed in the database
$this->wl();
$this->wl('[Phase 1/2] Finding files on filesystem which aren\'t listed in the database...');
$this->wl(str_repeat('-', 80));
$files = glob($mediaPath . '/[A-z0-9]/*/*');
foreach ($files as $file) {
$searchFile = str_replace($mediaPath, '', $file);
// Lookup
$mediaSql = "select count(*) as records from catalog_product_entity_media_gallery where value = '{$searchFile}'";
$mediaCount = $connection->fetchOne($mediaSql);
if ($mediaCount < 1) {
$orphansCount++;
$orphansTotalSize += filesize($file);
$this->wl(sprintf('Deleting image "%s"', $file), true);
if (!$this->dryRun) unlink($file);
}
}
$this->wl();
$this->wl('Done.');
$this->wl(str_repeat('-', 80));
$this->wl(sprintf(' Total duplicate images: %s (%s)', $duplicateCount, $this->formatBytes($duplicateTotalSize)));
$this->wl(sprintf(' Total orphan images: %s (%s)', $orphansCount, $this->formatBytes($orphansTotalSize)));
$this->wl(str_repeat('-', 80));
}
/**
* @param boolean $dryRun
*/
public function setDryRunEnabled($dryRun)
{
$this->dryRun = $dryRun;
}
/**
* @return boolean
*/
public function isDryRunEnabled()
{
return $this->dryRun;
}
/**
* Writes a line in console.
* @param $line
* @param bool $notifyDryRun
*/
protected function wl($line = null, $notifyDryRun = false)
{
($notifyDryRun && $this->dryRun && ($line !== null)) ?
$dryLabel = 'DRY RUN | ' :
$dryLabel = '';
print $dryLabel . $line . "\n";
}
/**
* Returns the script root path
* @return string
*/
protected function getRootPath()
{
return (dirname(__FILE__));
}
/**
* Format bytes
* @author MrCaspan (https://github.com/MrCaspan)
* @param $bytes
* @return string
*/
protected function formatBytes($bytes)
{
$i = floor(log($bytes, 1024));
return round($bytes / pow(1024, $i), [0, 0, 2, 2, 3][$i]) . ['B', 'kB', 'MB', 'GB', 'TB'][$i];
}
}
?>
</pre>
</body>
</html>
@webdreamsnc
Copy link

webdreamsnc commented Apr 13, 2018

@acataluddi
Hi, we improved your script to get table's name from magento core (including tables prefix options).
Thi is run function edited:

public function run()
{
$duplicateCount = 0;
$orphansCount = 0;
if (!Mage::isInstalled()) {
$this->wl('Application is not installed yet, please complete install wizard first.');
exit;
}
set_time_limit(0);
session_write_close();
umask(0);
Mage::app('admin')->setUseSessionInUrl(false);
$mediaPath = $this->getRootPath() . '/media/catalog/product';
if ($this->dryRun)
$this->wl('DRY RUN mode: the script will NOT modify any file or record.');
$this->wl();
$this->wl(' Magento release: ' . Mage::getVersion());
$this->wl(' Media path: ' . $mediaPath);
$this->wl();
$this->wl();
$this->wl('[Phase 1/2] Looking for duplicate products images...');
$this->wl(str_repeat('-', 80));
$connection = Mage::getSingleton('core/resource')
->getConnection('core_write');
$sql = 'select distinct '
. 'cp.entity_id, '
. 'cpg.value_id, '
. 'cpv.value as default_value, '
. 'cpg.value '
. 'from '.Mage::getSingleton('core/resource')->getTableName('catalog_product_entity').' as cp '
. 'join '.Mage::getSingleton('core/resource')->getTableName('catalog_product_entity_varchar').' as cpv on cp.entity_id = cpv.entity_id '
. 'join '.Mage::getSingleton('core/resource')->getTableName('catalog_product_entity_media_gallery').' as cpg on cp.entity_id = cpg.entity_id '
. 'WHERE '
. 'cpv.attribute_id in(select attribute_id from '.Mage::getSingleton('core/resource')->getTableName('eav_attribute').' where frontend_input = 'media_image') '
. 'and '
. 'cpv.value != cpg.value;';
$results = $connection->fetchAll($sql);
$this->wl(sprintf('Found %s items to process.', sizeof($results)), true);
$lastEntityId = null;
$origSums = array();
foreach ($results as $row) {
if ($row['entity_id'] != $lastEntityId) {
$lastEntityId = $row['entity_id'];
$origSums = array();
}
$origFile = $mediaPath . $row['default_value'];
if (!file_exists($origFile)) {
continue;
}
$file = $mediaPath . $row['value'];
if (file_exists($file)) {
if (!isset($origSums[$origFile])) {
$origSums[$origFile] = md5_file($origFile);
}
$sum = md5_file($file);
if (!in_array($sum, $origSums)) {
$origSums[$file] = $sum;
}
else {
$this->wl(sprintf('Deleting image "$s" (#%s)', $file, $row['entity_id']), true);
$duplicateCount++;
if (!$this->dryRun) unlink($file);
}
}
if (!file_exists($file)) {
$this->wl(sprintf('Deleting record for "%s" (#%s)', $file, $row['entity_id']), true);
$deleteSql = 'delete from '.Mage::getSingleton('core/resource')->getTableName('catalog_product_entity_media_gallery').' where value_id = ' . $row['value_id'] . ';';
if (!$this->dryRun) $connection->query($deleteSql);
}
}
// Find files on filesystem which aren't listed in the database
$this->wl();
$this->wl('[Phase 1/2] Finding files on filesystem which aren't listed in the database...');
$this->wl(str_repeat('-', 80));
$files = glob($mediaPath . '/[A-z0-9]//');
foreach ($files as $file) {
$searchFile = str_replace($mediaPath, '', $file);
// Lookup
$mediaSql = "select count(*) as records from ".Mage::getSingleton('core/resource')->getTableName('catalog_product_entity_media_gallery')." where value = '{$searchFile}'";
$mediaCount = $connection->fetchOne($mediaSql);
if ($mediaCount < 1) {
$orphansCount++;
$this->wl(sprintf('Deleting image "%s"', $file), true);
if (!$this->dryRun) unlink($file);
}
}
$this->wl();
$this->wl('Done.');
$this->wl(str_repeat('-', 80));
$this->wl(sprintf(' Total duplicate images: %s', $duplicateCount));
$this->wl(sprintf(' Total orphan images: %s', $orphansCount));
$this->wl(str_repeat('-', 80));
}

@Dogleg
Copy link

Dogleg commented May 11, 2018

Fix above worked for me but there is a single vs double quote mark issue in the db query string; needs to be:

. 'cpv.attribute_id in(select attribute_id from '.Mage::getSingleton('core/resource')->getTableName('eav_attribute').' where frontend_input = \'media_image\') '

…the single quotes around 'media_image' need to be slashed out.

@LiamKarlMitchell
Copy link

Thanks for this, It would be neat to have an option to get a total disk space in-use by orphaned images when doing the dry-run.
http://php.net/manual/en/function.filesize.php

@PeterBrain
Copy link

There is a little typo at line 179 ($ instead of %):

change
$this->wl(sprintf('Deleting image "$s" (#%s)', $file, $row['entity_id']), true);
to
$this->wl(sprintf('Deleting image "%s" (#%s)', $file, $row['entity_id']), true);

@Ang90
Copy link

Ang90 commented Apr 14, 2020

Beautiful! If I understand correctly, this script will delete the images that are no longer linked to no product ... That is, if I have only "disabled" products, in this case the images will not be deleted? It is important to keep them for me ... Also how do I run the script? Load in the foot and what do I type in the URL? Thank you

@acataluddi
Copy link
Author

There is a little typo at line 179 ($ instead of %):

Thanks @PeterBrain, fixed.

@acataluddi
Copy link
Author

hat is, if I have only "disabled" products, in this case the images will not be deleted?

Thanks @Ang90, much appreciated. Yes, correct. It should only remove the orphan images (not bound to any product anymore).
Anyway, as a general suggestion, I presume you have a test enviroment to run the script before: it's always the right think to do.

Thanks,
Adriano

@acataluddi
Copy link
Author

Thanks for this, It would be neat to have an option to get a total disk space in-use by orphaned images when doing the dry-run.

Hi @LiamKarlMitchell,
I've added the suggested feature.

Regards,
Adriano

@Ang90
Copy link

Ang90 commented Apr 16, 2020

Thanks a lot! But how do I run the script? I need to possibly run it via the URL browser, otherwise from ssh what are the exact commands? Thank you

@acataluddi
Copy link
Author

Hi @Ang90,

just upload it to your Magento root path and call the URL https://www.your-magento-store.com/removeOrphanImages.php.

Please Note
By default, it runs in "dry-mode" (does not actually deletes files). To disable the dry mode you need to change the following line from:

/**
 * NOTE: Set this to false if you want to actually delete files.
 */
$dryRun = true;

to:

/**
 * NOTE: Set this to false if you want to actually delete files.
 */
$dryRun = false;

Regards,
Adriano

@Ang90
Copy link

Ang90 commented Apr 16, 2020 via email

@tmotyl
Copy link

tmotyl commented Dec 11, 2020

You can also try https://github.com/magento-hackathon/EAVCleaner which is a module for n98-magerun

@LiamKarlMitchell
Copy link

This is still a great script, run it on a cron every month.

Something else I was doing back on older Magento 2 versions (<2.2) was using a tool named fdupes to symlink images that were exactly the same binary wise for a further space saving, as it had a bug of creating many duplicate images.

Note if moving website between servers that when rsyncing symlinks you will want the -L modifier.
(Which was fine for 1 image per product, but if the same image was uploaded to multiple products and the main one happened to get disabled it would be a bad time thankfully this did not happen in normal usage)

@LuizSantos1
Copy link

Tried to use it but it does not pass by
[Phase 1/2] Looking for duplicate products images...

My installation is:
OpenMage LTS 1.9.4.11
PHP: 7.4
Cpanel

I use db table prefix

@acataluddi
Copy link
Author

Hi @LuizSantos1,

please enable the PHP and Magento logging and check the logs. I'm sure you might get a clue of what's going on.

Regards,
Adriano

@LuizSantos1
Copy link

I am using a similar extension now for tests, and I am getting this error:
catalog_product_entity_media_gallery doesn't exist
But I have checked the db and the table is there...
I use table prefix but I had the plugin that worked with it but now it does not work neither...

@acataluddi
Copy link
Author

Hi Luiz,

why don't you try modifying the following lines by appending your table prefix?

        $sql = 'select distinct '
            . 'cp.entity_id, '
            . 'cpg.value_id, '
            . 'cpv.value as default_value, '
            . 'cpg.value '
            . 'from catalog_product_entity as cp '
            . 'join catalog_product_entity_varchar as cpv on cp.entity_id = cpv.entity_id '
            . 'join catalog_product_entity_media_gallery as cpg on cp.entity_id = cpg.entity_id '
            . 'WHERE '
            . 'cpv.attribute_id in(select attribute_id from eav_attribute where frontend_input = \'media_image\') '
            . 'and '
            . 'cpv.value != cpg.value;';

If it works, you might want to make the prefix parametric. I hope it will help.

Regards,
Adriano

@LuizSantos1
Copy link

Hi Luiz,

why don't you try modifying the following lines by appending your table prefix?

        $sql = 'select distinct '
            . 'cp.entity_id, '
            . 'cpg.value_id, '
            . 'cpv.value as default_value, '
            . 'cpg.value '
            . 'from catalog_product_entity as cp '
            . 'join catalog_product_entity_varchar as cpv on cp.entity_id = cpv.entity_id '
            . 'join catalog_product_entity_media_gallery as cpg on cp.entity_id = cpg.entity_id '
            . 'WHERE '
            . 'cpv.attribute_id in(select attribute_id from eav_attribute where frontend_input = \'media_image\') '
            . 'and '
            . 'cpv.value != cpg.value;';

If it works, you might want to make the prefix parametric. I hope it will help.

Regards,
Adriano

Thanks, I have tried but it did not work.
Same issue, does not pass by
"Phase 1/2] Looking for duplicate products images..."

Will try enabling system log latter on as we are still in commercial time here...

@kbpcit
Copy link

kbpcit commented Dec 16, 2021

Hi,

In my case the script didn't run as I am using a prefix for the database. Could I suggest adding that as an option? I've amended the applicable section of the code in my copy as follows:
`<?php

require 'app/Mage.php';

/**

  • NOTE: Set this to false if you want to actually delete files.
    */
    $dryRun = true;

/**

  • NOTE: Set here the prefix for your Magento tables
    */
    $db_prefix = 'bgb_';

$job = new OrphanImagesCleaner();
$job->run($dryRun, $db_prefix);

/**

  • Class OrphanImagesCleaner

  • @author Jeroen Boersma [email protected]

  • @author Adriano Cataluddi [email protected]
    /
    class OrphanImagesCleaner
    {
    /
    *

    • @var bool
      */
      protected $dryRun = true;

    /**

    • Executes the tools

    • @param bool $dryRun
      */
      public function run($dryRun = true, $db_prefix = '')
      {
      $this->dryRun = $dryRun;
      $duplicateCount = 0;
      $duplicateTotalSize = 0;
      $orphansCount = 0;
      $orphansTotalSize = 0;
      if (!Mage::isInstalled()) {
      $this->wl('Application is not installed yet, please complete install wizard first.');
      exit;
      }

      set_time_limit(0);
      session_write_close();
      umask(0);

      Mage::app('admin')->setUseSessionInUrl(false);
      $mediaPath = $this->getRootPath() . '/media/catalog/product';

      if ($this->dryRun)
      $this->wl('DRY RUN mode: the script will NOT modify any file or record.');

      $this->wl();
      $this->wl(' Magento release: ' . Mage::getVersion());
      $this->wl(' Media path: ' . $mediaPath);
      $this->wl();

      $this->wl();
      $this->wl('[Phase 1/2] Looking for duplicate products images...');
      $this->wl(str_repeat('-', 80));

      $connection = Mage::getSingleton('core/resource')
      ->getConnection('core_write');

      $sql = 'select distinct '
      . 'cp.entity_id, '
      . 'cpg.value_id, '
      . 'cpv.value as default_value, '
      . 'cpg.value '
      . 'from ' . $db_prefix . 'catalog_product_entity as cp '
      . 'join ' . $db_prefix . 'catalog_product_entity_varchar as cpv on cp.entity_id = cpv.entity_id '
      . 'join ' . $db_prefix . 'catalog_product_entity_media_gallery as cpg on cp.entity_id = cpg.entity_id '
      . 'WHERE '
      . 'cpv.attribute_id in(select attribute_id from ' . $db_prefix . 'eav_attribute where frontend_input = 'media_image') '
      . 'and '
      . 'cpv.value != cpg.value;';

      $results = $connection->fetchAll($sql);
      $this->wl(sprintf('Found %s items to process.', sizeof($results)), true);

      $lastEntityId = null;
      $origSums = array();
      foreach ($results as $row) {

       if ($row['entity_id'] != $lastEntityId) {
           $lastEntityId = $row['entity_id'];
           $origSums = array();
       }
      
       $origFile = $mediaPath . $row['default_value'];
       if (!file_exists($origFile)) {
           continue;
       }
      
       $file = $mediaPath . $row['value'];
       if (file_exists($file)) {
           if (!isset($origSums[$origFile])) {
               $origSums[$origFile] = md5_file($origFile);
           }
           $sum = md5_file($file);
      
           if (!in_array($sum, $origSums)) {
               $origSums[$file] = $sum;
           }
           else {
               $this->wl(sprintf('Deleting image "%s" (#%s)', $file, $row['entity_id']), true);
               $duplicateCount++;
               $duplicateTotalSize += filesize($file);
               if (!$this->dryRun) unlink($file);
           }
       }
      
       if (!file_exists($file)) {
           $this->wl(sprintf('Deleting record for "%s" (#%s)', $file, $row['entity_id']), true);
           $deleteSql = 'delete from ' . $db_prefix . 'catalog_product_entity_media_gallery where value_id = ' . $row['value_id'] . ';';
           if (!$this->dryRun) $connection->query($deleteSql);
       }
      

      }

      // Find files on filesystem which aren't listed in the database
      $this->wl();
      $this->wl('[Phase 1/2] Finding files on filesystem which aren't listed in the database...');
      $this->wl(str_repeat('-', 80));

      $files = glob($mediaPath . '/[A-z0-9]//');
      foreach ($files as $file) {
      $searchFile = str_replace($mediaPath, '', $file);

       // Lookup
       $mediaSql = "select count(*) as records from " . $db_prefix . "catalog_product_entity_media_gallery where value = '{$searchFile}'";
       $mediaCount = $connection->fetchOne($mediaSql);
      
       if ($mediaCount < 1) {
           $orphansCount++;
           $orphansTotalSize += filesize($file);
           $this->wl(sprintf('Deleting image "%s"', $file), true);
           if (!$this->dryRun) unlink($file);
       }
      

      }

      $this->wl();
      $this->wl('Done.');
      $this->wl(str_repeat('-', 80));
      $this->wl(sprintf(' Total duplicate images: %s (%s)', $duplicateCount, $this->formatBytes($duplicateTotalSize)));
      $this->wl(sprintf(' Total orphan images: %s (%s)', $orphansCount, $this->formatBytes($orphansTotalSize)));
      $this->wl(str_repeat('-', 80));
      }

    /**

    • @param boolean $dryRun
      */
      public function setDryRunEnabled($dryRun)
      {
      $this->dryRun = $dryRun;
      }

    /**

    • @return boolean
      */
      public function isDryRunEnabled()
      {
      return $this->dryRun;
      }

    /**

    • Writes a line in console.

    • @param $line

    • @param bool $notifyDryRun
      */
      protected function wl($line = null, $notifyDryRun = false)
      {
      ($notifyDryRun && $this->dryRun && ($line !== null)) ?
      $dryLabel = 'DRY RUN | ' :
      $dryLabel = '';

      print $dryLabel . $line . "\n";
      }

    /**

    • Returns the script root path
    • @return string
      */
      protected function getRootPath()
      {
      return (dirname(FILE));
      }

    /**

    • Format bytes
    • @author MrCaspan (https://github.com/MrCaspan)
    • @param $bytes
    • @return string
      */
      protected function formatBytes($bytes)
      {
      $i = floor(log($bytes, 1024));
      return round($bytes / pow(1024, $i), [0, 0, 2, 2, 3][$i]) . ['B', 'kB', 'MB', 'GB', 'TB'][$i];
      }
      }

?>`

@LuizSantos22
Copy link

LuizSantos22 commented Dec 16, 2021

@kbpcit
In case you want an alternative method which works directly from admin panel, you got this one:
https://github.com/fballiano/magento1-image-cleaner

Regards

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment