Last active
July 7, 2021 10:21
-
-
Save jeroenvermeulen/e1dc727bcaadeade14ea to your computer and use it in GitHub Desktop.
Script to clean up unused Magento images
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env php | |
<?php | |
/** | |
* mh_remove_unused_product_images.php | |
* | |
* NOTICE OF LICENSE | |
* | |
* This source file is subject to the Open Software License (OSL 3.0) | |
* that is bundled with this package in the file LICENSE.txt. | |
* It is also available through the world-wide-web at this URL: | |
* http://opensource.org/licenses/osl-3.0.php | |
* | |
* @copyright Copyright (c) 2014 Jeroen Vermeulen (http://www.jeroenvermeulen.eu) | |
* @license http://opensource.org/licenses/osl-3.0.php Open Software License (OSL 3.0) | |
*/ | |
error_reporting(E_ALL | E_STRICT); | |
ini_set('display_errors', 1); | |
$magePaths = [ dirname(__FILE__).'/..', dirname(__FILE__), dirname(__FILE__).'/../..' ]; | |
$tool = new Tool($magePaths); | |
$tool->run(); | |
exit; | |
/** | |
* Class Tool | |
* | |
* Tool to remove unused Magento images from disk. | |
* | |
* Looks for usage in: | |
* - Product media gallery | |
* - Product EAV attributes: small_image, thumbnail, image | |
* - These product attributes in flat tables | |
* | |
*/ | |
class Tool | |
{ | |
/** @var string - Path to the Magento root */ | |
protected $magePath; | |
/** @var string - Path to the product images root */ | |
protected $imagesPath; | |
/** @var mysqli - MySQLi database adapter object */ | |
protected $mysqli; | |
/** @var string - Hostname of MySQL server */ | |
protected $dbHost; | |
/** @var string - Username for MySQL server */ | |
protected $dbUser; | |
/** @var string - Password for MySQL server */ | |
protected $dbWord; | |
/** @var string - Database name for MySQL server */ | |
protected $dbName; | |
/** @var string - Table prefix for database */ | |
protected $dbPrefix; | |
/** @var array - Magento product attribute names which contain image paths */ | |
protected $imageFields = array('small_image', 'thumbnail', 'image'); | |
/** @var bool - True if script is executed using CLI */ | |
protected $cliRun = true; | |
/** @var bool - True if script is executed using a browser, not recommended. */ | |
protected $webRun = false; | |
/** | |
* Constructor. Reads config and checks images path. | |
* | |
* @param $magePaths | |
* @throws Exception | |
*/ | |
public function __construct($magePaths) | |
{ | |
$this->beNice(); | |
$this->cliRun = ( isset( $_SERVER['TERM'] ) && 'dumb' != $_SERVER['TERM'] ); | |
$this->webRun = isset( $_SERVER['HTTP_HOST'] ); | |
if ( $this->webRun ) { | |
header( "Content-Type: text/plain" ); | |
} | |
foreach ($magePaths as $magePath) { | |
$xmlFile = $magePath . '/app/etc/local.xml'; | |
if (is_readable($xmlFile)) { | |
$this->magePath = $magePath; | |
$this->imagesPath = realpath($this->magePath . '/media/catalog/product'); | |
printf("Using config: %s\n", $xmlFile); | |
$xml = simplexml_load_file($xmlFile, 'SimpleXMLElement', LIBXML_NOCDATA); | |
$this->dbHost = strval($xml->global->resources->default_setup->connection->host); | |
$this->dbUser = strval($xml->global->resources->default_setup->connection->username); | |
$this->dbName = strval($xml->global->resources->default_setup->connection->dbname); | |
$this->dbWord = strval($xml->global->resources->default_setup->connection->password); | |
$this->dbPrefix = strval($xml->global->resources->db->table_prefix); | |
unset($xml); | |
break; | |
} | |
$configFile = $magePath . '/app/etc/env.php'; | |
if (is_readable($configFile)) { | |
$this->magePath = $magePath; | |
$this->imagesPath = realpath($this->magePath . '/pub/media/catalog/product'); | |
printf("Using config: %s\n", $configFile); | |
$mage2Config = include($configFile); | |
$this->dbHost = $mage2Config['db']['connection']['default']['host']; | |
$this->dbUser = $mage2Config['db']['connection']['default']['username']; | |
$this->dbName = $mage2Config['db']['connection']['default']['dbname']; | |
$this->dbWord = $mage2Config['db']['connection']['default']['password']; | |
$this->dbPrefix = $mage2Config['db']['table_prefix']; | |
unset($mage2Config); | |
} | |
} | |
if (empty($this->magePath)) { | |
throw new Exception("ERROR: Magento not found.\n"); | |
} | |
// Check images path | |
if (!is_dir($this->imagesPath)) { | |
throw new Exception(sprintf("ERROR: Product images directory '%s' does not exist.\n", $this->imagesPath)); | |
} | |
} | |
/** | |
* Runs the actual tool. | |
* | |
* @return bool | |
* @throws Exception | |
*/ | |
public function run() | |
{ | |
// Open database connection | |
$this->mysqli = new mysqli($this->dbHost, $this->dbUser, $this->dbWord, $this->dbName); | |
if ($this->mysqli->connect_errno) { | |
throw new Exception(sprintf("ERROR: Failed to connect to MySQL: %s\n", $this->mysqli->connect_error)); | |
} | |
// Add media gallery imags | |
$dbImages = $this->getQueryColumn( | |
sprintf("SELECT `value` FROM `%scatalog_product_entity_media_gallery`", | |
$this->dbPrefix)); | |
// Add images from product attributes | |
$attrIds = $this->getQueryColumn( | |
sprintf("SELECT `attribute_id` | |
FROM `%seav_attribute` INNER JOIN `%seav_entity_type` USING (`entity_type_id`) | |
WHERE `entity_type_code` = 'catalog_product' | |
AND `attribute_code` IN (%s)", | |
$this->dbPrefix, $this->dbPrefix, "'" . implode("','", $this->imageFields) . "'")); | |
$eavImages = $this->getQueryColumn( | |
sprintf("SELECT `value` FROM `%scatalog_product_entity_varchar` WHERE `attribute_id` IN (%s)", | |
$this->dbPrefix, implode(',', $attrIds))); | |
$dbImages = array_merge($dbImages, $eavImages); | |
// Add images from flat tables, to be sure we don't break the current site | |
$flatTables = $this->getQueryColumn( | |
sprintf("SHOW TABLES LIKE '%scatalog_product_flat_%%'", $this->dbPrefix)); | |
foreach ($flatTables as $flatTable) { | |
foreach ($this->imageFields as $field) { | |
if ($this->fieldExists($flatTable, $field)) { | |
$fieldImages = $this->getQueryColumn(sprintf("SELECT `%s` FROM `%s`", $field, $flatTable)); | |
$dbImages = array_merge($dbImages, $fieldImages); | |
} | |
} | |
} | |
// Make array unique, convert to real filename | |
$dbImages = array_unique($dbImages); | |
$dbInvalidCount = 0; | |
$keys = array_keys($dbImages); // Save keys because we will manipulate array | |
foreach ($keys as $key) { | |
$dbImages[$key] = trim($dbImages[$key]); | |
if (empty($dbImages[$key])) { | |
unset($dbImages[$key]); | |
} else { | |
$fullPath = realpath($this->imagesPath . $dbImages[$key]); | |
if (false === $fullPath) { | |
// Finding path failed, filename from database does not exist on filesystem, ignore | |
unset($dbImages[$key]); | |
$dbInvalidCount++; | |
} elseif (0 !== strpos($fullPath, $this->imagesPath)) { | |
// Something wrong, file is outside product images root, for example contains '../' | |
printf("Warning: Image path outside image root used: '%s'.\n", $fullPath); | |
unset($dbImages[$key]); | |
$dbInvalidCount++; | |
} else { | |
$dbImages[$key] = $fullPath; | |
} | |
} | |
} | |
$dbImages = array_unique($dbImages); // After realpath some paths may be the same | |
printf("Found %d invalid images in database.\n", $dbInvalidCount); | |
printf("Found %d valid images in database.\n", count($dbImages)); | |
// Close database connection, we don't need it anymore | |
$this->mysqli->close(); | |
// Find images on filesystem using shell command "find" | |
$imageFiles = array(); | |
chdir($this->imagesPath); | |
// Look for images in all subdirs 1 that are 1 char lang | |
exec('find ./? -type f', $imageFiles); | |
printf("Found %d image files on disk.\n", count($imageFiles)); | |
// Compare and build list of images to delete | |
$dbImageFlip = array_flip($dbImages); | |
$deleteList = array(); | |
$deleteSize = 0; | |
foreach ($imageFiles as $imageFile) { | |
if ('./' == substr($imageFile, 0, 2)) { | |
$imageFile = substr($imageFile, 2); | |
} | |
$fullFilePath = $this->imagesPath . '/' . $imageFile; | |
if (!isset($dbImageFlip[$fullFilePath])) { | |
if (is_writable($fullFilePath)) { | |
$deleteList[] = $fullFilePath; | |
$deleteSize += filesize($fullFilePath) / 1024 / 1024; // Add in Mb | |
} else { | |
printf("Warning: File '%s' is not writable, skipping.\n", $fullFilePath); | |
} | |
} | |
} | |
// Let the user descide what to do | |
if (empty($deleteList)) { | |
echo "Found no images to clean up.\n"; | |
} else { | |
printf("Found %d image files to be deleted, %.02f Mb. ", count($deleteList), $deleteSize ); | |
if ($this->cliRun) { | |
echo "Are you sure (y/N)? "; | |
$handle = fopen("php://stdin", "r"); | |
$userInput = fgets($handle); | |
if (strtolower(substr(trim($userInput), 0, 1)) != 'y') { | |
echo "ABORTING!\n"; | |
return false; | |
} | |
} | |
echo "\n"; | |
// The actual deletion of the files | |
foreach ($deleteList as $deleteFile) { | |
unlink($deleteFile); | |
} | |
// Remove empty dirs | |
shell_exec(sprintf('find %s -depth -type d -empty -exec rmdir {} \;', $this->imagesPath)); | |
} | |
echo "Done.\n"; | |
return true; | |
} | |
/** | |
* Execute a query and return the MySQLi result object | |
* | |
* @param string $query | |
* @return mysqli_result | |
* @throws Exception | |
*/ | |
protected function sqlQuery($query) | |
{ | |
$queryResult = $this->mysqli->query($query); | |
if (!$queryResult) { | |
throw new Exception(sprintf("ERROR: Error '%s' running query '%s'.\n", $this->mysqli->error, $query)); | |
} | |
return $queryResult; | |
} | |
/** | |
* Execute a query, and return the first field of each resulting row, in one array. | |
* | |
* @param $query | |
* @return array | |
* @throws Exception | |
*/ | |
protected function getQueryColumn($query) | |
{ | |
$result = array(); | |
$queryResult = $this->sqlQuery($query); | |
while ($row = $queryResult->fetch_row()) { | |
$result[] = strval($row[0]); | |
} | |
return $result; | |
} | |
/** | |
* Check if a field exists in a table in the database | |
* | |
* @param $table | |
* @param $column | |
* @return bool | |
* @throws Exception | |
*/ | |
protected function fieldExists($table, $column) | |
{ | |
$queryResult = $this->sqlQuery(sprintf("SHOW COLUMNS FROM `%s` LIKE '%s'", $table, $column)); | |
return (0 < $queryResult->num_rows); | |
} | |
/** | |
* Be nice to other processes in disk and CPU usage. | |
* We give this script low priority so we don't bother more important processes. | |
*/ | |
protected function beNice() | |
{ | |
proc_nice(19); | |
if (file_exists('/usr/bin/ionice')) { | |
shell_exec(sprintf('/usr/bin/ionice -c 3 -p %d', getmypid())); | |
} | |
} | |
} |
@Ang90 The Gist is the page you are looking at. I have updated the script so you can put it in the webroot (httpdocs
or public_html
etc) and execute it from a web browser. Then you don't need SSH. I am sorry but I can't give any further support. Please ask a fellow developer to help you if needed.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thank you for answering! Where's Gist? Can't it also run from browser, Please? What are the commands via ssh? Sorry for this question. But I'm not ssh practice. Thank you!