Last active
April 8, 2019 07:02
-
-
Save plutocrat/7636841e480f483a97c0d17568ac0ef1 to your computer and use it in GitHub Desktop.
Wordpress Uploads Directory Cleanout
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# wget https://gist.githubusercontent.com/plutocrat/7636841e480f483a97c0d17568ac0ef1/raw/upload_clean.sh | |
# Dumps your database out, extracts image paths from it and compares it against a list of files from the filesystem | |
# Needs wp-cli to dump the database. | |
# Only works on Linuxy servers | |
# It will give you a chance to bail before it deletes anything. | |
# If it saves you a few hours, show your appreciation with bitcoin: 35Ws8qgKUCiVaca7vg5NWqkfQozPfzb4my | |
# Working Dir | |
WKDIR=zz_temp | |
# Image Storage Dir | |
IMGBK=image_backup | |
# Where is wp-cli | |
WPCLI=/usr/local/bin/wp-cli | |
# Make sure we're in the right dir | |
if [ -e wp-config.php ] | |
then | |
echo "Found wp-config.php. You're in the correct directory." | |
else | |
echo "wp-config not found. Please move this script to the correct directory and try again. " | |
exit 1 | |
fi | |
if [ -e $WPCLI ] | |
then | |
echo "Found wp-cli" | |
else | |
echo "wp-cli not found. Please install and/or update the path in this script. " | |
exit 1 | |
fi | |
mkdir $WKDIR | |
# Get list of files currently on System | |
find wp-content/uploads/ -type f | sort | uniq > $WKDIR/filelist.txt | |
# Dump Database | |
$WPCLI db export $WKDIR/dump.sql --skip-themes | |
# Search database for any mention of files in wp-content/uploads | |
grep -i -o -P "wp-content/uploads/[0-9][0-9][0-9][0-9]/[0-9][0-9]/.*?\.(jpg|png|jpeg|pdf|gif)" $WKDIR/dump.sql | sort | uniq > $WKDIR/dblist.txt | |
# Delete the dump file as it contains sensitive stuff | |
rm $WKDIR/dump.sql | |
DBCOUNT=$(wc -l $WKDIR/dblist.txt | awk '{print $1}') | |
FILECOUNT=$(wc -l $WKDIR/filelist.txt | awk '{print $1}') | |
echo "There are $DBCOUNT files in the database and $FILECOUNT files on the filesystem" | |
echo "Details of these can be found in $WKDIR/dblist.txt and $WKDIR/filelist.txt" | |
echo | |
comm -12 $WKDIR/dblist.txt $WKDIR/filelist.txt > $WKDIR/images_in_both.txt | |
comm -23 $WKDIR/dblist.txt $WKDIR/filelist.txt > $WKDIR/images_only_in_db.txt | |
comm -13 $WKDIR/dblist.txt $WKDIR/filelist.txt > $WKDIR/images_only_in_filesystem.txt | |
COMMONCOUNT=$(wc -l $WKDIR/images_in_both.txt | awk '{print $1}' ) | |
DBONLYCOUNT=$(wc -l $WKDIR/images_only_in_db.txt | awk '{print $1}' ) | |
FILEONLYCOUNT=$(wc -l $WKDIR/images_only_in_filesystem.txt | awk '{print $1}' ) | |
echo "Comparing the files, there are ..." | |
echo " - $COMMONCOUNT files in BOTH the database and filesystem" | |
echo " - $DBONLYCOUNT files are ONLY found in the database. You may want to check your 404 logs for these" | |
echo " - $FILEONLYCOUNT files are ONLY found in the filesystem, and can probably be removed." | |
echo "Look in the following files for details of which files are in which group." | |
ls -hal $WKDIR/images_* | |
echo | |
echo "WARNING" | |
echo "The next step will move all files in $WKDIR/images_only_in_filesystem.txt to $IMGBK" | |
echo "If you want to do this manually, pres CTRL-C to end this script. Remember to clean up $WKDIR" | |
echo | |
read -p "Press RETURN to continue" | |
mkdir $IMGBK | |
rsync -a --files-from=$WKDIR/images_only_in_filesystem.txt ./ ./$IMGBK/ | |
IMAGESIZE=$(du -h image_backup/ --max-depth=0 | awk '{ print $1}' ) | |
echo | |
echo "$IMAGESIZE of images synced into $IMGBK" | |
echo "Now deleting originals" | |
echo | |
# Delete listed files | |
echo "Size of uploads folder before is $( du -h wp-content/uploads/ --max-depth=0 | awk '{ print $1}' )" | |
while read FILE ; | |
do | |
rm "$FILE" | |
done < $WKDIR/images_only_in_filesystem.txt | |
echo "Size of uploads folder after is $( du -h wp-content/uploads/ --max-depth=0 | awk '{ print $1}' )" | |
# Optional | |
echo "If you'd like to regenerate image thumbnails, hit Enter" | |
echo "The process may take a while. You could run it later yourself with " | |
echo " wp-cli media regenerate --only-missing --skip-themes" | |
read -p "If you don't want to do this now, hit CTRL-C to end" | |
$WPCLI media regenerate --only-missing --skip-themes | |
echo "Size of uploads folder after thumbnail rebuild is $( du -h wp-content/uploads/ --max-depth=0 | awk '{ print $1}' )" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment