Skip to content

Instantly share code, notes, and snippets.

@plutocrat
Last active April 8, 2019 07:02
Show Gist options
  • Save plutocrat/7636841e480f483a97c0d17568ac0ef1 to your computer and use it in GitHub Desktop.
Save plutocrat/7636841e480f483a97c0d17568ac0ef1 to your computer and use it in GitHub Desktop.
Wordpress Uploads Directory Cleanout
#!/bin/bash
# wget https://gist.githubusercontent.com/plutocrat/7636841e480f483a97c0d17568ac0ef1/raw/upload_clean.sh
# Dumps your database out, extracts image paths from it and compares it against a list of files from the filesystem
# Needs wp-cli to dump the database.
# Only works on Linuxy servers
# It will give you a chance to bail before it deletes anything.
# If it saves you a few hours, show your appreciation with bitcoin: 35Ws8qgKUCiVaca7vg5NWqkfQozPfzb4my
# Working Dir
WKDIR=zz_temp
# Image Storage Dir
IMGBK=image_backup
# Where is wp-cli
WPCLI=/usr/local/bin/wp-cli
# Make sure we're in the right dir
if [ -e wp-config.php ]
then
echo "Found wp-config.php. You're in the correct directory."
else
echo "wp-config not found. Please move this script to the correct directory and try again. "
exit 1
fi
if [ -e $WPCLI ]
then
echo "Found wp-cli"
else
echo "wp-cli not found. Please install and/or update the path in this script. "
exit 1
fi
mkdir $WKDIR
# Get list of files currently on System
find wp-content/uploads/ -type f | sort | uniq > $WKDIR/filelist.txt
# Dump Database
$WPCLI db export $WKDIR/dump.sql --skip-themes
# Search database for any mention of files in wp-content/uploads
grep -i -o -P "wp-content/uploads/[0-9][0-9][0-9][0-9]/[0-9][0-9]/.*?\.(jpg|png|jpeg|pdf|gif)" $WKDIR/dump.sql | sort | uniq > $WKDIR/dblist.txt
# Delete the dump file as it contains sensitive stuff
rm $WKDIR/dump.sql
DBCOUNT=$(wc -l $WKDIR/dblist.txt | awk '{print $1}')
FILECOUNT=$(wc -l $WKDIR/filelist.txt | awk '{print $1}')
echo "There are $DBCOUNT files in the database and $FILECOUNT files on the filesystem"
echo "Details of these can be found in $WKDIR/dblist.txt and $WKDIR/filelist.txt"
echo
comm -12 $WKDIR/dblist.txt $WKDIR/filelist.txt > $WKDIR/images_in_both.txt
comm -23 $WKDIR/dblist.txt $WKDIR/filelist.txt > $WKDIR/images_only_in_db.txt
comm -13 $WKDIR/dblist.txt $WKDIR/filelist.txt > $WKDIR/images_only_in_filesystem.txt
COMMONCOUNT=$(wc -l $WKDIR/images_in_both.txt | awk '{print $1}' )
DBONLYCOUNT=$(wc -l $WKDIR/images_only_in_db.txt | awk '{print $1}' )
FILEONLYCOUNT=$(wc -l $WKDIR/images_only_in_filesystem.txt | awk '{print $1}' )
echo "Comparing the files, there are ..."
echo " - $COMMONCOUNT files in BOTH the database and filesystem"
echo " - $DBONLYCOUNT files are ONLY found in the database. You may want to check your 404 logs for these"
echo " - $FILEONLYCOUNT files are ONLY found in the filesystem, and can probably be removed."
echo "Look in the following files for details of which files are in which group."
ls -hal $WKDIR/images_*
echo
echo "WARNING"
echo "The next step will move all files in $WKDIR/images_only_in_filesystem.txt to $IMGBK"
echo "If you want to do this manually, pres CTRL-C to end this script. Remember to clean up $WKDIR"
echo
read -p "Press RETURN to continue"
mkdir $IMGBK
rsync -a --files-from=$WKDIR/images_only_in_filesystem.txt ./ ./$IMGBK/
IMAGESIZE=$(du -h image_backup/ --max-depth=0 | awk '{ print $1}' )
echo
echo "$IMAGESIZE of images synced into $IMGBK"
echo "Now deleting originals"
echo
# Delete listed files
echo "Size of uploads folder before is $( du -h wp-content/uploads/ --max-depth=0 | awk '{ print $1}' )"
while read FILE ;
do
rm "$FILE"
done < $WKDIR/images_only_in_filesystem.txt
echo "Size of uploads folder after is $( du -h wp-content/uploads/ --max-depth=0 | awk '{ print $1}' )"
# Optional
echo "If you'd like to regenerate image thumbnails, hit Enter"
echo "The process may take a while. You could run it later yourself with "
echo " wp-cli media regenerate --only-missing --skip-themes"
read -p "If you don't want to do this now, hit CTRL-C to end"
$WPCLI media regenerate --only-missing --skip-themes
echo "Size of uploads folder after thumbnail rebuild is $( du -h wp-content/uploads/ --max-depth=0 | awk '{ print $1}' )"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment