Created
February 1, 2019 14:09
-
-
Save srvanderplas/f4551edaf2f9d2e256b809c40964bde9 to your computer and use it in GitHub Desktop.
Bash script to slice images up into smaller images (with an offset) in parallel, save each image, and remove all blank images
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# This uses GNU parallel: cite: | |
# @article{Tange2011a, | |
# title = {GNU Parallel - The Command-Line Power Tool}, | |
# author = {O. Tange}, | |
# address = {Frederiksberg, Denmark}, | |
# journal = {;login: The USENIX Magazine}, | |
# month = {Feb}, | |
# number = {1}, | |
# volume = {36}, | |
# url = {http://www.gnu.org/s/parallel}, | |
# year = {2011}, | |
# pages = {42-47} | |
# } | |
# Any changes must be propogated through format_picture() | |
ORIG_DIR="./photos/" | |
PNG_DIR="./pngs/" | |
CROP_DIR="./cropped/" | |
EDGE_DIR="./edges/" | |
SLICE_DIR_Color="./colorslice64/" | |
SLICE_DIR_Color2="./colorslice128/" | |
SLICE_DIR="./slice64/" | |
SLICE_DIR2="./slice128/" | |
SMALL_DIR="./rejects/" | |
whiteTHR=253 | |
if [ ! -d "$PNG_DIR" ]; then | |
mkdir "$PNG_DIR" | |
fi | |
if [ ! -d "$CROP_DIR" ]; then | |
mkdir "$CROP_DIR" | |
fi | |
# if [ ! -d "$NORM_DIR" ]; then | |
# mkdir "$NORM_DIR" | |
# fi | |
if [ ! -d "$EDGE_DIR" ]; then | |
mkdir "$EDGE_DIR" | |
fi | |
if [ ! -d "$SLICE_DIR" ]; then | |
mkdir "$SLICE_DIR" | |
fi | |
if [ ! -d "$SLICE_DIR2" ]; then | |
mkdir "$SLICE_DIR2" | |
fi | |
if [ ! -d "$SLICE_DIR_Color" ]; then | |
mkdir "$SLICE_DIR_Color" | |
fi | |
if [ ! -d "$SLICE_DIR_Color2" ]; then | |
mkdir "$SLICE_DIR_Color2" | |
fi | |
if [ ! -d "$SMALL_DIR" ]; then | |
mkdir "$SMALL_DIR" | |
fi | |
# Set up manifest files | |
usefulfile='useful_files.csv' | |
unusefulfile='not_useful_files.csv' | |
if [ ! -d "$usefulfile" ]; then | |
echo "file, size, mean, discard" > $usefulfile | |
fi | |
if [ ! -d "$unusefulfile" ]; then | |
echo "file, size, mean, discard" > $unusefulfile | |
fi | |
##### Slice picture up and format appropriately ################################ | |
format_picture () { | |
origfile=$(basename $@) | |
basenoext=${origfile%.*} | |
basefile="$basenoext.png" | |
offset64="offset64_" | |
offset32="offset32_" | |
ORIG_DIR="./photos/" | |
PNG_DIR="./pngs/" | |
CROP_DIR="./cropped/" | |
EDGE_DIR="./edges/" | |
SLICE_DIR="./slice64/" | |
SLICE_DIR2="./slice128/" | |
SLICE_DIR_Color="./colorslice64/" | |
SLICE_DIR_Color2="./colorslice128/" | |
# echo $origfile | |
# echo $basefile | |
# echo $basenoext | |
# Convert to PNG | |
if [ ! -d $PNG_DIR$origfile ]; then | |
convert $ORIG_DIR$origfile $PNG_DIR$basefile | |
fi | |
# Crop | |
if [ ! -d $CROP_DIR$basefile ]; then | |
convert $PNG_DIR$basefile -trim $CROP_DIR$basefile | |
fi | |
# Normalize colors | |
# ./redist -s Normal $CROP_DIR$basefile $NORM_DIR$basefile | |
# Edge Detect | |
if [ ! -d $EDGE_DIR$basefile ]; then | |
convert $CROP_DIR$basefile -canny 0x1+3%+3% -negate -colorspace Gray $EDGE_DIR$basefile | |
fi | |
# Offset by 64 pixels | |
if [ ! -d $EDGE_DIR$offset32$basefile ]; then | |
convert $EDGE_DIR$basefile \ | |
-gravity northeast \ | |
-background white \ | |
-extent $(identify -format '%[fx:W+32]x%[fx:H+32]' $EDGE_DIR$basefile ) $EDGE_DIR$offset32$basefile | |
fi | |
# Offset by 64 pixels | |
if [ ! -d $EDGE_DIR$offset64$basefile ]; then | |
convert $EDGE_DIR$basefile \ | |
-gravity northeast \ | |
-background white \ | |
-extent $(identify -format '%[fx:W+64]x%[fx:H+64]' $EDGE_DIR$basefile ) $EDGE_DIR$offset64$basefile | |
fi | |
# Color images | |
if [ ! -d $CROP_DIR$offset32$basefile ]; then | |
convert $CROP_DIR$basefile \ | |
-gravity northeast \ | |
-background white \ | |
-extent $(identify -format '%[fx:W+32]x%[fx:H+32]' $CROP_DIR$basefile ) $CROP_DIR$offset32$basefile | |
fi | |
# Offset by 64 pixels | |
if [ ! -d $CROP_DIR$offset64$basefile ]; then | |
convert $CROP_DIR$basefile \ | |
-gravity northeast \ | |
-background white \ | |
-extent $(identify -format '%[fx:W+64]x%[fx:H+64]' $CROP_DIR$basefile ) $CROP_DIR$offset64$basefile | |
fi | |
# Actually slice images | |
### This is a crude way to avoid repeating slicing for images that have already been processed... | |
n64pics=$(find $SLICE_DIR -maxdepth 1 -type f -name "$basenoext*" | wc -l) | |
n128pics=$(find $SLICE_DIR2 -maxdepth 1 -type f -name "$basenoext*" | wc -l) | |
if (( $n64pics < 1 )); then | |
convert $EDGE_DIR$basefile -quiet -gravity Center -crop 64x64 $SLICE_DIR$basenoext'_64_%03d.png' | |
convert $EDGE_DIR$offset32$basefile -quiet -gravity Center -crop 64x64 $SLICE_DIR$basenoext'_64_%03d.5.png' | |
fi | |
if (( $n128pics < 1 )); then | |
convert $EDGE_DIR$basefile -quiet -gravity Center -crop 128x128 $SLICE_DIR2$basenoext'_128_%03d.png' | |
convert $EDGE_DIR$offset64$basefile -quiet -gravity Center -crop 128x128 $SLICE_DIR2$basenoext'_128_%03d.5.png' | |
fi | |
n64pics=$(find $SLICE_DIR_Color -maxdepth 1 -type f -name "$basenoext*" | wc -l) | |
n128pics=$(find $SLICE_DIR_Color2 -maxdepth 1 -type f -name "$basenoext*" | wc -l) | |
if (( $n64pics < 1 )); then | |
convert $CROP_DIR$basefile -quiet -gravity Center -crop 64x64 $SLICE_DIR_Color$basenoext'_color64_%03d.png' | |
convert $CROP_DIR$offset32$basefile -quiet -gravity Center -crop 64x64 $SLICE_DIR_Color$basenoext'_color64_%03d.5.png' | |
fi | |
if (( $n128pics < 1 )); then | |
convert $CROP_DIR$basefile -quiet -gravity Center -crop 128x128 $SLICE_DIR_Color2$basenoext'_color128_%03d.png' | |
convert $CROP_DIR$offset64$basefile -quiet -gravity Center -crop 128x128 $SLICE_DIR_Color2$basenoext'_color128_%03d.5.png' | |
fi | |
} | |
export -f format_picture | |
##### Remove Useless Images #################################################### | |
filter_images() { | |
usefulfile='useful_files.csv' | |
unusefulfile='not_useful_files.csv' | |
whiteThr=253 | |
imgval=$(convert $1 -format "%[fx:mean*255]" info:) | |
imgvalint=$(printf %.0f $imgval) | |
imgsize=$(identify -format "%[fx:w!=h]" $1 ) | |
filename="./rejects/$(basename $1)" | |
imw=$(identify -format "%w" $1) | |
imh=$(identify -format "%h" $1) | |
toowhite=$(( $imgvalint > $whiteThr )) | |
if (( $toowhite == 1 )); then | |
echo "removing $1: mean value $imgval" | |
fi; | |
if (( $imgsize == 1 )); then | |
echo "moving $1 to small pics folder $filename" | |
fi; | |
removefile=$(( `expr $toowhite + $imgsize` > 0 )) | |
savestr="$(basename $1), $imw x $imh, $imgval, $removefile" | |
if (( $removefile == 1 )); then | |
mv $1 $filename | |
echo $savestr >> $unusefulfile; | |
else | |
echo $savestr >> $usefulfile; | |
fi; | |
} | |
export -f filter_images | |
##### Actually do stuff ######################################################## | |
ls ./photos | parallel format_picture {} | |
find $SLICE_DIR -type f | parallel filter_images | |
find $SLICE_DIR2 -type f | parallel filter_images | |
find $SLICE_DIR_Color -type f | parallel filter_images | |
find $SLICE_DIR_Color2 -type f | parallel filter_images |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# This uses GNU parallel: cite: | |
# @article{Tange2011a, | |
# title = {GNU Parallel - The Command-Line Power Tool}, | |
# author = {O. Tange}, | |
# address = {Frederiksberg, Denmark}, | |
# journal = {;login: The USENIX Magazine}, | |
# month = {Feb}, | |
# number = {1}, | |
# volume = {36}, | |
# url = {http://www.gnu.org/s/parallel}, | |
# year = {2011}, | |
# pages = {42-47} | |
# } | |
# Exit function | |
die() { | |
printf '%s\n' "$1" >&2 | |
exit 1 | |
} | |
filter_images() { | |
if [[ $1 =~ color ]] ; then | |
whiteThr=250 | |
else | |
whiteThr=253 | |
fi | |
imgval=$(convert $1 -format "%[fx:mean*255]" info:) | |
imgvalint=$(printf %.0f $imgval) | |
toowhite=$(( $imgvalint > $whiteThr )) | |
if (( $toowhite == 1 )); then | |
echo "removing $1: mean value $imgval" | |
rm $1 | |
fi; | |
} | |
export -f filter_images | |
# Function to process a single shoe | |
process_shoe() { | |
usage="$(basename "$0") [-h] [-e] [-m] [-x n] [-o n] [--overwrite] [--out <outdir>] file.jpg | |
where: | |
-h prints help | |
-e uses canny edge detection on the image | |
-m flips the image in x and y | |
-x n sets the image chunk size to n x n | |
-o n offsets the image by n x n pixels. | |
--overwrite writes over previously generated intermediate images | |
--out specifies the directory to store the intermediate and processed images. | |
Defaults to ./processed/. Directory will be created if it does not already exist." | |
#--- Read in arguments and set flag variables -------------------------------- | |
POSITIONAL=() | |
# Initialize option variables | |
SIZE=64 | |
EDGE=0 | |
OFFSET=0 | |
MIRROR=0 | |
OUTPATH="./processed" | |
OVERWRITE=0 | |
offset_re='^[0-9]+$' | |
option_re='^-' | |
# Assumes space separated, e.g. ./myscript.sh -e conf -s /etc -l /usr/lib /etc/hosts | |
while [[ $# -gt 0 ]] | |
do | |
key="$1" | |
case $key in | |
-h) | |
echo "$usage" | |
exit | |
;; | |
-x|--size) | |
SIZE=$2 | |
shift # past argument | |
shift # past value | |
;; | |
-e|--edge) | |
EDGE=1 | |
shift # past argument | |
;; | |
-o|--offset) | |
if ![[ $2 =~ $offset_re ]]; then | |
OFFSET=32 | |
shift # past argument | |
else | |
OFFSET="$2" | |
shift # past argument | |
shift # past value | |
fi | |
;; | |
-m|--mirror) | |
MIRROR=1 | |
shift | |
;; | |
--out) | |
OUTPATH="$2" | |
shift # past argument | |
shift # past value | |
;; | |
--overwrite) | |
OVERWRITE=1 | |
shift # past argument | |
;; | |
*) # unknown option | |
POSITIONAL+=("$1") # save it in an array for later | |
shift # past argument | |
;; | |
esac | |
done | |
set -- "${POSITIONAL[@]}" # restore positional parameters | |
if [ "$#" -gt 1 ]; then | |
echo "Too many left over arguments. Assuming $1 is the file path." | |
fi | |
# --- Set Script variables --------------------------------------------------- | |
origfile=$(basename $1) | |
basenoext=${origfile%.*} | |
basefile="$basenoext.png" | |
wkfile="$basenoext" | |
# Ensure folders all exist | |
ORIG_DIR=$(dirname $1) | |
PNG_DIR="$OUTPATH/pngs" | |
TMP_DIR="$OUTPATH/toslice" | |
SLICE_DIR="$OUTPATH/slices" | |
if [ ! -d "$OUTPATH" ]; then | |
mkdir "$OUTPATH" | |
fi | |
if [ ! -d "$PNG_DIR" ]; then | |
mkdir "$PNG_DIR" | |
fi | |
if [ ! -d "$TMP_DIR" ]; then | |
mkdir "$TMP_DIR" | |
fi | |
# if [ ! -d "$SLICE_DIR" ]; then | |
# mkdir "$SLICE_DIR" | |
# fi | |
# Ensure the simple operations are done | |
# Convert to PNG | |
# echo $PNG_DIR/$basefile | |
# echo $ORIG_DIR/$origfile | |
if [ ! -f $PNG_DIR/$basefile ] || [ "$OVERWRITE" ]; then | |
convert $ORIG_DIR/$origfile $PNG_DIR/$basefile | |
fi | |
# Crop | |
if [ ! -f $TMP_DIR/$basefile ] || [ "$OVERWRITE" ]; then | |
convert $PNG_DIR/$basefile -trim $TMP_DIR/$basefile | |
fi | |
wkfileprev=$wkfile | |
if [ "$MIRROR" -eq "1" ]; then | |
wkfile=$wkfileprev'_flip' | |
if [ ! -f $TMP_DIR/$wkfile.png ] || [ "$OVERWRITE" ]; then | |
convert $TMP_DIR/$wkfileprev.png -flip -flop $TMP_DIR/$wkfile'.png' | |
fi | |
fi | |
wkfileprev=$wkfile | |
if [ "$EDGE" -eq "1" ]; then | |
wkfile=$wkfile'_edge' | |
if [ ! -f $TMP_DIR/$wkfile.png ] || [ "$OVERWRITE" ]; then | |
convert $TMP_DIR/$wkfileprev.png -canny 0x1+3%+3% -negate -colorspace Gray $TMP_DIR/$wkfile'.png' | |
fi | |
fi | |
# wkfileprev=$wkfile | |
# if [ "$OFFSET" -gt "0" ]; then | |
# wkfile=$wkfile'_offset'$OFFSET | |
# if [ ! -f $TMP_DIR/$wkfile.png ] || [ "$OVERWRITE" ]; then | |
# convert $TMP_DIR/$wkfileprev.png \ | |
# -gravity northeast \ | |
# -background white \ | |
# -extent $(identify -format '%[fx:W+$OFFSET]x%[fx:H+$OFFSET]' $TMP_DIR/$wkfileprev'.png' ) \ | |
# $TMP_DIR/$wkfile.png | |
# fi | |
# fi | |
# | |
# # Prepare for cropping image | |
# | |
# imw=$(identify -format "%w" $TMP_DIR/$wkfileprev.png) | |
# imh=$(identify -format "%h" $TMP_DIR/$wkfileprev.png) | |
# | |
# full_tile_w=$(( $imw / $SIZE )) | |
# full_tile_h=$(( $imh / $SIZE )) | |
# | |
# vcanv_w=$(( ($full_tile_w) * $SIZE )) | |
# vcanv_h=$(( ($full_tile_h) * $SIZE )) | |
# | |
# cx=$(( ($vcanv_w - $imw ) / 2 )) | |
# cy=$(( ($vcanv_h - $imh ) / 2 )) | |
# | |
# if [ "$cy" -ge "0" ]; then | |
# cy='+'$cy | |
# fi | |
# if [ "$cx" -ge "0" ]; then | |
# cx='+'$cx | |
# fi | |
# | |
# vcanvszstr=$vcanv_w'x'$vcanv_h | |
# | |
# wkfileprev=$wkfile | |
# wkfile=$wkfile'_crop'$vcanvszstr | |
# | |
# # echo "Full tiles: $full_tile_w x $full_tile_h, cropping image to $vcanvszstr from $cx, $cy" | |
# # echo "convert $TMP_DIR/$wkfileprev.png -repage $vcanvszstr$cx$cy -crop $vcanvszstr $TMP_DIR/$wkfile.png" | |
# # convert $TMP_DIR/$wkfileprev.png -repage $vcanvszstr$cx$cy -crop $vcanvszstr $TMP_DIR/$wkfile.png | |
# convert $TMP_DIR/$wkfileprev.png -repage $vcanvszstr+0+0 -crop $vcanvszstr $TMP_DIR/$wkfile.png | |
# | |
# | |
# wkfileprev=$wkfile | |
# wkfile=$wkfile'_sz'$SIZE | |
# szstr=$SIZE'x'$SIZE | |
# | |
# convert $TMP_DIR/$wkfileprev.png -quiet -crop $szstr $SLICE_DIR/$wkfile'_%03d.png' | |
# | |
# for i in $SLICE_DIR/$wkfile*.png; do | |
# filter_images $i | |
# done | |
} | |
export -f process_shoe | |
# Test with single shoe | |
# process_shoe photos/adidas-originals-gazelle-tactile-yellow-black-gold_product_8894439_color_695418.jpg | |
# process_shoe -e photos/adidas-originals-gazelle-tactile-yellow-black-gold_product_8894439_color_695418.jpg | |
# process_shoe -m photos/adidas-originals-gazelle-tactile-yellow-black-gold_product_8894439_color_695418.jpg | |
# process_shoe -e -m photos/adidas-originals-gazelle-tactile-yellow-black-gold_product_8894439_color_695418.jpg | |
find ./photos -type f | parallel -j40 --joblog /tmp/log64 process_shoe {} | |
# # 64x64 chunks | |
# find ./photos -type f | parallel -j40 --joblog /tmp/log64e process_shoe -e -x 64 {} | |
# find ./photos -type f | parallel -j40 --joblog /tmp/log64 process_shoe -x 64 {} | |
# find ./photos -type f | parallel -j40 --joblog /tmp/log64em process_shoe -m -e -x 64 {} | |
# find ./photos -type f | parallel -j40 --joblog /tmp/log64m process_shoe -m -x 64 {} | |
# | |
# # 128x128 chunks | |
# find ./photos -type f | parallel -j40 --joblog /tmp/log128e process_shoe -e -x 128 {} | |
# find ./photos -type f | parallel -j40 --joblog /tmp/log128 process_shoe -x 128 {} | |
# find ./photos -type f | parallel -j40 --joblog /tmp/log128em process_shoe -m -e -x 128 {} | |
# find ./photos -type f | parallel -j40 --joblog /tmp/log128m process_shoe -m -x 128 {} | |
# | |
# # 256x256 chunks | |
# find ./photos -type f | parallel -j40 --joblog /tmp/log256e process_shoe -e -x 256 {} | |
# find ./photos -type f | parallel -j40 --joblog /tmp/log256 process_shoe -x 256 {} | |
# find ./photos -type f | parallel -j40 --joblog /tmp/log256em process_shoe -m -e -x 256 {} | |
# find ./photos -type f | parallel -j40 --joblog /tmp/log256m process_shoe -m -x 256 {} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment