Created
December 6, 2013 14:41
-
-
Save goooooouwa/7825820 to your computer and use it in GitHub Desktop.
This is a Google Image batch download tool, takes search query as input, resulting a folder containing Google Image search results( usually couple of images).
This is a prototype, real product will be created later on.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# DESCRIPTION | |
# This is a Google Image batch download tool, takes search query as input, | |
# resulting a folder containing Google Image search results( usually couple of images). | |
# SYNOPSIS | |
# ./GoogleImageSearch.sh QUERY | |
# EXAMPLE | |
# ./GoogleImageSearch.sh 'Linkin Park' | |
echo 'searching Google Image for' $1 '...'; | |
#replace space with '+', ex."Linkin Park" -> "Linkin+Park" | |
query=$(echo $1 | sed 's/ /+/g'); | |
#echo $query | |
url="http://www.google.com.hk/search?q=$query&tbm=isch&sout=1&tbs=isz:ex,iszw:600,iszh:600"; | |
echo $url; | |
#Step1: use w3m to download wegpage source file | |
w3m -dump_source $url >GoogleImageSearch.html; | |
#Step2: fetch imgurl from webpage source file | |
#insert newline in front of where string "imgurl" appears | |
awk '{gsub(/imgurl/,"\nimgurl");print}' < GoogleImageSearch.html > newline_imgurl; | |
#insert newline at the end of where string "jpg" or "png" appears | |
awk '{gsub(/jpg/,"jpg\n");print}' < newline_imgurl > newline_jpg; | |
awk '{gsub(/png/,"png\n");print}' < newline_jpg > newline_png; | |
#grep imgurls | |
grep -E "(imgurl=http:[-/.[:alnum:]]*jpg|imgurl=http:[-/.[:alnum:]]*png)" newline_png > remove_imgurl; | |
#remove string "imgurl=", left pure url list | |
awk '{gsub(/imgurl=/,"");print}' < remove_imgurl > urlList; | |
#clear up | |
rm newline_imgurl newline_jpg newline_png remove_imgurl; | |
# to examine url list: remove '#' below | |
#vi urlList; | |
#Step3: download image files(Input:urlList;Retry:2 times;Output:Fold $query) | |
wget -i urlList -t 2 -P $query; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment