Created
August 1, 2013 15:11
-
-
Save lmandel/6132291 to your computer and use it in GitHub Desktop.
Scrape ratings data from Google Play
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
DEBUG=false | |
DATE=`date +%Y%m%d` | |
PLAY_URL=https://play.google.com/store/apps/details?id= | |
PRODUCT=$1 | |
#if [ $1 == 'firefox' ] | |
#then | |
# PRODUCT=org.mozilla.firefox | |
#fi | |
#if [ $1 == 'beta' ] | |
#then | |
# PRODUCT=org.mozilla.firefox_beta | |
#fi | |
OUTPUT_PATH=/home/lmandel/public_html/reviews/ | |
FILENAME=${PRODUCT}.html | |
JSON_FILENAME=${PRODUCT}-${DATE}.json | |
wget -O ${FILENAME} ${PLAY_URL}${PRODUCT} | |
RATING=`grep -m 1 -o '<div class="ratings goog-inline-block" title="Rating: [0-9\.]\{1,3\} stars' $FILENAME | sed '2,$d' | sed 's/<div class="ratings goog-inline-block" title="Rating: \([0-9\.]*\) stars/\1/'` | |
ONSTARS=`grep -m 1 -o '<div class="goog-inline-block star SPRITE_star_o\{0,1\}n\{0,1\}f\{0,2\}h\{0,1\}a\{0,1\}l\{0,1\}f\{0,1\}_dark' ${FILENAME} | sed '6,$d' | grep -c "SPRITE_star_on_dark"` | |
HALFSTARS=`grep -m 1 -o '<div class="goog-inline-block star SPRITE_star_o\{0,1\}n\{0,1\}f\{0,2\}h\{0,1\}a\{0,1\}l\{0,1\}f\{0,1\}_dark' ${FILENAME} | sed '6,$d' | grep -c "SPRITE_star_half_dark"` | |
OFFSTARS=`grep -m 1 -o '<div class="goog-inline-block star SPRITE_star_o\{0,1\}n\{0,1\}f\{0,2\}h\{0,1\}a\{0,1\}l\{0,1\}f\{0,1\}_dark' ${FILENAME} | sed '6,$d' | grep -c "SPRITE_star_off_dark"` | |
FIVESTARVOTES=`grep -m 1 -o '<span class="bar bar[1-5]" style="width:[0-9]*px"> \;</span> <span>[0-9,]*</span>' ${FILENAME} | sed '2,$d' | sed 's/<span class="bar bar[1-5]" style="width:[0-9]*px"> <\/span> <span>\([0-9,]*\)<\/span>/\1/'` | |
FOURSTARVOTES=`grep -m 1 -o '<span class="bar bar[1-5]" style="width:[0-9]*px"> \;</span> <span>[0-9,]*</span>' ${FILENAME} | sed '3,$d' | sed '1d' | sed 's/<span class="bar bar[1-5]" style="width:[0-9]*px"> <\/span> <span>\([0-9,]*\)<\/span>/\1/'` | |
THREESTARVOTES=`grep -m 1 -o '<span class="bar bar[1-5]" style="width:[0-9]*px"> \;</span> <span>[0-9,]*</span>' ${FILENAME} | sed '4,$d' | sed '1,2d' | sed 's/<span class="bar bar[1-5]" style="width:[0-9]*px"> <\/span> <span>\([0-9,]*\)<\/span>/\1/'` | |
TWOSTARVOTES=`grep -m 1 -o '<span class="bar bar[1-5]" style="width:[0-9]*px"> \;</span> <span>[0-9,]*</span>' ${FILENAME} | sed '5,$d' | sed '1,3d' | sed 's/<span class="bar bar[1-5]" style="width:[0-9]*px"> <\/span> <span>\([0-9,]*\)<\/span>/\1/'` | |
ONESTARVOTES=`grep -m 1 -o '<span class="bar bar[1-5]" style="width:[0-9]*px"> \;</span> <span>[0-9,]*</span>' ${FILENAME} | sed '6,$d' | sed '1,4d' | sed 's/<span class="bar bar[1-5]" style="width:[0-9]*px"> <\/span> <span>\([0-9,]*\)<\/span>/\1/'` | |
if [ $DEBUG == 'true' ] | |
then | |
echo "Rating $RATING" | |
echo "ON stars $ONSTARS" | |
echo "HALF stars $HALFSTARS" | |
echo "OFF stars $OFFSTARS" | |
echo "5 star votes $FIVESTARVOTES" | |
echo "4 star votes $FOURSTARVOTES" | |
echo "3 star votes $THREESTARVOTES" | |
echo "2 star votes $TWOSTARVOTES" | |
echo "1 star votes $ONESTARVOTES" | |
fi | |
echo "{'rating':${RATING},'stars':{'on':${ONSTARS},'half':${HALFSTARS},'off':${OFFSTARS}},'votes':{'five':'${FIVESTARVOTES}','four':'${FOURSTARVOTES}','three':'${THREESTARVOTES}','two':'${TWOSTARVOTES}','one':'${ONESTARVOTES}'}}" > ${OUTPUT_PATH}${JSON_FILENAME} | |
rm $FILENAME |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment