Last active
October 6, 2015 03:17
-
-
Save elmimmo/2926332 to your computer and use it in GitHub Desktop.
Script to schedule automated batch-downloading of Apple's App Store and iBookStore sales reports using Apple's AutoIngestion tool
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# `autoingestion.sh` downloads Apple iOS App Store sales data. It will | |
# download it to the same folder the script is running from, and group | |
# the downloaded files into directories by report type. | |
# | |
# It will only attempt to download if new sales data should be | |
# available. | |
# | |
# As skipping already downloaded files depends on the filename pattern | |
# `S_${REPORT_TYPE:0:1}_${VENDOR_ID}_$(THIS_DATE)"` (eg. | |
# `S_D_11111111_20120502`), the script should be updated would Apple change | |
# the filename format. | |
# | |
# Requires `Autoingestion.class` by Apple. Instructions for downloading, at | |
# <http://www.apple.com/itunesnews/docs/AppStoreReportingInstructions.pdf>. | |
# This script expects it to be located in the same folder as the script. | |
# | |
# Usage: autoingestion.sh [vendorid username password] | |
# | |
# Author: Jorge Hernández Valiñani | |
# https://gist.github.com/2926332 | |
# Exit if anything breaks. | |
set -e | |
# Run with lower priority. | |
renice 5 -p $$ | |
# Uncomment CHECK* to checking whether an internet connection exists. | |
#CHECK_INTERNET_CONNECTION=true | |
#CHECK_DROPBOX=true | |
ATTEMPTS=10 | |
RETRY_DELAY=30 | |
DROPBOX_INITIAL_DELAY=$(( 5 * 60 )) | |
# User account details (substitute with your own account details) | |
VENDOR_ID=${1:-"vendorid"} | |
USER_ID=${2:-"username"} | |
PASSWORD=${3:-"password"} | |
START_DATE=${4:-20080710} # First day you had something up in the App Store. | |
# Used to limit yearly reports, as Apple imposes no limit. | |
# Find in what directory this script resides. | |
SCRIPT_PATH="$(cd "${0%/*}" 2>/dev/null; echo "$PWD"/"${0##*/}")" | |
if [ -n "`readlink \"$SCRIPT_PATH\"`" ]; then | |
SCRIPT_PATH="`readlink \"$SCRIPT_PATH\"`" | |
fi | |
SCRIPT_DIR="`dirname \"$SCRIPT_PATH\"`/" # Path of this script | |
# Paths and filenames. | |
OUT_DIR="$SCRIPT_DIR" | |
LOG="${SCRIPT_DIR}${0##*/}.log" | |
CLASS_PATH="$SCRIPT_DIR" | |
WORK_DIR="`mktemp -d ${TMPDIR:="/tmp/"}${0##*/}.XXXXXXXXXX`/" | |
# Test for Java | |
/usr/libexec/java_home > /dev/null 2>&1 | |
if [ $? -eq 0 ]; then | |
# Use Apple's Java | |
JAVABIN="/usr/bin/java" | |
elif [ -x "/Library/Internet Plug-Ins/JavaAppletPlugin.plugin/Contents/Home/bin/java" ]; then | |
# Use Oracle's Java | |
JAVABIN="/Library/Internet Plug-Ins/JavaAppletPlugin.plugin/Contents/Home/bin/java" | |
else | |
echo "No Java runtime present." 1>&2 | |
echo "Download and install from http://java.com" 1>&2 | |
exit 1 | |
fi | |
if [ "$CHECK_INTERNET_CONNECTION" == true ]; then | |
# Check for internet connection | |
for ((i=1; i<=$ATTEMPTS; i++)); do | |
if ping -c 1 google.com > /dev/null; then | |
break | |
fi | |
echo "Internet not reachable." 1>&2 | |
if (( $i < $ATTEMPTS )); then | |
echo "Will try again in $RETRY_DELAY seconds, $(( $ATTEMPTS - ${i})) time(s) again." 1>&2 | |
sleep $RETRY_DELAY | |
continue | |
else | |
echo "Nothing was downloaded." 1>&2 | |
exit 1 | |
fi | |
done | |
fi | |
if [ "$CHECK_DROPBOX" == true ]; then | |
# Check if Dropbox is running | |
for ((i=1; i<=$ATTEMPTS; i++)); do | |
if ps aux | grep Dropbox | grep -v grep > /dev/null; then | |
# even if it is, give it some time to connect | |
sleep $DROPBOX_INITIAL_DELAY | |
break | |
else | |
echo "Dropbox is not running." 1>&2 | |
if (( $i < $ATTEMPTS )); then | |
echo "Will check again in $RETRY_DELAY seconds, $(( $ATTEMPTS - ${i})) time(s) again." 1>&2 | |
sleep $RETRY_DELAY | |
continue | |
else | |
echo "Nothing was downloaded." 1>&2 | |
exit 1 | |
fi | |
fi | |
done | |
fi | |
# Make sure only one instance of the script is running | |
lockfile "${TMPDIR}autoingestion.lock" | |
# Cleanup temp files on exit. | |
cleanup() { | |
rm -r "$WORK_DIR" | |
rm -f "${TMPDIR}autoingestion.lock" | |
} | |
trap "cleanup" EXIT | |
# Log stdout & stderr (comment out when editing, as hijacks exit codes) | |
exec > >(tee -a "$LOG") | |
exec 2>&1 | |
echo "***** `date -j \"+%c\"` ${USER}@${HOSTNAME} ${0##*/}[$$]: $0 $1 *****" | |
if [ ! -e "${CLASS_PATH}Autoingestion.class" ]; then | |
echo "Autoingestion.class is missing. For instructions to download it, see" 1>&2 | |
echo "http://www.apple.com/itunesnews/docs/AppStoreReportingInstructions.pdf" 1>&2 | |
exit 1 | |
fi | |
# Dates | |
# ----- | |
# Test `date` is BSD's | |
date -v+0d > /dev/null 2>&1 | |
if [ $? -ne 0 ]; then | |
echo "Requires BSD's \"date\" command." 1>&2 | |
fi | |
export TZ="US/Pacific" | |
TODAY=`date -j +"%s"` | |
# Reports are purportedly generated at 8AM Pacific. | |
# If it's not yet that time, consider `$TODAY` is still yesterday. | |
if [ $(( $TODAY - `date -j -v0H -v0M -v0S +"%s"`)) -lt \ | |
$(( 8 * 60 * 60 )) ]; then # from 6AM | |
DATE_MOD="-v-1d" | |
fi | |
Daily_MOST_RECENT="$DATE_MOD -v-1d" | |
Daily_LIMIT=29 # 30 days including yesterday | |
Daily_INTERVAL="d" | |
Daily_FORMAT="%Y%m%d" | |
Weekly_MOST_RECENT="$Daily_MOST_RECENT -v-sun" | |
Weekly_LIMIT=25 # 26 weeks including last Sunday | |
Weekly_INTERVAL="w" | |
Weekly_FORMAT="%Y%m%d" | |
Monthly_MOST_RECENT="$Daily_MOST_RECENT -v-1m -v-4d" #five days after the month's end date | |
Monthly_LIMIT=11 # 12 months including last month | |
Monthly_INTERVAL="m" | |
Monthly_FORMAT="%Y%m" | |
Yearly_MOST_RECENT="$Daily_MOST_RECENT -v-1y -v-5d" #six days after the year’s end date | |
Yearly_LIMIT=$(( $(date -j $Yearly_MOST_RECENT "+%Y") - $(date -j -f "%Y%m%d" ${START_DATE} "+%Y") )) # All years since `$START_DATE` | |
Yearly_INTERVAL="y" | |
Yearly_FORMAT="%Y" | |
# Start | |
# ----- | |
cd "$WORK_DIR" | |
# Loop for all report types. | |
# You can append new values of `REPORTING_RANGE` that Autoingestion may accept. | |
for REPORTING_RANGE in Daily Weekly Monthly Yearly | |
do | |
THIS_MOST_RECENT=`eval echo \\$"${REPORTING_RANGE}"_MOST_RECENT` | |
THIS_LIMIT=`eval echo \\$"${REPORTING_RANGE}"_LIMIT` | |
THIS_INTERVAL=`eval echo \\$"${REPORTING_RANGE}"_INTERVAL` | |
THIS_FORMAT=`eval echo \\$"${REPORTING_RANGE}"_FORMAT` | |
# Loop for all dates since `${REPORTING_RANGE}_LIMIT`. | |
while [ $THIS_LIMIT -ge 0 ] | |
do | |
THIS_DATE=`date -j $THIS_MOST_RECENT -v-${THIS_LIMIT}${THIS_INTERVAL} +"$THIS_FORMAT"` | |
THIS_LIMIT=$(( $THIS_LIMIT - 1 )) | |
# If a particular report was already downloaded, skip it. | |
if [ -f "${OUT_DIR}${VENDOR_ID}/${REPORTING_RANGE}/S_${REPORTING_RANGE:0:1}_${VENDOR_ID}_${THIS_DATE}"* ]; then | |
gzip -t "${OUT_DIR}${VENDOR_ID}/${REPORTING_RANGE}/S_${REPORTING_RANGE:0:1}_${VENDOR_ID}_${THIS_DATE}"*.gz 2>&- && \ | |
continue | |
else | |
grep --quiet "S_${REPORTING_RANGE:0:1}_${VENDOR_ID}_${THIS_DATE}" "${OUT_DIR}${VENDOR_ID}/${REPORTING_RANGE}/NO_SALES" 2>&- && \ | |
continue | |
fi | |
# Download report. | |
echo "Requesting $VENDOR_ID $REPORTING_RANGE ${THIS_DATE}…" | |
while read LINE | |
do | |
if [ "$LINE" == "There are no reports available to download for this selection." ]; then | |
NO_SALES+=( "S_${REPORTING_RANGE:0:1}_${VENDOR_ID}_${THIS_DATE}" ) | |
fi | |
echo "$LINE" | |
done < <("$JAVABIN" -cp "$CLASS_PATH" Autoingestion "$USER_ID" \ | |
"$PASSWORD" \ | |
"$VENDOR_ID" \ | |
Sales \ | |
"$REPORTING_RANGE" \ | |
Summary \ | |
"$THIS_DATE") | |
done | |
# Move downloaded files to subdirectory `${REPORTING_RANGE}` | |
for FILE in *.txt.gz | |
do | |
if [ $FILE = "*.txt.gz" ]; then | |
break | |
fi | |
mkdir -p "${OUT_DIR}${VENDOR_ID}/${REPORTING_RANGE}/" | |
mv "$FILE" "${OUT_DIR}${VENDOR_ID}/${REPORTING_RANGE}/" | |
done | |
if [ ${#NO_SALES[@]} -gt 0 ]; then | |
for THIS_NO_SALES in ${NO_SALES[@]} | |
do | |
echo "$THIS_NO_SALES" >> "${OUT_DIR}${VENDOR_ID}/${REPORTING_RANGE}/NO_SALES" | |
done | |
unset NO_SALES | |
fi | |
done | |
unset TZ | |
echo "/**** `date -j \"+%c\"` ${USER}@${HOSTNAME} ${0##*/}[$$]: Done ****" | |
echo "" | |
cd - >&- |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
If you use Mac OS X 10.7 and cannot see your Library folder, get to it with the Finder’s menu Go > Go to folder…, and type
~/Library
. Once inside the Library folder, search for the folder LaunchAgents. If it does not exist, create one with that name, and copy said file into it.2. Open the file with a text editor, such as TextEdit and change the value of the following line
so that it points to the path where autoingestion.sh resides.
3. Open Terminal and type:
The script will run every 8 hours, and silently quit if there is no data to be downloaded. Each run will be logged in a log file next to autoingestion.sh.
If you need to download sales data for several accounts (iOS, iBookStore, MAS) just duplicate the file, give it a proper name, open it in a text editor and change its
Label
accordingly, and load it as an additional job.Account details in the launchd job plist are optional if they were already set in the script, mandatory if not. If set on the launchd job, those will be the ones used regardless of the ones hardcoded in the script.