-
-
Save btobolaski/4002787 to your computer and use it in GitHub Desktop.
Bash script to parse Apache log for a count of RSS subscribers and email it to you
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# A modification of Marco Arment's script at | |
# | |
# https://gist.github.com/3783146 | |
# | |
# It's intended to be run once a day as a cron job. The main | |
# differences between it and Marco's script are: | |
# | |
# 1. It checks two feeds instead of just one. | |
# 2. It combines the non-Google Reader counts into a single number. | |
# 3. It doesn't write anything to stdout or send email. | |
# 4. It adds a line to a history file with the date and counts. | |
# | |
# Required variables. Edit these for your server. | |
FEED_LIST="/feed" | |
LOG_FILE="/path/to/apache/access/log/file" | |
HISTORY_FILE="subscribers.txt" | |
# Date expression for yesterday | |
DATE="-1 day" | |
# Date format in Apache log | |
LOG_FDATE=`date -d "$DATE" '+%d/%b/%Y'` | |
# Date format for display in emails | |
HUMAN_FDATE=`date -d "$DATE" '+%F'` | |
# Date format for history file. | |
HISTORY_FDATE=`date -d "$DATE" '+%Y-%m-%d'` | |
# Start the line with yesterday's date. | |
DAYLINE=$(printf "%s: " $HISTORY_FDATE) | |
# Loop through the feeds, collecting subscriber counts and adding | |
# them to the line. | |
for RSS_URI in $FEED_LIST; do | |
# Unique IPs requesting RSS, except those reporting "subscribers": | |
IPSUBS=`fgrep "$LOG_FDATE" "$LOG_FILE" | fgrep " $RSS_URI " | egrep -v '[0-9]+ subscribers' | cut -d' ' -f 1 | sort | uniq | wc -l` | |
# Google Reader subscribers and other user-agents reporting "subscribers" | |
# and using the "feed-id" parameter for uniqueness: | |
GRSUBS=`fgrep "$LOG_FDATE" "$LOG_FILE" | fgrep " $RSS_URI " | egrep -o '[0-9]+ subscribers; feed-id=[0-9]+' | sort -t= -k2 -s | tac | uniq -f2 | awk '{s+=$1} END {print s}'` | |
# Other user-agents reporting "subscribers", for which we'll use the | |
# entire user-agent string for uniqueness: | |
OTHERSUBS=`fgrep "$LOG_FDATE" "$LOG_FILE" | fgrep " $RSS_URI " | fgrep -v 'subscribers; feed-id=' | egrep '[0-9]+ subscribers' | egrep -o '"[^"]+"$' | tac | awk -F\( '!x[$1]++' | egrep -o '[0-9]+ subscribers' | awk '{s+=$1} END {print s}'` | |
# Add the non-Google Reader subscribers. | |
NONGRSUBS=$(($IPSUBS + $OTHERSUBS)) | |
DAYLINE=$DAYLINE$(printf "%5d " $GRSUBS; printf "%5d " $NONGRSUBS) | |
done | |
# Append yesterday's info to the history file. | |
echo "$DAYLINE" >> $HISTORY_FILE |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment