Created
February 23, 2010 14:56
-
-
Save ebot/312255 to your computer and use it in GitHub Desktop.
Reads the apache server log to gather statistics on the web site.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'rubygems' | |
require 'RedCloth' | |
require 'net/ssh' | |
require 'net/scp' | |
host_name = 'host_name' | |
username = 'user_name' | |
password = 'password' | |
download_file = "#{host_name}-#{Time.now.strftime('%b-%Y')}.gz" | |
download_path = "./logs/" | |
download_me = download_path << download_file | |
#TODO Insert the ssh code here | |
# Deflate the gz file. | |
command = "nice -n 5 gzip -d #{download_file}" | |
success = system(command) | |
exit unless success && $?.exitstatus == 0 | |
# Now we can build the reports | |
log_file = File.new File.basename(download_file, '.gz'), 'r' | |
ip_addresses = {} | |
episodes = {} | |
log_file.each_line do |line| | |
fields = line.split(" ") | |
if fields[8] == '200' && fields[5] == '"GET' | |
if fields[6].index('/audio/dle') != nil | |
episode = fields[6].split('/')[2] | |
# count the episode downloads | |
if episodes[episode].nil? | |
episodes[episode] = {:downloads => 1, | |
:ip_addresses => {}} | |
else | |
episodes[episode][:downloads] += 1 | |
end | |
# Add unique ip addresses to get a unique count | |
if episodes[episode][:ip_addresses][fields[0]].nil? | |
episodes[episode][:ip_addresses][fields[0]] = '' | |
end | |
# Gather infotmation about this unique ip address | |
if ip_addresses[fields[0]].nil? | |
ip_addresses[fields[0]] = {:count => 0, | |
:audio_count => 0, | |
:video_count => 0, | |
:log => []} | |
end | |
ip_addresses[fields[0]][:count] += 1 | |
ip_addresses[fields[0]][:audio_count] += 1 if fields[6].index('mp3') | |
ip_addresses[fields[0]][:video_count] += 1 if fields[6].index('m4v') | |
ip_addresses[fields[0]][:log] << fields[6] | |
end | |
end | |
end | |
# Now that we have parsed the data, let's build the report | |
output = File.new "#{Time.now.strftime('%Y_%m')}_podcast_stats.textile", 'w' | |
# Add some styling | |
css = <<EOF | |
<style type="text/css"> | |
html { | |
font-family: Verdana; | |
font-size: .9em | |
} | |
h1 { | |
font-size: 175%; | |
} | |
h2 { | |
font-size: 150%; | |
} | |
h3 { | |
font-size: 125%; | |
} | |
h4 { | |
font-size: 115%; | |
} | |
</style> | |
EOF | |
output << css | |
output << "\nh1. DLE Podcast Statistics for #{Time.now.strftime('%b %Y')}\n\n" | |
output << "\"Home\":index.html\n\n" | |
# Add Listener information to the report | |
output << "h2. Listener Statistics (how many unique IPs downloaded each episode)\n\n" | |
episodes.sort.each do |episode, stats| | |
output << "* Episode #{episode} had #{stats[:ip_addresses].count} listeners or viewers.\n" | |
end | |
# Add Episode information to the report | |
output << "\nh2. Episode Statistics (how many times each episode was downloaded)\n\n" | |
episodes.sort.each do |episode, stats| | |
output << "* Episode #{episode} was downloaded #{stats[:downloads]} times.\n" | |
end | |
# Add ip user information | |
output << "\nh2. User Statistics\n\n" | |
output << "The server had #{ip_addresses.count} unique visitors.\n\n" | |
ip_addresses.sort.each do |ip, stats| | |
output << "* *#{ip} hit the server #{stats[:count]} time(s), video => #{stats[:video_count]}, mp3 => #{stats[:audio_count]}.*\n" | |
stats[:log].sort.each { |element| output << "** #{element}\n" } | |
end | |
output << "\n\"Home\":index.html" | |
output.close | |
# generate html version of the report | |
textile = File.new "#{Time.now.strftime('%Y_%m')}_podcast_stats.textile", 'r' | |
rc = RedCloth.new textile.read | |
html = File.new "#{Time.now.strftime('%Y_%m')}_podcast_stats.html", 'w' | |
html << rc.to_html | |
html.close | |
# TODO Add a links page here | |
index = css | |
index << "h1. Podcast Statistics\n\n" | |
links = [] | |
Dir.glob('*stats.html') do |dir| | |
links << "* \"#{dir}\":#{dir}\n" | |
end | |
links.reverse.each { |link| index << link } | |
links_page = File.new 'index.html', 'w' | |
links_page << RedCloth.new( index ).to_html | |
links_page.close |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment