Created
September 18, 2012 22:00
-
-
Save bkonkle/3746198 to your computer and use it in GitHub Desktop.
A script to create urls for Jmeter testing from your Apache access logs.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Requires apachelog. `pip install apachelog` | |
""" | |
from __future__ import with_statement | |
import apachelog | |
import csv | |
import re | |
import sys | |
from optparse import OptionParser | |
STATUS_CODE = '%>s' | |
REQUEST = '%r' | |
USER_AGENT = '%{User-Agent}i' | |
MEDIA_RE = re.compile(r'\.png|\.jpg|\.jpeg|\.gif|\.tif|\.tiff|\.bmp|\.js|\.css|\.ico|\.swf|\.xml') | |
SPECIAL_RE = re.compile(r'xd_receiver|\.htj|\.htc|/admin') | |
def main(): | |
usage = "usage: %prog [options] LOGFILE" | |
parser = OptionParser(usage=usage) | |
parser.add_option( | |
"-o", "--outfile", | |
dest="outfile", | |
action="store", | |
default="urls.csv", | |
help="The output file to write urls to", | |
metavar="OUTFILE" | |
) | |
parser.add_option( | |
"-f", "--format", | |
dest="logformat", | |
action="store", | |
default=r'%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"', | |
help="The Apache log format, copied and pasted from the Apache conf", | |
metavar="FORMAT" | |
) | |
parser.add_option( | |
"-g", "--grep", | |
dest="grep", | |
action="store", | |
help="Simple, plain text filtering of the log lines. No regexes. This " | |
"is useful for things like date filtering - DD/Mmm/YYYY.", | |
metavar="TEXT" | |
) | |
options, args = parser.parse_args() | |
if not args: | |
sys.stderr.write('Please provide an Apache log to read from.\n') | |
sys.exit(1) | |
create_urls(args[0], options.outfile, options.logformat, options.grep) | |
def create_urls(logfile, outfile, logformat, grep=None): | |
parser = apachelog.parser(logformat) | |
with open(logfile) as f, open(outfile, 'wb') as o: | |
writer = csv.writer(o) | |
# Status spinner | |
spinner = "|/-\\" | |
pos = 0 | |
for i, line in enumerate(f): | |
# Spin the spinner | |
if i % 10000 == 0: | |
sys.stdout.write("\r" + spinner[pos]) | |
sys.stdout.flush() | |
pos += 1 | |
pos %= len(spinner) | |
# If a filter was specified, filter by it | |
if grep and not grep in line: | |
continue | |
try: | |
data = parser.parse(line) | |
except apachelog.ApacheLogParserError: | |
continue | |
method, url, protocol = data[REQUEST].split() | |
# Check for GET requests with a status of 200 | |
if method != 'GET' or data[STATUS_CODE] != '200': | |
continue | |
# Exclude media requests and special urls | |
if MEDIA_RE.search(url) or SPECIAL_RE.search(url): | |
continue | |
# This is a good record that we want to write | |
writer.writerow([url, data[USER_AGENT]]) | |
print ' done!' | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
123.45.67.89 - - [18/Sep/2012:09:57:41 -0500] "GET /my/test/url/ HTTP/1.1" 200 26080 "http://my.referrer.com/sweet/referral/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3" | |
123.45.67.89 - - [18/Sep/2012:09:57:41 -0500] "GET /my/test/url/2/ HTTP/1.1" 200 26080 "http://my.referrer.com/sweet/referral/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3" | |
123.45.67.89 - - [18/Sep/2012:09:57:41 -0500] "GET /my/other/test/url/ HTTP/1.1" 200 26080 "http://my.referrer.com/sweet/referral/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3" | |
123.45.67.89 - - [18/Sep/2012:09:57:41 -0500] "GET /my/test/url/3/ HTTP/1.1" 200 26080 "http://my.referrer.com/sweet/referral/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3" | |
123.45.67.89 - - [18/Sep/2012:09:57:41 -0500] "GET /the/best/url/ HTTP/1.1" 200 26080 "http://my.referrer.com/sweet/referral/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3" | |
123.45.67.89 - - [18/Sep/2012:09:57:41 -0500] "GET /my/test/url/2/ HTTP/1.1" 200 26080 "http://my.referrer.com/sweet/referral/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3" | |
123.45.67.89 - - [18/Sep/2012:09:57:41 -0500] "GET /my/test/url/ HTTP/1.1" 200 26080 "http://my.referrer.com/sweet/referral/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3" | |
123.45.67.89 - - [18/Sep/2012:09:57:41 -0500] "GET /my/other/test/url/ HTTP/1.1" 200 26080 "http://my.referrer.com/sweet/referral/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3" | |
123.45.67.89 - - [18/Sep/2012:09:57:41 -0500] "GET /my/test/url/ HTTP/1.1" 200 26080 "http://my.referrer.com/sweet/referral/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3" | |
123.45.67.89 - - [18/Sep/2012:09:57:41 -0500] "GET /my/test/url/ HTTP/1.1" 200 26080 "http://my.referrer.com/sweet/referral/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment