Skip to content

Instantly share code, notes, and snippets.

@seratch
Created October 19, 2011 11:36
Show Gist options
  • Save seratch/1298039 to your computer and use it in GitHub Desktop.
Save seratch/1298039 to your computer and use it in GitHub Desktop.
#daimonscala 19-2 "Apache access_log(combined) parser"
object Main {
import java.net._
import java.util.Date
case class Access(
ipAddress: InetAddress,
ident: String,
user: String,
time: Date,
method: String,
uri: URI,
version: String,
status: Int,
bytes: Int,
referrer: String,
userAgent: String
)
import util.parsing.combinator._
object AccessLogParser extends JavaTokenParsers {
import java.text.SimpleDateFormat
val timeFormat = new SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss Z")
override val whiteSpace = "[ \t]+".r
def eol = opt('\r') <~ '\n'
def q = "\""
def lines = repsep(line | "", eol)
def lineMatch = {
ipAddress ~
id ~
user ~
"[" ~ time ~ "]" ~
q ~ method ~ uri ~ version ~ q ~
status ~
bytes ~
q ~ referrer ~ q ~
q ~ ua ~ q
}
def line = lineMatch ^^ {
case (
ip ~
id ~
user ~
"[" ~ time ~ "]" ~
_ ~ method ~ uri ~ ver ~ _ ~
status ~
bytes ~
_ ~ referrer ~ _ ~
_ ~ ua ~ _
) => {
new Access(
InetAddress.getByName(ip),
id,
user,
timeFormat.parse(time),
method,
new URI(uri),
ver,
status.toInt,
bytes match { case "-" => 0 case b => b.toInt },
referrer,
ua
)
}
}
def notSpaceAtLeastOne = "[^\\s]+".r
def notQuoteAtLeastOne = "[^\"]+".r
def num = "\\d"
def atLeastOne = "+"
def ipAddress = ((num + "{1,3}" + "\\.") * 3 + num + "{1,3}").r
def id = notSpaceAtLeastOne
def user = notSpaceAtLeastOne
def month = "[a-zA-Z]{3}".r
def time = (num + "{2}" + "/" + month + "/" + num + "{4}" + (":" + num + "{2}") * 3 + " \\+" + num + "{4}").r
def method = "[A-Z]+".r
def uri = notSpaceAtLeastOne
def version = "HTTP/1\\.\\d".r
def status = (num + "{3}").r
def bytes = "-" | (num + atLeastOne).r
def referrer = notQuoteAtLeastOne
def ua = notQuoteAtLeastOne
def parse(json: String): ParseResult[Any] = parseAll(lines, json)
}
def main(args: Array[String]) {
val accessLog = """66.249.69.220 - - [03/Oct/2011:01:22:54 +0900] "GET /blog/23/ HTTP/1.1" 200 22716 "-" "SAMSUNG-SGH-E250/1.0 Profile/MIDP-2.0 Configuration/CLDC-1.1 UP.Browser/6.2.3.3.c.1.101 (GUI) MMP/2.0 (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)"
64.233.172.34 - - [16/Oct/2011:04:03:27 +0900] "GET /api/search/?format=atom&q=testtest HTTP/1.1" 200 20 "-" "Rome Client (http://tinyurl.com/64t5n) Ver: UNKNOWN AppEngine-Google; (+http://code.google.com/appengine; appid: xxxxx)"
64.233.172.36 - - [19/Oct/2011:05:18:52 +0900] "GET / HTTP/1.1" 304 - "http://twitter.com/" "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.202 Safari/535.1"
"""
val result = AccessLogParser.parse(accessLog)
println(result.get)
}
}
// vim: set ts=4 sw=4 et:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment