Created
October 19, 2011 11:36
-
-
Save seratch/1298039 to your computer and use it in GitHub Desktop.
#daimonscala 19-2 "Apache access_log(combined) parser"
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| object Main { | |
| import java.net._ | |
| import java.util.Date | |
| case class Access( | |
| ipAddress: InetAddress, | |
| ident: String, | |
| user: String, | |
| time: Date, | |
| method: String, | |
| uri: URI, | |
| version: String, | |
| status: Int, | |
| bytes: Int, | |
| referrer: String, | |
| userAgent: String | |
| ) | |
| import util.parsing.combinator._ | |
| object AccessLogParser extends JavaTokenParsers { | |
| import java.text.SimpleDateFormat | |
| val timeFormat = new SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss Z") | |
| override val whiteSpace = "[ \t]+".r | |
| def eol = opt('\r') <~ '\n' | |
| def q = "\"" | |
| def lines = repsep(line | "", eol) | |
| def lineMatch = { | |
| ipAddress ~ | |
| id ~ | |
| user ~ | |
| "[" ~ time ~ "]" ~ | |
| q ~ method ~ uri ~ version ~ q ~ | |
| status ~ | |
| bytes ~ | |
| q ~ referrer ~ q ~ | |
| q ~ ua ~ q | |
| } | |
| def line = lineMatch ^^ { | |
| case ( | |
| ip ~ | |
| id ~ | |
| user ~ | |
| "[" ~ time ~ "]" ~ | |
| _ ~ method ~ uri ~ ver ~ _ ~ | |
| status ~ | |
| bytes ~ | |
| _ ~ referrer ~ _ ~ | |
| _ ~ ua ~ _ | |
| ) => { | |
| new Access( | |
| InetAddress.getByName(ip), | |
| id, | |
| user, | |
| timeFormat.parse(time), | |
| method, | |
| new URI(uri), | |
| ver, | |
| status.toInt, | |
| bytes match { case "-" => 0 case b => b.toInt }, | |
| referrer, | |
| ua | |
| ) | |
| } | |
| } | |
| def notSpaceAtLeastOne = "[^\\s]+".r | |
| def notQuoteAtLeastOne = "[^\"]+".r | |
| def num = "\\d" | |
| def atLeastOne = "+" | |
| def ipAddress = ((num + "{1,3}" + "\\.") * 3 + num + "{1,3}").r | |
| def id = notSpaceAtLeastOne | |
| def user = notSpaceAtLeastOne | |
| def month = "[a-zA-Z]{3}".r | |
| def time = (num + "{2}" + "/" + month + "/" + num + "{4}" + (":" + num + "{2}") * 3 + " \\+" + num + "{4}").r | |
| def method = "[A-Z]+".r | |
| def uri = notSpaceAtLeastOne | |
| def version = "HTTP/1\\.\\d".r | |
| def status = (num + "{3}").r | |
| def bytes = "-" | (num + atLeastOne).r | |
| def referrer = notQuoteAtLeastOne | |
| def ua = notQuoteAtLeastOne | |
| def parse(json: String): ParseResult[Any] = parseAll(lines, json) | |
| } | |
| def main(args: Array[String]) { | |
| val accessLog = """66.249.69.220 - - [03/Oct/2011:01:22:54 +0900] "GET /blog/23/ HTTP/1.1" 200 22716 "-" "SAMSUNG-SGH-E250/1.0 Profile/MIDP-2.0 Configuration/CLDC-1.1 UP.Browser/6.2.3.3.c.1.101 (GUI) MMP/2.0 (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)" | |
| 64.233.172.34 - - [16/Oct/2011:04:03:27 +0900] "GET /api/search/?format=atom&q=testtest HTTP/1.1" 200 20 "-" "Rome Client (http://tinyurl.com/64t5n) Ver: UNKNOWN AppEngine-Google; (+http://code.google.com/appengine; appid: xxxxx)" | |
| 64.233.172.36 - - [19/Oct/2011:05:18:52 +0900] "GET / HTTP/1.1" 304 - "http://twitter.com/" "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.202 Safari/535.1" | |
| """ | |
| val result = AccessLogParser.parse(accessLog) | |
| println(result.get) | |
| } | |
| } | |
| // vim: set ts=4 sw=4 et: |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment