Created
August 29, 2014 22:43
-
-
Save shrkw/5e39c59cf0391783fd55 to your computer and use it in GitHub Desktop.
Scalaのパーザコンビネータでアクセスログをパースしてみる ref: http://qiita.com/shrkw/items/e40f04a74840608fe62d
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.example.service | |
import scala.io.Source | |
import scala.util.parsing.combinator.RegexParsers | |
object CloudFlareLogParserService { | |
def resolve: Unit = { | |
val accessLog = | |
""" | |
|www.cloudflare.com 1.1.1.1 1383426540 "GET / HTTP/1.1" 200 11022 "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0)" "CLEAN.HUMAN 1383426470.808 off" "http://blog.cloudflare.com/" | |
|www.cloudflare.com 1.1.1.1 1383426540 "GET / HTTP/1.1" 200 11022 "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0)" "CLEAN.HUMAN 1383426470.808 off" "http://blog.cloudflare.com/" | |
""".stripMargin | |
accessLog.split('\n').foreach { | |
case line if line.trim.length != 0 => { | |
try { | |
println(CloudFlareLogParser.parseLine(line)) | |
} catch { | |
case e: IllegalArgumentException => println(e) | |
} | |
} | |
case _ => | |
} | |
} | |
case class CloudFlareAccessLog(host: String, ipAddress: String, size: Int, referrer: String) | |
object CloudFlareLogParser extends RegexParsers { | |
def lines = repsep(line, nl) | |
def line = host ~ ipAddress ~ p2 ~ p3 ^^ { case host ~ ipAddress ~ size ~ referrer => CloudFlareAccessLog(host, ipAddress, size.toInt, referrer)} | |
def nl = opt('\r') <~ '\n' | |
def p2 = logWindow ~> request ~> statusCode ~> size | |
def p3 = userAgent ~> cloudFlareInfo ~> referrer | |
def host = """[\w.]+""".r | |
def ipAddress = """[\d.]+""".r | |
def logWindow = """\d+""".r | |
def request = "\"" ~> "[^\"]*".r <~ "\"" | |
def statusCode = """\d{3}""".r | |
def size = """\d+""".r | |
def userAgent = "\"" ~> "[^\"]*".r <~ "\"" | |
def cloudFlareInfo = "\"" ~> "[\\w. ]*".r <~ "\"" | |
def referrer = "\"" ~> "[^\"]*".r <~ "\"" | |
def parseLines(input: String): List[CloudFlareAccessLog] = parseAll(lines, input).getOrElse { | |
throw new IllegalArgumentException("Failed to parse: " + input) | |
} | |
def parseLine(input: String): CloudFlareAccessLog = parseLines(input).head | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment