Last active
February 9, 2023 23:16
-
-
Save ruxo/4515c0e5ad3fa11435e68c033fbdaed7 to your computer and use it in GitHub Desktop.
Parse IIS Log
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| open System | |
| open System.Net | |
| open System.IO | |
| type LogLine = | |
| { Timestamp: DateTime | |
| Method: string | |
| Path: string | |
| Query: string | |
| HttpVersion: string | |
| UserAgent: string | |
| Referer: string | |
| HttpStatus: HttpStatusCode | |
| HttpSubStatus: int | |
| SystemStatus: int | |
| ByteSent: int | |
| ByteReceived: int | |
| TimeProcessed: TimeSpan | |
| } | |
| module LogLine = | |
| let Query log = log.Query | |
| let UserAgent log = log.UserAgent | |
| let Timestamp log = log.Timestamp | |
| let Path log = log.Path | |
| let TimeProcessed log = log.TimeProcessed | |
| let requestTime log = log.Timestamp - log.TimeProcessed | |
| type AggregatedStats = | |
| { Count: int | |
| AverageTime: float | |
| } | |
| module AggregatedStats = | |
| let Count stat = stat.Count | |
| let TimeConsumed stat = float(stat.Count) * stat.AverageTime | |
| module W3CLogFields = | |
| [<Literal>] | |
| let Date = "date" | |
| [<Literal>] | |
| let Time = "time" | |
| [<Literal>] | |
| let Method = "cs-method" | |
| [<Literal>] | |
| let Path = "cs-uri-stem" | |
| [<Literal>] | |
| let Query = "cs-uri-query" | |
| [<Literal>] | |
| let Version = "cs-version" | |
| [<Literal>] | |
| let UserAgent = "cs(User-Agent)" | |
| [<Literal>] | |
| let Referer = "cs(Referer)" | |
| [<Literal>] | |
| let Status = "sc-status" | |
| [<Literal>] | |
| let Substatus = "sc-substatus" | |
| [<Literal>] | |
| let SystemStatus = "sc-win32-status" | |
| [<Literal>] | |
| let ByteSent = "sc-bytes" | |
| [<Literal>] | |
| let ByteReceived = "cs-bytes" | |
| [<Literal>] | |
| let TimeTaken = "time-taken" | |
| let all = | |
| [ Date | |
| Time | |
| Method | |
| Path | |
| Query | |
| Version | |
| UserAgent | |
| Referer | |
| Status | |
| Substatus | |
| SystemStatus | |
| ByteSent | |
| ByteReceived | |
| TimeTaken ] | |
| type LogFieldIndexer = Map<string,int> | |
| let logField :LogFieldIndexer -> string[] -> string -> string = fun index s key -> s.[index.[key]] | |
| let toLogLine :LogFieldIndexer -> string -> LogLine = fun index s -> | |
| let dashBlank line = if line = "-" then String.Empty else line | |
| let ss = s.Split(' ') | |
| let field = logField index ss | |
| { Timestamp = DateTime.Parse(field W3CLogFields.Date + "T" + field W3CLogFields.Time) | |
| Method = field W3CLogFields.Method | |
| Path = field W3CLogFields.Path | |
| Query = dashBlank <| field W3CLogFields.Query | |
| HttpVersion = field W3CLogFields.Version | |
| UserAgent = field W3CLogFields.UserAgent | |
| Referer = dashBlank <| field W3CLogFields.Referer | |
| HttpStatus = Enum.Parse(typeof<HttpStatusCode>, field W3CLogFields.Status) :?> HttpStatusCode | |
| HttpSubStatus = Int32.Parse(field W3CLogFields.Substatus) | |
| SystemStatus = Int32.Parse(field W3CLogFields.SystemStatus) | |
| ByteSent = Int32.Parse(field W3CLogFields.ByteSent) | |
| ByteReceived = Int32.Parse(field W3CLogFields.ByteReceived) | |
| TimeProcessed = field W3CLogFields.TimeTaken |> Double.Parse |> TimeSpan.FromMilliseconds | |
| } | |
| let isComment :string -> bool = fun s -> s.Length = 0 || s.[0] = '#' | |
| let loadLogFile path = | |
| let (comments, data) = path |> File.ReadAllLines |> Array.partition isComment | |
| let fieldDef = comments |> Seq.find (fun s -> s.StartsWith("#Fields:")) | |
| let fields = fieldDef.Split(':').[1].Trim().Split(' ') | |
| let indexer = | |
| W3CLogFields.all | |
| |> Seq.map (fun s -> s, fields |> Array.findIndex ((=)s)) | |
| |> Map.ofSeq | |
| data |> Seq.map (toLogLine indexer) | |
| let loadAllLogs :string * string -> LogLine seq = Directory.EnumerateFiles >> Seq.collect loadLogFile | |
| let findTop<'any,'sortkey when 'sortkey: comparison> | |
| :int -> ('any -> 'sortkey) -> 'any seq -> 'any seq = | |
| fun N getter -> Seq.sortByDescending getter >> Seq.truncate N | |
| let findTop10<'any,'sortkey when 'sortkey: comparison> | |
| :('any -> 'sortkey) -> 'any seq -> 'any seq = | |
| findTop 10 | |
| let trValue :('oldv -> 'newv) -> 'key * 'oldv -> 'key * 'newv = | |
| fun f (k,v) -> (k, f(v)) | |
| let aggregateStats :LogLine[] -> AggregatedStats = fun logs -> | |
| { Count = logs.Length | |
| AverageTime = logs |> Seq.averageBy (fun ll -> ll.TimeProcessed.TotalSeconds) | |
| } | |
| let spatialFrequency :LogLine[] -> (int * int)[] = fun logs -> | |
| let secDiff(t1: DateTime, t2: DateTime) = (t2 - t1).TotalSeconds | |
| let bin2 x = x / 2 | |
| match logs with | |
| | [||] | |
| | [|_|] -> [| Int32.MaxValue, 0 |] | |
| | logs -> | |
| logs |> Seq.map LogLine.requestTime | |
| |> Seq.sort | |
| |> Seq.pairwise | |
| |> Seq.map (int << ceil << secDiff) | |
| |> Seq.groupBy bin2 | |
| |> Seq.map (trValue Seq.length) | |
| |> Seq.sortBy fst | |
| |> Seq.toArray | |
| //-------- MAIN ------- | |
| let path = Environment.GetCommandLineArgs().[2] | |
| let logs = loadAllLogs (path, "*.log") |> Seq.toArray | |
| printfn "%d lines has been loaded." logs.Length | |
| let pathLogs = logs |> Seq.groupBy LogLine.Path | |
| |> Seq.map (trValue Seq.toArray) | |
| |> Seq.toArray | |
| printfn "%d paths have been logged." pathLogs.Length | |
| let startDate = logs |> Seq.map LogLine.requestTime |> Seq.min | |
| let endDate = logs |> Seq.map LogLine.Timestamp |> Seq.max | |
| let samplingTime = endDate - startDate | |
| let toFrequency x = x / samplingTime.TotalMinutes | |
| printfn "Data range: %d minutes started from %A" samplingTime.Minutes startDate | |
| let simpleLogs = pathLogs |> Array.map (trValue aggregateStats) | |
| printfn "Top 10 calls" | |
| printfn " %9s %7s %7s %s" "RPM" "Average" "% calls" "path" | |
| simpleLogs |> findTop10 (snd >> AggregatedStats.Count) | |
| |> Seq.iter (fun (uri,stats) -> | |
| printfn " %9.3f %7.3f %7.2f %s" | |
| (toFrequency <| float stats.Count) | |
| stats.AverageTime | |
| (100. * float stats.Count / float logs.Length) | |
| uri) | |
| printfn "" | |
| printfn "Top 10 most time consumed" | |
| printfn " %13s %s" "Time consumed" "path" | |
| simpleLogs |> findTop10 (snd >> AggregatedStats.TimeConsumed) | |
| |> Seq.iter (fun (uri,stats) -> printfn " %13.3f %s" (AggregatedStats.TimeConsumed stats) uri) | |
| printfn "" | |
| type ConsecutiveStats = | |
| { Diff: int | |
| Frequency: int | |
| ParamVariation: int | |
| TotalCalls: int | |
| } | |
| module ConsecutiveStats = | |
| let frequency stat = stat.Frequency | |
| let variationPercent stat = | |
| if stat.TotalCalls = 1 then | |
| 0. | |
| else | |
| float(stat.ParamVariation - 1) / float(stat.TotalCalls - 1) | |
| let cachability stat = 1. - (stat |> variationPercent) | |
| let collectStats :LogLine[] -> ConsecutiveStats = fun ll -> | |
| let maxFreq = ll |> spatialFrequency |> Array.maxBy snd | |
| { Diff = fst maxFreq | |
| Frequency = snd maxFreq | |
| ParamVariation = ll |> Seq.map LogLine.Query |> Seq.distinct |> Seq.length | |
| TotalCalls = ll.Length | |
| } | |
| printfn "Top consecutive calls" | |
| printfn " %4s %9s %16s %s" "Diff" "Frequency" "Cacheability (%)" "path" | |
| pathLogs |> Seq.map (trValue collectStats) | |
| |> findTop 20 (snd >> ConsecutiveStats.frequency) | |
| |> Seq.iter (fun (uri,stats) -> printfn " %4d %9d %16.2f %s" stats.Diff stats.Frequency (100. * ConsecutiveStats.cachability stats) uri) | |
| let agents = | |
| logs |> Seq.map LogLine.UserAgent | |
| |> Seq.groupBy id | |
| |> Seq.map (trValue Seq.length) | |
| |> findTop10 snd | |
| |> Seq.toArray | |
| if agents.Length > 1 then | |
| printfn "Top 10 agents" | |
| printfn " %6s %s" "Agents" "Count" | |
| agents |> Seq.iter (fun (r, count) -> printfn " %6d %s" count r) | |
| else | |
| printfn "No agent detected" | |
| 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment