Skip to content

Instantly share code, notes, and snippets.

Last active August 13, 2020 09:25
Show Gist options
  • Save truekonrads/d4446a9febde096c7351f49e8772b7c7 to your computer and use it in GitHub Desktop.
Save truekonrads/d4446a9febde096c7351f49e8772b7c7 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import urllib.parse
import ujson as json
except ImportError:
import json
import datetime
import sys
import re
import pytz
current_patern = None
status_desc = {
"201": "Created",
"202": "Accepted",
"203": "Nonauthoritative information",
"204": "No content",
"205": "Reset content",
"206": "Partial content",
"100": "Continue",
"101": "Switching protocols",
"200": "OK",
"301": "Moved permanently",
"302": "Object moved",
"304": "Not modified",
"307": "Temporary redirect",
"400": "Bad request",
"400.1": "Invalid XFF header",
"400.11": "Invalid WebSocket request",
"400.601": "Bad client request (ARR)",
"400.602": "Invalid time format (ARR)",
"400.603": "Parse range error (ARR)",
"400.604": "Client gone (ARR)",
"400.605": "Maximum number of forwards (ARR)",
"400.606": "Asynchronous competition error (ARR)",
"400.2": "Invalid Depth Header",
"400.3": "Invalid If Header",
"400.4": "Invalid Overwrite Header",
"400.5": "Invalid Translate Header",
"400.6": "Invalid Request Body",
"400.7": "Invalid Content Length",
"400.8": "Invalid Timeout",
"400.9": "Invalid Lock Token",
"401.1": "Logon failed",
"401.2": "Logon failed due to server configuration",
"401.3": "Unauthorized due to ACL on resource",
"401.4": "Authorization failed by filter",
"401.5": "Authorization failed by ISAPI/CGI application",
"403.1": "Execute access forbidden",
"403.12": "Mapper denied access",
"403.13": "Client certificate revoked",
"403.14": "Directory listing denied",
"403.16": "Client certificate is untrusted or invalid",
"403.17": "Client certificate has expired or is not yet valid",
"403.18": "Cannot execute requested URL in the current application pool",
"403.19": "Cannot execute CGI applications for the client browser in this application pool",
"403.2": "Read access forbidden",
"403.3": "Write access forbidden",
"403.4": "SSL required",
"403.5": "SSL 128 required",
"403.6": "IP address rejected",
"403.7": "Client certificate required",
"403.8": "Site access denied",
"403.20": "Forbidden: Passport logon failed",
"403.21": "Forbidden: Source access denied",
"403.22": "Forbidden: Infinite depth is denied",
"403.502": "Forbidden: Too many requests from the same client IP; Dynamic IP Restriction limit reached",
"404": "Not found",
"404.1": "Request header too long",
"404.11": "Request contains double escape sequence",
"404.12": "Request contains high-bit characters",
"404.13": "Content length too large",
"404.14": "Request URL too long",
"404.15": "Query string too long",
"404.17": "Dynamic content mapped to the static file handler",
"404.2": "ISAPI or CGI restriction",
"404.3": "MIME type restriction",
"404.4": "No handler configured",
"404.5": "Denied by request filtering configuration",
"404.6": "Verb denied",
"404.7": "File extension denied",
"404.8": "Hidden namespace",
"404.9": "File attribute hidden",
"405": "Method not allowed",
"406": "Invalid MIME type",
"412": "Precondition failed",
"500": "Internal server error",
"500.1": "Internal ASP error",
"500.11": "Application is shutting down on the web server",
"500.12": "Application is busy restarting on the web server",
"500.13": "Web server is too busy",
"500.15": "Direct requests for Global.asax are not allowed",
"500.19": "Configuration data is invalid",
"501": "Not implemented",
"502.1": "CGI application timeout",
"502.2": "Bad gateway: Premature Exit",
"502.2": "Map request failure (ARR)",
"502.3": "WinHTTP asynchronous completion failure (ARR)",
"502.4": "No server (ARR)",
"502.5": "WebSocket failure (ARR)",
"502.6": "Forwarded request failure (ARR)",
"502.7": "Execute request failure (ARR)",
"503": "Service unavailable",
"503.2": "Concurrent request limit exceeded"
class IST(datetime.tzinfo):
def utcoffset(self, dt):
return datetime.timedelta(hours=5, minutes=30) + self.dst(dt)
def dst(self, dt):
return datetime.timedelta(0)
def tzname(self, dt):
return "IST"
class HKT(datetime.tzinfo):
def utcoffset(self, dt):
return datetime.timedelta(hours=8) + self.dst(dt)
def dst(self, dt):
return datetime.timedelta(0)
def tzname(self, dt):
return "HKT"
tz = HKT()
if sys.argv[1]=="-":
for line in f.readlines():
l = line.strip()
# print(l)
if line[0] == "#":
if line.startswith("#Fields: "):
current_pattern = re.compile(" ".join(["(?P<{}>[^ ]+)".format(x.replace(
"-", "_").replace("(", "").replace(")", "")) for x in l.split(" ")[1:]]) )
sys.stderr.write("The new regex pattern is: {}\n".format(
elif current_pattern is None:
raise Exception("Encountered log line but pattern not built")
m = current_pattern.match(l)
if m:
d = m.groupdict()
end_time = datetime.datetime.strptime(
d['date']+" "+d['time'], "%Y-%m-%d %H:%M:%S").replace(tzinfo=tz)
# adjust time
end_time = end_time+tz.utcoffset(end_time)
_timestamp = end_time
d['end_time'] = end_time.isoformat()
start_time = end_time - \
_timestamp = start_time
d['start_time'] = start_time.isoformat()
except Exception as e:
d['_timestamp'] = _timestamp.isoformat()
d['message'] = l
if d['sc_substatus'] == "0":
d['status_desc'] = status_desc[d['sc_status']]
d['status_desc'] = status_desc["{}.{}".format(
d['sc_status'], d['sc_substatus'])]
d['status_desc'] = "Undefined"
for k, v in d.items():
if v == '-':
d[k] = None
# print >>sys.stderr, d['start_time'], d['end_time'], int(d['time_taken'])
if "csUser_Agent" in d and d["csUser_Agent"] is not None:
d["csUser_Agent"] = d["csUser_Agent"].replace("+", " ")
if 'cs_uri_query' in d and d['cs_uri_query'] is not None:
d['cs_uri_query'] = urllib.parse.unquote(d['cs_uri_query'])
# print json.dumps(d, ensure_ascii=False)
sys.stdout.write(json.dumps(d, ensure_ascii=True)+"\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment