Created
March 16, 2021 22:23
-
-
Save canimus/b79d920e7d95c26d5b08dd9f03c2ab6f to your computer and use it in GitHub Desktop.
IBM Web Server Log Parse
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from hashlib import md5 as xx | |
| from collections import namedtuple | |
| import os | |
| import re | |
| # Environment parametes | |
| COLUMN_SEPARATOR = chr(os.getenv('SEPARATOR', 449)) | |
| FILE_NAME = os.getenv('FILE', 'hashed.csv') | |
| # Regular expression to capture JSESSIONID | |
| JSESSION_PATTERN = r'JSESSIONID=([a-zA-Z0-9_\-]+)?[;\s]?' | |
| # Data Structure for log entries | |
| AccessLogRow = namedtuple('AccessLogRow', 'ip date method code url referer agent cookie version id') | |
| with open("logs.csv") as reader, open(FILE_NAME, "w") as writer: | |
| for line in reader: | |
| # Convert log entry to data structure | |
| log_entry = AccessLogRow(*line.split(COLUMN_SEPARATOR)) | |
| # Hashing Fields | |
| haship = xx(log_entry.ip.encode('utf-8')).hexdigest() | |
| # Default value for jsession or pattern matched | |
| jsessionid = 'EMPTY' | |
| if "JSESSIONID" in log_entry.cookie: | |
| jsessionid = re.search(JSESSION_PATTERN, log_entry.cookie).group(1) | |
| writer.write(COLUMN_SEPARATOR.join( | |
| [haship] + | |
| [log_entry.date] + | |
| [log_entry.method] + | |
| [log_entry.code] + | |
| [log_entry.url] + | |
| [log_entry.referer] + | |
| [log_entry.agent] + | |
| [jsessionid] + | |
| [log_entry.version] + | |
| [log_entry.id])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment