Skip to content

Instantly share code, notes, and snippets.

@DavideWiest
Last active April 13, 2024 15:42
Show Gist options
  • Save DavideWiest/3dbb8f48307bd0810cabe824834a3500 to your computer and use it in GitHub Desktop.
Save DavideWiest/3dbb8f48307bd0810cabe824834a3500 to your computer and use it in GitHub Desktop.
"""
Parses apache2 access log entries into json, adding ip details with the ipinfo.io API
Example entry:
95.90.223.124 - - [02/Jan/2023:18:38:29 +0000] "GET / HTTP/1.1" 500 6024
"""
import sys
import traceback
import json
import time
from urllib.request import urlopen
from urllib.error import HTTPError
from tqdm import tqdm
log_loc = input("Log path: ")
filename = log_loc + ".json" if "." not in log_loc.split("/")[-1] else "/".join(log_loc.split("/")[:-1]) + log_loc.split("/")[-1].split(".")[0] + ".json"
out_loc = input(f"Output path (json) (default {filename}): ")
out_loc = filename if out_loc == "" else out_loc
make_api_calls = input("Use ipinfo.io to extract details about ips? (1k free requests/d) (y/n) (default y): ")
make_api_calls = True if make_api_calls.lower() in ("y", "") else False
try:
with open(log_loc, "r", encoding="utf-8") as f:
acclog = f.read().replace("\n", ".\n").split("\n")
acclog[-1] += "."
except Exception as e:
print(f"Input file {log_loc} cannot be opened")
print(traceback.format_exc())
sys.exit(0)
FORMAT_SPLITPOINTS = [
" - - [", "] \"", " ", " ", "\" ", " ", "."
]
FORMAT_KEYS = [
"ip", "datetime", "request_type", "rel_url", "http_type", "response_type", "delay"
]
acc_json = []
ip_details_memo = {}
def mk_geoloc_request(ip, sleep=0):
if sleep != 0:
print(f"sleeping for {sleep}")
time.sleep(sleep)
url = f"https://ipinfo.io/{ip}/json"
try:
ip_details_memo[ip] = json.load(urlopen(url))
except HTTPError as e:
if "429" in str(e):
sleep += 10
if sleep <= 100:
return mk_geoloc_request(ip, sleep)
else:
raise Exception("Waiting for 429 to expire didn't work")
return ip_details_memo[ip]
def save_data():
try:
with open(out_loc, "w", encoding="utf-8") as f:
json.dump({"summary": {}, "entries": acc_json}, f, indent=4)
msg2 = f"({len(ip_details_memo)} unique ip addresses)" if ip_details_memo != {} else ""
print(f"\nSuccessfully parsed {len(acclog)} access log entries {msg2}\n")
except Exception as e:
print(f"Output file {out_loc} cannot be opened")
print(traceback.format_exc())
sys.exit(0)
try:
for i in tqdm(range(len(acclog))):
if acclog[i] == ".": continue
entry = acclog[i]
acc_json.append({})
for i_sp in range(len(FORMAT_SPLITPOINTS)):
try:
acc_json[i][FORMAT_KEYS[i_sp]], entry = entry.split(FORMAT_SPLITPOINTS[i_sp], 1)
except Exception as e:
print(f"Error occured at entry {i} for {FORMAT_KEYS[i_sp]}: {e}\nsplitpoint: {FORMAT_SPLITPOINTS[i_sp]}\nentry: {entry}")
if "ip" in acc_json[i] and make_api_calls:
acc_json[i]["ip_details"] = mk_geoloc_request(acc_json[i]["ip"])
save_data()
except Exception as e:
save_data()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment