Last active
April 13, 2024 15:42
-
-
Save DavideWiest/3dbb8f48307bd0810cabe824834a3500 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Parses apache2 access log entries into json, adding ip details with the ipinfo.io API | |
Example entry: | |
95.90.223.124 - - [02/Jan/2023:18:38:29 +0000] "GET / HTTP/1.1" 500 6024 | |
""" | |
import sys | |
import traceback | |
import json | |
import time | |
from urllib.request import urlopen | |
from urllib.error import HTTPError | |
from tqdm import tqdm | |
log_loc = input("Log path: ") | |
filename = log_loc + ".json" if "." not in log_loc.split("/")[-1] else "/".join(log_loc.split("/")[:-1]) + log_loc.split("/")[-1].split(".")[0] + ".json" | |
out_loc = input(f"Output path (json) (default {filename}): ") | |
out_loc = filename if out_loc == "" else out_loc | |
make_api_calls = input("Use ipinfo.io to extract details about ips? (1k free requests/d) (y/n) (default y): ") | |
make_api_calls = True if make_api_calls.lower() in ("y", "") else False | |
try: | |
with open(log_loc, "r", encoding="utf-8") as f: | |
acclog = f.read().replace("\n", ".\n").split("\n") | |
acclog[-1] += "." | |
except Exception as e: | |
print(f"Input file {log_loc} cannot be opened") | |
print(traceback.format_exc()) | |
sys.exit(0) | |
FORMAT_SPLITPOINTS = [ | |
" - - [", "] \"", " ", " ", "\" ", " ", "." | |
] | |
FORMAT_KEYS = [ | |
"ip", "datetime", "request_type", "rel_url", "http_type", "response_type", "delay" | |
] | |
acc_json = [] | |
ip_details_memo = {} | |
def mk_geoloc_request(ip, sleep=0): | |
if sleep != 0: | |
print(f"sleeping for {sleep}") | |
time.sleep(sleep) | |
url = f"https://ipinfo.io/{ip}/json" | |
try: | |
ip_details_memo[ip] = json.load(urlopen(url)) | |
except HTTPError as e: | |
if "429" in str(e): | |
sleep += 10 | |
if sleep <= 100: | |
return mk_geoloc_request(ip, sleep) | |
else: | |
raise Exception("Waiting for 429 to expire didn't work") | |
return ip_details_memo[ip] | |
def save_data(): | |
try: | |
with open(out_loc, "w", encoding="utf-8") as f: | |
json.dump({"summary": {}, "entries": acc_json}, f, indent=4) | |
msg2 = f"({len(ip_details_memo)} unique ip addresses)" if ip_details_memo != {} else "" | |
print(f"\nSuccessfully parsed {len(acclog)} access log entries {msg2}\n") | |
except Exception as e: | |
print(f"Output file {out_loc} cannot be opened") | |
print(traceback.format_exc()) | |
sys.exit(0) | |
try: | |
for i in tqdm(range(len(acclog))): | |
if acclog[i] == ".": continue | |
entry = acclog[i] | |
acc_json.append({}) | |
for i_sp in range(len(FORMAT_SPLITPOINTS)): | |
try: | |
acc_json[i][FORMAT_KEYS[i_sp]], entry = entry.split(FORMAT_SPLITPOINTS[i_sp], 1) | |
except Exception as e: | |
print(f"Error occured at entry {i} for {FORMAT_KEYS[i_sp]}: {e}\nsplitpoint: {FORMAT_SPLITPOINTS[i_sp]}\nentry: {entry}") | |
if "ip" in acc_json[i] and make_api_calls: | |
acc_json[i]["ip_details"] = mk_geoloc_request(acc_json[i]["ip"]) | |
save_data() | |
except Exception as e: | |
save_data() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment