Created
November 16, 2022 11:10
-
-
Save joswr1ght/a7e605168d291fedfaa6e3669c7b8307 to your computer and use it in GitHub Desktop.
Consistently replace the IP addresses in the first column of a log file with a random IP
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Anonymize the first column (delimited by space) IPv4 address in an ASCII file with a | |
# consistent IP address that excludes RFC1918 and other internal network IP addresses. | |
# You can use this to take an Apache (or Nginx or probably other log files as well) file | |
# and change each of the source IP addresses to another value that is preserved consistently | |
# throughout the output log file. | |
# Changes to the log file are written to STDOUT, so run this as | |
# `apacheanon.py access.log > new-access.log`. | |
# | |
# 2022-11-16 Joshua Wright | |
import sys | |
import random | |
from collections import defaultdict | |
octet1list = list(range(1, 255)) | |
# Remove first-octet values for reserved networks. | |
# This eliminates more than just the reserved portions of these network, but | |
# :man-shrugging: | |
octet1list.remove(10) | |
octet1list.remove(127) | |
octet1list.remove(169) | |
octet1list.remove(172) | |
octet1list.remove(192) | |
octet2list = list(range(0, 255)) | |
octet3list = octet2list | |
octet4list = list(range(1, 255)) | |
def randip(): | |
return f'{random.choice(octet1list)}.' \ | |
f'{random.choice(octet2list)}.' \ | |
f'{random.choice(octet3list)}.' \ | |
f'{random.choice(octet4list)}' | |
if (len(sys.argv) != 2): | |
sys.stderr.write('Consistently replace the IP addresses in the first column of a log file with a random IP.\n') | |
sys.stderr.write('Writes new log data on STDOUT.\n\n') | |
sys.stderr.write(f'Usage: {sys.argv[0]} <access.log>\n') | |
sys.exit(0) | |
substituteips = defaultdict(randip) | |
cnt = 0 | |
with open(sys.argv[1], 'r') as logfile: | |
logline = logfile.readline() | |
while len(logline) != 0: | |
ip, restoflog = logline.split(' ', 1) | |
sys.stdout.write(f'{substituteips[ip]} {restoflog}') | |
cnt += 1 | |
logline = logfile.readline() | |
sys.stderr.write(f'Substituted {cnt} IP addresses.\n') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment