Skip to content

Instantly share code, notes, and snippets.

@joswr1ght
Created November 16, 2022 11:10
Show Gist options
  • Save joswr1ght/a7e605168d291fedfaa6e3669c7b8307 to your computer and use it in GitHub Desktop.
Save joswr1ght/a7e605168d291fedfaa6e3669c7b8307 to your computer and use it in GitHub Desktop.
Consistently replace the IP addresses in the first column of a log file with a random IP
#!/usr/bin/env python3
# Anonymize the first column (delimited by space) IPv4 address in an ASCII file with a
# consistent IP address that excludes RFC1918 and other internal network IP addresses.
# You can use this to take an Apache (or Nginx or probably other log files as well) file
# and change each of the source IP addresses to another value that is preserved consistently
# throughout the output log file.
# Changes to the log file are written to STDOUT, so run this as
# `apacheanon.py access.log > new-access.log`.
#
# 2022-11-16 Joshua Wright
import sys
import random
from collections import defaultdict
octet1list = list(range(1, 255))
# Remove first-octet values for reserved networks.
# This eliminates more than just the reserved portions of these network, but
# :man-shrugging:
octet1list.remove(10)
octet1list.remove(127)
octet1list.remove(169)
octet1list.remove(172)
octet1list.remove(192)
octet2list = list(range(0, 255))
octet3list = octet2list
octet4list = list(range(1, 255))
def randip():
return f'{random.choice(octet1list)}.' \
f'{random.choice(octet2list)}.' \
f'{random.choice(octet3list)}.' \
f'{random.choice(octet4list)}'
if (len(sys.argv) != 2):
sys.stderr.write('Consistently replace the IP addresses in the first column of a log file with a random IP.\n')
sys.stderr.write('Writes new log data on STDOUT.\n\n')
sys.stderr.write(f'Usage: {sys.argv[0]} <access.log>\n')
sys.exit(0)
substituteips = defaultdict(randip)
cnt = 0
with open(sys.argv[1], 'r') as logfile:
logline = logfile.readline()
while len(logline) != 0:
ip, restoflog = logline.split(' ', 1)
sys.stdout.write(f'{substituteips[ip]} {restoflog}')
cnt += 1
logline = logfile.readline()
sys.stderr.write(f'Substituted {cnt} IP addresses.\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment