-
-
Save erikng/c7dd964ccd3f9afeea0f to your computer and use it in GitHub Desktop.
Playing with parsing Server caching service logs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tempfile, os.path, shutil, glob, os, subprocess, re | |
debug = True | |
# It should take the logs from tmp and clone them somewhere | |
# It can then bunzip and combine them | |
def log(s): | |
global debug | |
if debug: | |
print s | |
logPath = "/Users/mike/Desktop/CacheLogs" | |
targetDate = "2015-04-13" | |
# Are we sure the logs aren't in decimal bytes? Oh well, binary it is.. | |
sizeMultiplier = {'GB': 1073741824., | |
'MB': 1048576., | |
'bytes': 1.} | |
# Make a temporary directory to work with, per python docs: | |
# "The directory is readable, writable, and searchable only by the creating user ID." | |
tmpDir = tempfile.mkdtemp() | |
log("tmpDir: %s" % tmpDir) | |
# Clone the contents of logPath over into the 'logs' subdirectory | |
# (shutil.copytree doesn't want the directory to pre-exist) | |
tmpLogs = os.path.join(tmpDir, 'logs') | |
log("tmpLogs: %s" % tmpLogs) | |
shutil.copytree(logPath, tmpLogs) | |
# Expand any .bz files in the directory (Server 4.1+) | |
os.chdir(tmpLogs) | |
for bzLog in glob.glob(os.path.join(tmpLogs, '*.bz2')): | |
result = subprocess.check_call(["bzip2", "-d", bzLog]) | |
# Now combine all .log files in the destination into a temp file that's removed when python exits | |
rawLog = tempfile.TemporaryFile() | |
for anyLog in glob.glob(os.path.join(tmpLogs, '*.log')): | |
with open(anyLog, 'rb') as f: | |
shutil.copyfileobj(f, rawLog) | |
# skip back to the beginning of our newly concatenated log | |
rawLog.seek(0) | |
# Now we can reap the tmpDir since we have everything we need in a self-disposing file | |
shutil.rmtree(tmpDir) | |
# Now the real work can begin | |
def process_log(lines): | |
# Basically run through all the lines a single time and collect all the relevant data to slice, do stats with, etc. | |
sizeLog = [] | |
IPLog = [] | |
OSLog = [] | |
iOSModelLog = [] | |
# ...etc. | |
# Right now just debug log()ing all the parsed/captured data straight back out to stdout | |
for x in lines: | |
# If there aren't at least 3 pieces somehow, they'll get filled in with blanks | |
datestr,timestr,logmsg = (x.split(' ',2)+['','',''])[:3] | |
if datestr == targetDate: | |
# Only do work if the string is on the date we care about | |
try: | |
if 'Since server start:' in logmsg: | |
# Get the 4 size values, in order, as bytes | |
sizes = map(lambda x: int(float(x[0])*sizeMultiplier[x[1]]), re.findall(r'(?<= |\()([0-9.]{1,}) (GB|MB|bytes)(?= )', logmsg)) | |
sizeLog.append(sizes) | |
log('transferred:'+sizes.__repr__()) | |
elif 'Request from ' in logmsg: | |
# It's a request, log the IP - Apple should only be sending IPs to our server if they're supposed to be served by it - why filter? We still served it. | |
# Found interesting item that had a null IP: | |
if 'Request from [(null)]' not in logmsg: | |
ip = re.match(r'[^ ]+ Request from (([0-9]+\.?){4})', logmsg).group(1) | |
IPLog.append(ip) | |
log('ip:'+ip) | |
# Also need to log the OS | |
osInfo = re.match(r'.+? ((iOS|Darwin|OS X)[/ ](([0-9]+\.?){1,}))', logmsg) | |
osFamily = osInfo.group(2) | |
osVersion = osInfo.group(3) | |
OSLog.append((osFamily, osVersion)) | |
log('os:%s %s' % (osFamily, osVersion)) | |
# If it's iOS, then also log the product family and model | |
if osFamily == 'iOS': | |
iOSInfo = re.match(r'.+? model/([^ ]+?)([0-9]+,?[0-9]?)', logmsg) | |
iOSModel = iOSInfo.group(1) | |
iOSVersion = iOSInfo.group(2) | |
iOSModelLog.append((iOSModel, iOSVersion)) | |
log('iOS:%s%s' % (iOSModel, iOSVersion)) | |
# Do the stage VI stuff here - I need some sleep | |
except: | |
print logmsg | |
raise Exception("Funky line - check it out") | |
# Process the log | |
process_log(rawLog.readlines()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment