Skip to content

Instantly share code, notes, and snippets.

@devnull255
Created April 29, 2017 18:22
Show Gist options
  • Save devnull255/dd6dbf59ebc674646cbb0a5aba7ac437 to your computer and use it in GitHub Desktop.
Save devnull255/dd6dbf59ebc674646cbb0a5aba7ac437 to your computer and use it in GitHub Desktop.
log extract 1
#!/usr/bin/env python
# Write a script which parses /var/log/messages and generates a CSV with two columns: minute, number_of_messages in sorted time order.
import re
from collections import defaultdict
msg_data = {}
for f in open('test.log'):
rec = f.split()
timestamp = ' '.join(rec[:3])
timestamp = timestamp[:-3]
msg_data[timestamp] += 1
outrecs = [(x[0],x[1]) for x in sorted(msg_data.items(),key=lambda tup: tup[1],reverse=True)]
for o in outrecs:
print "%s,%s" % (o[0],o[1])
#!/usr/bin/env python
# Extract the program name from the field between the hostname and the log message and output those values in columns.
import re
from collections import defaultdict
msg_data = {}
found_programs = set()
for f in open('test.log'):
rec = f.split()
timestamp = ' '.join(rec[:3])
timestamp = timestamp[:-3]
m = re.match(r'[\w\-]+',rec[4])
if m:
program = m.group()
found_programs.add(program)
if timestamp not in msg_data:
msg_data[timestamp] = defaultdict(int)
msg_data[timestamp]['total_count'] += 1
msg_data[timestamp][program] += 1
title = 'minute,total_count,' + ','.join(found_programs)
print title
for k in sorted(msg_data.keys()):
output_line = "%s,%d," % (k, msg_data[k]['total_count'])
program_data = []
for p in found_programs:
program_data.append(str(msg_data[k][p]))
output_line += ','.join(program_data)
print output_line
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment