Skip to content

Instantly share code, notes, and snippets.

@oremj
Created November 3, 2009 22:46
Show Gist options
  • Save oremj/225528 to your computer and use it in GitHub Desktop.
Save oremj/225528 to your computer and use it in GitHub Desktop.
Consolidate log files in to one directory.
#!/usr/bin/python
import os
import glob
import re
import shutil
import stat
from datetime import datetime
from time import strptime
def filename_gen(file, host):
i = 1
while True:
yield "%s.%s_%d.gz" % ( file, host, i )
i += 1
def link_file(orig_file, output_dir, domain, filename, host):
dest_dir = os.path.join(output_dir, domain)
inode = os.stat(orig_file)[stat.ST_INO]
if not os.path.isdir(dest_dir):
os.mkdir(dest_dir)
file_g = filename_gen(filename, host)
tmp_filename = file_g.next()
while os.path.exists(os.path.join(dest_dir, tmp_filename)):
tmp_inode = os.stat(os.path.join(dest_dir, tmp_filename))[stat.ST_INO]
if tmp_inode == inode:
raise AttributeError
tmp_filename = file_g.next()
#print "Linking %s to %s" % ( orig_file, os.path.join(dest_dir,tmp_filename) )
os.link(orig_file, os.path.join(dest_dir, tmp_filename))
def consolidate_dir(dir, domain_file_re, output_dir):
for orig_file in glob.glob(dir):
dir, file = os.path.split(orig_file)
host = os.path.split(dir)[1]
if os.stat(orig_file)[stat.ST_NLINK] > 1:
continue
try:
domain, filename = domain_file_re.search(file).groups()
except AttributeError:
continue
link_file(orig_file, output_dir, domain, filename, host)
def chinacache_consolidate_dir(host, dir, domain, output_dir):
for orig_file in glob.glob(dir):
dir, file = os.path.split(orig_file)
if os.stat(orig_file)[stat.ST_NLINK] > 1:
continue
# fn example cc_9466.ecclf_S.200911011500-200911011559-20091101-99.50c.gz
start_time = strptime(file.split('.')[2].split('-')[0][:10], "%Y%m%d%H")
start_time = datetime(*start_time[:4])
filename = start_time.strftime("access_%Y-%m-%d-%H")
link_file(orig_file, output_dir, domain, filename, host)
if __name__ == "__main__":
output_dir = '/data/stats/logs/im-log01'
domain_file_re = re.compile("(.*)\.(access_\d{4}(?:-\d\d){3}).*gz")
consolidate_dir('/data/stats/logs/z*lb*/*', domain_file_re, output_dir)
consolidate_dir('/data/stats/logs/ams-zlb*/*', domain_file_re, output_dir)
consolidate_dir('/data/stats/logs/lm-zlb*/*', domain_file_re, output_dir)
consolidate_dir('/data/stats/logs/sg-zlb*.mozilla.net/*', domain_file_re, output_dir)
chinacache_consolidate_dir(
host="chinacache58-68-168-133",
dir="/data/stats/logs/chinacache/58.68.168.133/addons/*/*",
domain="addons.mozilla.org",
output_dir=output_dir
)
chinacache_consolidate_dir(
host="chinacache58-68-168-142",
dir="/data/stats/logs/chinacache/58.68.168.142/addons/*/*",
domain="addons.mozilla.org",
output_dir=output_dir
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment