Skip to content

Instantly share code, notes, and snippets.

@metaphox
Created December 21, 2011 13:45
Show Gist options
  • Select an option

  • Save metaphox/1506101 to your computer and use it in GitHub Desktop.

Select an option

Save metaphox/1506101 to your computer and use it in GitHub Desktop.
##generator pipes
import re
import os
import fnmatch
import gzip, bz2
def gen_find(filepat, top):
for path, dirlist, filelist in os.walk(top):
for name in fnmatch.filter(filelist, filepat):
yield os.path.join(path, name)
def gen_open(filenames):
for name in filenames:
if name.endswith(".gz"):
yield gzip.open(name)
elif name.endswith(".bz2"):
yield bz2.BZ2File(name)
else:
yield open(name)
def gen_cat(sources):
for s in sources:
for item in s:
yield item
def gen_grep(pat, lines):
patc = re.compile(pat)
for line in lines:
if patc.search(line): yield line
def lines_from_dir(filepat, dirname):
"""read all lines from a directory"""
names = gen_find(filepat, dirname)
files = gen_open(names)
lines = gen_cat(files)
return lines
#additionally:
def field_map(dictseq, name, func):
for d in dictseq:
d[name] = func(d[name])
yield d
###some tests
loglines = lines_from_dir('*.access_log', '.')
logpats = r'(\S+) (\S+) (\S+) \[(.*?)\] "(\S+) (\S+) (\S+)" (\S+) (\S+) (.*)'
logpat = re.compile(logpats)
groups = (logpat.match(line) for line in loglines)
tuples = (each.groups() for each in groups if each)
colnames = ('host', 'referrer', 'user', 'datetime', 'method', 'request', 'protocol', 'status', 'rest')
log = (dict(zip(colnames, t)) for t in tuples)
for l in log:
print(l['status'])
files = gen_find('*.*', '/')
for each in files: print(each)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment