Skip to content

Instantly share code, notes, and snippets.

@voidlizard
Created September 2, 2010 14:57
Show Gist options
  • Select an option

  • Save voidlizard/562401 to your computer and use it in GitHub Desktop.

Select an option

Save voidlizard/562401 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import sys,os,glob,re,itertools
from operator import itemgetter
DOMAINS_LIST_PATH = "tlds-alpha-by-domain.txt"
MAIL_SEARCH_PATHS = "."
FILE_MASK = "^\d+$"
def fatal(msg):
sys.stderr.write("*** Fatal error {%s}\n" % msg)
sys.exit(-1)
def load_domains():
try:
fname = os.path.realpath(os.path.join(os.path.split(sys.argv[0])[0], DOMAINS_LIST_PATH))
f = (x.strip().upper() for x in open(fname).readlines() if not x.startswith("#"))
return set(f)
except IOError:
fatal("Unable to open/read domains: %s" % fname)
def dirs_to_scan():
return [os.path.abspath(x.strip()) for x in MAIL_SEARCH_PATHS.split(":")]
def enum_files(dirs, filt):
def enum_files(d):
for (root,b,f) in os.walk(d):
for x in f:
yield os.path.join(root, x)
for d in (os.path.realpath(x.strip()) for x in dirs):
for f in enum_files(d):
if filt(f):
yield f
def scan_file(f, domains):
re_received = re.compile("^Received:.+$")
ip = re.compile("\d{1,4}\.\d{1,4}\.\d{1,4}\.\d{1,4}")
# dom = re.compile("((http://(www\.)?)|@)([-.\w]+\.([-\w]+))") # scan only for complete url
dom = re.compile("((http://(www\.)?)|@)([-.\w]+\.([-\w]+))")
fname = os.path.basename(f)
def enum_header(fd):
for s in fd:
if len(s.strip()):
yield s
else:
break
def enum_rest(fd):
for s in fd:
yield s
def flatten(x):
return (itertools.chain.from_iterable(x))
with open(f) as fd:
hdr = enum_header(fd)
bdy = enum_rest(fd)
ipl = flatten((ip.findall(x) for x in hdr if re_received.match(x)))
dl = (y[-2] for y in flatten((dom.findall(z) for z in bdy)) if y[-1].upper() in domains)
for i in set(ipl):
yield ("ip", fname, i)
for d in set(dl):
yield ("domain", fname, d.lower())
if __name__ == "__main__":
fre = re.compile(FILE_MASK)
ffilt = lambda f: fre.match(os.path.basename(f))
top = load_domains()
if len(sys.argv) > 1:
MAIL_SEARCH_PATHS = sys.argv[1]
sys.stderr.write("Directories to scan:\n" + "\n".join(dirs_to_scan()) + "\n\n")
print "%-8s ; %-32s ; %-16s" % ("type","file name", "address")
for x in enum_files(dirs_to_scan(), ffilt):
for (w,f,x) in scan_file(x, top):
print "%-8s ; %-32s ; %-16s" % (w,f,x)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment