Skip to content

Instantly share code, notes, and snippets.

@mbrownnycnyc
Last active December 20, 2015 15:59
Show Gist options
  • Save mbrownnycnyc/6158144 to your computer and use it in GitHub Desktop.
Save mbrownnycnyc/6158144 to your computer and use it in GitHub Desktop.
python script that parses the output of the argus-client radump()'s dns printer and inesrts interesting things into a DB. See DB definition at https://gist.github.com/mbrownnycnyc/6083357
#!/usr/bin/python
'''
You must create a config file with:
grep -v ^# /root/.rarc | grep -v ^$ > ~/for_dnsdb.rarc && if grep ^RA_TIME_FORMAT ~/for_dnsdb.rarc > /dev/null ; then sed s/^RA_TIME_FORMAT/#RA_TIME_FORMAT/g -i ~/for_dnsdb.rarc && echo -e "RA_TIME_FORMAT=\"%Y-%m-%d %T.%f\"\nRA_PRINT_LABELS=-1\nRA_FIELD_DELIMITER='^'" >> ~/for_dnsdb.rarc ; fi
expecting the input to be from:
radump -F ~/for_dnsdb.rarc -r argus_10\:00\:00.gz -s seq ltime saddr daddr suser:1024 duser:1024 - port domain
or
radump -F ~/for_dnsdb.rarc -S 127.0.0.1:561 -s seq ltime saddr daddr suser:1024 duser:1024 - port domain
as stdin like...
radump -F ~/for_dnsdb.rarc -S 127.0.0.1:561 -s seq ltime saddr daddr suser:1024 duser:1024 - port domain | radump_to_dns_db.py
or to drop to the background without any output:
nohup radump -F ~/for_dnsdb.rarc -S 127.0.0.1:561 -s seq ltime saddr daddr suser:1024 duser:1024 - port domain | python -u radump_to_dns_db.py &> /dev/null &
or to drop the background and save all output to a log file (watch out for reportrecordcount boolean):
nohup radump -F ~/for_dnsdb.rarc -S 127.0.0.1:561 -s seq ltime saddr daddr suser:1024 duser:1024 - port domain | python -u radump_to_dns_db.py &> /var/log/radump2dnsdb.log &
or to drop to the background saving only stderr to a log file:
nohup radump -F ~/for_dnsdb.rarc -S 127.0.0.1:561 -s seq ltime saddr daddr suser:1024 duser:1024 - port domain | python -u radump_to_dns_db.py 1> /dev/null 2> /var/log/radump2dnsdb.log &
considers:
- no labels (aka `-L -1` or `RA_PRINT_LABELS=-1`)
- field seperator is '^' (`-c ^` or `RA_FIELD_DELIMITER='^')
- time format is %Y-%m-%d %T.%f (as RA_TIME_FORMAT="%Y-%m-%d %T.%f")
with support for -vv will come support for additional field:
nsnames [this will operate on N index]: varchar(256)... will be a comma-separated list
To handle the case when the DNS question and the DNS answers are split between flushed userdata, the seq is inserted so that the user can reference this to locate the question and answer pair.
- If you see 0 as the value for atype_id or aname_host_id, then the question was missing from the `suser` (truly, there was 0 bytes of suser data).
- If you see 0 as the value for qtype_id or qname_host_id, then the answer was missing from the `duser` (truly, there was 0 bytes of duser data).
'''
# set booleans to either 'True' or 'False' (explicit case, without quotes)
reportrecordcount = True #this controls whether or not the number of records processed should be printed to stdout
recordcount = 0 #this is a global int to track how many records have been processed
debug = False #this controls whether or not some verbose info is printed throughout processing
onerecord = False #this controls whether or not the script proceeds past processing a single record (useful for debugging)
import sys, traceback, types
import string, re
import time
from datetime import datetime
def SQLQuery(query, fetchtype):
import MySQLdb
#http://www.tutorialspoint.com/python/python_database_access.htm
#http://www.packtpub.com/article/exception-handling-mysql-python
sqlserver = "localhost"
if debug: print 'Connecting to %s' %sqlserver
db = MySQLdb.connect(sqlserver,"root","passwordhere","argus_dnsdb" )
if debug: print 'Connected... creating db cursor'
cursor = db.cursor()
try:
if debug: print 'Executing query: ' + query
cursor.execute(query)
if debug: print 'Execution successful'
if fetchtype == "insert":
db.commit()
results = 0
if fetchtype == "fetchall":
results = cursor.fetchall() #fetchall returns a tuple of tuples of rows of results
if fetchtype == "fetchone":
results = cursor.fetchone() #fetchone returns a tuple of the next row of results (this is probably not the nicest way to do this, but it should work, as our SELECTs rely on there being only one result.
except MySQLdb.Error, e:
print 'Executing query: ' + query
print "Error: %s" %e
db.rollback()
sys.exit()
return 0
db.close()
return results
def FlowRecordProcessor(flow_seq, query_time, nsclient, nsserver, qtype, qname, atypes, anames):
#here we might want to multithread using http://docs.python.org/2/library/multiprocessing.html?highlight=multiprocessing#multiprocessing
# or we might want to multithread calling FlowRecordProcessor() calls
#first we'll check if nsclient is contained in nsclients, and if so, get the id
tresult = SQLQuery('select id, ipv4addr from nsclients where ipv4addr=INET_ATON(\'' + nsclient + '\');', 'fetchone')
if type(tresult) is not types.NoneType:
#then the nsclient is already in the DB
nsclient_id = tresult[0]
else:
#then the nsclient isn't in the DB
SQLQuery('insert into nsclients (ipv4addr) values (INET_ATON(\'' + nsclient + '\'));', 'insert')
nsclient_id = SQLQuery('select id, ipv4addr from nsclients where ipv4addr=INET_ATON(\'' + nsclient + '\');', 'fetchone')
nsclient_id = nsclient_id[0]
#next we'll check if nsserver is contained in nsserver, and if so, get the id
tresult = SQLQuery('select id, ipv4addr from nsservers where ipv4addr=INET_ATON(\'' + nsserver + '\');', 'fetchone')
if type(tresult) is not types.NoneType:
#then the nsserver is already in the DB
nsserver_id = tresult[0]
else:
#then the nsserver isn't in the DB
SQLQuery('insert into nsservers (ipv4addr) values (INET_ATON(\'' + nsserver + '\'));', 'insert')
nsserver_id = SQLQuery('select id, ipv4addr from nsservers where ipv4addr=INET_ATON(\'' + nsserver + '\');', 'fetchone')
nsserver_id = nsserver_id[0]
#next we'll get the id for the qtype and qname:
if (qtype == 0) and (qname == 0):
#then there was no suser data provided
qname_host_id = 0
qtype_nstype_id = 0
else:
#next we'll get the id for the qtype:
tresults = SQLQuery('select id, nstype from nstypes;', 'fetchall')
#thanks: http://stackoverflow.com/a/2917399/843000
qtype_nstype_id = [x[0] for x in tresults if x[1] == qtype][0]
#next we'll get the id for the host of the server that was inquired about
tresults = SQLQuery('select id from hosts where host=\'' + qname + '\';', 'fetchone')
if (type(tresults) is not types.NoneType) and (len(tresults) > 0):
#then the host is already in the DB
qname_host_id = tresults[0]
else:
#then the host isn't in the DB
SQLQuery('insert into hosts (host) values (\'' + qname + '\');', 'insert')
qname_host_id = SQLQuery('select id from hosts where host=\'' + qname + '\';', 'fetchone')
qname_host_id = qname_host_id[0]
#next we'll deal with resolving the atypes and anames to their corresponding ids from nstypes and hosts respectively
atypes_nstype_ids = []
anames_ids = []
if (0 in atypes) and (0 in anames):
atypes_nstype_ids.append(0)
anames_ids.append(0)
else:
if len(atypes) == len(anames):
#this should always be the case
for i in range(0,len(atypes)):
tresults = SQLQuery('select id, nstype from nstypes;', 'fetchall')
atypes_nstype_ids.append([x[0] for x in tresults if x[1] == atypes[i]][0])
for i in range(0,len(anames)):
#some times the anames[i] will be an ip address.
#some times the anames[i] will be a name
## depending on the atype[i] as described in RFCs
tresults = SQLQuery('select id, host from hosts where host=\'' + anames[i] + '\';', 'fetchall')
if (len(tresults) > 0):
#then the host is already in the DB
anames_ids.append([x[0] for x in tresults if x[1] == anames[i]][0])
else:
#then the host isn't in the DB
SQLQuery('insert into hosts (host) values (\'' + anames[i] + '\');', 'insert')
anames_ids.append(SQLQuery('select id, host from hosts where host=\'' + anames[i] + '\';', 'fetchone')[0])
# here we will INSERT the record into the main DB
## each flow_seq has the possibility to have many atypes and anames associated, so we'll be inserting on an atypes basis
## we consider flow_seq+query_time to be unique universally... http://thread.gmane.org/gmane.network.argus/9738/focus=9750
if len(atypes) == len(anames_ids):
for i in range(0,len(atypes_nstype_ids)):
if (type(query_time) is types.StringType):
if debug: print "query_time is of type " + str(type(query_time)) + " and will be converted"
query_time = time.mktime(datetime.strptime(query_time,'%Y-%m-%d %H:%M:%S.%f').timetuple())
else:
if debug: print "query_time is of another type " + str(type(query_time)) + " and will not be converted."
if debug:
print "type(flow_seq): " + str(type(flow_seq)) + ": " + flow_seq
print "type(query_time): " + str(type(query_time)) + ": " + str(query_time)
print "type(nsclient_id): " + str(type(nsclient_id)) + ": " + str(nsclient_id)
print "type(nsserver_id): " + str(type(nsserver_id)) + ": " + str(nsserver_id)
print "type(qtype_nstype_id): " + str(type(qtype_nstype_id)) + ": " + str(qtype_nstype_id)
print "type(qname_host_id): " + str(type(qname_host_id)) + ": " + str(qname_host_id)
print "type(atypes_nstype_ids[" + str(i) + "]): " + str(type(atypes_nstype_ids[i])) + ": " + str(atypes_nstype_ids[i])
print "type(anames_ids[" + str(i) + "]): " + str(type(anames_ids[i])) + ": " + str(anames_ids[i])
#should I check to make sure the flow isn't already in the main table? Right now I don't.
SQLQuery("insert into main (flow_uid,flow_seq,qtime,nsclient_id,nsserver_id,qtype_id,qname_host_id,atype_id,aname_host_id) " + \
"values ('" + str(float(flow_seq)+query_time) + "','" + str(float(flow_seq)) + "','" + str(query_time) + "','" + str(nsclient_id) + "','" + str(nsserver_id) + "','" + \
str(qtype_nstype_id) + "','" + str(qname_host_id) + "','" + str(atypes_nstype_ids[i]) + "','" + str(anames_ids[i]) + "');", 'insert')
global reportrecordcount
if reportrecordcount:
global recordcount
recordcount = recordcount + 1
sys.stdout.write("\r"+"DNS questions and answers stored: " + str(recordcount))
sys.stdout.flush()
if __name__ == "__main__":
if reportrecordcount:
sys.stdout.write("\r"+"DNS questions and answers stored: " + str(recordcount))
sys.stdout.flush()
#f = open('C:\\Documents and Settings\\mbrown\\Desktop\\radump_dns_to_sql\\radump.out', 'r')
f = sys.stdin
#there is another method of reading data from stdin: http://stackoverflow.com/a/7056462/843000
#I considered building an object to pass to a function, but that just seems latent.
for line in f:
try:
if debug:
print "---begin record---"
print line
#check if the line contains s[0] or d[0]
#that way we can handle missing suser or duser data by providing the fields 0 for ID in main and bypassing the SQL lookups
if re.search(r's\[0\]', line) == None:
if debug: print "suser contains more than 0 data"
suserzero = False
else:
#then we have no suser data
suserzero = True
if debug: print "suser contains 0 data"
if re.search(r'd\[0\]', line) == None:
if debug: print "duser contains more than 0 data"
duserzero = False
else:
#then we have no duserdata
duserzero = True
if debug: print "duser contains 0 data"
#Each field is split by '^'
fields = line.split('^')
#the first field is the sequence number for the flow. This is a unique ID across the time `argus` is running.
flow_seq = fields[0].strip()
#The second field is the last flow time, so this will be the time we'd like to insert into the DB. It will be formatted to be acceptable by strftime().
query_time = fields[1].strip()
#the third field is the source address (the dns client)
nsclient = fields[2].strip()
#the fourth field is the destination address (the dns server)
nsserver = fields[3].strip()
#the fifth field is source user data string:
'''
- we can take the data that's only in between the double quotes
- we can then parse this as follows:
- third space separated string is a TYPE followed by a '?'. This will be the qtype
- fourth space separate string is the qname
'''
if suserzero:
qtype = 0
qname = 0
else:
suserdata = re.search(r'".*"',fields[4].strip())
subsetsuserdata = suserdata.group(0).split()
#declaring a variable is just a pointer
qtypetemp = subsetsuserdata[2]
if (qtypetemp[len(qtypetemp)-1] == "?"):
#then we have a valid question
qtype = qtypetemp[:(len(qtypetemp)-1)]
qnametemp = subsetsuserdata[3]
if (qnametemp[len(qnametemp)-1] == "."):
#then we (still) have a valid question
qname = qnametemp[:(len(qnametemp)-1)]
#the sixth field is the destination user data string:
'''
- we can take the data that's only in between the double quotes
- we can then parse this as follows:
- the third space separated string is the beginning of a command separated list that ends with ' ('
'''
atypes = []
anames = []
if duserzero:
atypes.append(0)
anames.append(0)
else:
#duserdata = re.search(r'".*"',fields[4].strip())
#taken from printer-domain.c
subsetduserdata = re.findall(r'[A|NS|MD|MF|CNAME|SOA|MB|MG|MR|NULL|WKS|PTR|HINFO|MINFO|MX|TXT|RP|AFSDB|X25|ISDN|RT|NSAP|NSAP_PTR|SIG|KEY|PX|GPOS|AAAA|LOC|NXT|EID|NIMLOC|SRV|ATMA|NAPTR|A6|DNAME|OPT|UINFO|UID|GID|UNSPEC|UNSPECA|TKEY|TSIG|IXFR|AXFR|MAILB|MAILA|ANY]\s\d{,3}\.\d{,3}\.\d{,3}\.\d{,3}',fields[5].strip())
if len(subsetduserdata)>0:
for recordset in subsetduserdata:
rssplit = recordset.split()
atypes.append(rssplit[0]) #this is the answer qtype
anames.append(rssplit[1]) #this is the answer qtype parameter
#this would block so we may want to consider a multiprocessor here... or within FlowRecordProcessor()
FlowRecordProcessor(flow_seq,query_time, nsclient, nsserver, qtype, qname, atypes, anames)
except:
exc_type, exc_value, exc_traceback = sys.exc_info()
print "*** print_tb:"
traceback.print_tb(exc_traceback, limit=1, file=sys.stdout)
print "*** print_exception:"
traceback.print_exception(exc_type, exc_value, exc_traceback, limit=2, file=sys.stdout)
print "*** print_exc:"
traceback.print_exc()
print "*** format_exc, first and last line:"
formatted_lines = traceback.format_exc().splitlines()
print formatted_lines[0]
print formatted_lines[-1]
print "*** format_exception:"
print repr(traceback.format_exception(exc_type, exc_value, exc_traceback))
print "*** extract_tb:"
print repr(traceback.extract_tb(exc_traceback))
print "*** format_tb:"
print repr(traceback.format_tb(exc_traceback))
print "*** tb_lineno:", exc_traceback.tb_lineno
print 'exiting'
sys.exit()
if onerecord: sys.exit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment