mbrownnycnyc · December 20, 2015 15:59
diff --git a/radump_to_dns_db.py b/radump_to_dns_db.py
 #!/usr/bin/python
 '''

 You must create a config file with:
 grep -v ^# /root/.rarc | grep -v ^$ > ~/for_dnsdb.rarc && if grep ^RA_TIME_FORMAT ~/for_dnsdb.rarc > /dev/null ; then sed s/^RA_TIME_FORMAT/#RA_TIME_FORMAT/g -i ~/for_dnsdb.rarc && echo -e "RA_TIME_FORMAT=\"%Y-%m-%d %T.%f\"\nRA_PRINT_LABELS=-1\nRA_FIELD_DELIMITER='^'" >> ~/for_dnsdb.rarc ; fi

 expecting the input to be from:
 radump -F ~/for_dnsdb.rarc  -r argus_10\:00\:00.gz -s seq ltime saddr daddr suser:1024 duser:1024 - port domain
 or
 radump -F ~/for_dnsdb.rarc  -S 127.0.0.1:561 -s seq ltime saddr daddr suser:1024 duser:1024 - port domain
 as stdin like...
 radump -F ~/for_dnsdb.rarc  -S 127.0.0.1:561 -s seq ltime saddr daddr suser:1024 duser:1024 - port domain | radump_to_dns_db.py

 or to drop to the background without any output:
 nohup radump -F ~/for_dnsdb.rarc  -S 127.0.0.1:561 -s seq ltime saddr daddr suser:1024 duser:1024 - port domain | python -u radump_to_dns_db.py &> /dev/null &

 or to drop the background and save all output to a log file (watch out for reportrecordcount boolean):
 nohup radump -F ~/for_dnsdb.rarc  -S 127.0.0.1:561 -s seq ltime saddr daddr suser:1024 duser:1024 - port domain | python -u radump_to_dns_db.py &> /var/log/radump2dnsdb.log &
 
 or to drop to the background saving only stderr to a log file:
 nohup radump -F ~/for_dnsdb.rarc  -S 127.0.0.1:561 -s seq ltime saddr daddr suser:1024 duser:1024 - port domain | python -u radump_to_dns_db.py 1> /dev/null 2> /var/log/radump2dnsdb.log &


 considers:
 - no labels (aka `-L -1` or `RA_PRINT_LABELS=-1`)
 - field seperator is '^' (`-c ^` or `RA_FIELD_DELIMITER='^')
 - time format is %Y-%m-%d %T.%f (as RA_TIME_FORMAT="%Y-%m-%d %T.%f")

 with support for -vv will come support for additional field:
 nsnames [this will operate on N index]: varchar(256)... will be a comma-separated list

 To handle the case when the DNS question and the DNS answers are split between flushed userdata, the seq is inserted so that the user can reference this to locate the question and answer pair.
 - If you see 0 as the value for atype_id or aname_host_id, then the question was missing from the `suser` (truly, there was 0 bytes of suser data).
 - If you see 0 as the value for qtype_id or qname_host_id, then the answer was missing from the `duser` (truly, there was 0 bytes of duser data).
 '''

 # set booleans to either 'True' or 'False' (explicit case, without quotes)
 reportrecordcount = True #this controls whether or not the number of records processed should be printed to stdout
 recordcount = 0 #this is a global int to track how many records have been processed
 debug = False #this controls whether or not some verbose info is printed throughout processing
 onerecord = False #this controls whether or not the script proceeds past processing a single record (useful for debugging)



 import sys, traceback, types
 import string, re
 import time
 from datetime import datetime

 def SQLQuery(query, fetchtype):
    import MySQLdb
    #http://www.tutorialspoint.com/python/python_database_access.htm
    #http://www.packtpub.com/article/exception-handling-mysql-python

    sqlserver = "localhost"
    if debug: print 'Connecting to %s' %sqlserver
    db = MySQLdb.connect(sqlserver,"root","passwordhere","argus_dnsdb" )
    if debug: print 'Connected... creating db cursor'
    cursor = db.cursor()

    try:
        if debug: print 'Executing query: ' + query
        cursor.execute(query)
        if debug: print 'Execution successful'
        if fetchtype == "insert":
            db.commit()
            results = 0
        if fetchtype == "fetchall":
            results = cursor.fetchall() #fetchall returns a tuple of tuples of rows of results
        if fetchtype == "fetchone":
            results = cursor.fetchone() #fetchone returns a tuple of the next row of results (this is probably not the nicest way to do this, but it should work, as our SELECTs rely on there being only one result.

    except MySQLdb.Error, e:
        print 'Executing query: ' + query
        print "Error: %s" %e
        db.rollback()
        sys.exit()
        return 0

    db.close()
    return results

 def FlowRecordProcessor(flow_seq, query_time, nsclient, nsserver, qtype, qname, atypes, anames):

    #here we might want to multithread using http://docs.python.org/2/library/multiprocessing.html?highlight=multiprocessing#multiprocessing
    # or we might want to multithread calling FlowRecordProcessor() calls

    #first we'll check if nsclient is contained in nsclients, and if so, get the id
    tresult = SQLQuery('select id, ipv4addr from nsclients where ipv4addr=INET_ATON(\'' + nsclient + '\');', 'fetchone')
    if type(tresult) is not types.NoneType:
        #then the nsclient is already in the DB
        nsclient_id = tresult[0]
    else:
        #then the nsclient isn't in the DB
        SQLQuery('insert into nsclients (ipv4addr) values (INET_ATON(\'' + nsclient + '\'));', 'insert')
        nsclient_id = SQLQuery('select id, ipv4addr from nsclients where ipv4addr=INET_ATON(\'' + nsclient + '\');', 'fetchone')
        nsclient_id = nsclient_id[0]

    #next we'll check if nsserver is contained in nsserver, and if so, get the id
    tresult = SQLQuery('select id, ipv4addr from nsservers where ipv4addr=INET_ATON(\'' + nsserver + '\');', 'fetchone')
    if type(tresult) is not types.NoneType:
        #then the nsserver is already in the DB
        nsserver_id = tresult[0]
    else:
        #then the nsserver isn't in the DB
        SQLQuery('insert into nsservers (ipv4addr) values (INET_ATON(\'' + nsserver + '\'));', 'insert')
        nsserver_id = SQLQuery('select id, ipv4addr from nsservers where ipv4addr=INET_ATON(\'' + nsserver + '\');', 'fetchone')
        nsserver_id = nsserver_id[0]

    #next we'll get the id for the qtype and qname:
    if (qtype == 0) and (qname == 0):
        #then there was no suser data provided
        qname_host_id = 0
        qtype_nstype_id = 0
    else:
        #next we'll get the id for the qtype:
        tresults = SQLQuery('select id, nstype from nstypes;', 'fetchall')
        #thanks: http://stackoverflow.com/a/2917399/843000
        qtype_nstype_id = [x[0] for x in tresults if x[1] == qtype][0]

        #next we'll get the id for the host of the server that was inquired about
        tresults = SQLQuery('select id from hosts where host=\'' + qname + '\';', 'fetchone')
        if (type(tresults) is not types.NoneType) and (len(tresults) > 0):
            #then the host is already in the DB
            qname_host_id = tresults[0]
        else:
            #then the host isn't in the DB
            SQLQuery('insert into hosts (host) values (\'' + qname + '\');', 'insert')
            qname_host_id = SQLQuery('select id from hosts where host=\'' + qname + '\';', 'fetchone')
            qname_host_id = qname_host_id[0]


    #next we'll deal with resolving the atypes and anames to their corresponding ids from nstypes and hosts respectively
    atypes_nstype_ids = []
    anames_ids = []
    if (0 in atypes) and (0 in anames):
        atypes_nstype_ids.append(0)
        anames_ids.append(0)
    else:
        if len(atypes) == len(anames):
            #this should always be the case
            for i in range(0,len(atypes)):
                tresults = SQLQuery('select id, nstype from nstypes;', 'fetchall')
                atypes_nstype_ids.append([x[0] for x in tresults if x[1] == atypes[i]][0])
        
            for i in range(0,len(anames)):
                #some times the anames[i] will be an ip address.
                #some times the anames[i] will be a name
                ## depending on the atype[i] as described in RFCs
                tresults = SQLQuery('select id, host from hosts where host=\'' + anames[i] + '\';', 'fetchall')
                if (len(tresults) > 0):
                    #then the host is already in the DB
                    anames_ids.append([x[0] for x in tresults if x[1] == anames[i]][0])
                else:
                    #then the host isn't in the DB
                    SQLQuery('insert into hosts (host) values (\'' + anames[i] + '\');', 'insert')
                    anames_ids.append(SQLQuery('select id, host from hosts where host=\'' + anames[i] + '\';', 'fetchone')[0])

    # here we will INSERT the record into the main DB
    ## each flow_seq has the possibility to have many atypes and anames associated, so we'll be inserting on an atypes basis
    ## we consider flow_seq+query_time to be unique universally... http://thread.gmane.org/gmane.network.argus/9738/focus=9750
    if len(atypes) == len(anames_ids):
        for i in range(0,len(atypes_nstype_ids)):
            
            if (type(query_time) is types.StringType):
                if debug: print "query_time is of type " + str(type(query_time)) + " and will be converted"
                query_time = time.mktime(datetime.strptime(query_time,'%Y-%m-%d %H:%M:%S.%f').timetuple())
            else:
                if debug: print "query_time is of another type " + str(type(query_time)) + " and will not be converted."

            if debug:
                print "type(flow_seq): " + str(type(flow_seq)) + ": " + flow_seq
                print "type(query_time): " + str(type(query_time)) + ": " + str(query_time)
                print "type(nsclient_id): " + str(type(nsclient_id)) + ": " + str(nsclient_id)
                print "type(nsserver_id): " + str(type(nsserver_id)) + ": " + str(nsserver_id)
                print "type(qtype_nstype_id): " + str(type(qtype_nstype_id)) + ": " + str(qtype_nstype_id)
                print "type(qname_host_id): " + str(type(qname_host_id)) + ": " + str(qname_host_id)
                print "type(atypes_nstype_ids[" + str(i) + "]): " + str(type(atypes_nstype_ids[i])) + ": " + str(atypes_nstype_ids[i])
                print "type(anames_ids[" + str(i) + "]): " + str(type(anames_ids[i])) + ": " + str(anames_ids[i])
            
            #should I check to make sure the flow isn't already in the main table?  Right now I don't.
            SQLQuery("insert into main (flow_uid,flow_seq,qtime,nsclient_id,nsserver_id,qtype_id,qname_host_id,atype_id,aname_host_id) " + \
                    "values ('" + str(float(flow_seq)+query_time) + "','" + str(float(flow_seq)) + "','" + str(query_time) + "','" + str(nsclient_id) + "','" + str(nsserver_id) + "','" + \
                    str(qtype_nstype_id) + "','" + str(qname_host_id) + "','" + str(atypes_nstype_ids[i]) + "','" + str(anames_ids[i]) + "');", 'insert')
            
            global reportrecordcount
            if reportrecordcount:
                global recordcount
                recordcount = recordcount + 1
                sys.stdout.write("\r"+"DNS questions and answers stored: " + str(recordcount))
                sys.stdout.flush()
            

 if __name__ == "__main__":
    if reportrecordcount:
        sys.stdout.write("\r"+"DNS questions and answers stored: " + str(recordcount))
        sys.stdout.flush()
    #f = open('C:\\Documents and Settings\\mbrown\\Desktop\\radump_dns_to_sql\\radump.out', 'r')
    f = sys.stdin
    #there is another method of reading data from stdin: http://stackoverflow.com/a/7056462/843000

    #I considered building an object to pass to a function, but that just seems latent.
    for line in f:
        try:
            if debug:
                print "---begin record---"
                print line
            
            #check if the line contains s[0] or d[0]
            #that way we can handle missing suser or duser data by providing the fields 0 for ID in main and bypassing the SQL lookups
            if re.search(r's\[0\]', line) == None:
                if debug: print "suser contains more than 0 data"
                suserzero = False
            else:
                #then we have no suser data
                suserzero = True
                if debug: print "suser contains 0 data"

            if re.search(r'd\[0\]', line) == None:
                if debug: print "duser contains more than 0 data"
                duserzero = False
            else:
                #then we have no duserdata
                duserzero = True
                if debug: print "duser contains 0 data"

            #Each field is split by '^'
            fields = line.split('^')

            #the first field is the sequence number for the flow.  This is a unique ID across the time `argus` is running.
            flow_seq = fields[0].strip()

            #The second field is the last flow time, so this will be the time we'd like to insert into the DB.  It will be formatted to be acceptable by strftime().
            query_time = fields[1].strip()
    
            #the third field is the source address (the dns client)
            nsclient = fields[2].strip()

            #the fourth field is the destination address (the dns server)
            nsserver = fields[3].strip()

            
            #the fifth field is source user data string:
            '''
            - we can take the data that's only in between the double quotes
            - we can then parse this as follows:
                - third space separated string is a TYPE followed by a '?'.  This will be the qtype
                - fourth space separate string is the qname
            '''
            if suserzero:
                qtype = 0
                qname = 0
            else:
                suserdata = re.search(r'".*"',fields[4].strip())
                subsetsuserdata = suserdata.group(0).split()

                #declaring a variable is just a pointer
                qtypetemp = subsetsuserdata[2]
                if (qtypetemp[len(qtypetemp)-1] == "?"):
                    #then we have a valid question
                    qtype = qtypetemp[:(len(qtypetemp)-1)]

                qnametemp = subsetsuserdata[3]
                if (qnametemp[len(qnametemp)-1] == "."):
                    #then we (still) have a valid question
                    qname = qnametemp[:(len(qnametemp)-1)]


            #the sixth field is the destination user data string:
            '''
            - we can take the data that's only in between the double quotes
            - we can then parse this as follows:
             - the third space separated string is the beginning of a command separated list that ends with ' ('
            '''
            atypes = []
            anames = []
            if duserzero:
                atypes.append(0)
                anames.append(0)
            else:
                #duserdata = re.search(r'".*"',fields[4].strip())
                #taken from printer-domain.c
                subsetduserdata = re.findall(r'[A|NS|MD|MF|CNAME|SOA|MB|MG|MR|NULL|WKS|PTR|HINFO|MINFO|MX|TXT|RP|AFSDB|X25|ISDN|RT|NSAP|NSAP_PTR|SIG|KEY|PX|GPOS|AAAA|LOC|NXT|EID|NIMLOC|SRV|ATMA|NAPTR|A6|DNAME|OPT|UINFO|UID|GID|UNSPEC|UNSPECA|TKEY|TSIG|IXFR|AXFR|MAILB|MAILA|ANY]\s\d{,3}\.\d{,3}\.\d{,3}\.\d{,3}',fields[5].strip())
                if len(subsetduserdata)>0:
                    for recordset in subsetduserdata:
                        rssplit = recordset.split()
                        atypes.append(rssplit[0]) #this is the answer qtype
                        anames.append(rssplit[1]) #this is the answer qtype parameter

            #this would block so we may want to consider a multiprocessor here... or within FlowRecordProcessor()
            FlowRecordProcessor(flow_seq,query_time, nsclient, nsserver, qtype, qname, atypes, anames)
             
        except:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            print "*** print_tb:"
            traceback.print_tb(exc_traceback, limit=1, file=sys.stdout)
            print "*** print_exception:"
            traceback.print_exception(exc_type, exc_value, exc_traceback, limit=2, file=sys.stdout)
            print "*** print_exc:"
            traceback.print_exc()
            print "*** format_exc, first and last line:"
            formatted_lines = traceback.format_exc().splitlines()
            print formatted_lines[0]
            print formatted_lines[-1]
            print "*** format_exception:"
            print repr(traceback.format_exception(exc_type, exc_value, exc_traceback))
            print "*** extract_tb:"
            print repr(traceback.extract_tb(exc_traceback))
            print "*** format_tb:"
            print repr(traceback.format_tb(exc_traceback))
            print "*** tb_lineno:", exc_traceback.tb_lineno
            print 'exiting'
            sys.exit()   
        if onerecord: sys.exit()
	#!/usr/bin/python
	'''

	You must create a config file with:
	grep -v ^# /root/.rarc \| grep -v ^$ > ~/for_dnsdb.rarc && if grep ^RA_TIME_FORMAT ~/for_dnsdb.rarc > /dev/null ; then sed s/^RA_TIME_FORMAT/#RA_TIME_FORMAT/g -i ~/for_dnsdb.rarc && echo -e "RA_TIME_FORMAT=\"%Y-%m-%d %T.%f\"\nRA_PRINT_LABELS=-1\nRA_FIELD_DELIMITER='^'" >> ~/for_dnsdb.rarc ; fi

	expecting the input to be from:
	radump -F ~/for_dnsdb.rarc -r argus_10\:00\:00.gz -s seq ltime saddr daddr suser:1024 duser:1024 - port domain
	or
	radump -F ~/for_dnsdb.rarc -S 127.0.0.1:561 -s seq ltime saddr daddr suser:1024 duser:1024 - port domain
	as stdin like...
	radump -F ~/for_dnsdb.rarc -S 127.0.0.1:561 -s seq ltime saddr daddr suser:1024 duser:1024 - port domain \| radump_to_dns_db.py

	or to drop to the background without any output:
	nohup radump -F ~/for_dnsdb.rarc -S 127.0.0.1:561 -s seq ltime saddr daddr suser:1024 duser:1024 - port domain \| python -u radump_to_dns_db.py &> /dev/null &

	or to drop the background and save all output to a log file (watch out for reportrecordcount boolean):
	nohup radump -F ~/for_dnsdb.rarc -S 127.0.0.1:561 -s seq ltime saddr daddr suser:1024 duser:1024 - port domain \| python -u radump_to_dns_db.py &> /var/log/radump2dnsdb.log &

	or to drop to the background saving only stderr to a log file:
	nohup radump -F ~/for_dnsdb.rarc -S 127.0.0.1:561 -s seq ltime saddr daddr suser:1024 duser:1024 - port domain \| python -u radump_to_dns_db.py 1> /dev/null 2> /var/log/radump2dnsdb.log &


	considers:
	- no labels (aka `-L -1` or `RA_PRINT_LABELS=-1`)
	- field seperator is '^' (`-c ^` or `RA_FIELD_DELIMITER='^')
	- time format is %Y-%m-%d %T.%f (as RA_TIME_FORMAT="%Y-%m-%d %T.%f")

	with support for -vv will come support for additional field:
	nsnames [this will operate on N index]: varchar(256)... will be a comma-separated list

	To handle the case when the DNS question and the DNS answers are split between flushed userdata, the seq is inserted so that the user can reference this to locate the question and answer pair.
	- If you see 0 as the value for atype_id or aname_host_id, then the question was missing from the `suser` (truly, there was 0 bytes of suser data).
	- If you see 0 as the value for qtype_id or qname_host_id, then the answer was missing from the `duser` (truly, there was 0 bytes of duser data).
	'''

	# set booleans to either 'True' or 'False' (explicit case, without quotes)
	reportrecordcount = True #this controls whether or not the number of records processed should be printed to stdout
	recordcount = 0 #this is a global int to track how many records have been processed
	debug = False #this controls whether or not some verbose info is printed throughout processing
	onerecord = False #this controls whether or not the script proceeds past processing a single record (useful for debugging)



	import sys, traceback, types
	import string, re
	import time
	from datetime import datetime

	def SQLQuery(query, fetchtype):
	import MySQLdb
	#http://www.tutorialspoint.com/python/python_database_access.htm
	#http://www.packtpub.com/article/exception-handling-mysql-python

	sqlserver = "localhost"
	if debug: print 'Connecting to %s' %sqlserver
	db = MySQLdb.connect(sqlserver,"root","passwordhere","argus_dnsdb" )
	if debug: print 'Connected... creating db cursor'
	cursor = db.cursor()

	try:
	if debug: print 'Executing query: ' + query
	cursor.execute(query)
	if debug: print 'Execution successful'
	if fetchtype == "insert":
	db.commit()
	results = 0
	if fetchtype == "fetchall":
	results = cursor.fetchall() #fetchall returns a tuple of tuples of rows of results
	if fetchtype == "fetchone":
	results = cursor.fetchone() #fetchone returns a tuple of the next row of results (this is probably not the nicest way to do this, but it should work, as our SELECTs rely on there being only one result.

	except MySQLdb.Error, e:
	print 'Executing query: ' + query
	print "Error: %s" %e
	db.rollback()
	sys.exit()
	return 0

	db.close()
	return results

	def FlowRecordProcessor(flow_seq, query_time, nsclient, nsserver, qtype, qname, atypes, anames):

	#here we might want to multithread using http://docs.python.org/2/library/multiprocessing.html?highlight=multiprocessing#multiprocessing
	# or we might want to multithread calling FlowRecordProcessor() calls

	#first we'll check if nsclient is contained in nsclients, and if so, get the id
	tresult = SQLQuery('select id, ipv4addr from nsclients where ipv4addr=INET_ATON(\'' + nsclient + '\');', 'fetchone')
	if type(tresult) is not types.NoneType:
	#then the nsclient is already in the DB
	nsclient_id = tresult[0]
	else:
	#then the nsclient isn't in the DB
	SQLQuery('insert into nsclients (ipv4addr) values (INET_ATON(\'' + nsclient + '\'));', 'insert')
	nsclient_id = SQLQuery('select id, ipv4addr from nsclients where ipv4addr=INET_ATON(\'' + nsclient + '\');', 'fetchone')
	nsclient_id = nsclient_id[0]

	#next we'll check if nsserver is contained in nsserver, and if so, get the id
	tresult = SQLQuery('select id, ipv4addr from nsservers where ipv4addr=INET_ATON(\'' + nsserver + '\');', 'fetchone')
	if type(tresult) is not types.NoneType:
	#then the nsserver is already in the DB
	nsserver_id = tresult[0]
	else:
	#then the nsserver isn't in the DB
	SQLQuery('insert into nsservers (ipv4addr) values (INET_ATON(\'' + nsserver + '\'));', 'insert')
	nsserver_id = SQLQuery('select id, ipv4addr from nsservers where ipv4addr=INET_ATON(\'' + nsserver + '\');', 'fetchone')
	nsserver_id = nsserver_id[0]

	#next we'll get the id for the qtype and qname:
	if (qtype == 0) and (qname == 0):
	#then there was no suser data provided
	qname_host_id = 0
	qtype_nstype_id = 0
	else:
	#next we'll get the id for the qtype:
	tresults = SQLQuery('select id, nstype from nstypes;', 'fetchall')
	#thanks: http://stackoverflow.com/a/2917399/843000
	qtype_nstype_id = [x[0] for x in tresults if x[1] == qtype][0]

	#next we'll get the id for the host of the server that was inquired about
	tresults = SQLQuery('select id from hosts where host=\'' + qname + '\';', 'fetchone')
	if (type(tresults) is not types.NoneType) and (len(tresults) > 0):
	#then the host is already in the DB
	qname_host_id = tresults[0]
	else:
	#then the host isn't in the DB
	SQLQuery('insert into hosts (host) values (\'' + qname + '\');', 'insert')
	qname_host_id = SQLQuery('select id from hosts where host=\'' + qname + '\';', 'fetchone')
	qname_host_id = qname_host_id[0]


	#next we'll deal with resolving the atypes and anames to their corresponding ids from nstypes and hosts respectively
	atypes_nstype_ids = []
	anames_ids = []
	if (0 in atypes) and (0 in anames):
	atypes_nstype_ids.append(0)
	anames_ids.append(0)
	else:
	if len(atypes) == len(anames):
	#this should always be the case
	for i in range(0,len(atypes)):
	tresults = SQLQuery('select id, nstype from nstypes;', 'fetchall')
	atypes_nstype_ids.append([x[0] for x in tresults if x[1] == atypes[i]][0])

	for i in range(0,len(anames)):
	#some times the anames[i] will be an ip address.
	#some times the anames[i] will be a name
	## depending on the atype[i] as described in RFCs
	tresults = SQLQuery('select id, host from hosts where host=\'' + anames[i] + '\';', 'fetchall')
	if (len(tresults) > 0):
	#then the host is already in the DB
	anames_ids.append([x[0] for x in tresults if x[1] == anames[i]][0])
	else:
	#then the host isn't in the DB
	SQLQuery('insert into hosts (host) values (\'' + anames[i] + '\');', 'insert')
	anames_ids.append(SQLQuery('select id, host from hosts where host=\'' + anames[i] + '\';', 'fetchone')[0])

	# here we will INSERT the record into the main DB
	## each flow_seq has the possibility to have many atypes and anames associated, so we'll be inserting on an atypes basis
	## we consider flow_seq+query_time to be unique universally... http://thread.gmane.org/gmane.network.argus/9738/focus=9750
	if len(atypes) == len(anames_ids):
	for i in range(0,len(atypes_nstype_ids)):

	if (type(query_time) is types.StringType):
	if debug: print "query_time is of type " + str(type(query_time)) + " and will be converted"
	query_time = time.mktime(datetime.strptime(query_time,'%Y-%m-%d %H:%M:%S.%f').timetuple())
	else:
	if debug: print "query_time is of another type " + str(type(query_time)) + " and will not be converted."

	if debug:
	print "type(flow_seq): " + str(type(flow_seq)) + ": " + flow_seq
	print "type(query_time): " + str(type(query_time)) + ": " + str(query_time)
	print "type(nsclient_id): " + str(type(nsclient_id)) + ": " + str(nsclient_id)
	print "type(nsserver_id): " + str(type(nsserver_id)) + ": " + str(nsserver_id)
	print "type(qtype_nstype_id): " + str(type(qtype_nstype_id)) + ": " + str(qtype_nstype_id)
	print "type(qname_host_id): " + str(type(qname_host_id)) + ": " + str(qname_host_id)
	print "type(atypes_nstype_ids[" + str(i) + "]): " + str(type(atypes_nstype_ids[i])) + ": " + str(atypes_nstype_ids[i])
	print "type(anames_ids[" + str(i) + "]): " + str(type(anames_ids[i])) + ": " + str(anames_ids[i])

	#should I check to make sure the flow isn't already in the main table? Right now I don't.
	SQLQuery("insert into main (flow_uid,flow_seq,qtime,nsclient_id,nsserver_id,qtype_id,qname_host_id,atype_id,aname_host_id) " + \
	"values ('" + str(float(flow_seq)+query_time) + "','" + str(float(flow_seq)) + "','" + str(query_time) + "','" + str(nsclient_id) + "','" + str(nsserver_id) + "','" + \
	str(qtype_nstype_id) + "','" + str(qname_host_id) + "','" + str(atypes_nstype_ids[i]) + "','" + str(anames_ids[i]) + "');", 'insert')

	global reportrecordcount
	if reportrecordcount:
	global recordcount
	recordcount = recordcount + 1
	sys.stdout.write("\r"+"DNS questions and answers stored: " + str(recordcount))
	sys.stdout.flush()


	if __name__ == "__main__":
	if reportrecordcount:
	sys.stdout.write("\r"+"DNS questions and answers stored: " + str(recordcount))
	sys.stdout.flush()
	#f = open('C:\\Documents and Settings\\mbrown\\Desktop\\radump_dns_to_sql\\radump.out', 'r')
	f = sys.stdin
	#there is another method of reading data from stdin: http://stackoverflow.com/a/7056462/843000

	#I considered building an object to pass to a function, but that just seems latent.
	for line in f:
	try:
	if debug:
	print "---begin record---"
	print line

	#check if the line contains s[0] or d[0]
	#that way we can handle missing suser or duser data by providing the fields 0 for ID in main and bypassing the SQL lookups
	if re.search(r's\[0\]', line) == None:
	if debug: print "suser contains more than 0 data"
	suserzero = False
	else:
	#then we have no suser data
	suserzero = True
	if debug: print "suser contains 0 data"

	if re.search(r'd\[0\]', line) == None:
	if debug: print "duser contains more than 0 data"
	duserzero = False
	else:
	#then we have no duserdata
	duserzero = True
	if debug: print "duser contains 0 data"

	#Each field is split by '^'
	fields = line.split('^')

	#the first field is the sequence number for the flow. This is a unique ID across the time `argus` is running.
	flow_seq = fields[0].strip()

	#The second field is the last flow time, so this will be the time we'd like to insert into the DB. It will be formatted to be acceptable by strftime().
	query_time = fields[1].strip()

	#the third field is the source address (the dns client)
	nsclient = fields[2].strip()

	#the fourth field is the destination address (the dns server)
	nsserver = fields[3].strip()


	#the fifth field is source user data string:
	'''
	- we can take the data that's only in between the double quotes
	- we can then parse this as follows:
	- third space separated string is a TYPE followed by a '?'. This will be the qtype
	- fourth space separate string is the qname
	'''
	if suserzero:
	qtype = 0
	qname = 0
	else:
	suserdata = re.search(r'".*"',fields[4].strip())
	subsetsuserdata = suserdata.group(0).split()

	#declaring a variable is just a pointer
	qtypetemp = subsetsuserdata[2]
	if (qtypetemp[len(qtypetemp)-1] == "?"):
	#then we have a valid question
	qtype = qtypetemp[:(len(qtypetemp)-1)]

	qnametemp = subsetsuserdata[3]
	if (qnametemp[len(qnametemp)-1] == "."):
	#then we (still) have a valid question
	qname = qnametemp[:(len(qnametemp)-1)]


	#the sixth field is the destination user data string:
	'''
	- we can take the data that's only in between the double quotes
	- we can then parse this as follows:
	- the third space separated string is the beginning of a command separated list that ends with ' ('
	'''
	atypes = []
	anames = []
	if duserzero:
	atypes.append(0)
	anames.append(0)
	else:
	#duserdata = re.search(r'".*"',fields[4].strip())
	#taken from printer-domain.c
	subsetduserdata = re.findall(r'[A\|NS\|MD\|MF\|CNAME\|SOA\|MB\|MG\|MR\|NULL\|WKS\|PTR\|HINFO\|MINFO\|MX\|TXT\|RP\|AFSDB\|X25\|ISDN\|RT\|NSAP\|NSAP_PTR\|SIG\|KEY\|PX\|GPOS\|AAAA\|LOC\|NXT\|EID\|NIMLOC\|SRV\|ATMA\|NAPTR\|A6\|DNAME\|OPT\|UINFO\|UID\|GID\|UNSPEC\|UNSPECA\|TKEY\|TSIG\|IXFR\|AXFR\|MAILB\|MAILA\|ANY]\s\d{,3}\.\d{,3}\.\d{,3}\.\d{,3}',fields[5].strip())
	if len(subsetduserdata)>0:
	for recordset in subsetduserdata:
	rssplit = recordset.split()
	atypes.append(rssplit[0]) #this is the answer qtype
	anames.append(rssplit[1]) #this is the answer qtype parameter

	#this would block so we may want to consider a multiprocessor here... or within FlowRecordProcessor()
	FlowRecordProcessor(flow_seq,query_time, nsclient, nsserver, qtype, qname, atypes, anames)

	except:
	exc_type, exc_value, exc_traceback = sys.exc_info()
	print "*** print_tb:"
	traceback.print_tb(exc_traceback, limit=1, file=sys.stdout)
	print "*** print_exception:"
	traceback.print_exception(exc_type, exc_value, exc_traceback, limit=2, file=sys.stdout)
	print "*** print_exc:"
	traceback.print_exc()
	print "*** format_exc, first and last line:"
	formatted_lines = traceback.format_exc().splitlines()
	print formatted_lines[0]
	print formatted_lines[-1]
	print "*** format_exception:"
	print repr(traceback.format_exception(exc_type, exc_value, exc_traceback))
	print "*** extract_tb:"
	print repr(traceback.extract_tb(exc_traceback))
	print "*** format_tb:"
	print repr(traceback.format_tb(exc_traceback))
	print "*** tb_lineno:", exc_traceback.tb_lineno
	print 'exiting'
	sys.exit()
	if onerecord: sys.exit()