Created
August 14, 2014 06:11
-
-
Save cloudjunky/015cdff8bcd3815004d6 to your computer and use it in GitHub Desktop.
TShark to Python DPI and save in Mongo
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__author__ = '[email protected]' | |
import sys | |
import hashlib | |
import pymongo | |
""" | |
:usage: tshark -T fields -e ip.src -e tcp.srcport -e ip.dst -e tcp.dstport -e frame.protocols -r capture.pcap | python dpi_parser.py | |
""" | |
print "Processing stdin....." | |
print sys.argv | |
flows = {} | |
for line in sys.stdin: | |
try: | |
src, sport, dst, dport, proto = line.strip().split('\t') | |
flow = (src, int(sport)), (dst, int(dport)) | |
flow_hash = hashlib.sha1(str(flow)).hexdigest() | |
#print "{} => {}".format(flow, flow_hash) | |
if flow_hash in flows: | |
pass | |
else: | |
if proto: | |
layers = proto.split(':') | |
if 'tcp' in layers: | |
flows[flow_hash] = dict(hash=flow_hash, src=src, sport=sport, | |
dst=dst, dport=dport, flow=flow, proto=layers[3]) | |
if layers[4:]: | |
flows[flow_hash].update(app=layers[4:]) | |
except Exception, e: | |
#print "{}\nerror in line {}".format(e, line) | |
continue | |
print "Found application data in {} flows".format(len(flows.keys())) | |
conn = pymongo.Connection('localhost', 27017) | |
db = conn.packetloop | |
collection = db['captures'] | |
dpis = db.dpis | |
print "Writing to Mongo" | |
for id, flow in flows.iteritems(): | |
db.dpis.insert(flow) | |
print "Done!" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment