Last active
September 18, 2018 19:58
-
-
Save fahadysf/466fcdb60a5ccc440b869b67e647d4ea to your computer and use it in GitHub Desktop.
Find strings for APP-ID from PCAP files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/python2.7 | |
""" | |
Copyright (C) 2018 | |
Authors: Fahad Yousuf, Lorenzo Castelletti | |
This script parses a PCAP file and looks for common data patterns in TCP payloads. | |
These can then be used as signatures while building a custom App-ID. | |
It works for captures with multiple sessions of the same unknown-tcp traffic. | |
Known limitations: | |
-TCP Only. | |
-Only the first payload in each TCP flow is considered for matching. | |
-Intended to be used with context unknown-req-tcp-payload, but strings can always be converted to ASCII | |
-Manual reordering necessary if packets are out of order | |
""" | |
import dpkt | |
import binascii | |
import string | |
lengths = [] | |
tomatch = [] | |
globalmatch = [] | |
strings = {} | |
complete = 1 | |
tcpcounter = 0 | |
ipcounter = 0 | |
seq = 0 | |
filename = 'http.cap' | |
# Functions that calculate the longest match given a set of strings. Thank you Stackoverflow :) | |
def long_substr(data): | |
substr = '' | |
if len(data) > 1 and len(data[0]) > 0: | |
for i in range(len(data[0])): | |
for j in range(len(data[0])-i+1): | |
if j > len(substr) and is_substr(data[0][i:i+j], data): | |
substr = data[0][i:i+j] | |
return substr | |
def is_substr(find, data): | |
if len(data) < 1 and len(find) < 1: | |
return False | |
for i in range(len(data)): | |
if find not in data[i]: | |
return False | |
return True | |
def hex_to_ascii(hexstr): | |
if len(hexstr)%2==1: | |
hexstr = "0"+hexstr | |
data = (hexstr).decode("hex") | |
output = "".join(c if c in string.printable else '.' for c in data) | |
return output.strip() | |
# PCAP parsing starts here | |
for ts, pkt in dpkt.pcap.Reader(open(filename, 'rb')): | |
try: | |
eth = dpkt.ethernet.Ethernet(pkt) | |
except: | |
print("Could not process frame at timestamp: %s" % str(ts)) | |
ip = eth.data | |
if (type(ip)== dpkt.ip.IP) and isinstance(ip.data, dpkt.tcp.TCP): | |
tcp = ip.data | |
# Save sequence number of the first segment in TCP flow | |
if ( tcp.flags & dpkt.tcp.TH_SYN ) != 0 and ( tcp.flags & dpkt.tcp.TH_ACK ) == 0: | |
seq = tcp.seq | |
payload = binascii.hexlify(tcp.data).decode() | |
length = len(payload)/2 | |
# If payload is first in the flow, grab length and actual payload in HEX for comparison | |
if length > 13 and tcp.seq >= seq + 5: | |
if length not in lengths: | |
lengths.append(length) | |
strings[payload] = length | |
tcpcounter += 1 | |
print("TS %s: HEX: %s | ASCII: %s" % (str(ts), payload[:64], hex_to_ascii(payload[:64]))) | |
ipcounter += 1 | |
# HEX strings matching starts here | |
lengths.sort() | |
for n in lengths: | |
for index, value in strings.iteritems(): | |
if value == n: | |
tomatch.append(index) | |
match = long_substr(tomatch) | |
if match != '': | |
print('Common substring for segments with payload of %d bytes: %s | ASCII: %s' % (n, match[:64], hex_to_ascii(match[:64])) ) | |
if len(match)/2 < 7: | |
complete = 0 | |
print('This substring is shorter than 7 bytes. See below for all payloads') | |
for x in tomatch: | |
print( "%s | ASCII: %s" % (x[:64], hex_to_ascii(x[:64])) ) | |
else: | |
globalmatch.append(match) | |
else: | |
if len(tomatch) == 1: | |
print('Common substring for segments with payload of %d bytes: %s | ASCII: %s' % (n, tomatch[0][:64], hex_to_ascii(tomatch[0][:64]))) | |
globalmatch.append(tomatch[0]) | |
else: | |
complete = 0 | |
print('No common substring for segments with payload of %d bytes. See below for all payloads' % n) | |
for x in tomatch: | |
print( "%s | ASCII: %s" % (x[:64], hex_to_ascii(x[:64])) ) | |
tomatch = [] | |
if complete == 1: | |
match = long_substr(globalmatch) | |
print('*** Global common substring: %s ***' % match[:64]) | |
if len(match) / 2 < 7: | |
print('This string is shorter than 7 bytes. Use all previously found strings.') | |
else: | |
print('*** No global common substring found ***') | |
print('Total number of TCP segments analysed for matches: %d' % tcpcounter) | |
print('Total number of analysed IP packets: %d' % ipcounter) | |
print('Displaying the first 32 bytes only.') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment