Skip to content

Instantly share code, notes, and snippets.

@aurorapar
Created May 1, 2019 00:56
Show Gist options
  • Save aurorapar/acbf7a62a58fb2e326a349d305f01b75 to your computer and use it in GitHub Desktop.
Save aurorapar/acbf7a62a58fb2e326a349d305f01b75 to your computer and use it in GitHub Desktop.
from pomegranate import *
import dpkt, traceback, random, time
# Small test, 1 for all data, 0 for none
def log(text):
fileName = "HMM-output.txt"
with open(fileName, "a+") as output:
output.write(text)
def modelThis(data, percent):
d1 = DiscreteDistribution({'K': 100/6, 'P': 100/6, 'S': 100/6, 'G': 100/6, 'U': 100/6, 'T': 100/6})
d2 = DiscreteDistribution({'K': 0.95, 'P': 0.01, 'S': 0.01, 'G': 0.01, 'U': .01, 'T': .01})
d3 = DiscreteDistribution({'K': 0.01, 'P': 0.95, 'S': 0.01, 'G': 0.01, 'U': .01, 'T': .01})
d4 = DiscreteDistribution({'K': 0.01, 'P': 0.01, 'S': 0.95, 'G': 0.01, 'U': .01, 'T': .01})
d5 = DiscreteDistribution({'K': 0.01, 'P': 0.01, 'S': 0.01, 'G': 0.95, 'U': .01, 'T': .01})
d6 = DiscreteDistribution({'K': 0.01, 'P': 0.01, 'S': 0.01, 'G': 0.01, 'U': .95, 'T': .01})
d7 = DiscreteDistribution({'K': 0.01, 'P': 0.01, 'S': 0.95, 'G': 0.01, 'U': .01, 'T': .95})
s1 = State(d1, name="total")
s2 = State(d2, name="UNKOWN")
s3 = State(d3, name="ICMP")
s4 = State(d4, name="SCTP")
s5 = State(d5, name="IGMP")
s6 = State(d6, name="UDP")
s7 = State(d7, name="TCP")
hmm = HiddenMarkovModel("Traffic-Predictor")
hmm.add_states(s1, s2, s3, s4, s5, s6, s7)
hmm.add_transition(hmm.start, s1, .05)
hmm.add_transition(hmm.start, s2, .19)
hmm.add_transition(hmm.start, s3, .19)
hmm.add_transition(hmm.start, s4, .19)
hmm.add_transition(hmm.start, s5, .19)
hmm.add_transition(hmm.start, s6, .19)
hmm.add_transition(hmm.start, s7, .19)
# Even state
hmm.add_transition(s1, s1, .05)
hmm.add_transition(s1, s2, .19)
hmm.add_transition(s1, s3, .19)
hmm.add_transition(s1, s4, .19)
hmm.add_transition(s1, s5, .19)
hmm.add_transition(s1, s6, .19)
hmm.add_transition(s1, s7, .19)
# UNKNOWN state
hmm.add_transition(s2, s1, .01)
hmm.add_transition(s2, s2, .8)
hmm.add_transition(s2, s3, .01)
hmm.add_transition(s2, s4, .01)
hmm.add_transition(s2, s5, .01)
hmm.add_transition(s2, s6, .08)
hmm.add_transition(s2, s7, .08)
# ICMP
hmm.add_transition(s3, s1, .01)
hmm.add_transition(s3, s2, .05)
hmm.add_transition(s3, s3, .5)
hmm.add_transition(s3, s4, .01)
hmm.add_transition(s3, s5, .01)
hmm.add_transition(s3, s6, .21)
hmm.add_transition(s3, s7, .21)
# SCTP
hmm.add_transition(s4, s1, .01)
hmm.add_transition(s4, s2, .05)
hmm.add_transition(s4, s3, .01)
hmm.add_transition(s4, s4, .80)
hmm.add_transition(s4, s5, .02)
hmm.add_transition(s4, s6, .05)
hmm.add_transition(s4, s7, .06)
# IGMP
hmm.add_transition(s5, s1, .01)
hmm.add_transition(s5, s2, .05)
hmm.add_transition(s5, s3, .01)
hmm.add_transition(s5, s4, .01)
hmm.add_transition(s5, s5, .30)
hmm.add_transition(s5, s6, .31)
hmm.add_transition(s5, s7, .31)
# UDP
hmm.add_transition(s6, s1, .01)
hmm.add_transition(s6, s2, .05)
hmm.add_transition(s6, s3, .01)
hmm.add_transition(s6, s4, .01)
hmm.add_transition(s6, s5, .01)
hmm.add_transition(s6, s6, .86)
hmm.add_transition(s6, s7, .05)
# TCP
hmm.add_transition(s7, s1, .01)
hmm.add_transition(s7, s2, .05)
hmm.add_transition(s7, s3, .01)
hmm.add_transition(s7, s4, .01)
hmm.add_transition(s7, s5, .01)
hmm.add_transition(s7, s6, .05)
hmm.add_transition(s7, s7, .86)
hmm.bake()
hmm.fit(data, stop_threshold=1e-1)
sample = random.choice(data)
output = "Model at %s%% of data processed\n"%percent
output += "Probability of %s: %s"%(sample, str(hmm.probability(sample))) + "\n"
for state in range(0,7):
try:
output += "State %s: "%state
for key, value in hmm.states[state].distribution.parameters[0].items():
output += "\n\t%s %s"%(key,value)
output += "\n"
except:
traceback.print_exc()
pass
log(output)
if __name__ == '__main__':
bytesProcessed = 0
toProcess = 36489425360
data = []
samplePoints = 5
sampleTimes = list(range(0,100+int(100/samplePoints),int(100/samplePoints)))
print(sampleTimes)
with open("alldata2.pcap", 'rb') as wiresharkData:
pcap = dpkt.pcap.Reader(wiresharkData)
for timestamp, buf in pcap:
bytesProcessed += len(buf)
print("\r%.2f percent processed"%(bytesProcessed/float(toProcess)*100), end='\r')
try:
eth = dpkt.ethernet.Ethernet(buf)
if not isinstance(eth.data, dpkt.ip.IP):
pass
ip = eth.data
if isinstance(ip.data, dpkt.udp.UDP):
data.append("U")
elif isinstance(ip.data, dpkt.tcp.TCP):
data.append("T")
elif isinstance(ip.data, dpkt.icmp.ICMP):
data.append("P")
elif isinstance(ip.data, dpkt.igmp.IGMP):
data.append("G")
elif isinstance(ip.data, dpkt.sctp.SCTP):
data.append("S")
else:
data.append("K")
except:
pass
if int(bytesProcessed/float(toProcess)*100) in sampleTimes:
samplePoint = int(bytesProcessed/float(toProcess)*100)
modelThis(data, samplePoint)
try:
sampleTimes.remove(samplePoint)
except:
continue
if len(sampleTimes) == 0:
break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment