Created
May 1, 2019 00:56
-
-
Save aurorapar/acbf7a62a58fb2e326a349d305f01b75 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pomegranate import * | |
import dpkt, traceback, random, time | |
# Small test, 1 for all data, 0 for none | |
def log(text): | |
fileName = "HMM-output.txt" | |
with open(fileName, "a+") as output: | |
output.write(text) | |
def modelThis(data, percent): | |
d1 = DiscreteDistribution({'K': 100/6, 'P': 100/6, 'S': 100/6, 'G': 100/6, 'U': 100/6, 'T': 100/6}) | |
d2 = DiscreteDistribution({'K': 0.95, 'P': 0.01, 'S': 0.01, 'G': 0.01, 'U': .01, 'T': .01}) | |
d3 = DiscreteDistribution({'K': 0.01, 'P': 0.95, 'S': 0.01, 'G': 0.01, 'U': .01, 'T': .01}) | |
d4 = DiscreteDistribution({'K': 0.01, 'P': 0.01, 'S': 0.95, 'G': 0.01, 'U': .01, 'T': .01}) | |
d5 = DiscreteDistribution({'K': 0.01, 'P': 0.01, 'S': 0.01, 'G': 0.95, 'U': .01, 'T': .01}) | |
d6 = DiscreteDistribution({'K': 0.01, 'P': 0.01, 'S': 0.01, 'G': 0.01, 'U': .95, 'T': .01}) | |
d7 = DiscreteDistribution({'K': 0.01, 'P': 0.01, 'S': 0.95, 'G': 0.01, 'U': .01, 'T': .95}) | |
s1 = State(d1, name="total") | |
s2 = State(d2, name="UNKOWN") | |
s3 = State(d3, name="ICMP") | |
s4 = State(d4, name="SCTP") | |
s5 = State(d5, name="IGMP") | |
s6 = State(d6, name="UDP") | |
s7 = State(d7, name="TCP") | |
hmm = HiddenMarkovModel("Traffic-Predictor") | |
hmm.add_states(s1, s2, s3, s4, s5, s6, s7) | |
hmm.add_transition(hmm.start, s1, .05) | |
hmm.add_transition(hmm.start, s2, .19) | |
hmm.add_transition(hmm.start, s3, .19) | |
hmm.add_transition(hmm.start, s4, .19) | |
hmm.add_transition(hmm.start, s5, .19) | |
hmm.add_transition(hmm.start, s6, .19) | |
hmm.add_transition(hmm.start, s7, .19) | |
# Even state | |
hmm.add_transition(s1, s1, .05) | |
hmm.add_transition(s1, s2, .19) | |
hmm.add_transition(s1, s3, .19) | |
hmm.add_transition(s1, s4, .19) | |
hmm.add_transition(s1, s5, .19) | |
hmm.add_transition(s1, s6, .19) | |
hmm.add_transition(s1, s7, .19) | |
# UNKNOWN state | |
hmm.add_transition(s2, s1, .01) | |
hmm.add_transition(s2, s2, .8) | |
hmm.add_transition(s2, s3, .01) | |
hmm.add_transition(s2, s4, .01) | |
hmm.add_transition(s2, s5, .01) | |
hmm.add_transition(s2, s6, .08) | |
hmm.add_transition(s2, s7, .08) | |
# ICMP | |
hmm.add_transition(s3, s1, .01) | |
hmm.add_transition(s3, s2, .05) | |
hmm.add_transition(s3, s3, .5) | |
hmm.add_transition(s3, s4, .01) | |
hmm.add_transition(s3, s5, .01) | |
hmm.add_transition(s3, s6, .21) | |
hmm.add_transition(s3, s7, .21) | |
# SCTP | |
hmm.add_transition(s4, s1, .01) | |
hmm.add_transition(s4, s2, .05) | |
hmm.add_transition(s4, s3, .01) | |
hmm.add_transition(s4, s4, .80) | |
hmm.add_transition(s4, s5, .02) | |
hmm.add_transition(s4, s6, .05) | |
hmm.add_transition(s4, s7, .06) | |
# IGMP | |
hmm.add_transition(s5, s1, .01) | |
hmm.add_transition(s5, s2, .05) | |
hmm.add_transition(s5, s3, .01) | |
hmm.add_transition(s5, s4, .01) | |
hmm.add_transition(s5, s5, .30) | |
hmm.add_transition(s5, s6, .31) | |
hmm.add_transition(s5, s7, .31) | |
# UDP | |
hmm.add_transition(s6, s1, .01) | |
hmm.add_transition(s6, s2, .05) | |
hmm.add_transition(s6, s3, .01) | |
hmm.add_transition(s6, s4, .01) | |
hmm.add_transition(s6, s5, .01) | |
hmm.add_transition(s6, s6, .86) | |
hmm.add_transition(s6, s7, .05) | |
# TCP | |
hmm.add_transition(s7, s1, .01) | |
hmm.add_transition(s7, s2, .05) | |
hmm.add_transition(s7, s3, .01) | |
hmm.add_transition(s7, s4, .01) | |
hmm.add_transition(s7, s5, .01) | |
hmm.add_transition(s7, s6, .05) | |
hmm.add_transition(s7, s7, .86) | |
hmm.bake() | |
hmm.fit(data, stop_threshold=1e-1) | |
sample = random.choice(data) | |
output = "Model at %s%% of data processed\n"%percent | |
output += "Probability of %s: %s"%(sample, str(hmm.probability(sample))) + "\n" | |
for state in range(0,7): | |
try: | |
output += "State %s: "%state | |
for key, value in hmm.states[state].distribution.parameters[0].items(): | |
output += "\n\t%s %s"%(key,value) | |
output += "\n" | |
except: | |
traceback.print_exc() | |
pass | |
log(output) | |
if __name__ == '__main__': | |
bytesProcessed = 0 | |
toProcess = 36489425360 | |
data = [] | |
samplePoints = 5 | |
sampleTimes = list(range(0,100+int(100/samplePoints),int(100/samplePoints))) | |
print(sampleTimes) | |
with open("alldata2.pcap", 'rb') as wiresharkData: | |
pcap = dpkt.pcap.Reader(wiresharkData) | |
for timestamp, buf in pcap: | |
bytesProcessed += len(buf) | |
print("\r%.2f percent processed"%(bytesProcessed/float(toProcess)*100), end='\r') | |
try: | |
eth = dpkt.ethernet.Ethernet(buf) | |
if not isinstance(eth.data, dpkt.ip.IP): | |
pass | |
ip = eth.data | |
if isinstance(ip.data, dpkt.udp.UDP): | |
data.append("U") | |
elif isinstance(ip.data, dpkt.tcp.TCP): | |
data.append("T") | |
elif isinstance(ip.data, dpkt.icmp.ICMP): | |
data.append("P") | |
elif isinstance(ip.data, dpkt.igmp.IGMP): | |
data.append("G") | |
elif isinstance(ip.data, dpkt.sctp.SCTP): | |
data.append("S") | |
else: | |
data.append("K") | |
except: | |
pass | |
if int(bytesProcessed/float(toProcess)*100) in sampleTimes: | |
samplePoint = int(bytesProcessed/float(toProcess)*100) | |
modelThis(data, samplePoint) | |
try: | |
sampleTimes.remove(samplePoint) | |
except: | |
continue | |
if len(sampleTimes) == 0: | |
break | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment