Created
May 25, 2018 19:43
-
-
Save tcotav/6bac4beba550e848bc2cca214a33f2c4 to your computer and use it in GitHub Desktop.
filter ip into time-based buckets
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
""" | |
Track how many times in <time period> variable interval that a given IP makes a request | |
""" | |
CHECK_INTERVAL_SECONDS=1 # change to 5 minutes -- 300 | |
# this assumes we're sorted by time | |
# | |
# does that break if we have multiple logs? pretty sure YES | |
# | |
class ipcount(): | |
def __init__(self, ip, interval): | |
self.ip = ip | |
self.lastSeen = 0 | |
self.maxCount = 0 | |
self.numIntervals = 0 | |
self.curCount= 0 | |
self.interval = interval | |
def hit(self, ts): | |
if self.lastSeen == 0: # first hit | |
self.curCount = 1 | |
self.maxCount = 1 | |
elif ts - self.lastSeen <= self.interval: # we're in same block | |
self.curCount += 1 | |
if self.curCount > self.maxCount: | |
self.maxCount = self.curCount | |
else: | |
# how to count the number of intervals we find -- i.e. multiple blocks of IP hits | |
if self.curCount >= 1: | |
self.numIntervals += 1 | |
# and reset | |
self.curCount = 0 | |
# now matter what, set self.lastSeen | |
self.lastSeen = ts | |
def __repr__(self): | |
return "intervals: %s, maxcount: %s" % (self.numIntervals, self.maxCount) | |
ipdata={} | |
def check_entry(idata): | |
global ipdata | |
ip=idata[0] | |
if ipdata.has_key(ip): | |
ipobj = ipdata[ip] | |
ipobj.hit(idata[1]) | |
else: | |
ipobj = ipcount(ip, CHECK_INTERVAL_SECONDS) | |
ipobj.hit(idata[1]) | |
ipdata[ip] = ipobj | |
if __name__ == "__main__": | |
# make up some data tuples (ip, timestamp) | |
import time | |
t=int(time.time()) | |
dataset=[ | |
("172.18.0.20", t), | |
("172.18.0.21", t), | |
("172.18.0.20", t+10), | |
("172.18.0.21", t+11), | |
("172.18.0.20", t+20), | |
("172.18.0.21", t+21), | |
("172.18.0.20", t+21), | |
("172.18.0.20", t+22), | |
("172.18.0.21", t+22), | |
("172.18.0.21", t+23), | |
("172.18.0.21", t+24), | |
("172.18.0.20", t+26), | |
("172.18.0.20", t+35), | |
("172.18.0.21", t+55), | |
("172.18.0.20", t+65), | |
("172.18.0.21", t+95), | |
] | |
for ds in dataset: | |
check_entry(ds) | |
print(ipdata) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment