Skip to content

Instantly share code, notes, and snippets.

@b1tninja
Last active January 13, 2018 13:37
Show Gist options
  • Save b1tninja/655195cbf6f0534217d8 to your computer and use it in GitHub Desktop.
Save b1tninja/655195cbf6f0534217d8 to your computer and use it in GitHub Desktop.
masscan --readscan
{
"masscan": {
"order": 0,
"template": "masscan-*",
"settings": {
"index": {
"number_of_shards": "3",
"number_of_replicas": "1"
}
},
"mappings": {
"ReasonStatus": {
"properties": {
"reason": {
"index": "not_analyzed",
"type": "string"
}
}
},
"BannerStatus": {
"properties": {
"banner": {
"type": "string",
"fields": {
"raw": {
"index": "not_analyzed",
"type": "string"
}
}
},
"app_proto": {
"index": "not_analyzed",
"type": "string"
}
}
},
"_default_": {
"_all": {
"enabled": false
},
"properties": {
"ip_proto": {
"index": "not_analyzed",
"type": "string"
},
"reason": {
"index": "not_analyzed",
"type": "string"
},
"ip": {
"type": "ip"
},
"origin": {
"index": "no",
"type": "string"
},
"ttl": {
"type": "integer"
},
"status": {
"index": "not_analyzed",
"type": "string"
},
"timestamp": {
"format": "epoch_second",
"type": "date"
}
}
}
},
"aliases": {}
}
}
import logging
import mmap
import struct
from datetime import datetime
from enum import IntEnum
from functools import partial
from glob import glob
from ipaddress import IPv4Address
from argparse import ArgumentParser
import elasticsearch
from elasticsearch.helpers import parallel_bulk
logging.basicConfig(level=logging.INFO)
# from elasticsearch.serializer import JSONSerializer # TODO: implement as serializer
class Timestamp(int):
def __repr__(self):
return datetime.fromtimestamp(self).isoformat()
class CustomIntEnum(IntEnum):
def __str__(self):
return self.name
class RecordStatus(CustomIntEnum):
Open = 1
Closed = 2
Banner1 = 5
Open2 = 6
Closed2 = 7
Arp2 = 8
Banner9 = 9,
class InternetProtocol(CustomIntEnum):
ARP = 0
ICMP = 1
TCP = 6
UDP = 17
SCTP = 132
class ApplicationProtocol(CustomIntEnum):
PROTO_NONE = 0
PROTO_HEUR = 1
PROTO_SSH1 = 2
PROTO_SSH2 = 3
PROTO_HTTP = 4
PROTO_FTP = 5
PROTO_DNS_VERSIONBIND = 6
PROTO_SNMP = 7
PROTO_NBTSTAT = 8
PROTO_SSL3 = 9
PROTO_SMTP = 10
PROTO_POP3 = 11
PROTO_IMAP4 = 12
PROTO_UDP_ZEROACCESS = 13
PROTO_X509_CERT = 14
PROTO_HTML_TITLE = 15
PROTO_HTML_FULL = 16
PROTO_NTP = 17
PROTO_VULN = 18
PROTO_HEARTBLEED = 19
PROTO_VNC_RFB = 20
PROTO_SAFE = 21
class Bitset(list):
labels = []
def __init__(self, value):
assert isinstance(value, int)
# TODO: support list of labels -> int
self.value = value
super().__init__(list(self))
def __int__(self):
return self.value
def __iter__(self):
n = 1
i = 0
while n <= self.value:
if self.value & n:
yield self.labels[i] if len(self.labels) > i and self.labels[i] else hex(n)
i += 1
n = 1 << i
class ReasonFlags(Bitset):
labels = ['FIN', 'SYN', 'RST', 'PSH', 'ACK', 'URG', 'ECE', 'CWR']
class MassscanStatus(object):
def __init__(self, status, timestamp, ip, ip_proto, port, ttl):
self.status = status # TODO: make more consistent with other args
self.timestamp = Timestamp(timestamp)
self.ip = IPv4Address(ip)
self.ip_proto = InternetProtocol(ip_proto)
self.port = port
self.ttl = ttl
def __repr__(self):
return '%s: %s' % (self.__class__.__name__, self.__dict__)
class ReasonStatus(MassscanStatus):
def __init__(self, status, timestamp, ip, ip_proto, port, reason, ttl):
super(ReasonStatus, self).__init__(status, timestamp, ip, ip_proto, port, ttl)
self.reason = ReasonFlags(reason)
class BannerStatus(MassscanStatus):
def __init__(self, status, timestamp, ip, ip_proto, port, app_proto, ttl, banner):
super(BannerStatus, self).__init__(status, timestamp, ip, ip_proto, port, ttl)
self.app_proto = ApplicationProtocol(app_proto)
self.banner = banner
class MasscanReader:
compat_version = b'masscan/1.1'
def __init__(self, path):
self.path = path
self.fh = None
self.buffer = None
try:
fh = open(path, 'rb')
buffer = mmap.mmap(fh.fileno(), 0, access=mmap.ACCESS_READ)
except Exception as e:
logging.critical(e)
else:
self.fh = fh
self.buffer = buffer
# Read the header of the file and asses compatability
headers = buffer[0:100].rstrip(b"\x00").split()
assert headers[0].startswith(self.compat_version)
self.version = headers[0]
for header in headers[1:]:
header = header.decode('ascii')
if ':' in header:
(header_type, value) = tuple(header.split(':', 1))
if header_type == 's':
# Not sure if this is intended to be a generic string or indication of start time
self.start_time = datetime.fromtimestamp(int(value))
else:
logging.warning("%s: has unknown header: %s", self, header)
# TODO: parse other headers?
footers = buffer[buffer.size() - 99:].rstrip(b"\x00").split(b"\n")
if footers[0] == self.compat_version:
self.footers = footers
else:
logging.warning("%s: has an invalid footer, incomplete file?", self)
def __del__(self):
if self.buffer:
self.buffer.close()
if self.fh:
self.fh.close()
def __repr__(self):
return "%s: %s, timestamp: %s" % (self.__class__.__name__, self.path, self.start_time)
def __iter__(self):
logging.debug("Parsing results from: %s", self.path)
offset = 99
try:
while offset < self.buffer.size() - 99:
status = RecordStatus(self.buffer[offset])
length_width = 2
length = self.buffer[offset + 1]
if status in [RecordStatus.Open2, RecordStatus.Closed2, RecordStatus.Arp2]:
assert length == 13
(timestamp, ip, ip_proto, port, reason, ttl) = struct.unpack_from('>LLBHBB', self.buffer,
offset + 2)
status = ReasonStatus(status, timestamp, ip, ip_proto, port, reason, ttl)
elif status == RecordStatus.Banner9:
if length >= 128:
length_width = 3
if self.buffer[offset + 2] > 0b01111111:
logging.warning("its happening") # TODO: test this
length = ((self.buffer[offset + 1] & 0b01111111) << 7) | \
(self.buffer[offset + 2] & 0b01111111)
(timestamp, ip, ip_proto, port, app_proto, ttl) = struct.unpack_from('>LLBHHB',
self.buffer,
offset + length_width)
banner = self.buffer[offset + length_width + 14:offset + length + length_width].decode('latin-1')
status = BannerStatus(status, timestamp, ip, ip_proto, port, app_proto, ttl, banner)
else:
break
offset += length + length_width
yield status
except Exception as e:
logging.warning('%s: %s @ ', self, e, offset)
@classmethod
def iter_results_glob(cls, pattern):
scan_files = glob(pattern)
for n, scan_file in enumerate(reversed(sorted(scan_files))):
logging.debug("%d of %d: %s", n, len(scan_files), scan_file)
yield cls(scan_file)
class MasscanIndexer:
INDEX_VERSION = 1
def __init__(self, dbs):
self.es = elasticsearch.Elasticsearch(dbs, timeout=10)
def index_glob(self, pattern):
for scan in MasscanReader.iter_results_glob(pattern):
self._index(scan)
def index(self, path):
scan = MasscanReader(path)
self._index(scan)
def _index(self, scan):
logging.debug("Indexing: %s", scan)
try:
def StatusToDict(status, **kwargs):
assert isinstance(status, MassscanStatus)
# d = dict([(k, str(v)) if isinstance(v, int) else (k, v) for k, v in status.__dict__.items()])
d = dict(status.__dict__)
for k,v in d.items():
if isinstance(v, CustomIntEnum):
d[k] = str(v)
elif isinstance(v, IPv4Address):
d[k] = v.compressed
elif isinstance(v, Bitset):
d[k] = list(v)
d.update(kwargs)
d['_type'] = status.__class__.__name__
return d
for success, info in parallel_bulk(self.es,
map(partial(StatusToDict,
_index='masscan-%d' % self.INDEX_VERSION,
origin=scan.path),
scan)):
if success:
logging.debug(info)
else:
logging.warning(info)
# except elasticsearch.helpers.BulkIndexError as e:
except Exception as e:
logging.warning(e)
if __name__ == '__main__':
parser = ArgumentParser()
parser.add_argument('glob', help='Glob pattern to index')
parser.add_argument('hosts', type=str, metavar='host', nargs='+', help='Elasticsearch hosts')
args = parser.parse_args()
MasscanIndexer(args.hosts).index_glob(args.glob)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment