Last active
January 13, 2018 13:37
-
-
Save b1tninja/655195cbf6f0534217d8 to your computer and use it in GitHub Desktop.
masscan --readscan
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"masscan": { | |
"order": 0, | |
"template": "masscan-*", | |
"settings": { | |
"index": { | |
"number_of_shards": "3", | |
"number_of_replicas": "1" | |
} | |
}, | |
"mappings": { | |
"ReasonStatus": { | |
"properties": { | |
"reason": { | |
"index": "not_analyzed", | |
"type": "string" | |
} | |
} | |
}, | |
"BannerStatus": { | |
"properties": { | |
"banner": { | |
"type": "string", | |
"fields": { | |
"raw": { | |
"index": "not_analyzed", | |
"type": "string" | |
} | |
} | |
}, | |
"app_proto": { | |
"index": "not_analyzed", | |
"type": "string" | |
} | |
} | |
}, | |
"_default_": { | |
"_all": { | |
"enabled": false | |
}, | |
"properties": { | |
"ip_proto": { | |
"index": "not_analyzed", | |
"type": "string" | |
}, | |
"reason": { | |
"index": "not_analyzed", | |
"type": "string" | |
}, | |
"ip": { | |
"type": "ip" | |
}, | |
"origin": { | |
"index": "no", | |
"type": "string" | |
}, | |
"ttl": { | |
"type": "integer" | |
}, | |
"status": { | |
"index": "not_analyzed", | |
"type": "string" | |
}, | |
"timestamp": { | |
"format": "epoch_second", | |
"type": "date" | |
} | |
} | |
} | |
}, | |
"aliases": {} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import logging | |
import mmap | |
import struct | |
from datetime import datetime | |
from enum import IntEnum | |
from functools import partial | |
from glob import glob | |
from ipaddress import IPv4Address | |
from argparse import ArgumentParser | |
import elasticsearch | |
from elasticsearch.helpers import parallel_bulk | |
logging.basicConfig(level=logging.INFO) | |
# from elasticsearch.serializer import JSONSerializer # TODO: implement as serializer | |
class Timestamp(int): | |
def __repr__(self): | |
return datetime.fromtimestamp(self).isoformat() | |
class CustomIntEnum(IntEnum): | |
def __str__(self): | |
return self.name | |
class RecordStatus(CustomIntEnum): | |
Open = 1 | |
Closed = 2 | |
Banner1 = 5 | |
Open2 = 6 | |
Closed2 = 7 | |
Arp2 = 8 | |
Banner9 = 9, | |
class InternetProtocol(CustomIntEnum): | |
ARP = 0 | |
ICMP = 1 | |
TCP = 6 | |
UDP = 17 | |
SCTP = 132 | |
class ApplicationProtocol(CustomIntEnum): | |
PROTO_NONE = 0 | |
PROTO_HEUR = 1 | |
PROTO_SSH1 = 2 | |
PROTO_SSH2 = 3 | |
PROTO_HTTP = 4 | |
PROTO_FTP = 5 | |
PROTO_DNS_VERSIONBIND = 6 | |
PROTO_SNMP = 7 | |
PROTO_NBTSTAT = 8 | |
PROTO_SSL3 = 9 | |
PROTO_SMTP = 10 | |
PROTO_POP3 = 11 | |
PROTO_IMAP4 = 12 | |
PROTO_UDP_ZEROACCESS = 13 | |
PROTO_X509_CERT = 14 | |
PROTO_HTML_TITLE = 15 | |
PROTO_HTML_FULL = 16 | |
PROTO_NTP = 17 | |
PROTO_VULN = 18 | |
PROTO_HEARTBLEED = 19 | |
PROTO_VNC_RFB = 20 | |
PROTO_SAFE = 21 | |
class Bitset(list): | |
labels = [] | |
def __init__(self, value): | |
assert isinstance(value, int) | |
# TODO: support list of labels -> int | |
self.value = value | |
super().__init__(list(self)) | |
def __int__(self): | |
return self.value | |
def __iter__(self): | |
n = 1 | |
i = 0 | |
while n <= self.value: | |
if self.value & n: | |
yield self.labels[i] if len(self.labels) > i and self.labels[i] else hex(n) | |
i += 1 | |
n = 1 << i | |
class ReasonFlags(Bitset): | |
labels = ['FIN', 'SYN', 'RST', 'PSH', 'ACK', 'URG', 'ECE', 'CWR'] | |
class MassscanStatus(object): | |
def __init__(self, status, timestamp, ip, ip_proto, port, ttl): | |
self.status = status # TODO: make more consistent with other args | |
self.timestamp = Timestamp(timestamp) | |
self.ip = IPv4Address(ip) | |
self.ip_proto = InternetProtocol(ip_proto) | |
self.port = port | |
self.ttl = ttl | |
def __repr__(self): | |
return '%s: %s' % (self.__class__.__name__, self.__dict__) | |
class ReasonStatus(MassscanStatus): | |
def __init__(self, status, timestamp, ip, ip_proto, port, reason, ttl): | |
super(ReasonStatus, self).__init__(status, timestamp, ip, ip_proto, port, ttl) | |
self.reason = ReasonFlags(reason) | |
class BannerStatus(MassscanStatus): | |
def __init__(self, status, timestamp, ip, ip_proto, port, app_proto, ttl, banner): | |
super(BannerStatus, self).__init__(status, timestamp, ip, ip_proto, port, ttl) | |
self.app_proto = ApplicationProtocol(app_proto) | |
self.banner = banner | |
class MasscanReader: | |
compat_version = b'masscan/1.1' | |
def __init__(self, path): | |
self.path = path | |
self.fh = None | |
self.buffer = None | |
try: | |
fh = open(path, 'rb') | |
buffer = mmap.mmap(fh.fileno(), 0, access=mmap.ACCESS_READ) | |
except Exception as e: | |
logging.critical(e) | |
else: | |
self.fh = fh | |
self.buffer = buffer | |
# Read the header of the file and asses compatability | |
headers = buffer[0:100].rstrip(b"\x00").split() | |
assert headers[0].startswith(self.compat_version) | |
self.version = headers[0] | |
for header in headers[1:]: | |
header = header.decode('ascii') | |
if ':' in header: | |
(header_type, value) = tuple(header.split(':', 1)) | |
if header_type == 's': | |
# Not sure if this is intended to be a generic string or indication of start time | |
self.start_time = datetime.fromtimestamp(int(value)) | |
else: | |
logging.warning("%s: has unknown header: %s", self, header) | |
# TODO: parse other headers? | |
footers = buffer[buffer.size() - 99:].rstrip(b"\x00").split(b"\n") | |
if footers[0] == self.compat_version: | |
self.footers = footers | |
else: | |
logging.warning("%s: has an invalid footer, incomplete file?", self) | |
def __del__(self): | |
if self.buffer: | |
self.buffer.close() | |
if self.fh: | |
self.fh.close() | |
def __repr__(self): | |
return "%s: %s, timestamp: %s" % (self.__class__.__name__, self.path, self.start_time) | |
def __iter__(self): | |
logging.debug("Parsing results from: %s", self.path) | |
offset = 99 | |
try: | |
while offset < self.buffer.size() - 99: | |
status = RecordStatus(self.buffer[offset]) | |
length_width = 2 | |
length = self.buffer[offset + 1] | |
if status in [RecordStatus.Open2, RecordStatus.Closed2, RecordStatus.Arp2]: | |
assert length == 13 | |
(timestamp, ip, ip_proto, port, reason, ttl) = struct.unpack_from('>LLBHBB', self.buffer, | |
offset + 2) | |
status = ReasonStatus(status, timestamp, ip, ip_proto, port, reason, ttl) | |
elif status == RecordStatus.Banner9: | |
if length >= 128: | |
length_width = 3 | |
if self.buffer[offset + 2] > 0b01111111: | |
logging.warning("its happening") # TODO: test this | |
length = ((self.buffer[offset + 1] & 0b01111111) << 7) | \ | |
(self.buffer[offset + 2] & 0b01111111) | |
(timestamp, ip, ip_proto, port, app_proto, ttl) = struct.unpack_from('>LLBHHB', | |
self.buffer, | |
offset + length_width) | |
banner = self.buffer[offset + length_width + 14:offset + length + length_width].decode('latin-1') | |
status = BannerStatus(status, timestamp, ip, ip_proto, port, app_proto, ttl, banner) | |
else: | |
break | |
offset += length + length_width | |
yield status | |
except Exception as e: | |
logging.warning('%s: %s @ ', self, e, offset) | |
@classmethod | |
def iter_results_glob(cls, pattern): | |
scan_files = glob(pattern) | |
for n, scan_file in enumerate(reversed(sorted(scan_files))): | |
logging.debug("%d of %d: %s", n, len(scan_files), scan_file) | |
yield cls(scan_file) | |
class MasscanIndexer: | |
INDEX_VERSION = 1 | |
def __init__(self, dbs): | |
self.es = elasticsearch.Elasticsearch(dbs, timeout=10) | |
def index_glob(self, pattern): | |
for scan in MasscanReader.iter_results_glob(pattern): | |
self._index(scan) | |
def index(self, path): | |
scan = MasscanReader(path) | |
self._index(scan) | |
def _index(self, scan): | |
logging.debug("Indexing: %s", scan) | |
try: | |
def StatusToDict(status, **kwargs): | |
assert isinstance(status, MassscanStatus) | |
# d = dict([(k, str(v)) if isinstance(v, int) else (k, v) for k, v in status.__dict__.items()]) | |
d = dict(status.__dict__) | |
for k,v in d.items(): | |
if isinstance(v, CustomIntEnum): | |
d[k] = str(v) | |
elif isinstance(v, IPv4Address): | |
d[k] = v.compressed | |
elif isinstance(v, Bitset): | |
d[k] = list(v) | |
d.update(kwargs) | |
d['_type'] = status.__class__.__name__ | |
return d | |
for success, info in parallel_bulk(self.es, | |
map(partial(StatusToDict, | |
_index='masscan-%d' % self.INDEX_VERSION, | |
origin=scan.path), | |
scan)): | |
if success: | |
logging.debug(info) | |
else: | |
logging.warning(info) | |
# except elasticsearch.helpers.BulkIndexError as e: | |
except Exception as e: | |
logging.warning(e) | |
if __name__ == '__main__': | |
parser = ArgumentParser() | |
parser.add_argument('glob', help='Glob pattern to index') | |
parser.add_argument('hosts', type=str, metavar='host', nargs='+', help='Elasticsearch hosts') | |
args = parser.parse_args() | |
MasscanIndexer(args.hosts).index_glob(args.glob) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment