Last active
March 6, 2018 07:54
-
-
Save kleptog/9a5aa56e8d2532032b6a7b32bf7cc3aa to your computer and use it in GitHub Desktop.
Script to check Docker Swarm fdb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
from subprocess import check_output as run | |
import glob | |
# Get nodes | |
nodes = run(['docker', 'node', 'ls', '-q']).split() | |
self = run(['docker', 'node', 'inspect', 'self', '--format={{.ID}}']).strip() | |
nodeinfo = {} | |
for line in run(['docker', 'node', 'inspect', '--format', '{{.ID}} {{.ManagerStatus.Addr}} {{.Description.Hostname}}'] + nodes).splitlines(): | |
id, addr, hostname = line.split() | |
nodeinfo[id] = dict(addr=addr.partition(':')[0], hostname=hostname) | |
# Get services | |
services = run(['docker', 'service', 'ls', '-q']).split() | |
serviceinfo = {} | |
used_ips = {} | |
for line in run(['docker', 'service', 'inspect', '--format', | |
'{{.ID}} {{.Spec.Name}} ' | |
'{{range $now := .Endpoint.VirtualIPs}} {{$now.NetworkID}} {{$now.Addr}} {{end}}'] + services).splitlines(): | |
id, name = line.split()[:2] | |
addresses = line.split()[2:] | |
addresses = dict(zip(addresses[0::2], addresses[1::2])) | |
for ip in addresses.values(): | |
if ip in used_ips: | |
print "Duplicate IP %s (%s & %s)" % (ip, "srv %s (%s)" % (id, name), used_ips[ip]) | |
used_ips[ip] = "srv %s (%s)" % (id, name) | |
serviceinfo[id] = dict(id=id, name=name, addresses=addresses) | |
# Get tasks | |
# -q not available in 17.03 | |
#tasks = set(run(['docker', 'node', 'ps', '-f', 'desired-state=running', '-q'] + nodes).split()) | |
tasks = set(line.split()[0] for line in run(['docker', 'node', 'ps', '-f', 'desired-state=running'] + nodes).splitlines() if line.split()[0] != 'ID') | |
taskinfo = {} | |
for line in run(['docker', 'inspect', '--format', | |
'{{.ID}} {{.ServiceID}} {{.NodeID}} ' | |
'{{range $net := .NetworksAttachments}} {{$net.Network.ID}} {{index $net.Addresses 0}} {{end}}'] + list(tasks)).splitlines(): | |
task_id, service_id, node_id = line.split()[:3] | |
addresses = line.split()[3:] | |
addresses = dict(zip(addresses[0::2], addresses[1::2])) | |
taskinfo[task_id] = dict(task_id=task_id, service_id=service_id, node_id=node_id, addresses=addresses) | |
# Get networks | |
networks = run(['docker', 'network', 'ls', '-q', '-f', 'driver=overlay']).split() | |
netinfo = {} | |
for line in run(['docker', 'network', 'inspect', '--format', '{{.Id}} {{.Name}} {{index .Options "com.docker.network.driver.overlay.vxlanid_list"}}'] + networks).splitlines(): | |
id, name, vlan_id = line.split() | |
netinfo[id] = dict(name=name, vlan_id=vlan_id) | |
# Go over networks and compare with fdb | |
#print nodeinfo | |
#print taskinfo | |
#print netinfo | |
def addr2mac(addr): | |
addr = addr.partition('/')[0] # Strip network | |
addr = [0x02, 0x42] + [int(n) for n in addr.split('.')] | |
return "%02x:%02x:%02x:%02x:%02x:%02x" % tuple(addr) | |
def check_ns(id, netns): | |
orig_fdb = [] | |
for line in sorted(run(["nsenter", "--net="+netns, "bridge", "fdb"]).splitlines()): | |
if not line.startswith('02:42:'): | |
continue | |
orig_fdb.append(line) | |
line = line.split() | |
mac = line[0] | |
if line[1] != 'dev': | |
continue | |
if line[1] == 'dev' and line[2].startswith('vxlan') and \ | |
(line[3:5] == ['master', 'br0'] or line[3:7] == ['vlan', '0', 'master', 'br0']): # probably ignore these? | |
continue | |
if line[2].startswith('vxlan'): | |
act_target = line[4] | |
print "%s -> %s" % (mac, line[4]), | |
else: | |
act_target = 'local' | |
print "%s -> local (%s)" % (mac, line[2]), | |
if act_target == 'br0': # No idea what this means | |
print "???" | |
continue | |
printed = False | |
for t in taskinfo.values(): | |
if id in t['addresses']: | |
if addr2mac(t['addresses'][id]) == mac: | |
print "= srv %s (%s) -> node %s (%s)" % (t['task_id'], serviceinfo[t['service_id']]['name'], t['node_id'], nodeinfo[t['node_id']]['hostname']) | |
if t['node_id'] == self: | |
exp_target = 'local' | |
else: | |
exp_target = nodeinfo[t['node_id']]['addr'] | |
printed = True | |
if not printed: | |
print "(no service for mac)" | |
elif exp_target != act_target: | |
if exp_target == 'local' and act_target == nodeinfo[self]['addr']: | |
print "^^^ WARN Remote reference to self?" | |
else: | |
print "^^^ ERROR %s != %s" % (exp_target, act_target) | |
print "--- orig fdb" | |
print "\n".join(orig_fdb) | |
for id in netinfo: | |
print "=== Network %s (%s) vlan %s" % (id, netinfo[id]['name'], netinfo[id]['vlan_id']) | |
netns = glob.glob("/var/run/docker/netns/*-" + id[:10]) | |
if not netns: | |
print "(Not on this node)" | |
continue | |
netns = netns[0] | |
print "--- check %s" % netns | |
try: | |
check_ns(id, netns) | |
except Exception, e: | |
print "Exception: %s" % e |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment