-
-
Save gesellix/cf931d690361920a2aa378f904bef90b to your computer and use it in GitHub Desktop.
Script to check Docker Swarm fdb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
from subprocess import check_output as run | |
import glob | |
# Get nodes | |
nodes = run(['docker', 'node', 'ls', '-q']).split() | |
self = run(['docker', 'node', 'inspect', 'self', '--format={{.ID}}']).strip() | |
nodeinfo = {} | |
for line in run(['docker', 'node', 'inspect', '--format', '{{.ID}} {{.ManagerStatus.Addr}} {{.Description.Hostname}}'] + nodes).splitlines(): | |
id, addr, hostname = line.split() | |
nodeinfo[id] = dict(addr=addr.partition(':')[0], hostname=hostname) | |
# Get services | |
services = run(['docker', 'service', 'ls', '-q']).split() | |
serviceinfo = {} | |
for line in run(['docker', 'service', 'inspect', '--format', | |
'{{.ID}} {{.Spec.Name}} ' | |
'{{range $now := .Endpoint.VirtualIPs}} {{$now.NetworkID}} {{$now.Addr}} {{end}}'] + services).splitlines(): | |
id, name = line.split()[:2] | |
addresses = line.split()[2:] | |
addresses = dict(zip(addresses[0::2], addresses[1::2])) | |
serviceinfo[id] = dict(id=id, name=name, addresses=addresses) | |
# Get tasks | |
# -q not available in 17.03 | |
#tasks = set(run(['docker', 'node', 'ps', '-f', 'desired-state=running', '-q'] + nodes).split()) | |
tasks = set(line.split()[0] for line in run(['docker', 'node', 'ps', '-f', 'desired-state=running'] + nodes).splitlines() if line.split()[0] != 'ID') | |
taskinfo = {} | |
for line in run(['docker', 'inspect', '--format', | |
'{{.ID}} {{.ServiceID}} {{.NodeID}} ' | |
'{{range $net := .NetworksAttachments}} {{$net.Network.ID}} {{index $net.Addresses 0}} {{end}}'] + list(tasks)).splitlines(): | |
task_id, service_id, node_id = line.split()[:3] | |
addresses = line.split()[3:] | |
addresses = dict(zip(addresses[0::2], addresses[1::2])) | |
taskinfo[task_id] = dict(task_id=task_id, service_id=service_id, node_id=node_id, addresses=addresses) | |
# Get networks | |
networks = run(['docker', 'network', 'ls', '-q', '-f', 'driver=overlay']).split() | |
netinfo = {} | |
for line in run(['docker', 'network', 'inspect', '--format', '{{.Id}} {{.Name}} {{index .Options "com.docker.network.driver.overlay.vxlanid_list"}}'] + networks).splitlines(): | |
id, name, vlan_id = line.split() | |
netinfo[id] = dict(name=name, vlan_id=vlan_id) | |
# Go over networks and compare with fdb | |
#print nodeinfo | |
#print taskinfo | |
#print netinfo | |
def addr2mac(addr): | |
addr = addr.partition('/')[0] # Strip network | |
addr = [0x02, 0x42] + [int(n) for n in addr.split('.')] | |
return "%02x:%02x:%02x:%02x:%02x:%02x" % tuple(addr) | |
for id in netinfo: | |
print "=== Network %s (%s) vlan %s" % (id, netinfo[id]['name'], netinfo[id]['vlan_id']) | |
netns = glob.glob("/var/run/docker/netns/*-" + id[:10]) | |
if not netns: | |
print "(Not on this node)" | |
continue | |
netns = netns[0] | |
print "--- check %s" % netns | |
orig_fdb = [] | |
for line in sorted(run(["nsenter", "--net="+netns, "bridge", "fdb"]).splitlines()): | |
if not line.startswith('02:42:'): | |
continue | |
orig_fdb.append(line) | |
line = line.split() | |
mac = line[0] | |
if line[1] != 'dev': | |
continue | |
if line[1] == 'dev' and line[2].startswith('vxlan') and \ | |
(line[3:5] == ['master', 'br0'] or line[3:7] == ['vlan', '0', 'master', 'br0']): # probably ignore these? | |
continue | |
if line[2].startswith('vxlan'): | |
act_target = line[4] | |
print "%s -> %s" % (mac, line[4]), | |
else: | |
act_target = 'local' | |
print "%s -> local (%s)" % (mac, line[2]), | |
if act_target == 'br0': # No idea what this means | |
print "???" | |
continue | |
printed = False | |
for t in taskinfo.values(): | |
if id in t['addresses']: | |
if addr2mac(t['addresses'][id]) == mac: | |
print "= srv %s (%s) -> node %s (%s)" % (t['task_id'], serviceinfo[t['service_id']]['name'], t['node_id'], nodeinfo[t['node_id']]['hostname']) | |
if t['node_id'] == self: | |
exp_target = 'local' | |
else: | |
exp_target = nodeinfo[t['node_id']]['addr'] | |
printed = True | |
if not printed: | |
print "(no service for mac)" | |
elif exp_target != act_target: | |
if exp_target == 'local' and act_target == nodeinfo[self]['addr']: | |
print "^^^ WARN Remote reference to self?" | |
else: | |
print "^^^ ERROR %s != %s" % (exp_target, act_target) | |
print "--- orig fdb" | |
print "\n".join(orig_fdb) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Context: hanging/slow requests between Swarm services via overlay network.
See moby/moby#32195 (comment) for details.