|
#!/bin/bash |
|
# Copyright 2019 Gravitational, Inc. |
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
|
# you may not use this file except in compliance with the License. |
|
# You may obtain a copy of the License at |
|
# http://www.apache.org/licenses/LICENSE-2.0 |
|
# Unless required by applicable law or agreed to in writing, software |
|
# distributed under the License is distributed on an "AS IS" BASIS, |
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
# See the License for the specific language governing permissions and |
|
# limitations under the License. |
|
|
|
# set -x |
|
|
|
now=$(date +"%Y%m%d_%H%M%S") |
|
hostname=$(hostname) |
|
_file="/var/lib/gravity/planet/share/gravity-network-debug_${now}.${hostname}.txt" |
|
_tcpdump_flannel="/var/lib/gravity/planet/share/gravity-network-debug_${now}.${hostname}.flannel.pcap" |
|
_tcpdump_vxlan="/var/lib/gravity/planet/share/gravity-network-debug_${now}.${hostname}.vxlan.pcap" |
|
_tcpdump_any="/var/lib/gravity/planet/share/gravity-network-debug_${now}.${hostname}.any.pcap" |
|
echo -e "Saving report to $_file" |
|
echo -e "Saving flannel capture to $_tcpdump_flannel" |
|
echo -e "Saving vxlan capture to $_tcpdump_vxlan" |
|
echo -e "Saving any capture to $_tcpdump_any" |
|
echo -e "" |
|
|
|
echo -e "Creating debug pods..." |
|
kubectl apply -f - <<EOF |
|
apiVersion: extensions/v1beta1 |
|
kind: DaemonSet |
|
metadata: |
|
name: network-debug |
|
namespace: kube-system |
|
labels: |
|
k8s-app: network-debug |
|
spec: |
|
selector: |
|
matchLabels: |
|
name: network-debug |
|
template: |
|
metadata: |
|
labels: |
|
name: network-debug |
|
spec: |
|
tolerations: |
|
- operator: "Exists" |
|
containers: |
|
- name: debian |
|
image: apiserver:5000/gravitational/debian-tall:0.0.1 |
|
command: ["sleep", "99999"] |
|
ports: |
|
- containerPort: 53 |
|
--- |
|
apiVersion: v1 |
|
kind: Service |
|
metadata: |
|
labels: |
|
name: network-debug |
|
name: network-debug |
|
namespace: kube-system |
|
spec: |
|
ports: |
|
- port: 53 |
|
protocol: TCP |
|
selector: |
|
name: network-debug |
|
type: ClusterIP |
|
EOF |
|
sleep 5 |
|
|
|
# |
|
# Collect host information |
|
# |
|
echo -e "Collecting system information..." |
|
|
|
echo -e "\n\n\n--- hostname" 2>${_file} >${_file} |
|
hostname &>> ${_file} |
|
|
|
echo -e "\n\n\n--- uname -a" &>> ${_file} |
|
uname -a &>> ${_file} |
|
|
|
echo -e "\n\n\n--- sysctl -a" &>> ${_file} |
|
sysctl -a &>> ${_file} |
|
|
|
echo -e "\n\n\n--- lsmod" &>> ${_file} |
|
lsmod &>> ${_file} |
|
|
|
# |
|
# Collect current network configuration |
|
# |
|
echo -e "Collecting flannel / network configuration..." |
|
|
|
echo -e "\n\n\n--- cat /run/flannel/subnet.env" &>> ${_file} |
|
cat /run/flannel/subnet.env &>> ${_file} |
|
|
|
echo -e "\n\n\n--- etcdctl get /coreos.com/network/config" &>> ${_file} |
|
etcdctl get /coreos.com/network/config &>> ${_file} |
|
|
|
echo -e "\n\n\n--- etcdctl ls --recursive -p /coreos.com/network/subnets | grep -v '/$' | xargs -n 1 -I% sh -c 'echo -n %:; etcdctl get %;'" &>> ${_file} |
|
etcdctl ls --recursive -p /coreos.com/network/subnets | grep -v '/$' | xargs -n 1 -I% sh -c 'echo -n %:; etcdctl get %;' &>> ${_file} |
|
|
|
echo -e "\n\n\n--- ip -d route" &>> ${_file} |
|
ip -d route &>> ${_file} |
|
|
|
echo -e "\n\n\n--- ip -d link show" &>> ${_file} |
|
ip -d link show &>> ${_file} |
|
|
|
echo -e "\n\n\n--- ip -d addr" &>> ${_file} |
|
ip -d addr &>> ${_file} |
|
|
|
echo -e "\n\n\n--- ip -d neighbor" &>> ${_file} |
|
ip -d neighbor &>> ${_file} |
|
|
|
echo -e "\n\n\n--- ip -d rule" &>> ${_file} |
|
ip -d rule &>> ${_file} |
|
|
|
echo -e "\n\n\n--- ip -d ntable" &>> ${_file} |
|
ip -d ntable &>> ${_file} |
|
|
|
echo -e "\n\n\n--- ip -d maddress" &>> ${_file} |
|
ip -d maddress &>> ${_file} |
|
|
|
echo -e "\n\n\n--- ip -d xfrm state show" &>> ${_file} |
|
ip -d xfrm state show &>> ${_file} |
|
|
|
echo -e "\n\n\n--- ip -d xfrm policy show" &>> ${_file} |
|
ip -d xfrm policy show &>> ${_file} |
|
|
|
echo -e "\n\n\n--- ip -d tcp_metrics" &>> ${_file} |
|
ip -d tcp_metrics &>> ${_file} |
|
|
|
echo -e "\n\n\n--- ip -d netconf" &>> ${_file} |
|
ip -d netconf &>> ${_file} |
|
|
|
echo -e "\n\n\n--- bridge fdb show" &>> ${_file} |
|
bridge fdb show &>> ${_file} |
|
|
|
echo -e "\n\n\n--- iptables-save" &>> ${_file} |
|
iptables-save &>> ${_file} |
|
|
|
# |
|
# Collect kubernetes info |
|
# |
|
# Note: will return error on worker nodes |
|
echo -e "\n\n\n--- kubectl get all -o wide --all-namespaces" &>> ${_file} |
|
kubectl get all -o wide --all-namespaces &>> ${_file} |
|
|
|
echo -e "\n\n\n--- kubectl get endpoints -o yaml --all-namespaces" &>> ${_file} |
|
kubectl get endpoints -o yaml --all-namespaces &>> ${_file} |
|
|
|
echo -e "\n\n\n--- kubectl get nodes -o yaml --all-namespaces" &>> ${_file} |
|
kubectl get nodes -o yaml --all-namespaces &>> ${_file} |
|
|
|
echo -e "\n\n\n--- kubectl -n kube-system get po -lk8s-app=kube-dns -o wide" &>> ${_file} |
|
kubectl -n kube-system get po -lk8s-app=kube-dns -o wide &>> ${_file} |
|
|
|
|
|
# |
|
# Test network |
|
# |
|
echo -e "Testing network..." |
|
|
|
# Setup packet capture for the test |
|
vxlan_dev=$(ip -d link show flannel.1 | grep vxlan | grep -oP '(?<=dev\s)\w+') |
|
echo "external_dev: ${vxlan_dev}" &>> ${_file} |
|
tcpdump_flannel=$(systemd-run tcpdump -vni flannel.1 -w ${_tcpdump_flannel} 2>&1 port 53 or icmp | awk '{print $4}') |
|
tcpdump_vxlan=$(systemd-run tcpdump -vni ${vxlan_dev} -w ${_tcpdump_vxlan} 2>&1 port 8472 or icmp | awk '{print $4}') |
|
tcpdump_any=$(systemd-run tcpdump -vni any -w ${_tcpdump_any} 2>&1 icmp | awk '{print $4}') |
|
|
|
echo -e "\n\n\n--- === Test network (host ns) ===" &>> ${_file} |
|
# ping flannel.1 and docker0 on each host |
|
mapfile -t flannel_subnets < <( etcdctl ls --recursive -p /coreos.com/network/subnets | grep -v '/$' | awk -F'/' '{print $5}' | awk -F'-' '{print $1}' | awk -F'.' '{print $1 "." $2 "." $3 "."}' ) |
|
#echo ${flannel_subnets[*]} |
|
|
|
for subnet in ${flannel_subnets[*]} |
|
do |
|
echo -e "\n\n\n--- ping -c3 ${subnet}0" &>> ${_file} |
|
ping -c3 ${subnet}0 &>> ${_file} |
|
echo -e "\n\n\n--- ping -c3 ${subnet}1" &>> ${_file} |
|
ping -c3 ${subnet}1 &>> ${_file} |
|
done |
|
|
|
echo -e "\n\n\n--- === Test ping debug pod (host ns) ===" &>> ${_file} |
|
# ping / dig the kube-dns pod on each host |
|
mapfile -t kubedns_ips < <( iptables-save | grep kube-system/network-debug: | grep '\-\-to-destination' | grep tcp | awk '{print $14}' | awk -F':' '{print $1}' ) |
|
for ip in ${kubedns_ips[*]} |
|
do |
|
echo -e "\n\n\n--- ping -c3 ${ip}" &>> ${_file} |
|
ping -c3 ${ip} &>> ${_file} |
|
done |
|
|
|
|
|
echo -e "\n\n\n--- === Test ping masters (host ns) ===" &>> ${_file} |
|
# ping / dig the kube-dns pod on each host |
|
mapfile -t master_ips < <( etcdctl ls --recursive /planet/cluster/ | grep election/ | awk -F'/' '{print $6}' ) |
|
for ip in ${master_ips[*]} |
|
do |
|
echo -e "\n\n\n--- ping -c3 ${ip}" &>> ${_file} |
|
ping -c3 ${ip} &>> ${_file} |
|
done |
|
|
|
# ping flannel.1 / docker0 / dns-pod from kube-dns network ns |
|
echo -e "\n\n\n--- === Test network (debug ns) ===" &>> ${_file} |
|
nannypid=$(ps -ef | grep "sleep 99999" | grep -v grep | awk '{print $2}') |
|
echo "debug container pid: ${nannypid}" &>> ${_file} |
|
|
|
for subnet in ${flannel_subnets[*]} |
|
do |
|
echo -e "\n\n\n--- nsenter -n -t ${nannypid} ping -c3 ${subnet}0" &>> ${_file} |
|
nsenter -n -t ${nannypid} ping -c3 ${subnet}0 &>> ${_file} |
|
echo -e "\n\n\n--- nsenter -n -t ${nannypid} ping -c3 ${subnet}1" &>> ${_file} |
|
nsenter -n -t ${nannypid} ping -c3 ${subnet}1 &>> ${_file} |
|
done |
|
|
|
echo -e "\n\n\n--- === Test kube-dns (debug ns) ===" &>> ${_file} |
|
for ip in ${kubedns_ips[*]} |
|
do |
|
echo -e "\n\n\n--- nsenter -n -t ${nannypid} ping -c3 ${ip}" &>> ${_file} |
|
nsenter -n -t ${nannypid} ping -c3 ${ip} &>> ${_file} |
|
done |
|
|
|
echo -e "\n\n\n--- === Test ping masters (debug ns) ===" &>> ${_file} |
|
for ip in ${master_ips[*]} |
|
do |
|
echo -e "\n\n\n--- nsenter -n -t ${nannypid} ping -c3 ${ip}" &>> ${_file} |
|
nsenter -n -t ${nannypid} ping -c3 ${ip} &>> ${_file} |
|
done |
|
|
|
# stop the running tcpdumps |
|
echo -e "\n\n\n--- systemctl stop ${tcpdump_flannel}" &>> ${_file} |
|
systemctl stop ${tcpdump_flannel} &>> ${_file} |
|
echo -e "\n\n\n--- systemctl stop ${tcpdump_vxlan}" &>> ${_file} |
|
systemctl stop ${tcpdump_vxlan} &>> ${_file} |
|
echo -e "\n\n\n--- systemctl stop ${tcpdump_any}" &>> ${_file} |
|
systemctl stop ${tcpdump_any} &>> ${_file} |
|
|
|
|
|
|
|
# |
|
# Collect flanneld logs |
|
# |
|
echo -e "\n\n\n--- journalctl -u flanneld --since="7 days ago" --no-pager" &>> ${_file} |
|
journalctl -u flanneld --since="7 days ago" --no-pager &>> ${_file} |
|
|
|
echo "Script completed..." |