#10.59.194.141上执行下面创建的ipip0网卡:
node=10.59.194.141
peer=10.59.194.154
ip tun add ipip0 mode ipip remote $peer local $node
ip link set ipip0 up
ip add add 192.168.200.1 brd 255.255.255.255 peer 192.168.200.2 dev ipip0
ip ro add 192.168.200.0/24 via 192.168.200.1
#10.59.194.154上执行下面创建的ipip0网卡:
node=10.59.194.154
peer=10.59.194.141
ip tun add ipip0 mode ipip remote $peer local $node
ip link set ipip0 up
ip add add 192.168.200.2 brd 255.255.255.255 peer 192.168.200.1 dev ipip0
ip ro add 192.168.200.0/24 via 192.168.200.2
Last active
August 5, 2019 05:24
-
-
Save chenchun/7562a289be0badcf5b07353e472118c6 to your computer and use it in GitHub Desktop.
flannel vxlan ipip
check the reason of flannel network unreachable
- check kernel vxlan port
$ ip -d li show dev flannel.1
$ netstat -apn | grep udp
if the port is unwanted
# 新建vxlan.conf
# cat /etc/modprobe.d/vxlan.conf
#### Set the VXLAN UDP port ####
options vxlan udp_port=4789
# 重新加载vxlan模块
# rmmod vxlan
# modprobe -v vxlan
- arpd, newer kernel removed this config, make sure it's not "n" or CONFIG_ARPD doesn't exist.
$ grep ARPD /boot/config-`uname -r`
CONFIG_ARPD=y
- check if forward is enabled and the forward chain of iptables is ACCEPT
$ cat /proc/sys/net/ipv4/ip_forward
1
$ iptables-save
...
:FORWARD ACCEPT [0:0]
- check route is ok
ip route
- check ip is private ip
10.0.0.0/8
172.16.0.0/12
192.168.0.0/16
- check neighbor/fdb is ok
ip neigh show dev flannel.1
bridge fdb show def flannel.1
if there is FAILED entry in neighbor table, try manually configure the failed ones ip neigh add $dst_ctn_ip lladdr $dst_vxlan_mac dev vxlan2 nud permanent
And try to test if flannel receive the expected kernel message:
package main
import (
"flag"
"syscall"
"time"
"git.code.oa.com/gaiastack/galaxy/pkg/network"
log "github.com/golang/glog"
"github.com/vishvananda/netlink"
"github.com/vishvananda/netlink/nl"
)
var flagDevice = flag.String("device", "", "device name to listen")
func main() {
flag.Parse()
if *flagDevice == "" {
log.Fatalf("please specify device name")
}
link, err := netlink.LinkByName(*flagDevice)
if err != nil {
log.Fatalf("failed to get device %s: %v", *flagDevice, err)
}
dev := device{l: link}
dev.MonitorMisses()
}
type device struct {
l netlink.Link
}
func (dev *device) MonitorMisses() {
nlsock, err := nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_NEIGH)
if err != nil {
log.Error("Failed to subscribe to netlink RTNLGRP_NEIGH messages")
return
}
for {
msgs, err := nlsock.Receive()
if err != nil {
log.Errorf("Failed to receive from netlink: %v ", err)
time.Sleep(1 * time.Second)
continue
}
for _, msg := range msgs {
dev.processNeighMsg(msg)
}
}
}
func (dev *device) processNeighMsg(msg syscall.NetlinkMessage) {
neigh, err := netlink.NeighDeserialize(msg.Data)
if err != nil {
log.Error("Failed to deserialize netlink ndmsg: %v", err)
return
}
log.V(1).Infof("receiving neigh msg %#v, neigh %#v", msg, neigh)
if int(neigh.LinkIndex) != dev.l.Attrs().Index {
log.Infof("ignore neigh msg from kernel %#v: not equal device id %d", neigh, dev.l.Attrs().Index)
return
}
if msg.Header.Type != syscall.RTM_GETNEIGH && msg.Header.Type != syscall.RTM_NEWNEIGH {
log.Infof("ignore neigh msg from kernel %#v: msg type is wrong %d", neigh, msg.Header.Type)
return
}
if !network.IsNeighResolving(neigh.State) {
log.Infof("ignore neigh msg from kernel %#v: invalid state %d", neigh, neigh.State)
return
}
log.Infof("receive good neigh msg from kernel %#v", neigh)
}
- flannel 0.5.x not hornal vxlan dst port
- 腾讯云黑石不支持4789端口,换一个端口试试,比如4790,46354
node | 物理网卡 | ip |
---|---|---|
node1 | enp0s8 | 10.245.1.3 |
node2 | enp0s8 | 10.245.1.4 |
在每台机器上执行,记得修改脚本中的ip地址
function setup_flannel_vxlan() {
# calculate ip address of bridge device and container veth device
mask=`echo $node_ip_cidr | cut -d/ -f 2`
ip_prefix=`echo $node_ip_cidr | cut -d/ -f 1 | awk -F\. '{print $1"."$2"."$3"."}'`
gateway="${ip_prefix}1"
br_ip="${gateway}/$mask"
ctn_ip="${ip_prefix}2/$mask"
# create vxlan device
ip link add dev vxlan2 type vxlan id 2 local $vtep_ip dev $eth dstport 4789
ip link set dev vxlan2 address $vxlan_mac
ip link set dev vxlan2 up
ip addr add $ip_cidr dev vxlan2
ip link set dev vxlan2 mtu 1450
ip link add dev br0 type bridge
ip link set dev br0 up
ip netns add ctn
ip li add dev vhost mtu 1450 type veth peer name vctn mtu 1450
ip li set dev vctn netns ctn
ip link set vhost up
ip link set vhost master br0
ip netns exec ctn ip link set dev vctn up
ip netns exec ctn ip addr add $ctn_ip dev vctn
ip netns exec ctn ip route add default via $gateway
ip addr add $br_ip dev br0
}
# node1
eth=enp0s8
vtep_ip=10.245.1.3
ip_cidr=10.250.1.0/16
vxlan_mac=02:42:0a:fa:01:00
node_ip_cidr=10.250.1.0/24
setup_flannel_vxlan
# node2
eth=enp0s8
vtep_ip=10.245.1.4
ip_cidr=10.250.2.0/16
vxlan_mac=02:42:0a:fa:02:00
node_ip_cidr=10.250.2.0/24
setup_flannel_vxlan
在两台机器分别配置二层转发表和ARP表
function setup_fdb_arp() {
bridge fdb add $dst_vxlan_mac dst $dst_vtep_ip self permanent dev vxlan2
ip neigh add $dst_ctn_ip lladdr $dst_vxlan_mac dev vxlan2 nud permanent
}
# 获取每台机器vxlan2网卡的mac地址
cat /sys/class/net/vxlan2/address
# vxlan2_mac_node1=02:42:0a:fa:01:00
# vxlan2_mac_node2=02:42:0a:fa:02:00
# node-1
dst_vtep_ip=10.245.1.4
dst_vxlan_mac=02:42:0a:fa:02:00
dst_ctn_ip=10.250.2.2
setup_fdb_arp
# node-2
dst_vtep_ip=10.245.1.3
dst_vxlan_mac=02:42:0a:fa:01:00
dst_ctn_ip=10.250.1.2
setup_fdb_arp
验证vxlan网络,dump enp0s8网卡数据包
[root@kubernetes-node-1 vagrant]# ip netns exec ctn ping -c 3 10.250.2.2
PING 10.250.2.2 (10.250.2.2) 56(84) bytes of data.
64 bytes from 10.250.2.2: icmp_seq=1 ttl=62 time=10.7 ms
64 bytes from 10.250.2.2: icmp_seq=2 ttl=62 time=4.00 ms
64 bytes from 10.250.2.2: icmp_seq=3 ttl=62 time=29.7 ms
--- 10.250.2.2 ping statistics ---
3 packets transmitted, 3 received, 0% packet loss, time 2170ms
rtt min/avg/max/mdev = 4.006/14.825/29.721/10.887 ms
[root@kubernetes-node-1 vagrant]# tcpdump -vv -nn -s 0 -e -i enp0s8 udp port 4789
tcpdump: listening on enp0s8, link-type EN10MB (Ethernet), capture size 262144 bytes
03:35:49.722825 08:00:27:57:65:f9 > 08:00:27:c3:83:d5, ethertype IPv4 (0x0800), length 148: (tos 0x0, ttl 64, id 44644, offset 0, flags [none], proto UDP (17), length 134)
10.245.1.3.47561 > 10.245.1.4.4789: [no cksum] VXLAN, flags [I] (0x08), vni 2
e2:1a:0e:91:0c:fc > c2:88:4e:3c:c7:5c, ethertype IPv4 (0x0800), length 98: (tos 0x0, ttl 63, id 62686, offset 0, flags [DF], proto ICMP (1), length 84)
10.250.1.2 > 10.250.2.2: ICMP echo request, id 18584, seq 1, length 64
03:35:49.723857 08:00:27:c3:83:d5 > 08:00:27:57:65:f9, ethertype IPv4 (0x0800), length 148: (tos 0x0, ttl 64, id 16670, offset 0, flags [none], proto UDP (17), length 134)
10.245.1.4.44620 > 10.245.1.3.4789: [no cksum] VXLAN, flags [I] (0x08), vni 2
c2:88:4e:3c:c7:5c > e2:1a:0e:91:0c:fc, ethertype IPv4 (0x0800), length 98: (tos 0x0, ttl 63, id 7781, offset 0, flags [none], proto ICMP (1), length 84)
10.250.2.2 > 10.250.1.2: ICMP echo reply, id 18584, seq 1, length 64
^C
6 packets captured
6 packets received by filter
0 packets dropped by kernel
如果host1上要访问host2上的容器,需要在host2上配置192.168.1.0或者192.168.1.1的neigh
ip neigh add dev vxlan2 192.168.1.0 lladdr 02:42:0a:fa:01:00 nud permanent
ip neigh add dev vxlan2 192.168.1.1 lladdr 02:42:0a:fa:01:00 nud permanent
clean up
ip netns delete ctn
ip link del vxlan2
ip link del br0
config etcd
etcdctl --endpoint=http://10.245.1.2:4379 set /coreos.com/network/config '{"Network":"10.246.0.0/16","SubnetLen":24,"Backend":{"Type":"vxlan","Port":4789,"VNI":3}}'
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment