$ unshare --user --map-root-user --net --mount
[root@incensed-gawain ~]# echo $$
2646
strace -f slirp4netns --configure --mtu=65520 2646 tap0
Note: Your fd are still available
Wait for the child to communicate back on the socketpair
socketpair(AF_UNIX, SOCK_STREAM, 0, [3, 4]) = 0
clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f5bc798b810) = 2667
wait4(2667, strace: Process 2667 attached
<unfinished ...>
Creates the tap interface
[pid 2667] openat(AT_FDCWD, "/proc/2646/ns/user", O_RDONLY) = 5
[pid 2667] openat(AT_FDCWD, "/proc/2646/ns/net", O_RDONLY) = 6
[pid 2667] setns(5, CLONE_NEWUSER) = 0
[pid 2667] setns(6, CLONE_NEWNET) = 0
[pid 2667] close(5) = 0
[pid 2667] close(6) = 0
[pid 2667] openat(AT_FDCWD, "/dev/net/tun", O_RDWR) = 5
The tap fd in the child is 5
[pid 2667] ioctl(5, TUNSETIFF, 0x7ffd60075390) = 0
[pid 2667] socket(AF_INET, SOCK_DGRAM, IPPROTO_IP) = 6
[pid 2667] ioctl(6, SIOCSIFFLAGS, {ifr_name="tap0", ifr_flags=IFF_UP|IFF_RUNNING}) = 0
[pid 2667] ioctl(6, SIOCSIFMTU, {ifr_name="tap0", ifr_mtu=65520}) = 0
[pid 2667] ioctl(6, SIOCSIFADDR, {ifr_name="tap0", ifr_addr={sa_family=AF_INET, sin_port=htons(0), sin_addr=inet_addr("10.0.2.100")}}) = 0
[pid 2667] ioctl(6, SIOCSIFNETMASK, {ifr_name="tap0", ifr_netmask={sa_family=AF_INET, sin_port=htons(0), sin_addr=inet_addr("255.255.255.0")}}) = 0
[pid 2667] ioctl(6, SIOCADDRT, 0x7ffd60075390) = 0
Use outof band data to send the fd 5
back to the parent process running on the host
https://linux.die.net/man/2/sendmsg
MSG_OOB
Sends out-of-band data on sockets that support this notion (e.g., of type SOCK_STREAM); the underlying protocol must also support out-of-band data
[pid 2667] sendmsg(4, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="\0", iov_len=1}], msg_iovlen=1, msg_control=[{cmsg_len=20, cmsg_level=SOL_SOCKET, cmsg_type=SCM_RIGHTS, cmsg_data=[5]}], msg_controllen=20, msg_flags=0}, 0) = 1
[pid 2667] write(2, "sent tapfd=5 for tap0\n", 22sent tapfd=5 for tap0
) = 22
[pid 2667] close(4) = 0
[pid 2667] exit_group(0) = ?
[pid 2667] +++ exited with 0 +++
Picks up the fd 5
. This fd is read from to get packets from the container.
That is how network traffic makes it across the network ns even though tap interfaces cannot cross a network namespace boundary
<... wait4 resumed> [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], 0, NULL) = 2667
--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=2667, si_uid=1000, si_status=0, si_utime=0, si_stime=0} ---
recvmsg(3, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="\0", iov_len=1}], msg_iovlen=1, msg_control=[{cmsg_len=20, cmsg_level=SOL_SOCKET, cmsg_type=SCM_RIGHTS, cmsg_data=[5]}], msg_controllen=24, msg_flags=0}, 0) = 1
write(2, "received tapfd=5\n", 17received tapfd=5
) = 17
close(3) = 0
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(0x88, 0x1), ...}) = 0
write(1, "Starting slirp\n", 15Starting slirp
) = 15
write(1, "* MTU: 65520\n", 25* MTU: 65520
) = 25
write(1, "* Network: 10.0.2.0\n", 28* Network: 10.0.2.0
) = 28
write(1, "* Netmask: 255.255.255.0"..., 33* Netmask: 255.255.255.0
) = 33
write(1, "* Gateway: 10.0.2.2\n", 28* Gateway: 10.0.2.2
) = 28
write(1, "* DNS: 10.0.2.3\n", 28* DNS: 10.0.2.3
) = 28
write(1, "* Recommended IP: 10.0.2.100\n", 30* Recommended IP: 10.0.2.100
) = 30
write(1, "WARNING: 127.0.0.1:* on the host"..., 127WARNING: 127.0.0.1:* on the host is accessible as 10.0.2.2 (set --disable-host-loopback to prohibit connecting to 127.0.0.1:*)
) = 127
rt_sigaction(SIGPIPE, {sa_handler=SIG_IGN, sa_mask=[PIPE], sa_flags=SA_RESTORER|SA_RESTART, sa_restorer=0x7f5bc73e2f30}, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=0}, 8) = 0
poll([{fd=5, events=POLLIN|POLLHUP}], 1, 1000) = 1 ([{fd=5, revents=POLLIN}])
read(5, "33\0\0\0\26\372N1\230}\325\206\335`\0\0\0\0$\0\1\0\0\0\0\0\0\0\0\0\0"..., 65536) = 90
brk(NULL) = 0xe03000
brk(0xe2e000) = 0xe2e000
poll([{fd=5, events=POLLIN|POLLHUP}], 1, 1000) = 1 ([{fd=5, revents=POLLIN}])