-
-
Save amodm/a61e6d0c413e8cc9ac4c56a803150daf to your computer and use it in GitHub Desktop.
| /* See the corresponding blog post for details: | |
| * https://amodm.com/blog/2024/07/03/running-a-linux-router-on-macos | |
| */ | |
| #pragma once | |
| #include <net/if_var.h> | |
| #pragma pack(4) | |
| struct ifbreq { | |
| char ifbr_ifsname[IFNAMSIZ]; /* member if name */ | |
| uint32_t ifbr_ifsflags; /* member if flags */ | |
| uint32_t ifbr_stpflags; /* member if STP flags */ | |
| uint32_t ifbr_path_cost; /* member if STP cost */ | |
| uint8_t ifbr_portno; /* member if port number */ | |
| uint8_t ifbr_priority; /* member if STP priority */ | |
| uint8_t ifbr_proto; /* member if STP protocol */ | |
| uint8_t ifbr_role; /* member if STP role */ | |
| uint8_t ifbr_state; /* member if STP state */ | |
| uint32_t ifbr_addrcnt; /* member if addr number */ | |
| uint32_t ifbr_addrmax; /* member if addr max */ | |
| uint32_t ifbr_addrexceeded; /* member if addr violations */ | |
| uint8_t pad[32]; | |
| }; | |
| struct ifbifconf { | |
| uint32_t ifbic_len; /* buffer size */ | |
| union { | |
| caddr_t ifbicu_buf; | |
| struct ifbreq *ifbicu_req; | |
| #define ifbic_buf ifbic_ifbicu.ifbicu_buf | |
| #define ifbic_req ifbic_ifbicu.ifbicu_req | |
| } ifbic_ifbicu; | |
| }; |
| /* See the corresponding blog post for details: | |
| * https://amodm.com/blog/2024/07/03/running-a-linux-router-on-macos | |
| */ | |
| #pragma once | |
| #include <net/if_var.h> | |
| /* ----------------------------------------------------- | |
| * Fake ethernet related headers. | |
| * https://opensource.apple.com/source/xnu/xnu-7195.81.3/bsd/net/if_fake_var.h.auto.html | |
| * ----------------------------------------------------- | |
| */ | |
| /* | |
| * SIOCSDRVSPEC | |
| */ | |
| enum { | |
| IF_FAKE_S_CMD_NONE = 0, | |
| IF_FAKE_S_CMD_SET_PEER = 1, | |
| IF_FAKE_S_CMD_SET_MEDIA = 2, | |
| IF_FAKE_S_CMD_SET_DEQUEUE_STALL = 3, | |
| }; | |
| /* | |
| * SIOCGDRVSPEC | |
| */ | |
| enum { | |
| IF_FAKE_G_CMD_NONE = 0, | |
| IF_FAKE_G_CMD_GET_PEER = 1, | |
| }; | |
| #define IF_FAKE_MEDIA_LIST_MAX 27 | |
| struct if_fake_media { | |
| int32_t iffm_current; | |
| uint32_t iffm_count; | |
| uint32_t iffm_reserved[3]; | |
| int32_t iffm_list[IF_FAKE_MEDIA_LIST_MAX]; | |
| }; | |
| struct if_fake_request { | |
| uint64_t iffr_reserved[4]; | |
| union { | |
| char iffru_buf[128]; /* stable size */ | |
| struct if_fake_media iffru_media; | |
| char iffru_peer_name[IFNAMSIZ]; /* if name, e.g. "en0" */ | |
| /* | |
| * control dequeue stall. 0: disable dequeue stall, else | |
| * enable dequeue stall. | |
| */ | |
| uint32_t iffru_dequeue_stall; | |
| } iffr_u; | |
| #define iffr_peer_name iffr_u.iffru_peer_name | |
| #define iffr_media iffr_u.iffru_media | |
| #define iffr_dequeue_stall iffr_u.iffru_dequeue_stall | |
| }; |
| // See the corresponding blog post for details: | |
| // https://amodm.com/blog/2024/07/03/running-a-linux-router-on-macos | |
| import Foundation | |
| // xnu is a custom module that I created to expose the relevant C structs | |
| // that the kernel expects, as those structs are not part of the userspace | |
| // API. This module contains C-bridge headers if-fake.h and if-bridge.h | |
| // which are also shown in this gist. | |
| import xnu | |
| struct NetworkInterface { | |
| let name: String | |
| let mac: ether_addr_t | |
| let ips: [String] | |
| let type: UInt32 | |
| let flags: UInt32 | |
| var isBridge: Bool { | |
| return type == UInt(IFT_BRIDGE) | |
| } | |
| var isLoopback: Bool { | |
| return flags & UInt32(IFF_LOOPBACK) != 0 | |
| } | |
| var isFakeEth: Bool { | |
| return name.starts(with: "feth") // TODO: figure out type? | |
| } | |
| var up: Bool { | |
| return flags & UInt32(IFF_UP) != 0 | |
| } | |
| func changeStatus(up: Bool) throws { | |
| try Self.changeStatus(name: name, up: up) | |
| } | |
| /// - Returns: all network interfaces currently configured on this system. | |
| static var all: [NetworkInterface] { | |
| var ifap: UnsafeMutablePointer<ifaddrs>? = nil | |
| guard getifaddrs(&ifap) == 0 else { | |
| fatalError("getifaddrs() failed: \(String(cString: strerror(errno)))") | |
| } | |
| defer { freeifaddrs(ifap) } | |
| var interfaces = [NetworkInterface]() | |
| try! withControlSocket { ctl in | |
| for ifa in sequence(first: ifap, next: { $0?.pointee.ifa_next }) { | |
| if let ifa = ifa?.pointee { | |
| let ifname = String(cString: ifa.ifa_name) | |
| let flags = ifa.ifa_flags | |
| var ips = [String]() | |
| var mac = ether_addr_t() | |
| switch Int32(ifa.ifa_addr.pointee.sa_family) { | |
| case AF_LINK: | |
| var addr = ifa.ifa_addr.withMemoryRebound(to: sockaddr_dl.self, capacity: 1) { $0.pointee } | |
| mac = withUnsafeMutableBytes(of: &addr.sdl_data) { ptr in | |
| ptr.baseAddress!.advanced(by: Int(addr.sdl_nlen)).assumingMemoryBound(to: ether_addr_t.self).pointee | |
| } | |
| case AF_INET: | |
| var addr = ifa.ifa_addr.withMemoryRebound(to: sockaddr_in.self, capacity: 1) { $0.pointee } | |
| var ip = [CChar](repeating: 0, count: Int(INET_ADDRSTRLEN)) | |
| inet_ntop(AF_INET, &addr.sin_addr, &ip, socklen_t(INET_ADDRSTRLEN)) | |
| ips.append(String(cString: ip)) | |
| case AF_INET6: | |
| var addr = ifa.ifa_addr.withMemoryRebound(to: sockaddr_in6.self, capacity: 1) { $0.pointee } | |
| var ip = [CChar](repeating: 0, count: Int(INET6_ADDRSTRLEN)) | |
| inet_ntop(AF_INET6, &addr.sin6_addr, &ip, socklen_t(INET6_ADDRSTRLEN)) | |
| ips.append(String(cString: ip)) | |
| default: | |
| continue | |
| } | |
| var ifr = ifreq() | |
| memset(&ifr, 0, MemoryLayout<ifreq>.size) | |
| ifname.copyTo(&ifr.ifr_name) | |
| guard ioctl(ctl, IfIoctl.SIOCFIFTYPE, &ifr) == 0 else { | |
| fatalError("\(ifname):ioctl(SIOCFIFTYPE): \(String(cString: strerror(errno)))") | |
| } | |
| let type = ifr.ifr_ifru.ifru_functional_type | |
| interfaces.append(NetworkInterface(name: ifname, mac: mac, ips: ips, type: type, flags: flags)) | |
| } | |
| } | |
| } | |
| return interfaces | |
| } | |
| private static func withControlSocket<T>(_ family: Int32 = AF_LOCAL, _ body: (Int32) throws -> T) throws -> T { | |
| let sock = socket(AF_LOCAL, SOCK_DGRAM, 0) | |
| guard sock >= 0 else { | |
| throw RVMError.sycallError("control:socket()") | |
| } | |
| defer { close(sock) } | |
| return try body(sock) | |
| } | |
| /// Creates a fake eth interface, and peers with `peer` (if provided). | |
| /// - Parameter peer: the peer to connect to | |
| /// - Returns: the name of the fake eth interface that was created. | |
| static func createFakeEth(peer: String? = nil) throws -> String { | |
| let allFakeEths = Set(all.filter { $0.isFakeEth }.map { $0.name }) | |
| for i in 0..<128 { | |
| let name = "feth\(i)" | |
| if !allFakeEths.contains(name) { | |
| var ifr = ifreq() | |
| memset(&ifr, 0, MemoryLayout.size(ofValue: ifr)) | |
| name.copyTo(&ifr.ifr_name) | |
| ifr.ifr_ifru.ifru_flags = Int16(IFF_UP | IFF_RUNNING) | |
| // create | |
| try withControlSocket { ctl in | |
| guard ioctl(ctl, IfIoctl.SIOCIFCREATE2, &ifr) == 0 else { | |
| throw RVMError.sycallError("feth:create()") | |
| } | |
| if peer != nil { | |
| // from https://opensource.apple.com/source/network_cmds/network_cmds-606.40.2/ifconfig.tproj/iffake.c.auto.html | |
| var iffr = if_fake_request() | |
| memset(&iffr, 0, MemoryLayout.size(ofValue: iffr)) | |
| peer!.copyTo(&iffr.iffr_u.iffru_peer_name) | |
| var ifd = ifdrv() | |
| memset(&ifd, 0, MemoryLayout.size(ofValue: ifd)) | |
| name.copyTo(&ifd.ifd_name) | |
| ifd.ifd_cmd = UInt(IF_FAKE_S_CMD_SET_PEER) | |
| withUnsafeMutablePointer(to: &iffr) { ifd.ifd_data = UnsafeMutableRawPointer($0) } | |
| ifd.ifd_len = MemoryLayout.size(ofValue: iffr) | |
| guard ioctl(ctl, IfIoctl.SIOCSDRVSPEC, &ifd) == 0 else { | |
| throw RVMError.sycallError("feth:ioctl(set-peer)") | |
| } | |
| } | |
| } | |
| return name | |
| } | |
| } | |
| throw RVMError.illegalState("feth:create(): out of options") | |
| } | |
| /// Deletes the network interface with the given name. | |
| /// - Parameter name: the name of the network interface to delete. | |
| static func deleteInterface(_ name: String) throws { | |
| var ifr = ifreq() | |
| memset(&ifr, 0, MemoryLayout.size(ofValue: ifr)) | |
| name.copyTo(&ifr.ifr_name) | |
| try withControlSocket { ctl in | |
| guard ioctl(ctl, IfIoctl.SIOCIFDESTROY, &ifr) == 0 else { | |
| throw RVMError.sycallError("\(name):ioctl(SIOCIFDESTROY)") | |
| } | |
| } | |
| } | |
| /// Creates a pair of fake eth interfaces, and peers them together. | |
| /// - Returns: the names of the two fake eth interfaces that were created. | |
| static func createFakeEthPair() throws -> (String, String) { | |
| let feth1 = try createFakeEth() | |
| let feth2 = try createFakeEth(peer: feth1) | |
| try changeStatus(name: feth1, up: true) | |
| try changeStatus(name: feth2, up: true) | |
| return (feth1, feth2) | |
| } | |
| /// Change the status of the network interface with the given name. | |
| /// - Parameters: | |
| /// - name: the name of the network interface | |
| /// - up: whether to bring the interface up or down | |
| /// - Throws: an error if the operation fails | |
| static func changeStatus(name: String, up: Bool) throws { | |
| var ifr = ifreq() | |
| memset(&ifr, 0, MemoryLayout.size(ofValue: ifr)) | |
| name.copyTo(&ifr.ifr_name) | |
| try NetworkInterface.withControlSocket(AF_INET) { ctl in | |
| guard ioctl(ctl, IfIoctl.SIOCGIFFLAGS, &ifr) == 0 else { | |
| throw RVMError.sycallError("\(name):ioctl(SIOCGIFFLAGS)") | |
| } | |
| let oldFlag = Int32(ifr.ifr_ifru.ifru_flags) & 0xffff | |
| var newFlag = oldFlag | |
| if up { | |
| newFlag |= Int32(IFF_UP | IFF_RUNNING) | |
| } else { | |
| newFlag &= ~Int32(IFF_UP | IFF_RUNNING) | |
| } | |
| if oldFlag != newFlag { | |
| ifr.ifr_ifru.ifru_flags = Int16(bitPattern: UInt16(newFlag & 0xffff)) | |
| guard ioctl(ctl, IfIoctl.SIOCSIFFLAGS, &ifr) >= 0 else { | |
| throw RVMError.sycallError("\(name):ioctl(SIOCSIFFLAGS)") | |
| } | |
| } | |
| } | |
| } | |
| /// Adds `ifc` to the network bridge `bridge`. | |
| /// - Parameters: | |
| /// - ifc: the network interface to add to the bridge. | |
| /// - bridge: the network bridge. | |
| static func addInterfaceToBridge(_ ifc: String, to bridge: String) throws { | |
| var req = ifbreq() | |
| memset(&req, 0, MemoryLayout.size(ofValue: req)) | |
| ifc.copyTo(&req.ifbr_ifsname) | |
| var ifd = ifdrv() | |
| memset(&ifd, 0, MemoryLayout.size(ofValue: ifd)) | |
| bridge.copyTo(&ifd.ifd_name) | |
| ifd.ifd_cmd = 0 // BRDGADD: https://opensource.apple.com/source/xnu/xnu-7195.81.3/bsd/net/if_bridgevar.h.auto.html | |
| withUnsafeMutablePointer(to: &req) { ifd.ifd_data = UnsafeMutableRawPointer($0) } | |
| ifd.ifd_len = MemoryLayout.size(ofValue: req) | |
| try withControlSocket { ctl in | |
| guard ioctl(ctl, IfIoctl.SIOCSDRVSPEC, &ifd) == 0 else { | |
| throw RVMError.sycallError("bridge(\(bridge)):add-if(\(ifc))") | |
| } | |
| } | |
| } | |
| /// Ensures that `member` is a member of the `bridge` network interface. | |
| /// - Returns: `true` if the member was added, `false` if it was already a member. | |
| static func ensureBridgeMembership(bridge: String, member: String) throws -> Bool { | |
| var req = ifbreq() | |
| memset(&req, 0, MemoryLayout.size(ofValue: req)) | |
| member.copyTo(&req.ifbr_ifsname) | |
| var ifd = ifdrv() | |
| memset(&ifd, 0, MemoryLayout.size(ofValue: ifd)) | |
| bridge.copyTo(&ifd.ifd_name) | |
| ifd.ifd_cmd = 2 // BRDGGIFFLGS: https://opensource.apple.com/source/xnu/xnu-7195.81.3/bsd/net/if_bridgevar.h.auto.html | |
| withUnsafeMutablePointer(to: &req) { ifd.ifd_data = UnsafeMutableRawPointer($0) } | |
| ifd.ifd_len = MemoryLayout.size(ofValue: req) | |
| return try withControlSocket { ctl in | |
| if ioctl(ctl, IfIoctl.SIOCGDRVSPEC, &ifd) < 0 { | |
| if errno == ENOENT { | |
| try addInterfaceToBridge(member, to: bridge) | |
| return true | |
| } else { | |
| throw RVMError.sycallError("bridge(\(bridge)):getifflags(\(member))") | |
| } | |
| } | |
| return false | |
| } | |
| } | |
| } | |
| func _IOC(_ dir: UInt32, _ g: Character, _ n: UInt, _ l: Int) -> UInt { | |
| return UInt(dir) | ((UInt(l) & UInt(IOCPARM_MASK)) << 16) | (UInt(g.asciiValue ?? 0) << 8) | n | |
| } | |
| func _IO(_ g: Character, _ n: UInt) -> UInt { | |
| return _IOC(IOC_VOID, g, n, 0) | |
| } | |
| func _IOW<T>(_ char: Character, _ nr: UInt, _ ctype: T.Type) -> UInt { | |
| return _IOC(IOC_IN, char, nr, MemoryLayout<T>.size) | |
| } | |
| func _IOR<T>(_ char: Character, _ nr: UInt, _ ctype: T.Type) -> UInt { | |
| return _IOC(IOC_OUT, char, nr, MemoryLayout<T>.size) | |
| } | |
| func _IOWR<T>(_ char: Character, _ nr: UInt, _ ctype: T.Type) -> UInt { | |
| return _IOC(IOC_INOUT, char, nr, MemoryLayout<T>.size) | |
| } | |
| enum IfIoctl { | |
| static let SIOCSIFFLAGS = _IOW("i", 16, ifreq.self) | |
| static let SIOCGIFFLAGS = _IOWR("i", 17, ifreq.self) | |
| static let SIOCGIFMEDIA = _IOWR("i", 56, ifmediareq.self) | |
| static let SIOCIFCREATE = _IOWR("i", 120, ifreq.self) | |
| static let SIOCIFDESTROY = _IOW("i", 121, ifreq.self) | |
| static let SIOCIFCREATE2 = _IOWR("i", 122, ifreq.self) | |
| static let SIOCSDRVSPEC = _IOW("i", 123, ifdrv.self) | |
| static let SIOCGDRVSPEC = _IOWR("i", 123, ifdrv.self) | |
| static let SIOCFIFTYPE = _IOWR("i", 159, ifreq.self) | |
| } |
| // See the corresponding blog post for details: | |
| // https://amodm.com/blog/2024/07/03/running-a-linux-router-on-macos | |
| import Darwin | |
| import Foundation | |
| import Virtualization | |
| // we poll via kqeueues in this thread | |
| final class NetworkSwitch: Thread { | |
| static var shared = NetworkSwitch() | |
| static var logger: VMLogFacility = { | |
| VMFileLogger.shared.newFacility("nwswitch") | |
| }() | |
| private var sockDevs: [VSockDev] = [] | |
| func newBridgePort(hostBridge: String, vMac: ether_addr_t) throws -> VZFileHandleNetworkDeviceAttachment { | |
| if isExecuting { | |
| throw RVMError.illegalState("cannot add port after switch has started") | |
| } | |
| let vsockDev = try VSockDev(hostBridge: hostBridge, vMac: vMac) | |
| sockDevs.append(vsockDev) | |
| return VZFileHandleNetworkDeviceAttachment(fileHandle: FileHandle(fileDescriptor: vsockDev.remoteSocket)) | |
| } | |
| /// Checks every bridge port and ensures that the bridge contains our interface. | |
| func ensureBridgeMembership() { | |
| for dev in sockDevs { | |
| if dev.isBridge { | |
| do { | |
| if try NetworkInterface.ensureBridgeMembership(bridge: dev.hostInterface, member: dev.fethBridgeSide) { | |
| NetworkSwitch.logger.info("readded \(dev.fethBridgeSide) to bridge \(dev.hostInterface)") | |
| } | |
| } catch { | |
| NetworkSwitch.logger.error("\(error)") | |
| } | |
| } | |
| } | |
| } | |
| private static func kqChangeList(_ capacity: Int) -> UnsafeMutablePointer<kevent> { | |
| let ptr = UnsafeMutablePointer<kevent>.allocate(capacity: capacity) | |
| ptr.initialize(repeating: kevent(), count: capacity) | |
| return ptr | |
| } | |
| override func main() { | |
| if !sockDevs.isEmpty { | |
| defer { | |
| // close all sockets | |
| for dev in sockDevs { | |
| dev.close() | |
| } | |
| } | |
| let kq = kqueue() | |
| if kq < 0 { | |
| fatalError("kqueue() failed: \(String(cString: strerror(errno)))") | |
| } | |
| defer { close(kq) } | |
| let kqs = KQSockets(sockDevs) | |
| while !isCancelled { | |
| if kqs.onEvent(kq) < 0 { | |
| if errno == EINTR || errno == EAGAIN { | |
| continue | |
| } | |
| NetworkSwitch.logger.error("onEvent() failed: \(String(cString: strerror(errno)))") | |
| } | |
| } | |
| // cleanup | |
| for dev in sockDevs { | |
| dev.close() | |
| } | |
| } | |
| } | |
| func cancelAndJoin(_ pollTimeNanos: UInt64 = 100_000_000) async throws { | |
| cancel() | |
| while !isFinished { | |
| try await Task.sleep(nanoseconds: pollTimeNanos) | |
| } | |
| } | |
| } | |
| private struct VSockDev { | |
| let hostInterface: String | |
| let vMac: ether_addr_t | |
| let vmSocket: Int32 | |
| let remoteSocket: Int32 | |
| let bpfSocket: Int32 | |
| let ndrvSocket: Int32 | |
| let bpfBufferSize: Int | |
| let bpfReadBuffer: UnsafeMutableRawBufferPointer | |
| let bpfFilter: [bpf_insn] | |
| let fethBridgeSide: String | |
| let fethVmSide: String | |
| let isBridge: Bool | |
| var bpfStats: bpf_stat { | |
| var stats = bpf_stat() | |
| return ioctl(bpfSocket, BpfIoctl.BIOCGSTATS, &stats) == 0 ? stats : bpf_stat(bs_recv: 0, bs_drop: 0) | |
| } | |
| init(hostBridge: String, vMac: ether_addr_t) throws { | |
| self.hostInterface = hostBridge | |
| self.isBridge = NetworkInterface.all.first(where: { $0.name == hostBridge })?.isBridge ?? false | |
| self.vMac = vMac | |
| (fethBridgeSide, fethVmSide) = isBridge ? try NetworkInterface.createFakeEthPair() : (hostBridge, hostBridge) | |
| var socketPair: (Int32, Int32) = (0, 0) | |
| withUnsafePointer(to: &socketPair) { | |
| let ptr = UnsafeMutableRawPointer(mutating: $0).bindMemory(to: Int32.self, capacity: 2) | |
| guard socketpair(PF_LOCAL, SOCK_DGRAM, 0, ptr) == 0 else { | |
| fatalError("socketpair() failed: \(String(cString: strerror(errno)))") | |
| } | |
| } | |
| (vmSocket, remoteSocket) = socketPair | |
| // set buffer size | |
| var size = 1024 * 1024 * 8 | |
| setsockopt(vmSocket, SOL_SOCKET, SO_SNDBUF, &size, socklen_t(MemoryLayout<Int>.size)) | |
| setsockopt(vmSocket, SOL_SOCKET, SO_RCVBUF, &size, socklen_t(MemoryLayout<Int>.size)) | |
| setsockopt(remoteSocket, SOL_SOCKET, SO_SNDBUF, &size, socklen_t(MemoryLayout<Int>.size)) | |
| setsockopt(remoteSocket, SOL_SOCKET, SO_RCVBUF, &size, socklen_t(MemoryLayout<Int>.size)) | |
| self.bpfBufferSize = Int(BPF_MAXBUFSIZE) | |
| self.bpfReadBuffer = UnsafeMutableRawBufferPointer.allocate(byteCount: bpfBufferSize, alignment: 16) | |
| let vmacTop2 = UInt32(vMac.octet.0) << 8 | UInt32(vMac.octet.1) | |
| let vmacBottom4 = UInt32(vMac.octet.2) << 24 | UInt32(vMac.octet.3) << 16 | UInt32(vMac.octet.4) << 8 | UInt32(vMac.octet.5) | |
| self.bpfFilter = [ | |
| // [0] the following 4 statements do `ether dst host <vMac>` | |
| bpf_insn(code: CUnsignedShort(BPF_LD | BPF_W | BPF_ABS), jt: 0, jf: 0, k: 2), // ld dst_host_ether[2..<6] | |
| bpf_insn(code: CUnsignedShort(BPF_JMP | BPF_JEQ | BPF_K), jt: 0, jf: 2, k: vmacBottom4), // if == vMac[2..<6], proceed to next else skip-2 | |
| bpf_insn(code: CUnsignedShort(BPF_LD | BPF_H | BPF_ABS), jt: 0, jf: 0, k: 0), // ldh dst_host_ether[0..<2] (msb 2 bytes) | |
| bpf_insn(code: CUnsignedShort(BPF_JMP | BPF_JEQ | BPF_K), jt: 3, jf: 4, k: vmacTop2), // if == vMac[0..<2], skip-3 (true) else skip-4 (false) | |
| // [4] the following 3 statements do `ether dst broadcast` | |
| bpf_insn(code: CUnsignedShort(BPF_JMP | BPF_JEQ | BPF_K), jt: 0, jf: 3, k: 0xffffffff), // if == 0xffffffff (broadcast), next else skip-3 (false) | |
| bpf_insn(code: CUnsignedShort(BPF_LD | BPF_H | BPF_ABS), jt: 0, jf: 0, k: 2), // ld dst_host_ether[2..<6] | |
| bpf_insn(code: CUnsignedShort(BPF_JMP | BPF_JEQ | BPF_K), jt: 0, jf: 1, k: 0xffff), // if == 0xffff (broadcast), next (true) else skip-1 (false) | |
| // [7] return true (capture max packet size) | |
| bpf_insn(code: CUnsignedShort(BPF_RET | BPF_K), jt: 0, jf: 0, k: UInt32(self.bpfBufferSize)), | |
| // [8] return false | |
| bpf_insn(code: CUnsignedShort(BPF_RET | BPF_K), jt: 0, jf: 0, k: 0), // ret false | |
| ] | |
| self.ndrvSocket = Self.ndrvSocket(fethVmSide) | |
| self.bpfSocket = Self.bpfSocket(fethVmSide, self.bpfBufferSize, self.bpfFilter) | |
| } | |
| /// Route traffic between host and vm, depending upon the `event` | |
| func routeTraffic(_ event: kevent64_s) -> Bool { | |
| let fd = Int32(event.ident) | |
| if fd == vmSocket { | |
| vmToHost(event) | |
| } else if fd == bpfSocket { | |
| hostToVM(event) | |
| } else { | |
| return false | |
| } | |
| return true | |
| } | |
| /// Route traffic from host to VM by reading from bpfSocket and writing to vmSocket. | |
| func hostToVM(_ event: kevent64_s) { | |
| var numPackets = 0, wlen = 0, wlenActual = 0 | |
| let buffer = bpfReadBuffer.baseAddress! | |
| let len = read(bpfSocket, buffer, bpfBufferSize) | |
| if len > 0 { | |
| let endPtr = buffer.advanced(by: len) | |
| var pktPtr = buffer.assumingMemoryBound(to: bpf_hdr.self) | |
| while pktPtr < endPtr { | |
| // for each packet | |
| let hdr = pktPtr.pointee | |
| let nextPktPtr = UnsafeMutableRawPointer(pktPtr).advanced(by: Int(hdr.bh_caplen) + Int(hdr.bh_hdrlen)) | |
| if hdr.bh_caplen > 0 { | |
| if nextPktPtr > endPtr { | |
| NetworkSwitch.logger.error("\(hostInterface)-h2g: nextPktPtr out of bounds: \(nextPktPtr) > \(endPtr). current pktPtr=\(pktPtr) hdr=\(hdr)", throttleKey: "h2g-next-oob") | |
| } | |
| let hdr = pktPtr.pointee | |
| let dataPtr = UnsafeMutableRawPointer(mutating: pktPtr).advanced(by: Int(hdr.bh_hdrlen)) | |
| let writeLen = write(vmSocket, dataPtr, Int(hdr.bh_caplen)) | |
| numPackets += 1 | |
| wlen += Int(hdr.bh_caplen) | |
| wlenActual += writeLen | |
| if writeLen < 0 { | |
| NetworkSwitch.logger.error("\(hostInterface)-h2g: write() failed: \(String(cString: strerror(errno)))", throttleKey: "h2g-writ-fail") | |
| } else if writeLen != Int(hdr.bh_caplen) { | |
| NetworkSwitch.logger.error("\(hostInterface)-h2g: write() failed: partial write", throttleKey: "h2g-writ-partial") | |
| } | |
| } | |
| pktPtr = nextPktPtr.alignedUp(toMultipleOf: BPF_ALIGNMENT).assumingMemoryBound(to: bpf_hdr.self) | |
| } | |
| } else if len == 0 { | |
| NetworkSwitch.logger.error("\(hostInterface)-h2g: EOF", throttleKey: "h2g-eof") | |
| } else if errno != EAGAIN && errno != EINTR { | |
| NetworkSwitch.logger.error("\(hostInterface)-h2g: read() failed: \(String(cString: strerror(errno)))", throttleKey: "h2g-read-fail") | |
| } | |
| } | |
| /// Send traffic from VM to host by reading from vmSocket and writing to ndrv socket. | |
| func vmToHost(_ event: kevent64_s, onlyOne: Bool = true) { | |
| let availableLen = min(bpfReadBuffer.count, Int(event.data)) | |
| let basePtr = bpfReadBuffer.baseAddress! | |
| var offset = 0 | |
| while offset < availableLen { | |
| let n = read(vmSocket, basePtr, availableLen - offset) | |
| if n > 0 { | |
| let len = write(ndrvSocket, basePtr, n) | |
| if len != n { | |
| if len < 0 { | |
| NetworkSwitch.logger.error("\(hostInterface)-g2h: write() failed: \(String(cString: strerror(errno)))", throttleKey: "g2h-writ-fail") | |
| } else if errno != EAGAIN && errno != EINTR { | |
| NetworkSwitch.logger.error("\(hostInterface)-g2h: write() failed: partial write", throttleKey: "g2h-writ-partial") | |
| } | |
| break | |
| } | |
| offset += n | |
| if onlyOne { | |
| break | |
| } | |
| } else { | |
| if n == 0 { | |
| NetworkSwitch.logger.error("\(hostInterface)-g2h: EOF", throttleKey: "g2h-eof") | |
| } else if errno != EAGAIN && errno != EINTR { | |
| NetworkSwitch.logger.error("\(hostInterface)-g2h: read() failed: \(String(cString: strerror(errno))): e=\(event)", throttleKey: "g2h-read-fail") | |
| } | |
| break | |
| } | |
| } | |
| } | |
| static func bpfSocket(_ ifc: String, _ buffSize: Int, _ bpfFilter: [bpf_insn]) -> Int32 { | |
| // TODO: modify sysctl debug.bpf_maxbufsize and use that size | |
| for i in 1..<256 { | |
| let dev = "/dev/bpf\(i)" | |
| let fd = open(dev, O_RDONLY) | |
| if fd >= 0 { | |
| // set buffer size | |
| var arg = buffSize | |
| guard ioctl(fd, BpfIoctl.BIOCSBLEN, &arg) == 0 else { | |
| fatalError("bpf \(dev) ioctl(BIOCSBLEN) failed for \(ifc): \(String(cString: strerror(errno)))") | |
| } | |
| // set immediate mode to true | |
| arg = 1 | |
| guard ioctl(fd, BpfIoctl.BIOCIMMEDIATE, &arg) == 0 else { | |
| fatalError("bpf ioctl(BIOCIMMEDIATE) failed for \(ifc): \(String(cString: strerror(errno)))") | |
| } | |
| // see only received packets, not generated locally | |
| arg = 0 | |
| guard ioctl(fd, BpfIoctl.BIOCSSEESENT, &arg) == 0 else { | |
| fatalError("bpf ioctl(BIOCSSEESENT) failed for \(ifc): \(String(cString: strerror(errno)))") | |
| } | |
| // bind to interface | |
| var ifr = ifreq() | |
| memset(&ifr, 0, MemoryLayout<ifreq>.size) | |
| ifc.copyTo(&ifr.ifr_name) | |
| guard ioctl(fd, BpfIoctl.BIOCSETIF, &ifr) == 0 else { | |
| fatalError("bpf ioctl(BIOCSETIF) failed for \(ifc): \(String(cString: strerror(errno)))") | |
| } | |
| arg = 1 | |
| guard ioctl(fd, BpfIoctl.BIOCSHDRCMPLT, &arg) == 0 else { | |
| fatalError("bpf ioctl(BIOCSHDRCMPLT) failed for \(ifc): \(String(cString: strerror(errno)))") | |
| } | |
| arg = 1 | |
| guard ioctl(fd, BpfIoctl.BIOCPROMISC, &arg) == 0 else { | |
| fatalError("bpf ioctl(BIOCPROMISC) failed for \(ifc): \(String(cString: strerror(errno)))") | |
| } | |
| // set filter | |
| var filter = bpf_program() | |
| filter.bf_len = UInt32(bpfFilter.count) | |
| filter.bf_insns = UnsafeMutablePointer<bpf_insn>.allocate(capacity: bpfFilter.count) | |
| for i in 0..<bpfFilter.count { | |
| filter.bf_insns[i] = bpfFilter[i] | |
| } | |
| guard ioctl(fd, BpfIoctl.BIOCSETFNR, &filter) == 0 else { | |
| fatalError("bpf ioctl(BIOCSETFNR) failed for \(ifc): \(String(cString: strerror(errno)))") | |
| } | |
| return fd | |
| } | |
| } | |
| fatalError("bpf open() failed for \(ifc): \(String(cString: strerror(errno)))") | |
| } | |
| static func ndrvSocket(_ ifc: String) -> Int32 { | |
| let fd = socket(PF_NDRV, SOCK_RAW, 0) | |
| guard fd >= 0 else { | |
| fatalError("ndrv socket() failed for \(ifc): \(String(cString: strerror(errno)))") | |
| } | |
| // bind to interface | |
| var nd = sockaddr_ndrv() | |
| nd.snd_len = UInt8(MemoryLayout<sockaddr_ndrv>.size) | |
| nd.snd_family = UInt8(AF_NDRV) | |
| ifc.copyTo(&nd.snd_name) | |
| withUnsafePointer(to: &nd) { nd_ptr in | |
| nd_ptr.withMemoryRebound(to: sockaddr.self, capacity: 1) { nd_ptr in | |
| if Darwin.bind(fd, nd_ptr, socklen_t(MemoryLayout<sockaddr_ndrv>.size)) != 0 { | |
| fatalError("ndrv bind() failed for \(ifc): \(String(cString: strerror(errno)))") | |
| } | |
| if Darwin.connect(fd, nd_ptr, socklen_t(MemoryLayout<sockaddr_ndrv>.size)) != 0 { | |
| fatalError("ndrv connect() failed for \(ifc): \(String(cString: strerror(errno)))") | |
| } | |
| } | |
| } | |
| return fd | |
| } | |
| func close() { | |
| Darwin.close(vmSocket) | |
| Darwin.close(remoteSocket) | |
| Darwin.close(bpfSocket) | |
| Darwin.close(ndrvSocket) | |
| if isBridge { | |
| try? NetworkInterface.deleteInterface(self.fethBridgeSide) | |
| try? NetworkInterface.deleteInterface(self.fethVmSide) | |
| } | |
| } | |
| } | |
| private struct KQSockets { | |
| private let ptr: UnsafeMutablePointer<kevent64_s> | |
| private let eventsPtr: UnsafeMutablePointer<kevent64_s> | |
| private let sockDevs: [VSockDev] | |
| init(_ sockDevs: [VSockDev]) { | |
| self.sockDevs = sockDevs | |
| let capacity = sockDevs.count * 2 | |
| self.ptr = UnsafeMutablePointer<kevent64_s>.allocate(capacity: capacity) | |
| self.ptr.initialize(repeating: kevent64_s(), count: capacity) | |
| self.eventsPtr = UnsafeMutablePointer<kevent64_s>.allocate(capacity: capacity) | |
| self.eventsPtr.initialize(repeating: kevent64_s(), count: capacity) | |
| for i in 0..<sockDevs.count { | |
| guard Foundation.fcntl(sockDevs[i].vmSocket, F_SETFL, O_NONBLOCK) == 0 else { | |
| fatalError("fcntl() failed for \(sockDevs[i].hostInterface) vmSocket: \(String(cString: strerror(errno)))") | |
| } | |
| guard Foundation.fcntl(sockDevs[i].bpfSocket, F_SETFL, O_NONBLOCK) == 0 else { | |
| fatalError("fcntl() failed for \(sockDevs[i].hostInterface) bpfSocket: \(String(cString: strerror(errno)))") | |
| } | |
| self.ptr.advanced(by: 2*i).pointee = kevent64_s( | |
| ident: UInt64(sockDevs[i].vmSocket), | |
| filter: Int16(EVFILT_READ), | |
| flags: UInt16(EV_ADD | EV_ENABLE), | |
| fflags: 0, | |
| data: 0, | |
| udata: 0, | |
| ext: (0, 0) | |
| ) | |
| self.ptr.advanced(by: 2*i+1).pointee = kevent64_s( | |
| ident: UInt64(sockDevs[i].bpfSocket), | |
| filter: Int16(EVFILT_READ), | |
| flags: UInt16(EV_ADD | EV_ENABLE), | |
| fflags: 0, | |
| data: 0, | |
| udata: 0, | |
| ext: (0, 0) | |
| ) | |
| } | |
| } | |
| func onEvent(_ kq: Int32) -> Int { | |
| let timeoutMillis: Int = 1000 | |
| let timeoutSecs = timeoutMillis / 1000 | |
| let timeoutNanos = (timeoutMillis % 1000) * 1_000_000 | |
| var timeout = timespec(tv_sec: timeoutSecs, tv_nsec: timeoutNanos) | |
| let len = sockDevs.count * 2 | |
| let numEvents = Int(kevent64(kq, ptr, Int32(len), eventsPtr, Int32(len), 0, &timeout)) | |
| if numEvents > 0 { | |
| eventLoop: for i in 0..<len { | |
| let evt = eventsPtr.advanced(by: i).pointee | |
| if evt.flags & UInt16(EV_ERROR) != 0 { | |
| NetworkSwitch.logger.error("evt-error: \(String(cString: strerror(Int32(evt.data))))", throttleKey: "kq-evt-error") | |
| } else if evt.data > 0 { | |
| let fd = Int32(evt.ident) | |
| for j in 0..<sockDevs.count { | |
| let dev = sockDevs[j] | |
| if dev.vmSocket == fd { | |
| dev.vmToHost(evt) | |
| continue eventLoop | |
| } else if dev.bpfSocket == fd { | |
| dev.hostToVM(evt) | |
| continue eventLoop | |
| } else { | |
| continue | |
| } | |
| } | |
| NetworkSwitch.logger.error("no route found for event: \(evt)", throttleKey: "kq-no-route") | |
| } | |
| } | |
| } | |
| return numEvents | |
| } | |
| } | |
| private let BPF_ALIGNMENT = MemoryLayout<Int32>.size | |
| enum BpfIoctl { | |
| static let BIOCSBLEN = _IOWR("B", 102, CUnsignedInt.self) | |
| static let BIOCPROMISC = _IO("B", 105) | |
| static let BIOCSETIF = _IOW("B", 108, ifreq.self) | |
| static let BIOCGSTATS = _IOR("B", 111, bpf_stat.self) | |
| static let BIOCIMMEDIATE = _IOW("B", 112, CUnsignedInt.self) | |
| static let BIOCSHDRCMPLT = _IOW("B", 117, CUnsignedInt.self) | |
| static let BIOCSSEESENT = _IOW("B", 119, CUnsignedInt.self) | |
| static let BIOCSETFNR = _IOW("B", 126, bpf_program.self) | |
| } |
@amodm pls lend me a hand when you're not busy..
Another problem with error "en0-h2g: write(vmSocket=3) len=60 failed: Destination address required", what might be the cause?
I made a launchd helper that runs your NetworkSwitch.swift, to provide an UnixDomainSocket such as into a file /tmp/vm.socket. and this socket file would be given to a virtual machine tool such as vfkit which could take it as a virtio-net device. In this way, I could isolate the sudo network things from rootless virtual machine part.
So I created a unix socket by "unixSocket = Darwin.socket(PF_LOCAL, SOCK_DGRAM, 0)", bind it to /tmp/vm.socket, and give this socket to VSockDev.vmSocket. (I deleted VSockDev.remoteSocket as it's not needed now)
Then I got errors in log as below, seems vmToHost is ok, hostToVm is not, as I checked by tcpdump.
2024-12-12 23:22:32.965 virt[2706:284159] en0-g2h: read len=42
2024-12-12 23:22:32.966 virt[2706:284159] en0-h2g: write(vmSocket=3) len=60 failed: Destination address required
2024-12-12 23:22:34.392 virt[2706:284159] en0-g2h: read len=70
2024-12-12 23:22:34.908 virt[2706:284159] en0-g2h: read len=42
2024-12-12 23:22:34.908 virt[2706:284159] en0-h2g: write(vmSocket =3) len=60 failed: Destination address required
$ vfkit --cpus 2 --memory 1024 --bootloader linux,kernel=/tmp/virt/vmlinuz,initrd=/tmp/vmlinuz,cmdline=""console=hvc0 root=/dev/vda"" --device virtio-blk,path=/tmp/virt/vda.img --device virtio-blk,path=/tmp/virt/vdb.img --device virtio-net,unixSocketPath=/tmp/s.socket,mac=c2:6d:fd:60:10:2b --restful-uri tcp://localhost:5122 --device virtio-serial,stdio
Thanks again!
Instead of doing what you did, bind VSockDev.remoteSocket to unix socket, and let everything else be the same.
Awesome! 👏