-
-
Save amodm/a61e6d0c413e8cc9ac4c56a803150daf to your computer and use it in GitHub Desktop.
/* See the corresponding blog post for details: | |
* https://amodm.com/blog/2024/07/03/running-a-linux-router-on-macos | |
*/ | |
#pragma once | |
#include <net/if_var.h> | |
#pragma pack(4) | |
struct ifbreq { | |
char ifbr_ifsname[IFNAMSIZ]; /* member if name */ | |
uint32_t ifbr_ifsflags; /* member if flags */ | |
uint32_t ifbr_stpflags; /* member if STP flags */ | |
uint32_t ifbr_path_cost; /* member if STP cost */ | |
uint8_t ifbr_portno; /* member if port number */ | |
uint8_t ifbr_priority; /* member if STP priority */ | |
uint8_t ifbr_proto; /* member if STP protocol */ | |
uint8_t ifbr_role; /* member if STP role */ | |
uint8_t ifbr_state; /* member if STP state */ | |
uint32_t ifbr_addrcnt; /* member if addr number */ | |
uint32_t ifbr_addrmax; /* member if addr max */ | |
uint32_t ifbr_addrexceeded; /* member if addr violations */ | |
uint8_t pad[32]; | |
}; | |
struct ifbifconf { | |
uint32_t ifbic_len; /* buffer size */ | |
union { | |
caddr_t ifbicu_buf; | |
struct ifbreq *ifbicu_req; | |
#define ifbic_buf ifbic_ifbicu.ifbicu_buf | |
#define ifbic_req ifbic_ifbicu.ifbicu_req | |
} ifbic_ifbicu; | |
}; |
/* See the corresponding blog post for details: | |
* https://amodm.com/blog/2024/07/03/running-a-linux-router-on-macos | |
*/ | |
#pragma once | |
#include <net/if_var.h> | |
/* ----------------------------------------------------- | |
* Fake ethernet related headers. | |
* https://opensource.apple.com/source/xnu/xnu-7195.81.3/bsd/net/if_fake_var.h.auto.html | |
* ----------------------------------------------------- | |
*/ | |
/* | |
* SIOCSDRVSPEC | |
*/ | |
enum { | |
IF_FAKE_S_CMD_NONE = 0, | |
IF_FAKE_S_CMD_SET_PEER = 1, | |
IF_FAKE_S_CMD_SET_MEDIA = 2, | |
IF_FAKE_S_CMD_SET_DEQUEUE_STALL = 3, | |
}; | |
/* | |
* SIOCGDRVSPEC | |
*/ | |
enum { | |
IF_FAKE_G_CMD_NONE = 0, | |
IF_FAKE_G_CMD_GET_PEER = 1, | |
}; | |
#define IF_FAKE_MEDIA_LIST_MAX 27 | |
struct if_fake_media { | |
int32_t iffm_current; | |
uint32_t iffm_count; | |
uint32_t iffm_reserved[3]; | |
int32_t iffm_list[IF_FAKE_MEDIA_LIST_MAX]; | |
}; | |
struct if_fake_request { | |
uint64_t iffr_reserved[4]; | |
union { | |
char iffru_buf[128]; /* stable size */ | |
struct if_fake_media iffru_media; | |
char iffru_peer_name[IFNAMSIZ]; /* if name, e.g. "en0" */ | |
/* | |
* control dequeue stall. 0: disable dequeue stall, else | |
* enable dequeue stall. | |
*/ | |
uint32_t iffru_dequeue_stall; | |
} iffr_u; | |
#define iffr_peer_name iffr_u.iffru_peer_name | |
#define iffr_media iffr_u.iffru_media | |
#define iffr_dequeue_stall iffr_u.iffru_dequeue_stall | |
}; |
// See the corresponding blog post for details: | |
// https://amodm.com/blog/2024/07/03/running-a-linux-router-on-macos | |
import Foundation | |
// xnu is a custom module that I created to expose the relevant C structs | |
// that the kernel expects, as those structs are not part of the userspace | |
// API. This module contains C-bridge headers if-fake.h and if-bridge.h | |
// which are also shown in this gist. | |
import xnu | |
struct NetworkInterface { | |
let name: String | |
let mac: ether_addr_t | |
let ips: [String] | |
let type: UInt32 | |
let flags: UInt32 | |
var isBridge: Bool { | |
return type == UInt(IFT_BRIDGE) | |
} | |
var isLoopback: Bool { | |
return flags & UInt32(IFF_LOOPBACK) != 0 | |
} | |
var isFakeEth: Bool { | |
return name.starts(with: "feth") // TODO: figure out type? | |
} | |
var up: Bool { | |
return flags & UInt32(IFF_UP) != 0 | |
} | |
func changeStatus(up: Bool) throws { | |
try Self.changeStatus(name: name, up: up) | |
} | |
/// - Returns: all network interfaces currently configured on this system. | |
static var all: [NetworkInterface] { | |
var ifap: UnsafeMutablePointer<ifaddrs>? = nil | |
guard getifaddrs(&ifap) == 0 else { | |
fatalError("getifaddrs() failed: \(String(cString: strerror(errno)))") | |
} | |
defer { freeifaddrs(ifap) } | |
var interfaces = [NetworkInterface]() | |
try! withControlSocket { ctl in | |
for ifa in sequence(first: ifap, next: { $0?.pointee.ifa_next }) { | |
if let ifa = ifa?.pointee { | |
let ifname = String(cString: ifa.ifa_name) | |
let flags = ifa.ifa_flags | |
var ips = [String]() | |
var mac = ether_addr_t() | |
switch Int32(ifa.ifa_addr.pointee.sa_family) { | |
case AF_LINK: | |
var addr = ifa.ifa_addr.withMemoryRebound(to: sockaddr_dl.self, capacity: 1) { $0.pointee } | |
mac = withUnsafeMutableBytes(of: &addr.sdl_data) { ptr in | |
ptr.baseAddress!.advanced(by: Int(addr.sdl_nlen)).assumingMemoryBound(to: ether_addr_t.self).pointee | |
} | |
case AF_INET: | |
var addr = ifa.ifa_addr.withMemoryRebound(to: sockaddr_in.self, capacity: 1) { $0.pointee } | |
var ip = [CChar](repeating: 0, count: Int(INET_ADDRSTRLEN)) | |
inet_ntop(AF_INET, &addr.sin_addr, &ip, socklen_t(INET_ADDRSTRLEN)) | |
ips.append(String(cString: ip)) | |
case AF_INET6: | |
var addr = ifa.ifa_addr.withMemoryRebound(to: sockaddr_in6.self, capacity: 1) { $0.pointee } | |
var ip = [CChar](repeating: 0, count: Int(INET6_ADDRSTRLEN)) | |
inet_ntop(AF_INET6, &addr.sin6_addr, &ip, socklen_t(INET6_ADDRSTRLEN)) | |
ips.append(String(cString: ip)) | |
default: | |
continue | |
} | |
var ifr = ifreq() | |
memset(&ifr, 0, MemoryLayout<ifreq>.size) | |
ifname.copyTo(&ifr.ifr_name) | |
guard ioctl(ctl, IfIoctl.SIOCFIFTYPE, &ifr) == 0 else { | |
fatalError("\(ifname):ioctl(SIOCFIFTYPE): \(String(cString: strerror(errno)))") | |
} | |
let type = ifr.ifr_ifru.ifru_functional_type | |
interfaces.append(NetworkInterface(name: ifname, mac: mac, ips: ips, type: type, flags: flags)) | |
} | |
} | |
} | |
return interfaces | |
} | |
private static func withControlSocket<T>(_ family: Int32 = AF_LOCAL, _ body: (Int32) throws -> T) throws -> T { | |
let sock = socket(AF_LOCAL, SOCK_DGRAM, 0) | |
guard sock >= 0 else { | |
throw RVMError.sycallError("control:socket()") | |
} | |
defer { close(sock) } | |
return try body(sock) | |
} | |
/// Creates a fake eth interface, and peers with `peer` (if provided). | |
/// - Parameter peer: the peer to connect to | |
/// - Returns: the name of the fake eth interface that was created. | |
static func createFakeEth(peer: String? = nil) throws -> String { | |
let allFakeEths = Set(all.filter { $0.isFakeEth }.map { $0.name }) | |
for i in 0..<128 { | |
let name = "feth\(i)" | |
if !allFakeEths.contains(name) { | |
var ifr = ifreq() | |
memset(&ifr, 0, MemoryLayout.size(ofValue: ifr)) | |
name.copyTo(&ifr.ifr_name) | |
ifr.ifr_ifru.ifru_flags = Int16(IFF_UP | IFF_RUNNING) | |
// create | |
try withControlSocket { ctl in | |
guard ioctl(ctl, IfIoctl.SIOCIFCREATE2, &ifr) == 0 else { | |
throw RVMError.sycallError("feth:create()") | |
} | |
if peer != nil { | |
// from https://opensource.apple.com/source/network_cmds/network_cmds-606.40.2/ifconfig.tproj/iffake.c.auto.html | |
var iffr = if_fake_request() | |
memset(&iffr, 0, MemoryLayout.size(ofValue: iffr)) | |
peer!.copyTo(&iffr.iffr_u.iffru_peer_name) | |
var ifd = ifdrv() | |
memset(&ifd, 0, MemoryLayout.size(ofValue: ifd)) | |
name.copyTo(&ifd.ifd_name) | |
ifd.ifd_cmd = UInt(IF_FAKE_S_CMD_SET_PEER) | |
withUnsafeMutablePointer(to: &iffr) { ifd.ifd_data = UnsafeMutableRawPointer($0) } | |
ifd.ifd_len = MemoryLayout.size(ofValue: iffr) | |
guard ioctl(ctl, IfIoctl.SIOCSDRVSPEC, &ifd) == 0 else { | |
throw RVMError.sycallError("feth:ioctl(set-peer)") | |
} | |
} | |
} | |
return name | |
} | |
} | |
throw RVMError.illegalState("feth:create(): out of options") | |
} | |
/// Deletes the network interface with the given name. | |
/// - Parameter name: the name of the network interface to delete. | |
static func deleteInterface(_ name: String) throws { | |
var ifr = ifreq() | |
memset(&ifr, 0, MemoryLayout.size(ofValue: ifr)) | |
name.copyTo(&ifr.ifr_name) | |
try withControlSocket { ctl in | |
guard ioctl(ctl, IfIoctl.SIOCIFDESTROY, &ifr) == 0 else { | |
throw RVMError.sycallError("\(name):ioctl(SIOCIFDESTROY)") | |
} | |
} | |
} | |
/// Creates a pair of fake eth interfaces, and peers them together. | |
/// - Returns: the names of the two fake eth interfaces that were created. | |
static func createFakeEthPair() throws -> (String, String) { | |
let feth1 = try createFakeEth() | |
let feth2 = try createFakeEth(peer: feth1) | |
try changeStatus(name: feth1, up: true) | |
try changeStatus(name: feth2, up: true) | |
return (feth1, feth2) | |
} | |
/// Change the status of the network interface with the given name. | |
/// - Parameters: | |
/// - name: the name of the network interface | |
/// - up: whether to bring the interface up or down | |
/// - Throws: an error if the operation fails | |
static func changeStatus(name: String, up: Bool) throws { | |
var ifr = ifreq() | |
memset(&ifr, 0, MemoryLayout.size(ofValue: ifr)) | |
name.copyTo(&ifr.ifr_name) | |
try NetworkInterface.withControlSocket(AF_INET) { ctl in | |
guard ioctl(ctl, IfIoctl.SIOCGIFFLAGS, &ifr) == 0 else { | |
throw RVMError.sycallError("\(name):ioctl(SIOCGIFFLAGS)") | |
} | |
let oldFlag = Int32(ifr.ifr_ifru.ifru_flags) & 0xffff | |
var newFlag = oldFlag | |
if up { | |
newFlag |= Int32(IFF_UP | IFF_RUNNING) | |
} else { | |
newFlag &= ~Int32(IFF_UP | IFF_RUNNING) | |
} | |
if oldFlag != newFlag { | |
ifr.ifr_ifru.ifru_flags = Int16(bitPattern: UInt16(newFlag & 0xffff)) | |
guard ioctl(ctl, IfIoctl.SIOCSIFFLAGS, &ifr) >= 0 else { | |
throw RVMError.sycallError("\(name):ioctl(SIOCSIFFLAGS)") | |
} | |
} | |
} | |
} | |
/// Adds `ifc` to the network bridge `bridge`. | |
/// - Parameters: | |
/// - ifc: the network interface to add to the bridge. | |
/// - bridge: the network bridge. | |
static func addInterfaceToBridge(_ ifc: String, to bridge: String) throws { | |
var req = ifbreq() | |
memset(&req, 0, MemoryLayout.size(ofValue: req)) | |
ifc.copyTo(&req.ifbr_ifsname) | |
var ifd = ifdrv() | |
memset(&ifd, 0, MemoryLayout.size(ofValue: ifd)) | |
bridge.copyTo(&ifd.ifd_name) | |
ifd.ifd_cmd = 0 // BRDGADD: https://opensource.apple.com/source/xnu/xnu-7195.81.3/bsd/net/if_bridgevar.h.auto.html | |
withUnsafeMutablePointer(to: &req) { ifd.ifd_data = UnsafeMutableRawPointer($0) } | |
ifd.ifd_len = MemoryLayout.size(ofValue: req) | |
try withControlSocket { ctl in | |
guard ioctl(ctl, IfIoctl.SIOCSDRVSPEC, &ifd) == 0 else { | |
throw RVMError.sycallError("bridge(\(bridge)):add-if(\(ifc))") | |
} | |
} | |
} | |
/// Ensures that `member` is a member of the `bridge` network interface. | |
/// - Returns: `true` if the member was added, `false` if it was already a member. | |
static func ensureBridgeMembership(bridge: String, member: String) throws -> Bool { | |
var req = ifbreq() | |
memset(&req, 0, MemoryLayout.size(ofValue: req)) | |
member.copyTo(&req.ifbr_ifsname) | |
var ifd = ifdrv() | |
memset(&ifd, 0, MemoryLayout.size(ofValue: ifd)) | |
bridge.copyTo(&ifd.ifd_name) | |
ifd.ifd_cmd = 2 // BRDGGIFFLGS: https://opensource.apple.com/source/xnu/xnu-7195.81.3/bsd/net/if_bridgevar.h.auto.html | |
withUnsafeMutablePointer(to: &req) { ifd.ifd_data = UnsafeMutableRawPointer($0) } | |
ifd.ifd_len = MemoryLayout.size(ofValue: req) | |
return try withControlSocket { ctl in | |
if ioctl(ctl, IfIoctl.SIOCGDRVSPEC, &ifd) < 0 { | |
if errno == ENOENT { | |
try addInterfaceToBridge(member, to: bridge) | |
return true | |
} else { | |
throw RVMError.sycallError("bridge(\(bridge)):getifflags(\(member))") | |
} | |
} | |
return false | |
} | |
} | |
} | |
func _IOC(_ dir: UInt32, _ g: Character, _ n: UInt, _ l: Int) -> UInt { | |
return UInt(dir) | ((UInt(l) & UInt(IOCPARM_MASK)) << 16) | (UInt(g.asciiValue ?? 0) << 8) | n | |
} | |
func _IO(_ g: Character, _ n: UInt) -> UInt { | |
return _IOC(IOC_VOID, g, n, 0) | |
} | |
func _IOW<T>(_ char: Character, _ nr: UInt, _ ctype: T.Type) -> UInt { | |
return _IOC(IOC_IN, char, nr, MemoryLayout<T>.size) | |
} | |
func _IOR<T>(_ char: Character, _ nr: UInt, _ ctype: T.Type) -> UInt { | |
return _IOC(IOC_OUT, char, nr, MemoryLayout<T>.size) | |
} | |
func _IOWR<T>(_ char: Character, _ nr: UInt, _ ctype: T.Type) -> UInt { | |
return _IOC(IOC_INOUT, char, nr, MemoryLayout<T>.size) | |
} | |
enum IfIoctl { | |
static let SIOCSIFFLAGS = _IOW("i", 16, ifreq.self) | |
static let SIOCGIFFLAGS = _IOWR("i", 17, ifreq.self) | |
static let SIOCGIFMEDIA = _IOWR("i", 56, ifmediareq.self) | |
static let SIOCIFCREATE = _IOWR("i", 120, ifreq.self) | |
static let SIOCIFDESTROY = _IOW("i", 121, ifreq.self) | |
static let SIOCIFCREATE2 = _IOWR("i", 122, ifreq.self) | |
static let SIOCSDRVSPEC = _IOW("i", 123, ifdrv.self) | |
static let SIOCGDRVSPEC = _IOWR("i", 123, ifdrv.self) | |
static let SIOCFIFTYPE = _IOWR("i", 159, ifreq.self) | |
} |
// See the corresponding blog post for details: | |
// https://amodm.com/blog/2024/07/03/running-a-linux-router-on-macos | |
import Darwin | |
import Foundation | |
import Virtualization | |
// we poll via kqeueues in this thread | |
final class NetworkSwitch: Thread { | |
static var shared = NetworkSwitch() | |
static var logger: VMLogFacility = { | |
VMFileLogger.shared.newFacility("nwswitch") | |
}() | |
private var sockDevs: [VSockDev] = [] | |
func newBridgePort(hostBridge: String, vMac: ether_addr_t) throws -> VZFileHandleNetworkDeviceAttachment { | |
if isExecuting { | |
throw RVMError.illegalState("cannot add port after switch has started") | |
} | |
let vsockDev = try VSockDev(hostBridge: hostBridge, vMac: vMac) | |
sockDevs.append(vsockDev) | |
return VZFileHandleNetworkDeviceAttachment(fileHandle: FileHandle(fileDescriptor: vsockDev.remoteSocket)) | |
} | |
/// Checks every bridge port and ensures that the bridge contains our interface. | |
func ensureBridgeMembership() { | |
for dev in sockDevs { | |
if dev.isBridge { | |
do { | |
if try NetworkInterface.ensureBridgeMembership(bridge: dev.hostInterface, member: dev.fethBridgeSide) { | |
NetworkSwitch.logger.info("readded \(dev.fethBridgeSide) to bridge \(dev.hostInterface)") | |
} | |
} catch { | |
NetworkSwitch.logger.error("\(error)") | |
} | |
} | |
} | |
} | |
private static func kqChangeList(_ capacity: Int) -> UnsafeMutablePointer<kevent> { | |
let ptr = UnsafeMutablePointer<kevent>.allocate(capacity: capacity) | |
ptr.initialize(repeating: kevent(), count: capacity) | |
return ptr | |
} | |
override func main() { | |
if !sockDevs.isEmpty { | |
defer { | |
// close all sockets | |
for dev in sockDevs { | |
dev.close() | |
} | |
} | |
let kq = kqueue() | |
if kq < 0 { | |
fatalError("kqueue() failed: \(String(cString: strerror(errno)))") | |
} | |
defer { close(kq) } | |
let kqs = KQSockets(sockDevs) | |
while !isCancelled { | |
if kqs.onEvent(kq) < 0 { | |
if errno == EINTR || errno == EAGAIN { | |
continue | |
} | |
NetworkSwitch.logger.error("onEvent() failed: \(String(cString: strerror(errno)))") | |
} | |
} | |
// cleanup | |
for dev in sockDevs { | |
dev.close() | |
} | |
} | |
} | |
func cancelAndJoin(_ pollTimeNanos: UInt64 = 100_000_000) async throws { | |
cancel() | |
while !isFinished { | |
try await Task.sleep(nanoseconds: pollTimeNanos) | |
} | |
} | |
} | |
private struct VSockDev { | |
let hostInterface: String | |
let vMac: ether_addr_t | |
let vmSocket: Int32 | |
let remoteSocket: Int32 | |
let bpfSocket: Int32 | |
let ndrvSocket: Int32 | |
let bpfBufferSize: Int | |
let bpfReadBuffer: UnsafeMutableRawBufferPointer | |
let bpfFilter: [bpf_insn] | |
let fethBridgeSide: String | |
let fethVmSide: String | |
let isBridge: Bool | |
var bpfStats: bpf_stat { | |
var stats = bpf_stat() | |
return ioctl(bpfSocket, BpfIoctl.BIOCGSTATS, &stats) == 0 ? stats : bpf_stat(bs_recv: 0, bs_drop: 0) | |
} | |
init(hostBridge: String, vMac: ether_addr_t) throws { | |
self.hostInterface = hostBridge | |
self.isBridge = NetworkInterface.all.first(where: { $0.name == hostBridge })?.isBridge ?? false | |
self.vMac = vMac | |
(fethBridgeSide, fethVmSide) = isBridge ? try NetworkInterface.createFakeEthPair() : (hostBridge, hostBridge) | |
var socketPair: (Int32, Int32) = (0, 0) | |
withUnsafePointer(to: &socketPair) { | |
let ptr = UnsafeMutableRawPointer(mutating: $0).bindMemory(to: Int32.self, capacity: 2) | |
guard socketpair(PF_LOCAL, SOCK_DGRAM, 0, ptr) == 0 else { | |
fatalError("socketpair() failed: \(String(cString: strerror(errno)))") | |
} | |
} | |
(vmSocket, remoteSocket) = socketPair | |
// set buffer size | |
var size = 1024 * 1024 * 8 | |
setsockopt(vmSocket, SOL_SOCKET, SO_SNDBUF, &size, socklen_t(MemoryLayout<Int>.size)) | |
setsockopt(vmSocket, SOL_SOCKET, SO_RCVBUF, &size, socklen_t(MemoryLayout<Int>.size)) | |
setsockopt(remoteSocket, SOL_SOCKET, SO_SNDBUF, &size, socklen_t(MemoryLayout<Int>.size)) | |
setsockopt(remoteSocket, SOL_SOCKET, SO_RCVBUF, &size, socklen_t(MemoryLayout<Int>.size)) | |
self.bpfBufferSize = Int(BPF_MAXBUFSIZE) | |
self.bpfReadBuffer = UnsafeMutableRawBufferPointer.allocate(byteCount: bpfBufferSize, alignment: 16) | |
let vmacTop2 = UInt32(vMac.octet.0) << 8 | UInt32(vMac.octet.1) | |
let vmacBottom4 = UInt32(vMac.octet.2) << 24 | UInt32(vMac.octet.3) << 16 | UInt32(vMac.octet.4) << 8 | UInt32(vMac.octet.5) | |
self.bpfFilter = [ | |
// [0] the following 4 statements do `ether dst host <vMac>` | |
bpf_insn(code: CUnsignedShort(BPF_LD | BPF_W | BPF_ABS), jt: 0, jf: 0, k: 2), // ld dst_host_ether[2..<6] | |
bpf_insn(code: CUnsignedShort(BPF_JMP | BPF_JEQ | BPF_K), jt: 0, jf: 2, k: vmacBottom4), // if == vMac[2..<6], proceed to next else skip-2 | |
bpf_insn(code: CUnsignedShort(BPF_LD | BPF_H | BPF_ABS), jt: 0, jf: 0, k: 0), // ldh dst_host_ether[0..<2] (msb 2 bytes) | |
bpf_insn(code: CUnsignedShort(BPF_JMP | BPF_JEQ | BPF_K), jt: 3, jf: 4, k: vmacTop2), // if == vMac[0..<2], skip-3 (true) else skip-4 (false) | |
// [4] the following 3 statements do `ether dst broadcast` | |
bpf_insn(code: CUnsignedShort(BPF_JMP | BPF_JEQ | BPF_K), jt: 0, jf: 3, k: 0xffffffff), // if == 0xffffffff (broadcast), next else skip-3 (false) | |
bpf_insn(code: CUnsignedShort(BPF_LD | BPF_H | BPF_ABS), jt: 0, jf: 0, k: 2), // ld dst_host_ether[2..<6] | |
bpf_insn(code: CUnsignedShort(BPF_JMP | BPF_JEQ | BPF_K), jt: 0, jf: 1, k: 0xffff), // if == 0xffff (broadcast), next (true) else skip-1 (false) | |
// [7] return true (capture max packet size) | |
bpf_insn(code: CUnsignedShort(BPF_RET | BPF_K), jt: 0, jf: 0, k: UInt32(self.bpfBufferSize)), | |
// [8] return false | |
bpf_insn(code: CUnsignedShort(BPF_RET | BPF_K), jt: 0, jf: 0, k: 0), // ret false | |
] | |
self.ndrvSocket = Self.ndrvSocket(fethVmSide) | |
self.bpfSocket = Self.bpfSocket(fethVmSide, self.bpfBufferSize, self.bpfFilter) | |
} | |
/// Route traffic between host and vm, depending upon the `event` | |
func routeTraffic(_ event: kevent64_s) -> Bool { | |
let fd = Int32(event.ident) | |
if fd == vmSocket { | |
vmToHost(event) | |
} else if fd == bpfSocket { | |
hostToVM(event) | |
} else { | |
return false | |
} | |
return true | |
} | |
/// Route traffic from host to VM by reading from bpfSocket and writing to vmSocket. | |
func hostToVM(_ event: kevent64_s) { | |
var numPackets = 0, wlen = 0, wlenActual = 0 | |
let buffer = bpfReadBuffer.baseAddress! | |
let len = read(bpfSocket, buffer, bpfBufferSize) | |
if len > 0 { | |
let endPtr = buffer.advanced(by: len) | |
var pktPtr = buffer.assumingMemoryBound(to: bpf_hdr.self) | |
while pktPtr < endPtr { | |
// for each packet | |
let hdr = pktPtr.pointee | |
let nextPktPtr = UnsafeMutableRawPointer(pktPtr).advanced(by: Int(hdr.bh_caplen) + Int(hdr.bh_hdrlen)) | |
if hdr.bh_caplen > 0 { | |
if nextPktPtr > endPtr { | |
NetworkSwitch.logger.error("\(hostInterface)-h2g: nextPktPtr out of bounds: \(nextPktPtr) > \(endPtr). current pktPtr=\(pktPtr) hdr=\(hdr)", throttleKey: "h2g-next-oob") | |
} | |
let hdr = pktPtr.pointee | |
let dataPtr = UnsafeMutableRawPointer(mutating: pktPtr).advanced(by: Int(hdr.bh_hdrlen)) | |
let writeLen = write(vmSocket, dataPtr, Int(hdr.bh_caplen)) | |
numPackets += 1 | |
wlen += Int(hdr.bh_caplen) | |
wlenActual += writeLen | |
if writeLen < 0 { | |
NetworkSwitch.logger.error("\(hostInterface)-h2g: write() failed: \(String(cString: strerror(errno)))", throttleKey: "h2g-writ-fail") | |
} else if writeLen != Int(hdr.bh_caplen) { | |
NetworkSwitch.logger.error("\(hostInterface)-h2g: write() failed: partial write", throttleKey: "h2g-writ-partial") | |
} | |
} | |
pktPtr = nextPktPtr.alignedUp(toMultipleOf: BPF_ALIGNMENT).assumingMemoryBound(to: bpf_hdr.self) | |
} | |
} else if len == 0 { | |
NetworkSwitch.logger.error("\(hostInterface)-h2g: EOF", throttleKey: "h2g-eof") | |
} else if errno != EAGAIN && errno != EINTR { | |
NetworkSwitch.logger.error("\(hostInterface)-h2g: read() failed: \(String(cString: strerror(errno)))", throttleKey: "h2g-read-fail") | |
} | |
} | |
/// Send traffic from VM to host by reading from vmSocket and writing to ndrv socket. | |
func vmToHost(_ event: kevent64_s, onlyOne: Bool = true) { | |
let availableLen = min(bpfReadBuffer.count, Int(event.data)) | |
let basePtr = bpfReadBuffer.baseAddress! | |
var offset = 0 | |
while offset < availableLen { | |
let n = read(vmSocket, basePtr, availableLen - offset) | |
if n > 0 { | |
let len = write(ndrvSocket, basePtr, n) | |
if len != n { | |
if len < 0 { | |
NetworkSwitch.logger.error("\(hostInterface)-g2h: write() failed: \(String(cString: strerror(errno)))", throttleKey: "g2h-writ-fail") | |
} else if errno != EAGAIN && errno != EINTR { | |
NetworkSwitch.logger.error("\(hostInterface)-g2h: write() failed: partial write", throttleKey: "g2h-writ-partial") | |
} | |
break | |
} | |
offset += n | |
if onlyOne { | |
break | |
} | |
} else { | |
if n == 0 { | |
NetworkSwitch.logger.error("\(hostInterface)-g2h: EOF", throttleKey: "g2h-eof") | |
} else if errno != EAGAIN && errno != EINTR { | |
NetworkSwitch.logger.error("\(hostInterface)-g2h: read() failed: \(String(cString: strerror(errno))): e=\(event)", throttleKey: "g2h-read-fail") | |
} | |
break | |
} | |
} | |
} | |
static func bpfSocket(_ ifc: String, _ buffSize: Int, _ bpfFilter: [bpf_insn]) -> Int32 { | |
// TODO: modify sysctl debug.bpf_maxbufsize and use that size | |
for i in 1..<256 { | |
let dev = "/dev/bpf\(i)" | |
let fd = open(dev, O_RDONLY) | |
if fd >= 0 { | |
// set buffer size | |
var arg = buffSize | |
guard ioctl(fd, BpfIoctl.BIOCSBLEN, &arg) == 0 else { | |
fatalError("bpf \(dev) ioctl(BIOCSBLEN) failed for \(ifc): \(String(cString: strerror(errno)))") | |
} | |
// set immediate mode to true | |
arg = 1 | |
guard ioctl(fd, BpfIoctl.BIOCIMMEDIATE, &arg) == 0 else { | |
fatalError("bpf ioctl(BIOCIMMEDIATE) failed for \(ifc): \(String(cString: strerror(errno)))") | |
} | |
// see only received packets, not generated locally | |
arg = 0 | |
guard ioctl(fd, BpfIoctl.BIOCSSEESENT, &arg) == 0 else { | |
fatalError("bpf ioctl(BIOCSSEESENT) failed for \(ifc): \(String(cString: strerror(errno)))") | |
} | |
// bind to interface | |
var ifr = ifreq() | |
memset(&ifr, 0, MemoryLayout<ifreq>.size) | |
ifc.copyTo(&ifr.ifr_name) | |
guard ioctl(fd, BpfIoctl.BIOCSETIF, &ifr) == 0 else { | |
fatalError("bpf ioctl(BIOCSETIF) failed for \(ifc): \(String(cString: strerror(errno)))") | |
} | |
arg = 1 | |
guard ioctl(fd, BpfIoctl.BIOCSHDRCMPLT, &arg) == 0 else { | |
fatalError("bpf ioctl(BIOCSHDRCMPLT) failed for \(ifc): \(String(cString: strerror(errno)))") | |
} | |
arg = 1 | |
guard ioctl(fd, BpfIoctl.BIOCPROMISC, &arg) == 0 else { | |
fatalError("bpf ioctl(BIOCPROMISC) failed for \(ifc): \(String(cString: strerror(errno)))") | |
} | |
// set filter | |
var filter = bpf_program() | |
filter.bf_len = UInt32(bpfFilter.count) | |
filter.bf_insns = UnsafeMutablePointer<bpf_insn>.allocate(capacity: bpfFilter.count) | |
for i in 0..<bpfFilter.count { | |
filter.bf_insns[i] = bpfFilter[i] | |
} | |
guard ioctl(fd, BpfIoctl.BIOCSETFNR, &filter) == 0 else { | |
fatalError("bpf ioctl(BIOCSETFNR) failed for \(ifc): \(String(cString: strerror(errno)))") | |
} | |
return fd | |
} | |
} | |
fatalError("bpf open() failed for \(ifc): \(String(cString: strerror(errno)))") | |
} | |
static func ndrvSocket(_ ifc: String) -> Int32 { | |
let fd = socket(PF_NDRV, SOCK_RAW, 0) | |
guard fd >= 0 else { | |
fatalError("ndrv socket() failed for \(ifc): \(String(cString: strerror(errno)))") | |
} | |
// bind to interface | |
var nd = sockaddr_ndrv() | |
nd.snd_len = UInt8(MemoryLayout<sockaddr_ndrv>.size) | |
nd.snd_family = UInt8(AF_NDRV) | |
ifc.copyTo(&nd.snd_name) | |
withUnsafePointer(to: &nd) { nd_ptr in | |
nd_ptr.withMemoryRebound(to: sockaddr.self, capacity: 1) { nd_ptr in | |
if Darwin.bind(fd, nd_ptr, socklen_t(MemoryLayout<sockaddr_ndrv>.size)) != 0 { | |
fatalError("ndrv bind() failed for \(ifc): \(String(cString: strerror(errno)))") | |
} | |
if Darwin.connect(fd, nd_ptr, socklen_t(MemoryLayout<sockaddr_ndrv>.size)) != 0 { | |
fatalError("ndrv connect() failed for \(ifc): \(String(cString: strerror(errno)))") | |
} | |
} | |
} | |
return fd | |
} | |
func close() { | |
Darwin.close(vmSocket) | |
Darwin.close(remoteSocket) | |
Darwin.close(bpfSocket) | |
Darwin.close(ndrvSocket) | |
if isBridge { | |
try? NetworkInterface.deleteInterface(self.fethBridgeSide) | |
try? NetworkInterface.deleteInterface(self.fethVmSide) | |
} | |
} | |
} | |
private struct KQSockets { | |
private let ptr: UnsafeMutablePointer<kevent64_s> | |
private let eventsPtr: UnsafeMutablePointer<kevent64_s> | |
private let sockDevs: [VSockDev] | |
init(_ sockDevs: [VSockDev]) { | |
self.sockDevs = sockDevs | |
let capacity = sockDevs.count * 2 | |
self.ptr = UnsafeMutablePointer<kevent64_s>.allocate(capacity: capacity) | |
self.ptr.initialize(repeating: kevent64_s(), count: capacity) | |
self.eventsPtr = UnsafeMutablePointer<kevent64_s>.allocate(capacity: capacity) | |
self.eventsPtr.initialize(repeating: kevent64_s(), count: capacity) | |
for i in 0..<sockDevs.count { | |
guard Foundation.fcntl(sockDevs[i].vmSocket, F_SETFL, O_NONBLOCK) == 0 else { | |
fatalError("fcntl() failed for \(sockDevs[i].hostInterface) vmSocket: \(String(cString: strerror(errno)))") | |
} | |
guard Foundation.fcntl(sockDevs[i].bpfSocket, F_SETFL, O_NONBLOCK) == 0 else { | |
fatalError("fcntl() failed for \(sockDevs[i].hostInterface) bpfSocket: \(String(cString: strerror(errno)))") | |
} | |
self.ptr.advanced(by: 2*i).pointee = kevent64_s( | |
ident: UInt64(sockDevs[i].vmSocket), | |
filter: Int16(EVFILT_READ), | |
flags: UInt16(EV_ADD | EV_ENABLE), | |
fflags: 0, | |
data: 0, | |
udata: 0, | |
ext: (0, 0) | |
) | |
self.ptr.advanced(by: 2*i+1).pointee = kevent64_s( | |
ident: UInt64(sockDevs[i].bpfSocket), | |
filter: Int16(EVFILT_READ), | |
flags: UInt16(EV_ADD | EV_ENABLE), | |
fflags: 0, | |
data: 0, | |
udata: 0, | |
ext: (0, 0) | |
) | |
} | |
} | |
func onEvent(_ kq: Int32) -> Int { | |
let timeoutMillis: Int = 1000 | |
let timeoutSecs = timeoutMillis / 1000 | |
let timeoutNanos = (timeoutMillis % 1000) * 1_000_000 | |
var timeout = timespec(tv_sec: timeoutSecs, tv_nsec: timeoutNanos) | |
let len = sockDevs.count * 2 | |
let numEvents = Int(kevent64(kq, ptr, Int32(len), eventsPtr, Int32(len), 0, &timeout)) | |
if numEvents > 0 { | |
eventLoop: for i in 0..<len { | |
let evt = eventsPtr.advanced(by: i).pointee | |
if evt.flags & UInt16(EV_ERROR) != 0 { | |
NetworkSwitch.logger.error("evt-error: \(String(cString: strerror(Int32(evt.data))))", throttleKey: "kq-evt-error") | |
} else if evt.data > 0 { | |
let fd = Int32(evt.ident) | |
for j in 0..<sockDevs.count { | |
let dev = sockDevs[j] | |
if dev.vmSocket == fd { | |
dev.vmToHost(evt) | |
continue eventLoop | |
} else if dev.bpfSocket == fd { | |
dev.hostToVM(evt) | |
continue eventLoop | |
} else { | |
continue | |
} | |
} | |
NetworkSwitch.logger.error("no route found for event: \(evt)", throttleKey: "kq-no-route") | |
} | |
} | |
} | |
return numEvents | |
} | |
} | |
private let BPF_ALIGNMENT = MemoryLayout<Int32>.size | |
enum BpfIoctl { | |
static let BIOCSBLEN = _IOWR("B", 102, CUnsignedInt.self) | |
static let BIOCPROMISC = _IO("B", 105) | |
static let BIOCSETIF = _IOW("B", 108, ifreq.self) | |
static let BIOCGSTATS = _IOR("B", 111, bpf_stat.self) | |
static let BIOCIMMEDIATE = _IOW("B", 112, CUnsignedInt.self) | |
static let BIOCSHDRCMPLT = _IOW("B", 117, CUnsignedInt.self) | |
static let BIOCSSEESENT = _IOW("B", 119, CUnsignedInt.self) | |
static let BIOCSETFNR = _IOW("B", 126, bpf_program.self) | |
} |
@amodm pls lend me a hand when you're not busy..
Another problem with error "en0-h2g: write(vmSocket=3) len=60 failed: Destination address required", what might be the cause?
I made a launchd helper that runs your NetworkSwitch.swift, to provide an UnixDomainSocket such as into a file /tmp/vm.socket. and this socket file would be given to a virtual machine tool such as vfkit which could take it as a virtio-net device. In this way, I could isolate the sudo network things from rootless virtual machine part.
So I created a unix socket by "unixSocket = Darwin.socket(PF_LOCAL, SOCK_DGRAM, 0)", bind it to /tmp/vm.socket, and give this socket to VSockDev.vmSocket. (I deleted VSockDev.remoteSocket as it's not needed now)
Then I got errors in log as below, seems vmToHost is ok, hostToVm is not, as I checked by tcpdump.
2024-12-12 23:22:32.965 virt[2706:284159] en0-g2h: read len=42
2024-12-12 23:22:32.966 virt[2706:284159] en0-h2g: write(vmSocket=3) len=60 failed: Destination address required
2024-12-12 23:22:34.392 virt[2706:284159] en0-g2h: read len=70
2024-12-12 23:22:34.908 virt[2706:284159] en0-g2h: read len=42
2024-12-12 23:22:34.908 virt[2706:284159] en0-h2g: write(vmSocket =3) len=60 failed: Destination address required
$ vfkit --cpus 2 --memory 1024 --bootloader linux,kernel=/tmp/virt/vmlinuz,initrd=/tmp/vmlinuz,cmdline=""console=hvc0 root=/dev/vda"" --device virtio-blk,path=/tmp/virt/vda.img --device virtio-blk,path=/tmp/virt/vdb.img --device virtio-net,unixSocketPath=/tmp/s.socket,mac=c2:6d:fd:60:10:2b --restful-uri tcp://localhost:5122 --device virtio-serial,stdio
Thanks again!
Instead of doing what you did, bind VSockDev.remoteSocket
to unix socket, and let everything else be the same.
Awesome! 👏