Skip to content

Instantly share code, notes, and snippets.

@amodm
Created July 3, 2024 15:18
Show Gist options
  • Save amodm/a61e6d0c413e8cc9ac4c56a803150daf to your computer and use it in GitHub Desktop.
Save amodm/a61e6d0c413e8cc9ac4c56a803150daf to your computer and use it in GitHub Desktop.
/* See the corresponding blog post for details:
* https://amodm.com/blog/2024/07/03/running-a-linux-router-on-macos
*/
#pragma once
#include <net/if_var.h>
#pragma pack(4)
struct ifbreq {
char ifbr_ifsname[IFNAMSIZ]; /* member if name */
uint32_t ifbr_ifsflags; /* member if flags */
uint32_t ifbr_stpflags; /* member if STP flags */
uint32_t ifbr_path_cost; /* member if STP cost */
uint8_t ifbr_portno; /* member if port number */
uint8_t ifbr_priority; /* member if STP priority */
uint8_t ifbr_proto; /* member if STP protocol */
uint8_t ifbr_role; /* member if STP role */
uint8_t ifbr_state; /* member if STP state */
uint32_t ifbr_addrcnt; /* member if addr number */
uint32_t ifbr_addrmax; /* member if addr max */
uint32_t ifbr_addrexceeded; /* member if addr violations */
uint8_t pad[32];
};
struct ifbifconf {
uint32_t ifbic_len; /* buffer size */
union {
caddr_t ifbicu_buf;
struct ifbreq *ifbicu_req;
#define ifbic_buf ifbic_ifbicu.ifbicu_buf
#define ifbic_req ifbic_ifbicu.ifbicu_req
} ifbic_ifbicu;
};
/* See the corresponding blog post for details:
* https://amodm.com/blog/2024/07/03/running-a-linux-router-on-macos
*/
#pragma once
#include <net/if_var.h>
/* -----------------------------------------------------
* Fake ethernet related headers.
* https://opensource.apple.com/source/xnu/xnu-7195.81.3/bsd/net/if_fake_var.h.auto.html
* -----------------------------------------------------
*/
/*
* SIOCSDRVSPEC
*/
enum {
IF_FAKE_S_CMD_NONE = 0,
IF_FAKE_S_CMD_SET_PEER = 1,
IF_FAKE_S_CMD_SET_MEDIA = 2,
IF_FAKE_S_CMD_SET_DEQUEUE_STALL = 3,
};
/*
* SIOCGDRVSPEC
*/
enum {
IF_FAKE_G_CMD_NONE = 0,
IF_FAKE_G_CMD_GET_PEER = 1,
};
#define IF_FAKE_MEDIA_LIST_MAX 27
struct if_fake_media {
int32_t iffm_current;
uint32_t iffm_count;
uint32_t iffm_reserved[3];
int32_t iffm_list[IF_FAKE_MEDIA_LIST_MAX];
};
struct if_fake_request {
uint64_t iffr_reserved[4];
union {
char iffru_buf[128]; /* stable size */
struct if_fake_media iffru_media;
char iffru_peer_name[IFNAMSIZ]; /* if name, e.g. "en0" */
/*
* control dequeue stall. 0: disable dequeue stall, else
* enable dequeue stall.
*/
uint32_t iffru_dequeue_stall;
} iffr_u;
#define iffr_peer_name iffr_u.iffru_peer_name
#define iffr_media iffr_u.iffru_media
#define iffr_dequeue_stall iffr_u.iffru_dequeue_stall
};
// See the corresponding blog post for details:
// https://amodm.com/blog/2024/07/03/running-a-linux-router-on-macos
import Foundation
// xnu is a custom module that I created to expose the relevant C structs
// that the kernel expects, as those structs are not part of the userspace
// API. This module contains C-bridge headers if-fake.h and if-bridge.h
// which are also shown in this gist.
import xnu
struct NetworkInterface {
let name: String
let mac: ether_addr_t
let ips: [String]
let type: UInt32
let flags: UInt32
var isBridge: Bool {
return type == UInt(IFT_BRIDGE)
}
var isLoopback: Bool {
return flags & UInt32(IFF_LOOPBACK) != 0
}
var isFakeEth: Bool {
return name.starts(with: "feth") // TODO: figure out type?
}
var up: Bool {
return flags & UInt32(IFF_UP) != 0
}
func changeStatus(up: Bool) throws {
try Self.changeStatus(name: name, up: up)
}
/// - Returns: all network interfaces currently configured on this system.
static var all: [NetworkInterface] {
var ifap: UnsafeMutablePointer<ifaddrs>? = nil
guard getifaddrs(&ifap) == 0 else {
fatalError("getifaddrs() failed: \(String(cString: strerror(errno)))")
}
defer { freeifaddrs(ifap) }
var interfaces = [NetworkInterface]()
try! withControlSocket { ctl in
for ifa in sequence(first: ifap, next: { $0?.pointee.ifa_next }) {
if let ifa = ifa?.pointee {
let ifname = String(cString: ifa.ifa_name)
let flags = ifa.ifa_flags
var ips = [String]()
var mac = ether_addr_t()
switch Int32(ifa.ifa_addr.pointee.sa_family) {
case AF_LINK:
var addr = ifa.ifa_addr.withMemoryRebound(to: sockaddr_dl.self, capacity: 1) { $0.pointee }
mac = withUnsafeMutableBytes(of: &addr.sdl_data) { ptr in
ptr.baseAddress!.advanced(by: Int(addr.sdl_nlen)).assumingMemoryBound(to: ether_addr_t.self).pointee
}
case AF_INET:
var addr = ifa.ifa_addr.withMemoryRebound(to: sockaddr_in.self, capacity: 1) { $0.pointee }
var ip = [CChar](repeating: 0, count: Int(INET_ADDRSTRLEN))
inet_ntop(AF_INET, &addr.sin_addr, &ip, socklen_t(INET_ADDRSTRLEN))
ips.append(String(cString: ip))
case AF_INET6:
var addr = ifa.ifa_addr.withMemoryRebound(to: sockaddr_in6.self, capacity: 1) { $0.pointee }
var ip = [CChar](repeating: 0, count: Int(INET6_ADDRSTRLEN))
inet_ntop(AF_INET6, &addr.sin6_addr, &ip, socklen_t(INET6_ADDRSTRLEN))
ips.append(String(cString: ip))
default:
continue
}
var ifr = ifreq()
memset(&ifr, 0, MemoryLayout<ifreq>.size)
ifname.copyTo(&ifr.ifr_name)
guard ioctl(ctl, IfIoctl.SIOCFIFTYPE, &ifr) == 0 else {
fatalError("\(ifname):ioctl(SIOCFIFTYPE): \(String(cString: strerror(errno)))")
}
let type = ifr.ifr_ifru.ifru_functional_type
interfaces.append(NetworkInterface(name: ifname, mac: mac, ips: ips, type: type, flags: flags))
}
}
}
return interfaces
}
private static func withControlSocket<T>(_ family: Int32 = AF_LOCAL, _ body: (Int32) throws -> T) throws -> T {
let sock = socket(AF_LOCAL, SOCK_DGRAM, 0)
guard sock >= 0 else {
throw RVMError.sycallError("control:socket()")
}
defer { close(sock) }
return try body(sock)
}
/// Creates a fake eth interface, and peers with `peer` (if provided).
/// - Parameter peer: the peer to connect to
/// - Returns: the name of the fake eth interface that was created.
static func createFakeEth(peer: String? = nil) throws -> String {
let allFakeEths = Set(all.filter { $0.isFakeEth }.map { $0.name })
for i in 0..<128 {
let name = "feth\(i)"
if !allFakeEths.contains(name) {
var ifr = ifreq()
memset(&ifr, 0, MemoryLayout.size(ofValue: ifr))
name.copyTo(&ifr.ifr_name)
ifr.ifr_ifru.ifru_flags = Int16(IFF_UP | IFF_RUNNING)
// create
try withControlSocket { ctl in
guard ioctl(ctl, IfIoctl.SIOCIFCREATE2, &ifr) == 0 else {
throw RVMError.sycallError("feth:create()")
}
if peer != nil {
// from https://opensource.apple.com/source/network_cmds/network_cmds-606.40.2/ifconfig.tproj/iffake.c.auto.html
var iffr = if_fake_request()
memset(&iffr, 0, MemoryLayout.size(ofValue: iffr))
peer!.copyTo(&iffr.iffr_u.iffru_peer_name)
var ifd = ifdrv()
memset(&ifd, 0, MemoryLayout.size(ofValue: ifd))
name.copyTo(&ifd.ifd_name)
ifd.ifd_cmd = UInt(IF_FAKE_S_CMD_SET_PEER)
withUnsafeMutablePointer(to: &iffr) { ifd.ifd_data = UnsafeMutableRawPointer($0) }
ifd.ifd_len = MemoryLayout.size(ofValue: iffr)
guard ioctl(ctl, IfIoctl.SIOCSDRVSPEC, &ifd) == 0 else {
throw RVMError.sycallError("feth:ioctl(set-peer)")
}
}
}
return name
}
}
throw RVMError.illegalState("feth:create(): out of options")
}
/// Deletes the network interface with the given name.
/// - Parameter name: the name of the network interface to delete.
static func deleteInterface(_ name: String) throws {
var ifr = ifreq()
memset(&ifr, 0, MemoryLayout.size(ofValue: ifr))
name.copyTo(&ifr.ifr_name)
try withControlSocket { ctl in
guard ioctl(ctl, IfIoctl.SIOCIFDESTROY, &ifr) == 0 else {
throw RVMError.sycallError("\(name):ioctl(SIOCIFDESTROY)")
}
}
}
/// Creates a pair of fake eth interfaces, and peers them together.
/// - Returns: the names of the two fake eth interfaces that were created.
static func createFakeEthPair() throws -> (String, String) {
let feth1 = try createFakeEth()
let feth2 = try createFakeEth(peer: feth1)
try changeStatus(name: feth1, up: true)
try changeStatus(name: feth2, up: true)
return (feth1, feth2)
}
/// Change the status of the network interface with the given name.
/// - Parameters:
/// - name: the name of the network interface
/// - up: whether to bring the interface up or down
/// - Throws: an error if the operation fails
static func changeStatus(name: String, up: Bool) throws {
var ifr = ifreq()
memset(&ifr, 0, MemoryLayout.size(ofValue: ifr))
name.copyTo(&ifr.ifr_name)
try NetworkInterface.withControlSocket(AF_INET) { ctl in
guard ioctl(ctl, IfIoctl.SIOCGIFFLAGS, &ifr) == 0 else {
throw RVMError.sycallError("\(name):ioctl(SIOCGIFFLAGS)")
}
let oldFlag = Int32(ifr.ifr_ifru.ifru_flags) & 0xffff
var newFlag = oldFlag
if up {
newFlag |= Int32(IFF_UP | IFF_RUNNING)
} else {
newFlag &= ~Int32(IFF_UP | IFF_RUNNING)
}
if oldFlag != newFlag {
ifr.ifr_ifru.ifru_flags = Int16(bitPattern: UInt16(newFlag & 0xffff))
guard ioctl(ctl, IfIoctl.SIOCSIFFLAGS, &ifr) >= 0 else {
throw RVMError.sycallError("\(name):ioctl(SIOCSIFFLAGS)")
}
}
}
}
/// Adds `ifc` to the network bridge `bridge`.
/// - Parameters:
/// - ifc: the network interface to add to the bridge.
/// - bridge: the network bridge.
static func addInterfaceToBridge(_ ifc: String, to bridge: String) throws {
var req = ifbreq()
memset(&req, 0, MemoryLayout.size(ofValue: req))
ifc.copyTo(&req.ifbr_ifsname)
var ifd = ifdrv()
memset(&ifd, 0, MemoryLayout.size(ofValue: ifd))
bridge.copyTo(&ifd.ifd_name)
ifd.ifd_cmd = 0 // BRDGADD: https://opensource.apple.com/source/xnu/xnu-7195.81.3/bsd/net/if_bridgevar.h.auto.html
withUnsafeMutablePointer(to: &req) { ifd.ifd_data = UnsafeMutableRawPointer($0) }
ifd.ifd_len = MemoryLayout.size(ofValue: req)
try withControlSocket { ctl in
guard ioctl(ctl, IfIoctl.SIOCSDRVSPEC, &ifd) == 0 else {
throw RVMError.sycallError("bridge(\(bridge)):add-if(\(ifc))")
}
}
}
/// Ensures that `member` is a member of the `bridge` network interface.
/// - Returns: `true` if the member was added, `false` if it was already a member.
static func ensureBridgeMembership(bridge: String, member: String) throws -> Bool {
var req = ifbreq()
memset(&req, 0, MemoryLayout.size(ofValue: req))
member.copyTo(&req.ifbr_ifsname)
var ifd = ifdrv()
memset(&ifd, 0, MemoryLayout.size(ofValue: ifd))
bridge.copyTo(&ifd.ifd_name)
ifd.ifd_cmd = 2 // BRDGGIFFLGS: https://opensource.apple.com/source/xnu/xnu-7195.81.3/bsd/net/if_bridgevar.h.auto.html
withUnsafeMutablePointer(to: &req) { ifd.ifd_data = UnsafeMutableRawPointer($0) }
ifd.ifd_len = MemoryLayout.size(ofValue: req)
return try withControlSocket { ctl in
if ioctl(ctl, IfIoctl.SIOCGDRVSPEC, &ifd) < 0 {
if errno == ENOENT {
try addInterfaceToBridge(member, to: bridge)
return true
} else {
throw RVMError.sycallError("bridge(\(bridge)):getifflags(\(member))")
}
}
return false
}
}
}
func _IOC(_ dir: UInt32, _ g: Character, _ n: UInt, _ l: Int) -> UInt {
return UInt(dir) | ((UInt(l) & UInt(IOCPARM_MASK)) << 16) | (UInt(g.asciiValue ?? 0) << 8) | n
}
func _IO(_ g: Character, _ n: UInt) -> UInt {
return _IOC(IOC_VOID, g, n, 0)
}
func _IOW<T>(_ char: Character, _ nr: UInt, _ ctype: T.Type) -> UInt {
return _IOC(IOC_IN, char, nr, MemoryLayout<T>.size)
}
func _IOR<T>(_ char: Character, _ nr: UInt, _ ctype: T.Type) -> UInt {
return _IOC(IOC_OUT, char, nr, MemoryLayout<T>.size)
}
func _IOWR<T>(_ char: Character, _ nr: UInt, _ ctype: T.Type) -> UInt {
return _IOC(IOC_INOUT, char, nr, MemoryLayout<T>.size)
}
enum IfIoctl {
static let SIOCSIFFLAGS = _IOW("i", 16, ifreq.self)
static let SIOCGIFFLAGS = _IOWR("i", 17, ifreq.self)
static let SIOCGIFMEDIA = _IOWR("i", 56, ifmediareq.self)
static let SIOCIFCREATE = _IOWR("i", 120, ifreq.self)
static let SIOCIFDESTROY = _IOW("i", 121, ifreq.self)
static let SIOCIFCREATE2 = _IOWR("i", 122, ifreq.self)
static let SIOCSDRVSPEC = _IOW("i", 123, ifdrv.self)
static let SIOCGDRVSPEC = _IOWR("i", 123, ifdrv.self)
static let SIOCFIFTYPE = _IOWR("i", 159, ifreq.self)
}
// See the corresponding blog post for details:
// https://amodm.com/blog/2024/07/03/running-a-linux-router-on-macos
import Darwin
import Foundation
import Virtualization
// we poll via kqeueues in this thread
final class NetworkSwitch: Thread {
static var shared = NetworkSwitch()
static var logger: VMLogFacility = {
VMFileLogger.shared.newFacility("nwswitch")
}()
private var sockDevs: [VSockDev] = []
func newBridgePort(hostBridge: String, vMac: ether_addr_t) throws -> VZFileHandleNetworkDeviceAttachment {
if isExecuting {
throw RVMError.illegalState("cannot add port after switch has started")
}
let vsockDev = try VSockDev(hostBridge: hostBridge, vMac: vMac)
sockDevs.append(vsockDev)
return VZFileHandleNetworkDeviceAttachment(fileHandle: FileHandle(fileDescriptor: vsockDev.remoteSocket))
}
/// Checks every bridge port and ensures that the bridge contains our interface.
func ensureBridgeMembership() {
for dev in sockDevs {
if dev.isBridge {
do {
if try NetworkInterface.ensureBridgeMembership(bridge: dev.hostInterface, member: dev.fethBridgeSide) {
NetworkSwitch.logger.info("readded \(dev.fethBridgeSide) to bridge \(dev.hostInterface)")
}
} catch {
NetworkSwitch.logger.error("\(error)")
}
}
}
}
private static func kqChangeList(_ capacity: Int) -> UnsafeMutablePointer<kevent> {
let ptr = UnsafeMutablePointer<kevent>.allocate(capacity: capacity)
ptr.initialize(repeating: kevent(), count: capacity)
return ptr
}
override func main() {
if !sockDevs.isEmpty {
defer {
// close all sockets
for dev in sockDevs {
dev.close()
}
}
let kq = kqueue()
if kq < 0 {
fatalError("kqueue() failed: \(String(cString: strerror(errno)))")
}
defer { close(kq) }
let kqs = KQSockets(sockDevs)
while !isCancelled {
if kqs.onEvent(kq) < 0 {
if errno == EINTR || errno == EAGAIN {
continue
}
NetworkSwitch.logger.error("onEvent() failed: \(String(cString: strerror(errno)))")
}
}
// cleanup
for dev in sockDevs {
dev.close()
}
}
}
func cancelAndJoin(_ pollTimeNanos: UInt64 = 100_000_000) async throws {
cancel()
while !isFinished {
try await Task.sleep(nanoseconds: pollTimeNanos)
}
}
}
private struct VSockDev {
let hostInterface: String
let vMac: ether_addr_t
let vmSocket: Int32
let remoteSocket: Int32
let bpfSocket: Int32
let ndrvSocket: Int32
let bpfBufferSize: Int
let bpfReadBuffer: UnsafeMutableRawBufferPointer
let bpfFilter: [bpf_insn]
let fethBridgeSide: String
let fethVmSide: String
let isBridge: Bool
var bpfStats: bpf_stat {
var stats = bpf_stat()
return ioctl(bpfSocket, BpfIoctl.BIOCGSTATS, &stats) == 0 ? stats : bpf_stat(bs_recv: 0, bs_drop: 0)
}
init(hostBridge: String, vMac: ether_addr_t) throws {
self.hostInterface = hostBridge
self.isBridge = NetworkInterface.all.first(where: { $0.name == hostBridge })?.isBridge ?? false
self.vMac = vMac
(fethBridgeSide, fethVmSide) = isBridge ? try NetworkInterface.createFakeEthPair() : (hostBridge, hostBridge)
var socketPair: (Int32, Int32) = (0, 0)
withUnsafePointer(to: &socketPair) {
let ptr = UnsafeMutableRawPointer(mutating: $0).bindMemory(to: Int32.self, capacity: 2)
guard socketpair(PF_LOCAL, SOCK_DGRAM, 0, ptr) == 0 else {
fatalError("socketpair() failed: \(String(cString: strerror(errno)))")
}
}
(vmSocket, remoteSocket) = socketPair
// set buffer size
var size = 1024 * 1024 * 8
setsockopt(vmSocket, SOL_SOCKET, SO_SNDBUF, &size, socklen_t(MemoryLayout<Int>.size))
setsockopt(vmSocket, SOL_SOCKET, SO_RCVBUF, &size, socklen_t(MemoryLayout<Int>.size))
setsockopt(remoteSocket, SOL_SOCKET, SO_SNDBUF, &size, socklen_t(MemoryLayout<Int>.size))
setsockopt(remoteSocket, SOL_SOCKET, SO_RCVBUF, &size, socklen_t(MemoryLayout<Int>.size))
self.bpfBufferSize = Int(BPF_MAXBUFSIZE)
self.bpfReadBuffer = UnsafeMutableRawBufferPointer.allocate(byteCount: bpfBufferSize, alignment: 16)
let vmacTop2 = UInt32(vMac.octet.0) << 8 | UInt32(vMac.octet.1)
let vmacBottom4 = UInt32(vMac.octet.2) << 24 | UInt32(vMac.octet.3) << 16 | UInt32(vMac.octet.4) << 8 | UInt32(vMac.octet.5)
self.bpfFilter = [
// [0] the following 4 statements do `ether dst host <vMac>`
bpf_insn(code: CUnsignedShort(BPF_LD | BPF_W | BPF_ABS), jt: 0, jf: 0, k: 2), // ld dst_host_ether[2..<6]
bpf_insn(code: CUnsignedShort(BPF_JMP | BPF_JEQ | BPF_K), jt: 0, jf: 2, k: vmacBottom4), // if == vMac[2..<6], proceed to next else skip-2
bpf_insn(code: CUnsignedShort(BPF_LD | BPF_H | BPF_ABS), jt: 0, jf: 0, k: 0), // ldh dst_host_ether[0..<2] (msb 2 bytes)
bpf_insn(code: CUnsignedShort(BPF_JMP | BPF_JEQ | BPF_K), jt: 3, jf: 4, k: vmacTop2), // if == vMac[0..<2], skip-3 (true) else skip-4 (false)
// [4] the following 3 statements do `ether dst broadcast`
bpf_insn(code: CUnsignedShort(BPF_JMP | BPF_JEQ | BPF_K), jt: 0, jf: 3, k: 0xffffffff), // if == 0xffffffff (broadcast), next else skip-3 (false)
bpf_insn(code: CUnsignedShort(BPF_LD | BPF_H | BPF_ABS), jt: 0, jf: 0, k: 2), // ld dst_host_ether[2..<6]
bpf_insn(code: CUnsignedShort(BPF_JMP | BPF_JEQ | BPF_K), jt: 0, jf: 1, k: 0xffff), // if == 0xffff (broadcast), next (true) else skip-1 (false)
// [7] return true (capture max packet size)
bpf_insn(code: CUnsignedShort(BPF_RET | BPF_K), jt: 0, jf: 0, k: UInt32(self.bpfBufferSize)),
// [8] return false
bpf_insn(code: CUnsignedShort(BPF_RET | BPF_K), jt: 0, jf: 0, k: 0), // ret false
]
self.ndrvSocket = Self.ndrvSocket(fethVmSide)
self.bpfSocket = Self.bpfSocket(fethVmSide, self.bpfBufferSize, self.bpfFilter)
}
/// Route traffic between host and vm, depending upon the `event`
func routeTraffic(_ event: kevent64_s) -> Bool {
let fd = Int32(event.ident)
if fd == vmSocket {
vmToHost(event)
} else if fd == bpfSocket {
hostToVM(event)
} else {
return false
}
return true
}
/// Route traffic from host to VM by reading from bpfSocket and writing to vmSocket.
func hostToVM(_ event: kevent64_s) {
var numPackets = 0, wlen = 0, wlenActual = 0
let buffer = bpfReadBuffer.baseAddress!
let len = read(bpfSocket, buffer, bpfBufferSize)
if len > 0 {
let endPtr = buffer.advanced(by: len)
var pktPtr = buffer.assumingMemoryBound(to: bpf_hdr.self)
while pktPtr < endPtr {
// for each packet
let hdr = pktPtr.pointee
let nextPktPtr = UnsafeMutableRawPointer(pktPtr).advanced(by: Int(hdr.bh_caplen) + Int(hdr.bh_hdrlen))
if hdr.bh_caplen > 0 {
if nextPktPtr > endPtr {
NetworkSwitch.logger.error("\(hostInterface)-h2g: nextPktPtr out of bounds: \(nextPktPtr) > \(endPtr). current pktPtr=\(pktPtr) hdr=\(hdr)", throttleKey: "h2g-next-oob")
}
let hdr = pktPtr.pointee
let dataPtr = UnsafeMutableRawPointer(mutating: pktPtr).advanced(by: Int(hdr.bh_hdrlen))
let writeLen = write(vmSocket, dataPtr, Int(hdr.bh_caplen))
numPackets += 1
wlen += Int(hdr.bh_caplen)
wlenActual += writeLen
if writeLen < 0 {
NetworkSwitch.logger.error("\(hostInterface)-h2g: write() failed: \(String(cString: strerror(errno)))", throttleKey: "h2g-writ-fail")
} else if writeLen != Int(hdr.bh_caplen) {
NetworkSwitch.logger.error("\(hostInterface)-h2g: write() failed: partial write", throttleKey: "h2g-writ-partial")
}
}
pktPtr = nextPktPtr.alignedUp(toMultipleOf: BPF_ALIGNMENT).assumingMemoryBound(to: bpf_hdr.self)
}
} else if len == 0 {
NetworkSwitch.logger.error("\(hostInterface)-h2g: EOF", throttleKey: "h2g-eof")
} else if errno != EAGAIN && errno != EINTR {
NetworkSwitch.logger.error("\(hostInterface)-h2g: read() failed: \(String(cString: strerror(errno)))", throttleKey: "h2g-read-fail")
}
}
/// Send traffic from VM to host by reading from vmSocket and writing to ndrv socket.
func vmToHost(_ event: kevent64_s, onlyOne: Bool = true) {
let availableLen = min(bpfReadBuffer.count, Int(event.data))
let basePtr = bpfReadBuffer.baseAddress!
var offset = 0
while offset < availableLen {
let n = read(vmSocket, basePtr, availableLen - offset)
if n > 0 {
let len = write(ndrvSocket, basePtr, n)
if len != n {
if len < 0 {
NetworkSwitch.logger.error("\(hostInterface)-g2h: write() failed: \(String(cString: strerror(errno)))", throttleKey: "g2h-writ-fail")
} else if errno != EAGAIN && errno != EINTR {
NetworkSwitch.logger.error("\(hostInterface)-g2h: write() failed: partial write", throttleKey: "g2h-writ-partial")
}
break
}
offset += n
if onlyOne {
break
}
} else {
if n == 0 {
NetworkSwitch.logger.error("\(hostInterface)-g2h: EOF", throttleKey: "g2h-eof")
} else if errno != EAGAIN && errno != EINTR {
NetworkSwitch.logger.error("\(hostInterface)-g2h: read() failed: \(String(cString: strerror(errno))): e=\(event)", throttleKey: "g2h-read-fail")
}
break
}
}
}
static func bpfSocket(_ ifc: String, _ buffSize: Int, _ bpfFilter: [bpf_insn]) -> Int32 {
// TODO: modify sysctl debug.bpf_maxbufsize and use that size
for i in 1..<256 {
let dev = "/dev/bpf\(i)"
let fd = open(dev, O_RDONLY)
if fd >= 0 {
// set buffer size
var arg = buffSize
guard ioctl(fd, BpfIoctl.BIOCSBLEN, &arg) == 0 else {
fatalError("bpf \(dev) ioctl(BIOCSBLEN) failed for \(ifc): \(String(cString: strerror(errno)))")
}
// set immediate mode to true
arg = 1
guard ioctl(fd, BpfIoctl.BIOCIMMEDIATE, &arg) == 0 else {
fatalError("bpf ioctl(BIOCIMMEDIATE) failed for \(ifc): \(String(cString: strerror(errno)))")
}
// see only received packets, not generated locally
arg = 0
guard ioctl(fd, BpfIoctl.BIOCSSEESENT, &arg) == 0 else {
fatalError("bpf ioctl(BIOCSSEESENT) failed for \(ifc): \(String(cString: strerror(errno)))")
}
// bind to interface
var ifr = ifreq()
memset(&ifr, 0, MemoryLayout<ifreq>.size)
ifc.copyTo(&ifr.ifr_name)
guard ioctl(fd, BpfIoctl.BIOCSETIF, &ifr) == 0 else {
fatalError("bpf ioctl(BIOCSETIF) failed for \(ifc): \(String(cString: strerror(errno)))")
}
arg = 1
guard ioctl(fd, BpfIoctl.BIOCSHDRCMPLT, &arg) == 0 else {
fatalError("bpf ioctl(BIOCSHDRCMPLT) failed for \(ifc): \(String(cString: strerror(errno)))")
}
arg = 1
guard ioctl(fd, BpfIoctl.BIOCPROMISC, &arg) == 0 else {
fatalError("bpf ioctl(BIOCPROMISC) failed for \(ifc): \(String(cString: strerror(errno)))")
}
// set filter
var filter = bpf_program()
filter.bf_len = UInt32(bpfFilter.count)
filter.bf_insns = UnsafeMutablePointer<bpf_insn>.allocate(capacity: bpfFilter.count)
for i in 0..<bpfFilter.count {
filter.bf_insns[i] = bpfFilter[i]
}
guard ioctl(fd, BpfIoctl.BIOCSETFNR, &filter) == 0 else {
fatalError("bpf ioctl(BIOCSETFNR) failed for \(ifc): \(String(cString: strerror(errno)))")
}
return fd
}
}
fatalError("bpf open() failed for \(ifc): \(String(cString: strerror(errno)))")
}
static func ndrvSocket(_ ifc: String) -> Int32 {
let fd = socket(PF_NDRV, SOCK_RAW, 0)
guard fd >= 0 else {
fatalError("ndrv socket() failed for \(ifc): \(String(cString: strerror(errno)))")
}
// bind to interface
var nd = sockaddr_ndrv()
nd.snd_len = UInt8(MemoryLayout<sockaddr_ndrv>.size)
nd.snd_family = UInt8(AF_NDRV)
ifc.copyTo(&nd.snd_name)
withUnsafePointer(to: &nd) { nd_ptr in
nd_ptr.withMemoryRebound(to: sockaddr.self, capacity: 1) { nd_ptr in
if Darwin.bind(fd, nd_ptr, socklen_t(MemoryLayout<sockaddr_ndrv>.size)) != 0 {
fatalError("ndrv bind() failed for \(ifc): \(String(cString: strerror(errno)))")
}
if Darwin.connect(fd, nd_ptr, socklen_t(MemoryLayout<sockaddr_ndrv>.size)) != 0 {
fatalError("ndrv connect() failed for \(ifc): \(String(cString: strerror(errno)))")
}
}
}
return fd
}
func close() {
Darwin.close(vmSocket)
Darwin.close(remoteSocket)
Darwin.close(bpfSocket)
Darwin.close(ndrvSocket)
if isBridge {
try? NetworkInterface.deleteInterface(self.fethBridgeSide)
try? NetworkInterface.deleteInterface(self.fethVmSide)
}
}
}
private struct KQSockets {
private let ptr: UnsafeMutablePointer<kevent64_s>
private let eventsPtr: UnsafeMutablePointer<kevent64_s>
private let sockDevs: [VSockDev]
init(_ sockDevs: [VSockDev]) {
self.sockDevs = sockDevs
let capacity = sockDevs.count * 2
self.ptr = UnsafeMutablePointer<kevent64_s>.allocate(capacity: capacity)
self.ptr.initialize(repeating: kevent64_s(), count: capacity)
self.eventsPtr = UnsafeMutablePointer<kevent64_s>.allocate(capacity: capacity)
self.eventsPtr.initialize(repeating: kevent64_s(), count: capacity)
for i in 0..<sockDevs.count {
guard Foundation.fcntl(sockDevs[i].vmSocket, F_SETFL, O_NONBLOCK) == 0 else {
fatalError("fcntl() failed for \(sockDevs[i].hostInterface) vmSocket: \(String(cString: strerror(errno)))")
}
guard Foundation.fcntl(sockDevs[i].bpfSocket, F_SETFL, O_NONBLOCK) == 0 else {
fatalError("fcntl() failed for \(sockDevs[i].hostInterface) bpfSocket: \(String(cString: strerror(errno)))")
}
self.ptr.advanced(by: 2*i).pointee = kevent64_s(
ident: UInt64(sockDevs[i].vmSocket),
filter: Int16(EVFILT_READ),
flags: UInt16(EV_ADD | EV_ENABLE),
fflags: 0,
data: 0,
udata: 0,
ext: (0, 0)
)
self.ptr.advanced(by: 2*i+1).pointee = kevent64_s(
ident: UInt64(sockDevs[i].bpfSocket),
filter: Int16(EVFILT_READ),
flags: UInt16(EV_ADD | EV_ENABLE),
fflags: 0,
data: 0,
udata: 0,
ext: (0, 0)
)
}
}
func onEvent(_ kq: Int32) -> Int {
let timeoutMillis: Int = 1000
let timeoutSecs = timeoutMillis / 1000
let timeoutNanos = (timeoutMillis % 1000) * 1_000_000
var timeout = timespec(tv_sec: timeoutSecs, tv_nsec: timeoutNanos)
let len = sockDevs.count * 2
let numEvents = Int(kevent64(kq, ptr, Int32(len), eventsPtr, Int32(len), 0, &timeout))
if numEvents > 0 {
eventLoop: for i in 0..<len {
let evt = eventsPtr.advanced(by: i).pointee
if evt.flags & UInt16(EV_ERROR) != 0 {
NetworkSwitch.logger.error("evt-error: \(String(cString: strerror(Int32(evt.data))))", throttleKey: "kq-evt-error")
} else if evt.data > 0 {
let fd = Int32(evt.ident)
for j in 0..<sockDevs.count {
let dev = sockDevs[j]
if dev.vmSocket == fd {
dev.vmToHost(evt)
continue eventLoop
} else if dev.bpfSocket == fd {
dev.hostToVM(evt)
continue eventLoop
} else {
continue
}
}
NetworkSwitch.logger.error("no route found for event: \(evt)", throttleKey: "kq-no-route")
}
}
}
return numEvents
}
}
private let BPF_ALIGNMENT = MemoryLayout<Int32>.size
enum BpfIoctl {
static let BIOCSBLEN = _IOWR("B", 102, CUnsignedInt.self)
static let BIOCPROMISC = _IO("B", 105)
static let BIOCSETIF = _IOW("B", 108, ifreq.self)
static let BIOCGSTATS = _IOR("B", 111, bpf_stat.self)
static let BIOCIMMEDIATE = _IOW("B", 112, CUnsignedInt.self)
static let BIOCSHDRCMPLT = _IOW("B", 117, CUnsignedInt.self)
static let BIOCSSEESENT = _IOW("B", 119, CUnsignedInt.self)
static let BIOCSETFNR = _IOW("B", 126, bpf_program.self)
}
@amodm
Copy link
Author

amodm commented Dec 12, 2024

Instead of doing what you did, bind VSockDev.remoteSocket to unix socket, and let everything else be the same.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment