Created
July 3, 2024 15:18
-
-
Save amodm/a61e6d0c413e8cc9ac4c56a803150daf to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* See the corresponding blog post for details: | |
* https://amodm.com/blog/2024/07/03/running-a-linux-router-on-macos | |
*/ | |
#pragma once | |
#include <net/if_var.h> | |
#pragma pack(4) | |
struct ifbreq { | |
char ifbr_ifsname[IFNAMSIZ]; /* member if name */ | |
uint32_t ifbr_ifsflags; /* member if flags */ | |
uint32_t ifbr_stpflags; /* member if STP flags */ | |
uint32_t ifbr_path_cost; /* member if STP cost */ | |
uint8_t ifbr_portno; /* member if port number */ | |
uint8_t ifbr_priority; /* member if STP priority */ | |
uint8_t ifbr_proto; /* member if STP protocol */ | |
uint8_t ifbr_role; /* member if STP role */ | |
uint8_t ifbr_state; /* member if STP state */ | |
uint32_t ifbr_addrcnt; /* member if addr number */ | |
uint32_t ifbr_addrmax; /* member if addr max */ | |
uint32_t ifbr_addrexceeded; /* member if addr violations */ | |
uint8_t pad[32]; | |
}; | |
struct ifbifconf { | |
uint32_t ifbic_len; /* buffer size */ | |
union { | |
caddr_t ifbicu_buf; | |
struct ifbreq *ifbicu_req; | |
#define ifbic_buf ifbic_ifbicu.ifbicu_buf | |
#define ifbic_req ifbic_ifbicu.ifbicu_req | |
} ifbic_ifbicu; | |
}; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* See the corresponding blog post for details: | |
* https://amodm.com/blog/2024/07/03/running-a-linux-router-on-macos | |
*/ | |
#pragma once | |
#include <net/if_var.h> | |
/* ----------------------------------------------------- | |
* Fake ethernet related headers. | |
* https://opensource.apple.com/source/xnu/xnu-7195.81.3/bsd/net/if_fake_var.h.auto.html | |
* ----------------------------------------------------- | |
*/ | |
/* | |
* SIOCSDRVSPEC | |
*/ | |
enum { | |
IF_FAKE_S_CMD_NONE = 0, | |
IF_FAKE_S_CMD_SET_PEER = 1, | |
IF_FAKE_S_CMD_SET_MEDIA = 2, | |
IF_FAKE_S_CMD_SET_DEQUEUE_STALL = 3, | |
}; | |
/* | |
* SIOCGDRVSPEC | |
*/ | |
enum { | |
IF_FAKE_G_CMD_NONE = 0, | |
IF_FAKE_G_CMD_GET_PEER = 1, | |
}; | |
#define IF_FAKE_MEDIA_LIST_MAX 27 | |
struct if_fake_media { | |
int32_t iffm_current; | |
uint32_t iffm_count; | |
uint32_t iffm_reserved[3]; | |
int32_t iffm_list[IF_FAKE_MEDIA_LIST_MAX]; | |
}; | |
struct if_fake_request { | |
uint64_t iffr_reserved[4]; | |
union { | |
char iffru_buf[128]; /* stable size */ | |
struct if_fake_media iffru_media; | |
char iffru_peer_name[IFNAMSIZ]; /* if name, e.g. "en0" */ | |
/* | |
* control dequeue stall. 0: disable dequeue stall, else | |
* enable dequeue stall. | |
*/ | |
uint32_t iffru_dequeue_stall; | |
} iffr_u; | |
#define iffr_peer_name iffr_u.iffru_peer_name | |
#define iffr_media iffr_u.iffru_media | |
#define iffr_dequeue_stall iffr_u.iffru_dequeue_stall | |
}; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// See the corresponding blog post for details: | |
// https://amodm.com/blog/2024/07/03/running-a-linux-router-on-macos | |
import Foundation | |
// xnu is a custom module that I created to expose the relevant C structs | |
// that the kernel expects, as those structs are not part of the userspace | |
// API. This module contains C-bridge headers if-fake.h and if-bridge.h | |
// which are also shown in this gist. | |
import xnu | |
struct NetworkInterface { | |
let name: String | |
let mac: ether_addr_t | |
let ips: [String] | |
let type: UInt32 | |
let flags: UInt32 | |
var isBridge: Bool { | |
return type == UInt(IFT_BRIDGE) | |
} | |
var isLoopback: Bool { | |
return flags & UInt32(IFF_LOOPBACK) != 0 | |
} | |
var isFakeEth: Bool { | |
return name.starts(with: "feth") // TODO: figure out type? | |
} | |
var up: Bool { | |
return flags & UInt32(IFF_UP) != 0 | |
} | |
func changeStatus(up: Bool) throws { | |
try Self.changeStatus(name: name, up: up) | |
} | |
/// - Returns: all network interfaces currently configured on this system. | |
static var all: [NetworkInterface] { | |
var ifap: UnsafeMutablePointer<ifaddrs>? = nil | |
guard getifaddrs(&ifap) == 0 else { | |
fatalError("getifaddrs() failed: \(String(cString: strerror(errno)))") | |
} | |
defer { freeifaddrs(ifap) } | |
var interfaces = [NetworkInterface]() | |
try! withControlSocket { ctl in | |
for ifa in sequence(first: ifap, next: { $0?.pointee.ifa_next }) { | |
if let ifa = ifa?.pointee { | |
let ifname = String(cString: ifa.ifa_name) | |
let flags = ifa.ifa_flags | |
var ips = [String]() | |
var mac = ether_addr_t() | |
switch Int32(ifa.ifa_addr.pointee.sa_family) { | |
case AF_LINK: | |
var addr = ifa.ifa_addr.withMemoryRebound(to: sockaddr_dl.self, capacity: 1) { $0.pointee } | |
mac = withUnsafeMutableBytes(of: &addr.sdl_data) { ptr in | |
ptr.baseAddress!.advanced(by: Int(addr.sdl_nlen)).assumingMemoryBound(to: ether_addr_t.self).pointee | |
} | |
case AF_INET: | |
var addr = ifa.ifa_addr.withMemoryRebound(to: sockaddr_in.self, capacity: 1) { $0.pointee } | |
var ip = [CChar](repeating: 0, count: Int(INET_ADDRSTRLEN)) | |
inet_ntop(AF_INET, &addr.sin_addr, &ip, socklen_t(INET_ADDRSTRLEN)) | |
ips.append(String(cString: ip)) | |
case AF_INET6: | |
var addr = ifa.ifa_addr.withMemoryRebound(to: sockaddr_in6.self, capacity: 1) { $0.pointee } | |
var ip = [CChar](repeating: 0, count: Int(INET6_ADDRSTRLEN)) | |
inet_ntop(AF_INET6, &addr.sin6_addr, &ip, socklen_t(INET6_ADDRSTRLEN)) | |
ips.append(String(cString: ip)) | |
default: | |
continue | |
} | |
var ifr = ifreq() | |
memset(&ifr, 0, MemoryLayout<ifreq>.size) | |
ifname.copyTo(&ifr.ifr_name) | |
guard ioctl(ctl, IfIoctl.SIOCFIFTYPE, &ifr) == 0 else { | |
fatalError("\(ifname):ioctl(SIOCFIFTYPE): \(String(cString: strerror(errno)))") | |
} | |
let type = ifr.ifr_ifru.ifru_functional_type | |
interfaces.append(NetworkInterface(name: ifname, mac: mac, ips: ips, type: type, flags: flags)) | |
} | |
} | |
} | |
return interfaces | |
} | |
private static func withControlSocket<T>(_ family: Int32 = AF_LOCAL, _ body: (Int32) throws -> T) throws -> T { | |
let sock = socket(AF_LOCAL, SOCK_DGRAM, 0) | |
guard sock >= 0 else { | |
throw RVMError.sycallError("control:socket()") | |
} | |
defer { close(sock) } | |
return try body(sock) | |
} | |
/// Creates a fake eth interface, and peers with `peer` (if provided). | |
/// - Parameter peer: the peer to connect to | |
/// - Returns: the name of the fake eth interface that was created. | |
static func createFakeEth(peer: String? = nil) throws -> String { | |
let allFakeEths = Set(all.filter { $0.isFakeEth }.map { $0.name }) | |
for i in 0..<128 { | |
let name = "feth\(i)" | |
if !allFakeEths.contains(name) { | |
var ifr = ifreq() | |
memset(&ifr, 0, MemoryLayout.size(ofValue: ifr)) | |
name.copyTo(&ifr.ifr_name) | |
ifr.ifr_ifru.ifru_flags = Int16(IFF_UP | IFF_RUNNING) | |
// create | |
try withControlSocket { ctl in | |
guard ioctl(ctl, IfIoctl.SIOCIFCREATE2, &ifr) == 0 else { | |
throw RVMError.sycallError("feth:create()") | |
} | |
if peer != nil { | |
// from https://opensource.apple.com/source/network_cmds/network_cmds-606.40.2/ifconfig.tproj/iffake.c.auto.html | |
var iffr = if_fake_request() | |
memset(&iffr, 0, MemoryLayout.size(ofValue: iffr)) | |
peer!.copyTo(&iffr.iffr_u.iffru_peer_name) | |
var ifd = ifdrv() | |
memset(&ifd, 0, MemoryLayout.size(ofValue: ifd)) | |
name.copyTo(&ifd.ifd_name) | |
ifd.ifd_cmd = UInt(IF_FAKE_S_CMD_SET_PEER) | |
withUnsafeMutablePointer(to: &iffr) { ifd.ifd_data = UnsafeMutableRawPointer($0) } | |
ifd.ifd_len = MemoryLayout.size(ofValue: iffr) | |
guard ioctl(ctl, IfIoctl.SIOCSDRVSPEC, &ifd) == 0 else { | |
throw RVMError.sycallError("feth:ioctl(set-peer)") | |
} | |
} | |
} | |
return name | |
} | |
} | |
throw RVMError.illegalState("feth:create(): out of options") | |
} | |
/// Deletes the network interface with the given name. | |
/// - Parameter name: the name of the network interface to delete. | |
static func deleteInterface(_ name: String) throws { | |
var ifr = ifreq() | |
memset(&ifr, 0, MemoryLayout.size(ofValue: ifr)) | |
name.copyTo(&ifr.ifr_name) | |
try withControlSocket { ctl in | |
guard ioctl(ctl, IfIoctl.SIOCIFDESTROY, &ifr) == 0 else { | |
throw RVMError.sycallError("\(name):ioctl(SIOCIFDESTROY)") | |
} | |
} | |
} | |
/// Creates a pair of fake eth interfaces, and peers them together. | |
/// - Returns: the names of the two fake eth interfaces that were created. | |
static func createFakeEthPair() throws -> (String, String) { | |
let feth1 = try createFakeEth() | |
let feth2 = try createFakeEth(peer: feth1) | |
try changeStatus(name: feth1, up: true) | |
try changeStatus(name: feth2, up: true) | |
return (feth1, feth2) | |
} | |
/// Change the status of the network interface with the given name. | |
/// - Parameters: | |
/// - name: the name of the network interface | |
/// - up: whether to bring the interface up or down | |
/// - Throws: an error if the operation fails | |
static func changeStatus(name: String, up: Bool) throws { | |
var ifr = ifreq() | |
memset(&ifr, 0, MemoryLayout.size(ofValue: ifr)) | |
name.copyTo(&ifr.ifr_name) | |
try NetworkInterface.withControlSocket(AF_INET) { ctl in | |
guard ioctl(ctl, IfIoctl.SIOCGIFFLAGS, &ifr) == 0 else { | |
throw RVMError.sycallError("\(name):ioctl(SIOCGIFFLAGS)") | |
} | |
let oldFlag = Int32(ifr.ifr_ifru.ifru_flags) & 0xffff | |
var newFlag = oldFlag | |
if up { | |
newFlag |= Int32(IFF_UP | IFF_RUNNING) | |
} else { | |
newFlag &= ~Int32(IFF_UP | IFF_RUNNING) | |
} | |
if oldFlag != newFlag { | |
ifr.ifr_ifru.ifru_flags = Int16(bitPattern: UInt16(newFlag & 0xffff)) | |
guard ioctl(ctl, IfIoctl.SIOCSIFFLAGS, &ifr) >= 0 else { | |
throw RVMError.sycallError("\(name):ioctl(SIOCSIFFLAGS)") | |
} | |
} | |
} | |
} | |
/// Adds `ifc` to the network bridge `bridge`. | |
/// - Parameters: | |
/// - ifc: the network interface to add to the bridge. | |
/// - bridge: the network bridge. | |
static func addInterfaceToBridge(_ ifc: String, to bridge: String) throws { | |
var req = ifbreq() | |
memset(&req, 0, MemoryLayout.size(ofValue: req)) | |
ifc.copyTo(&req.ifbr_ifsname) | |
var ifd = ifdrv() | |
memset(&ifd, 0, MemoryLayout.size(ofValue: ifd)) | |
bridge.copyTo(&ifd.ifd_name) | |
ifd.ifd_cmd = 0 // BRDGADD: https://opensource.apple.com/source/xnu/xnu-7195.81.3/bsd/net/if_bridgevar.h.auto.html | |
withUnsafeMutablePointer(to: &req) { ifd.ifd_data = UnsafeMutableRawPointer($0) } | |
ifd.ifd_len = MemoryLayout.size(ofValue: req) | |
try withControlSocket { ctl in | |
guard ioctl(ctl, IfIoctl.SIOCSDRVSPEC, &ifd) == 0 else { | |
throw RVMError.sycallError("bridge(\(bridge)):add-if(\(ifc))") | |
} | |
} | |
} | |
/// Ensures that `member` is a member of the `bridge` network interface. | |
/// - Returns: `true` if the member was added, `false` if it was already a member. | |
static func ensureBridgeMembership(bridge: String, member: String) throws -> Bool { | |
var req = ifbreq() | |
memset(&req, 0, MemoryLayout.size(ofValue: req)) | |
member.copyTo(&req.ifbr_ifsname) | |
var ifd = ifdrv() | |
memset(&ifd, 0, MemoryLayout.size(ofValue: ifd)) | |
bridge.copyTo(&ifd.ifd_name) | |
ifd.ifd_cmd = 2 // BRDGGIFFLGS: https://opensource.apple.com/source/xnu/xnu-7195.81.3/bsd/net/if_bridgevar.h.auto.html | |
withUnsafeMutablePointer(to: &req) { ifd.ifd_data = UnsafeMutableRawPointer($0) } | |
ifd.ifd_len = MemoryLayout.size(ofValue: req) | |
return try withControlSocket { ctl in | |
if ioctl(ctl, IfIoctl.SIOCGDRVSPEC, &ifd) < 0 { | |
if errno == ENOENT { | |
try addInterfaceToBridge(member, to: bridge) | |
return true | |
} else { | |
throw RVMError.sycallError("bridge(\(bridge)):getifflags(\(member))") | |
} | |
} | |
return false | |
} | |
} | |
} | |
func _IOC(_ dir: UInt32, _ g: Character, _ n: UInt, _ l: Int) -> UInt { | |
return UInt(dir) | ((UInt(l) & UInt(IOCPARM_MASK)) << 16) | (UInt(g.asciiValue ?? 0) << 8) | n | |
} | |
func _IO(_ g: Character, _ n: UInt) -> UInt { | |
return _IOC(IOC_VOID, g, n, 0) | |
} | |
func _IOW<T>(_ char: Character, _ nr: UInt, _ ctype: T.Type) -> UInt { | |
return _IOC(IOC_IN, char, nr, MemoryLayout<T>.size) | |
} | |
func _IOR<T>(_ char: Character, _ nr: UInt, _ ctype: T.Type) -> UInt { | |
return _IOC(IOC_OUT, char, nr, MemoryLayout<T>.size) | |
} | |
func _IOWR<T>(_ char: Character, _ nr: UInt, _ ctype: T.Type) -> UInt { | |
return _IOC(IOC_INOUT, char, nr, MemoryLayout<T>.size) | |
} | |
enum IfIoctl { | |
static let SIOCSIFFLAGS = _IOW("i", 16, ifreq.self) | |
static let SIOCGIFFLAGS = _IOWR("i", 17, ifreq.self) | |
static let SIOCGIFMEDIA = _IOWR("i", 56, ifmediareq.self) | |
static let SIOCIFCREATE = _IOWR("i", 120, ifreq.self) | |
static let SIOCIFDESTROY = _IOW("i", 121, ifreq.self) | |
static let SIOCIFCREATE2 = _IOWR("i", 122, ifreq.self) | |
static let SIOCSDRVSPEC = _IOW("i", 123, ifdrv.self) | |
static let SIOCGDRVSPEC = _IOWR("i", 123, ifdrv.self) | |
static let SIOCFIFTYPE = _IOWR("i", 159, ifreq.self) | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// See the corresponding blog post for details: | |
// https://amodm.com/blog/2024/07/03/running-a-linux-router-on-macos | |
import Darwin | |
import Foundation | |
import Virtualization | |
// we poll via kqeueues in this thread | |
final class NetworkSwitch: Thread { | |
static var shared = NetworkSwitch() | |
static var logger: VMLogFacility = { | |
VMFileLogger.shared.newFacility("nwswitch") | |
}() | |
private var sockDevs: [VSockDev] = [] | |
func newBridgePort(hostBridge: String, vMac: ether_addr_t) throws -> VZFileHandleNetworkDeviceAttachment { | |
if isExecuting { | |
throw RVMError.illegalState("cannot add port after switch has started") | |
} | |
let vsockDev = try VSockDev(hostBridge: hostBridge, vMac: vMac) | |
sockDevs.append(vsockDev) | |
return VZFileHandleNetworkDeviceAttachment(fileHandle: FileHandle(fileDescriptor: vsockDev.remoteSocket)) | |
} | |
/// Checks every bridge port and ensures that the bridge contains our interface. | |
func ensureBridgeMembership() { | |
for dev in sockDevs { | |
if dev.isBridge { | |
do { | |
if try NetworkInterface.ensureBridgeMembership(bridge: dev.hostInterface, member: dev.fethBridgeSide) { | |
NetworkSwitch.logger.info("readded \(dev.fethBridgeSide) to bridge \(dev.hostInterface)") | |
} | |
} catch { | |
NetworkSwitch.logger.error("\(error)") | |
} | |
} | |
} | |
} | |
private static func kqChangeList(_ capacity: Int) -> UnsafeMutablePointer<kevent> { | |
let ptr = UnsafeMutablePointer<kevent>.allocate(capacity: capacity) | |
ptr.initialize(repeating: kevent(), count: capacity) | |
return ptr | |
} | |
override func main() { | |
if !sockDevs.isEmpty { | |
defer { | |
// close all sockets | |
for dev in sockDevs { | |
dev.close() | |
} | |
} | |
let kq = kqueue() | |
if kq < 0 { | |
fatalError("kqueue() failed: \(String(cString: strerror(errno)))") | |
} | |
defer { close(kq) } | |
let kqs = KQSockets(sockDevs) | |
while !isCancelled { | |
if kqs.onEvent(kq) < 0 { | |
if errno == EINTR || errno == EAGAIN { | |
continue | |
} | |
NetworkSwitch.logger.error("onEvent() failed: \(String(cString: strerror(errno)))") | |
} | |
} | |
// cleanup | |
for dev in sockDevs { | |
dev.close() | |
} | |
} | |
} | |
func cancelAndJoin(_ pollTimeNanos: UInt64 = 100_000_000) async throws { | |
cancel() | |
while !isFinished { | |
try await Task.sleep(nanoseconds: pollTimeNanos) | |
} | |
} | |
} | |
private struct VSockDev { | |
let hostInterface: String | |
let vMac: ether_addr_t | |
let vmSocket: Int32 | |
let remoteSocket: Int32 | |
let bpfSocket: Int32 | |
let ndrvSocket: Int32 | |
let bpfBufferSize: Int | |
let bpfReadBuffer: UnsafeMutableRawBufferPointer | |
let bpfFilter: [bpf_insn] | |
let fethBridgeSide: String | |
let fethVmSide: String | |
let isBridge: Bool | |
var bpfStats: bpf_stat { | |
var stats = bpf_stat() | |
return ioctl(bpfSocket, BpfIoctl.BIOCGSTATS, &stats) == 0 ? stats : bpf_stat(bs_recv: 0, bs_drop: 0) | |
} | |
init(hostBridge: String, vMac: ether_addr_t) throws { | |
self.hostInterface = hostBridge | |
self.isBridge = NetworkInterface.all.first(where: { $0.name == hostBridge })?.isBridge ?? false | |
self.vMac = vMac | |
(fethBridgeSide, fethVmSide) = isBridge ? try NetworkInterface.createFakeEthPair() : (hostBridge, hostBridge) | |
var socketPair: (Int32, Int32) = (0, 0) | |
withUnsafePointer(to: &socketPair) { | |
let ptr = UnsafeMutableRawPointer(mutating: $0).bindMemory(to: Int32.self, capacity: 2) | |
guard socketpair(PF_LOCAL, SOCK_DGRAM, 0, ptr) == 0 else { | |
fatalError("socketpair() failed: \(String(cString: strerror(errno)))") | |
} | |
} | |
(vmSocket, remoteSocket) = socketPair | |
// set buffer size | |
var size = 1024 * 1024 * 8 | |
setsockopt(vmSocket, SOL_SOCKET, SO_SNDBUF, &size, socklen_t(MemoryLayout<Int>.size)) | |
setsockopt(vmSocket, SOL_SOCKET, SO_RCVBUF, &size, socklen_t(MemoryLayout<Int>.size)) | |
setsockopt(remoteSocket, SOL_SOCKET, SO_SNDBUF, &size, socklen_t(MemoryLayout<Int>.size)) | |
setsockopt(remoteSocket, SOL_SOCKET, SO_RCVBUF, &size, socklen_t(MemoryLayout<Int>.size)) | |
self.bpfBufferSize = Int(BPF_MAXBUFSIZE) | |
self.bpfReadBuffer = UnsafeMutableRawBufferPointer.allocate(byteCount: bpfBufferSize, alignment: 16) | |
let vmacTop2 = UInt32(vMac.octet.0) << 8 | UInt32(vMac.octet.1) | |
let vmacBottom4 = UInt32(vMac.octet.2) << 24 | UInt32(vMac.octet.3) << 16 | UInt32(vMac.octet.4) << 8 | UInt32(vMac.octet.5) | |
self.bpfFilter = [ | |
// [0] the following 4 statements do `ether dst host <vMac>` | |
bpf_insn(code: CUnsignedShort(BPF_LD | BPF_W | BPF_ABS), jt: 0, jf: 0, k: 2), // ld dst_host_ether[2..<6] | |
bpf_insn(code: CUnsignedShort(BPF_JMP | BPF_JEQ | BPF_K), jt: 0, jf: 2, k: vmacBottom4), // if == vMac[2..<6], proceed to next else skip-2 | |
bpf_insn(code: CUnsignedShort(BPF_LD | BPF_H | BPF_ABS), jt: 0, jf: 0, k: 0), // ldh dst_host_ether[0..<2] (msb 2 bytes) | |
bpf_insn(code: CUnsignedShort(BPF_JMP | BPF_JEQ | BPF_K), jt: 3, jf: 4, k: vmacTop2), // if == vMac[0..<2], skip-3 (true) else skip-4 (false) | |
// [4] the following 3 statements do `ether dst broadcast` | |
bpf_insn(code: CUnsignedShort(BPF_JMP | BPF_JEQ | BPF_K), jt: 0, jf: 3, k: 0xffffffff), // if == 0xffffffff (broadcast), next else skip-3 (false) | |
bpf_insn(code: CUnsignedShort(BPF_LD | BPF_H | BPF_ABS), jt: 0, jf: 0, k: 2), // ld dst_host_ether[2..<6] | |
bpf_insn(code: CUnsignedShort(BPF_JMP | BPF_JEQ | BPF_K), jt: 0, jf: 1, k: 0xffff), // if == 0xffff (broadcast), next (true) else skip-1 (false) | |
// [7] return true (capture max packet size) | |
bpf_insn(code: CUnsignedShort(BPF_RET | BPF_K), jt: 0, jf: 0, k: UInt32(self.bpfBufferSize)), | |
// [8] return false | |
bpf_insn(code: CUnsignedShort(BPF_RET | BPF_K), jt: 0, jf: 0, k: 0), // ret false | |
] | |
self.ndrvSocket = Self.ndrvSocket(fethVmSide) | |
self.bpfSocket = Self.bpfSocket(fethVmSide, self.bpfBufferSize, self.bpfFilter) | |
} | |
/// Route traffic between host and vm, depending upon the `event` | |
func routeTraffic(_ event: kevent64_s) -> Bool { | |
let fd = Int32(event.ident) | |
if fd == vmSocket { | |
vmToHost(event) | |
} else if fd == bpfSocket { | |
hostToVM(event) | |
} else { | |
return false | |
} | |
return true | |
} | |
/// Route traffic from host to VM by reading from bpfSocket and writing to vmSocket. | |
func hostToVM(_ event: kevent64_s) { | |
var numPackets = 0, wlen = 0, wlenActual = 0 | |
let buffer = bpfReadBuffer.baseAddress! | |
let len = read(bpfSocket, buffer, bpfBufferSize) | |
if len > 0 { | |
let endPtr = buffer.advanced(by: len) | |
var pktPtr = buffer.assumingMemoryBound(to: bpf_hdr.self) | |
while pktPtr < endPtr { | |
// for each packet | |
let hdr = pktPtr.pointee | |
let nextPktPtr = UnsafeMutableRawPointer(pktPtr).advanced(by: Int(hdr.bh_caplen) + Int(hdr.bh_hdrlen)) | |
if hdr.bh_caplen > 0 { | |
if nextPktPtr > endPtr { | |
NetworkSwitch.logger.error("\(hostInterface)-h2g: nextPktPtr out of bounds: \(nextPktPtr) > \(endPtr). current pktPtr=\(pktPtr) hdr=\(hdr)", throttleKey: "h2g-next-oob") | |
} | |
let hdr = pktPtr.pointee | |
let dataPtr = UnsafeMutableRawPointer(mutating: pktPtr).advanced(by: Int(hdr.bh_hdrlen)) | |
let writeLen = write(vmSocket, dataPtr, Int(hdr.bh_caplen)) | |
numPackets += 1 | |
wlen += Int(hdr.bh_caplen) | |
wlenActual += writeLen | |
if writeLen < 0 { | |
NetworkSwitch.logger.error("\(hostInterface)-h2g: write() failed: \(String(cString: strerror(errno)))", throttleKey: "h2g-writ-fail") | |
} else if writeLen != Int(hdr.bh_caplen) { | |
NetworkSwitch.logger.error("\(hostInterface)-h2g: write() failed: partial write", throttleKey: "h2g-writ-partial") | |
} | |
} | |
pktPtr = nextPktPtr.alignedUp(toMultipleOf: BPF_ALIGNMENT).assumingMemoryBound(to: bpf_hdr.self) | |
} | |
} else if len == 0 { | |
NetworkSwitch.logger.error("\(hostInterface)-h2g: EOF", throttleKey: "h2g-eof") | |
} else if errno != EAGAIN && errno != EINTR { | |
NetworkSwitch.logger.error("\(hostInterface)-h2g: read() failed: \(String(cString: strerror(errno)))", throttleKey: "h2g-read-fail") | |
} | |
} | |
/// Send traffic from VM to host by reading from vmSocket and writing to ndrv socket. | |
func vmToHost(_ event: kevent64_s, onlyOne: Bool = true) { | |
let availableLen = min(bpfReadBuffer.count, Int(event.data)) | |
let basePtr = bpfReadBuffer.baseAddress! | |
var offset = 0 | |
while offset < availableLen { | |
let n = read(vmSocket, basePtr, availableLen - offset) | |
if n > 0 { | |
let len = write(ndrvSocket, basePtr, n) | |
if len != n { | |
if len < 0 { | |
NetworkSwitch.logger.error("\(hostInterface)-g2h: write() failed: \(String(cString: strerror(errno)))", throttleKey: "g2h-writ-fail") | |
} else if errno != EAGAIN && errno != EINTR { | |
NetworkSwitch.logger.error("\(hostInterface)-g2h: write() failed: partial write", throttleKey: "g2h-writ-partial") | |
} | |
break | |
} | |
offset += n | |
if onlyOne { | |
break | |
} | |
} else { | |
if n == 0 { | |
NetworkSwitch.logger.error("\(hostInterface)-g2h: EOF", throttleKey: "g2h-eof") | |
} else if errno != EAGAIN && errno != EINTR { | |
NetworkSwitch.logger.error("\(hostInterface)-g2h: read() failed: \(String(cString: strerror(errno))): e=\(event)", throttleKey: "g2h-read-fail") | |
} | |
break | |
} | |
} | |
} | |
static func bpfSocket(_ ifc: String, _ buffSize: Int, _ bpfFilter: [bpf_insn]) -> Int32 { | |
// TODO: modify sysctl debug.bpf_maxbufsize and use that size | |
for i in 1..<256 { | |
let dev = "/dev/bpf\(i)" | |
let fd = open(dev, O_RDONLY) | |
if fd >= 0 { | |
// set buffer size | |
var arg = buffSize | |
guard ioctl(fd, BpfIoctl.BIOCSBLEN, &arg) == 0 else { | |
fatalError("bpf \(dev) ioctl(BIOCSBLEN) failed for \(ifc): \(String(cString: strerror(errno)))") | |
} | |
// set immediate mode to true | |
arg = 1 | |
guard ioctl(fd, BpfIoctl.BIOCIMMEDIATE, &arg) == 0 else { | |
fatalError("bpf ioctl(BIOCIMMEDIATE) failed for \(ifc): \(String(cString: strerror(errno)))") | |
} | |
// see only received packets, not generated locally | |
arg = 0 | |
guard ioctl(fd, BpfIoctl.BIOCSSEESENT, &arg) == 0 else { | |
fatalError("bpf ioctl(BIOCSSEESENT) failed for \(ifc): \(String(cString: strerror(errno)))") | |
} | |
// bind to interface | |
var ifr = ifreq() | |
memset(&ifr, 0, MemoryLayout<ifreq>.size) | |
ifc.copyTo(&ifr.ifr_name) | |
guard ioctl(fd, BpfIoctl.BIOCSETIF, &ifr) == 0 else { | |
fatalError("bpf ioctl(BIOCSETIF) failed for \(ifc): \(String(cString: strerror(errno)))") | |
} | |
arg = 1 | |
guard ioctl(fd, BpfIoctl.BIOCSHDRCMPLT, &arg) == 0 else { | |
fatalError("bpf ioctl(BIOCSHDRCMPLT) failed for \(ifc): \(String(cString: strerror(errno)))") | |
} | |
arg = 1 | |
guard ioctl(fd, BpfIoctl.BIOCPROMISC, &arg) == 0 else { | |
fatalError("bpf ioctl(BIOCPROMISC) failed for \(ifc): \(String(cString: strerror(errno)))") | |
} | |
// set filter | |
var filter = bpf_program() | |
filter.bf_len = UInt32(bpfFilter.count) | |
filter.bf_insns = UnsafeMutablePointer<bpf_insn>.allocate(capacity: bpfFilter.count) | |
for i in 0..<bpfFilter.count { | |
filter.bf_insns[i] = bpfFilter[i] | |
} | |
guard ioctl(fd, BpfIoctl.BIOCSETFNR, &filter) == 0 else { | |
fatalError("bpf ioctl(BIOCSETFNR) failed for \(ifc): \(String(cString: strerror(errno)))") | |
} | |
return fd | |
} | |
} | |
fatalError("bpf open() failed for \(ifc): \(String(cString: strerror(errno)))") | |
} | |
static func ndrvSocket(_ ifc: String) -> Int32 { | |
let fd = socket(PF_NDRV, SOCK_RAW, 0) | |
guard fd >= 0 else { | |
fatalError("ndrv socket() failed for \(ifc): \(String(cString: strerror(errno)))") | |
} | |
// bind to interface | |
var nd = sockaddr_ndrv() | |
nd.snd_len = UInt8(MemoryLayout<sockaddr_ndrv>.size) | |
nd.snd_family = UInt8(AF_NDRV) | |
ifc.copyTo(&nd.snd_name) | |
withUnsafePointer(to: &nd) { nd_ptr in | |
nd_ptr.withMemoryRebound(to: sockaddr.self, capacity: 1) { nd_ptr in | |
if Darwin.bind(fd, nd_ptr, socklen_t(MemoryLayout<sockaddr_ndrv>.size)) != 0 { | |
fatalError("ndrv bind() failed for \(ifc): \(String(cString: strerror(errno)))") | |
} | |
if Darwin.connect(fd, nd_ptr, socklen_t(MemoryLayout<sockaddr_ndrv>.size)) != 0 { | |
fatalError("ndrv connect() failed for \(ifc): \(String(cString: strerror(errno)))") | |
} | |
} | |
} | |
return fd | |
} | |
func close() { | |
Darwin.close(vmSocket) | |
Darwin.close(remoteSocket) | |
Darwin.close(bpfSocket) | |
Darwin.close(ndrvSocket) | |
if isBridge { | |
try? NetworkInterface.deleteInterface(self.fethBridgeSide) | |
try? NetworkInterface.deleteInterface(self.fethVmSide) | |
} | |
} | |
} | |
private struct KQSockets { | |
private let ptr: UnsafeMutablePointer<kevent64_s> | |
private let eventsPtr: UnsafeMutablePointer<kevent64_s> | |
private let sockDevs: [VSockDev] | |
init(_ sockDevs: [VSockDev]) { | |
self.sockDevs = sockDevs | |
let capacity = sockDevs.count * 2 | |
self.ptr = UnsafeMutablePointer<kevent64_s>.allocate(capacity: capacity) | |
self.ptr.initialize(repeating: kevent64_s(), count: capacity) | |
self.eventsPtr = UnsafeMutablePointer<kevent64_s>.allocate(capacity: capacity) | |
self.eventsPtr.initialize(repeating: kevent64_s(), count: capacity) | |
for i in 0..<sockDevs.count { | |
guard Foundation.fcntl(sockDevs[i].vmSocket, F_SETFL, O_NONBLOCK) == 0 else { | |
fatalError("fcntl() failed for \(sockDevs[i].hostInterface) vmSocket: \(String(cString: strerror(errno)))") | |
} | |
guard Foundation.fcntl(sockDevs[i].bpfSocket, F_SETFL, O_NONBLOCK) == 0 else { | |
fatalError("fcntl() failed for \(sockDevs[i].hostInterface) bpfSocket: \(String(cString: strerror(errno)))") | |
} | |
self.ptr.advanced(by: 2*i).pointee = kevent64_s( | |
ident: UInt64(sockDevs[i].vmSocket), | |
filter: Int16(EVFILT_READ), | |
flags: UInt16(EV_ADD | EV_ENABLE), | |
fflags: 0, | |
data: 0, | |
udata: 0, | |
ext: (0, 0) | |
) | |
self.ptr.advanced(by: 2*i+1).pointee = kevent64_s( | |
ident: UInt64(sockDevs[i].bpfSocket), | |
filter: Int16(EVFILT_READ), | |
flags: UInt16(EV_ADD | EV_ENABLE), | |
fflags: 0, | |
data: 0, | |
udata: 0, | |
ext: (0, 0) | |
) | |
} | |
} | |
func onEvent(_ kq: Int32) -> Int { | |
let timeoutMillis: Int = 1000 | |
let timeoutSecs = timeoutMillis / 1000 | |
let timeoutNanos = (timeoutMillis % 1000) * 1_000_000 | |
var timeout = timespec(tv_sec: timeoutSecs, tv_nsec: timeoutNanos) | |
let len = sockDevs.count * 2 | |
let numEvents = Int(kevent64(kq, ptr, Int32(len), eventsPtr, Int32(len), 0, &timeout)) | |
if numEvents > 0 { | |
eventLoop: for i in 0..<len { | |
let evt = eventsPtr.advanced(by: i).pointee | |
if evt.flags & UInt16(EV_ERROR) != 0 { | |
NetworkSwitch.logger.error("evt-error: \(String(cString: strerror(Int32(evt.data))))", throttleKey: "kq-evt-error") | |
} else if evt.data > 0 { | |
let fd = Int32(evt.ident) | |
for j in 0..<sockDevs.count { | |
let dev = sockDevs[j] | |
if dev.vmSocket == fd { | |
dev.vmToHost(evt) | |
continue eventLoop | |
} else if dev.bpfSocket == fd { | |
dev.hostToVM(evt) | |
continue eventLoop | |
} else { | |
continue | |
} | |
} | |
NetworkSwitch.logger.error("no route found for event: \(evt)", throttleKey: "kq-no-route") | |
} | |
} | |
} | |
return numEvents | |
} | |
} | |
private let BPF_ALIGNMENT = MemoryLayout<Int32>.size | |
enum BpfIoctl { | |
static let BIOCSBLEN = _IOWR("B", 102, CUnsignedInt.self) | |
static let BIOCPROMISC = _IO("B", 105) | |
static let BIOCSETIF = _IOW("B", 108, ifreq.self) | |
static let BIOCGSTATS = _IOR("B", 111, bpf_stat.self) | |
static let BIOCIMMEDIATE = _IOW("B", 112, CUnsignedInt.self) | |
static let BIOCSHDRCMPLT = _IOW("B", 117, CUnsignedInt.self) | |
static let BIOCSSEESENT = _IOW("B", 119, CUnsignedInt.self) | |
static let BIOCSETFNR = _IOW("B", 126, bpf_program.self) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Instead of doing what you did, bind
VSockDev.remoteSocket
to unix socket, and let everything else be the same.