Last active
September 18, 2019 18:12
-
-
Save YutaroHayakawa/1b546672743ede427cc96e096e332762 to your computer and use it in GitHub Desktop.
Linux kernel L3 routing tracing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import click | |
import socket | |
import ipaddress | |
import dataclasses | |
from bcc import BPF | |
from ctypes import * | |
from jinja2 import Template | |
class V4Addrs(Structure): | |
_fields_ = [ | |
("saddr", c_uint32), | |
("daddr", c_uint32) | |
] | |
class V6Addrs(Structure): | |
_fields_ = [ | |
("saddr", c_uint8 * 16), | |
("daddr", c_uint8 * 16) | |
] | |
class IPAddrs(Union): | |
_fields_ = [ | |
("v4", V4Addrs), | |
("v6", V6Addrs) | |
] | |
class EventData(Structure): | |
_anonymous = ("addrs") | |
_fields_ = [ | |
("event_id", c_uint8), | |
("l4_protocol", c_uint8), | |
("l3_protocol", c_uint16), | |
("addrs", IPAddrs), | |
("sport", c_uint16), | |
("dport", c_uint16), | |
] | |
@dataclasses.dataclass(eq=True, frozen=True) | |
class Flow: | |
protocol: str | |
saddr: str | |
daddr: str | |
sport: int | |
dport: int | |
PROTO_TO_ID = {} | |
ID_TO_PROTO = {} | |
for line in open("/etc/protocols"): | |
spl = line.split() | |
if len(spl) == 0 or spl[0] == "#": | |
continue | |
PROTO_TO_ID[spl[0]] = spl[1] | |
ID_TO_PROTO[spl[1]] = spl[0] | |
PREAMBLE = """ | |
#include <linux/skbuff.h> | |
#include <uapi/linux/ip.h> | |
#include <uapi/linux/ipv6.h> | |
#include <uapi/linux/tcp.h> | |
#include <uapi/linux/udp.h> | |
#include <uapi/linux/if_ether.h> | |
#include <uapi/linux/ptrace.h> | |
#define member_read(destination, source_struct, source_member) \ | |
do{ \ | |
bpf_probe_read( \ | |
destination, \ | |
sizeof(source_struct->source_member), \ | |
((char*)source_struct) + offsetof(typeof(*source_struct), source_member) \ | |
); \ | |
} while(0) | |
#define PROTOCOL {{protocol}} | |
#define V4_SADDR {{saddr4}} | |
#define V4_DADDR {{daddr4}} | |
#define V6_SADDR {{saddr6}} | |
#define V6_DADDR {{daddr6}} | |
#define SPORT {{sport}} | |
#define DPORT {{dport}} | |
struct event_data { | |
uint8_t event_id; | |
uint8_t l4_protocol; | |
uint16_t l3_protocol; | |
union { | |
struct { | |
uint32_t saddr; | |
uint32_t daddr; | |
} v4; | |
struct { | |
struct in6_addr saddr; | |
struct in6_addr daddr; | |
} v6; | |
}; | |
uint16_t sport; | |
uint16_t dport; | |
}; | |
BPF_PERF_OUTPUT(events); | |
static inline bool | |
v6_src_match(struct in6_addr *src) | |
{ | |
bool any = true; | |
struct in6_addr saddr = { | |
.in6_u.u6_addr8 = { V6_SADDR } | |
}; | |
#pragma unroll | |
for (int i = 0; i < 16; i++) { | |
if (src->in6_u.u6_addr8[i] != saddr.in6_u.u6_addr8[i]) { | |
any = false; | |
} | |
} | |
if (any) { | |
return true; | |
} | |
#pragma unroll | |
for (int i = 0; i < 16; i++) { | |
if (saddr.in6_u.u6_addr8[i] != 0) { | |
return false; | |
} | |
} | |
return true; | |
} | |
static inline bool | |
v6_dst_match(struct in6_addr *dst) | |
{ | |
bool any = true; | |
struct in6_addr daddr = { | |
.in6_u.u6_addr8 = { V6_DADDR } | |
}; | |
#pragma unroll | |
for (int i = 0; i < 16; i++) { | |
if (daddr.in6_u.u6_addr8[i] != 0) { | |
any = false; | |
} | |
} | |
if (any) { | |
return true; | |
} | |
#pragma unroll | |
for (int i = 0; i < 16; i++) { | |
if (dst->in6_u.u6_addr8[i] != daddr.in6_u.u6_addr8[i]) { | |
return false; | |
} | |
} | |
return true; | |
} | |
static inline void | |
trace_body(struct pt_regs *ctx, struct sk_buff *skb, struct event_data *t) | |
{ | |
uint8_t *head; | |
uint16_t eth_ofs; | |
uint16_t ip_ofs; | |
size_t hdr_len; | |
struct iphdr iph; | |
struct ipv6hdr ip6h; | |
member_read(&head, skb, head); | |
/* get ethernet type */ | |
member_read(&t->l3_protocol, skb, protocol); | |
/* get offset of network header */ | |
member_read(&ip_ofs, skb, network_header); | |
if (t->l3_protocol == 0x0008) { | |
/* extract ipv4 header */ | |
bpf_probe_read(&iph, sizeof(iph), head + ip_ofs); | |
if (iph.protocol != PROTOCOL && PROTOCOL != 0) return; | |
if (iph.saddr != V4_SADDR && V4_SADDR != 0) return; | |
if (iph.daddr != V4_DADDR && V4_DADDR != 0) return; | |
t->l4_protocol = iph.protocol; | |
t->v4.saddr = iph.saddr; | |
t->v4.daddr = iph.daddr; | |
hdr_len = iph.ihl * 4; | |
} else if (t->l3_protocol == 0xdd86) { | |
/* extract ipv6 header */ | |
bpf_probe_read(&ip6h, sizeof(ip6h), head + ip_ofs); | |
/* | |
* Skip the extension headers. | |
* Due to the limitation of the BPF we only can handle | |
* limited number of headers (we chose 1 in here for now). | |
*/ | |
uint8_t nexthdr = ip6h.nexthdr; | |
uint8_t nexthdr_len = 0; | |
if (nexthdr == 0 || nexthdr == 41 || | |
nexthdr == 43 || nexthdr == 44) { | |
bpf_probe_read(&nexthdr, 1, head + ip_ofs + sizeof(ip6h)); | |
bpf_probe_read(&nexthdr_len, 1, head + ip_ofs + sizeof(ip6h) + 1); | |
} | |
if (nexthdr != PROTOCOL && PROTOCOL != 0) return; | |
if (!v6_src_match(&ip6h.saddr)) return; | |
if (!v6_dst_match(&ip6h.daddr)) return; | |
t->l4_protocol = nexthdr; | |
t->v6.saddr = ip6h.saddr; | |
t->v6.daddr = ip6h.daddr; | |
hdr_len = sizeof(ip6h) + nexthdr_len; | |
} else { | |
return; | |
} | |
if (t->l4_protocol == 6) { | |
/* TCP */ | |
struct tcphdr tcph; | |
bpf_probe_read(&tcph, sizeof(tcph), head + hdr_len); | |
if (tcph.source != SPORT && SPORT != 0) return; | |
if (tcph.dest != DPORT && DPORT != 0) return; | |
t->sport = tcph.source; | |
t->dport = tcph.dest; | |
} else if (t->l4_protocol == 13) { | |
/* UDP */ | |
struct udphdr udph; | |
bpf_probe_read(&udph, sizeof(udph), head + hdr_len); | |
if (udph.source != SPORT && SPORT != 0) return; | |
if (udph.dest != DPORT && DPORT != 0) return; | |
t->sport = udph.source; | |
t->dport = udph.dest; | |
} | |
events.perf_submit(ctx, t, sizeof(*t)); | |
} | |
""" | |
EVENTS = [ | |
{ | |
"name": "ip_local_deliver", | |
"args": [ | |
"struct sk_buff *skb" | |
] | |
}, | |
{ | |
"name": "ip_rcv", | |
"args": [ | |
"struct sk_buff *skb" | |
] | |
}, | |
{ | |
"name": "ip_local_out", | |
"args": [ | |
"struct net *net", | |
"struct sock *sk", | |
"struct sk_buff *skb" | |
] | |
}, | |
{ | |
"name": "ip_output", | |
"args": [ | |
"struct net *net", | |
"struct sock *sk", | |
"struct sk_buff *skb" | |
] | |
}, | |
{ | |
"name": "ip_forward", | |
"args": [ | |
"struct sk_buff *skb" | |
] | |
}, | |
{ | |
"name": "tcp_v4_rcv", | |
"args": [ | |
"struct sk_buff *skb" | |
] | |
}, | |
{ | |
"name": "udp_rcv", | |
"args": [ | |
"struct sk_buff *skb" | |
] | |
}, | |
{ | |
"name": "vrf_l3_rcv", | |
"args": [ | |
"struct net_device *vrf_dev", | |
"struct sk_buff *skb", | |
"u16 proto" | |
] | |
}, | |
{ | |
"name": "vrf_l3_out", | |
"args": [ | |
"struct net_device *vrf_dev", | |
"struct sock *sk", | |
"struct sk_buff *skb", | |
"u16 proto" | |
] | |
}, | |
{ | |
"name": "vrf_output", | |
"args": [ | |
"struct net *net", | |
"struct sock *sk", | |
"struct sk_buff *skb", | |
] | |
}, | |
{ | |
"name": "vrf_xmit", | |
"args": [ | |
"struct sk_buff *skb", | |
"struct net_device *dev" | |
] | |
}, | |
{ | |
"name": "vrf_local_xmit", | |
"args": [ | |
"struct sk_buff *skb", | |
"struct net_device *dev", | |
"struct dst_entry *dst" | |
] | |
}, | |
{ | |
"name": "ip_route_input_noref", | |
"args": [ | |
"struct sk_buff *skb", | |
"__be32 daddr", | |
"__be32 saddr", | |
"u8 tos", | |
"struct net_device *dev", | |
] | |
}, | |
{ | |
"name": "ipv6_rcv", | |
"args": [ | |
"struct sk_buff *skb" | |
] | |
}, | |
{ | |
"name": "ip6_input", | |
"args": [ | |
"struct sk_buff *skb" | |
] | |
}, | |
{ | |
"name": "ip6_output", | |
"args": [ | |
"struct net *net", | |
"struct sock *sk", | |
"struct sk_buff *skb" | |
] | |
}, | |
{ | |
"name": "ip6_local_out", | |
"args": [ | |
"struct net *net", | |
"struct sock *sk", | |
"struct sk_buff *skb" | |
] | |
}, | |
] | |
PROBE = """ | |
{% for e in events %} | |
void kprobe__{{ e["name"] }}( {{ ",".join(["struct pt_regs *ctx"] + e["args"]) }} ) { | |
struct event_data t = { {{ loop.index - 1 }} }; | |
trace_body(ctx, skb, &t); | |
} | |
{% endfor %} | |
""" | |
@click.command() | |
@click.option("--protocol", default="any") | |
@click.option("--saddr4", default="any") | |
@click.option("--daddr4", default="any") | |
@click.option("--saddr6", default="any") | |
@click.option("--daddr6", default="any") | |
@click.option("--sport", default="any") | |
@click.option("--dport", default="any") | |
def main(protocol, saddr4, daddr4, saddr6, daddr6, sport, dport): | |
def inet_addr4(addr): | |
if addr == "any": | |
return 0 | |
else: | |
a = ipaddress.IPv4Address(addr).packed | |
return int.from_bytes(a, byteorder="little") | |
def inet_addr6(addr): | |
if addr == "any": | |
return "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0" | |
else: | |
a = ipaddress.IPv6Address(addr).packed | |
al = ",".join(list(map(lambda b: "0x" + str(b), a))) | |
return al | |
conds = { | |
"protocol": 0 if protocol == "any" else PROTO_TO_ID[protocol], | |
"saddr4": inet_addr4(saddr4), | |
"daddr4": inet_addr4(daddr4), | |
"saddr6": inet_addr6(saddr6), | |
"daddr6": inet_addr6(daddr6), | |
"sport": 0 if sport == "any" else int(sport), | |
"dport": 0 if dport == "any" else int(dport), | |
} | |
preamble = Template(PREAMBLE).render(**conds) | |
probes = Template(PROBE).render(events=EVENTS) | |
text = preamble + probes | |
print(text) | |
b = BPF(text=text) | |
events = b["events"] | |
flows = {} | |
def get_event(cpu, data, size): | |
event = cast(data, POINTER(EventData)).contents | |
event_name = EVENTS[event.event_id]["name"] | |
if event.l3_protocol == 0x0008: | |
saddr = ipaddress.IPv4Address(socket.ntohl(event.addrs.v4.saddr)) | |
daddr = ipaddress.IPv4Address(socket.ntohl(event.addrs.v4.daddr)) | |
elif event.l3_protocol == 0xdd86: | |
saddr = ipaddress.IPv6Address(bytes(event.addrs.v6.saddr)) | |
daddr = ipaddress.IPv6Address(bytes(event.addrs.v6.daddr)) | |
else: | |
print(f"Unsupported l3 protocol {event.l3_protocol}") | |
return | |
flow = Flow( | |
protocol=ID_TO_PROTO[str(event.l4_protocol)], | |
saddr=str(saddr), | |
daddr=str(daddr), | |
sport=socket.ntohs(event.sport), | |
dport=socket.ntohs(event.dport), | |
) | |
event_list = flows.get(flow, []) | |
if event_name not in event_list: | |
event_list.append(event_name) | |
flows[flow] = event_list | |
events.open_perf_buffer(get_event) | |
print("Trace ready!") | |
while 1: | |
try: | |
b.perf_buffer_poll() | |
except KeyboardInterrupt: | |
exit() | |
for f, e in flows.items(): | |
print(f"{f.protocol} {f.saddr}:{f.sport} -> {f.daddr}:{f.dport} {e}") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment