Skip to content

Instantly share code, notes, and snippets.

@YutaroHayakawa
Last active September 18, 2019 18:12
Show Gist options
  • Save YutaroHayakawa/1b546672743ede427cc96e096e332762 to your computer and use it in GitHub Desktop.
Save YutaroHayakawa/1b546672743ede427cc96e096e332762 to your computer and use it in GitHub Desktop.
Linux kernel L3 routing tracing
import re
import click
import socket
import ipaddress
import dataclasses
from bcc import BPF
from ctypes import *
from jinja2 import Template
class V4Addrs(Structure):
_fields_ = [
("saddr", c_uint32),
("daddr", c_uint32)
]
class V6Addrs(Structure):
_fields_ = [
("saddr", c_uint8 * 16),
("daddr", c_uint8 * 16)
]
class IPAddrs(Union):
_fields_ = [
("v4", V4Addrs),
("v6", V6Addrs)
]
class EventData(Structure):
_anonymous = ("addrs")
_fields_ = [
("event_id", c_uint8),
("l4_protocol", c_uint8),
("l3_protocol", c_uint16),
("addrs", IPAddrs),
("sport", c_uint16),
("dport", c_uint16),
]
@dataclasses.dataclass(eq=True, frozen=True)
class Flow:
protocol: str
saddr: str
daddr: str
sport: int
dport: int
PROTO_TO_ID = {}
ID_TO_PROTO = {}
for line in open("/etc/protocols"):
spl = line.split()
if len(spl) == 0 or spl[0] == "#":
continue
PROTO_TO_ID[spl[0]] = spl[1]
ID_TO_PROTO[spl[1]] = spl[0]
PREAMBLE = """
#include <linux/skbuff.h>
#include <uapi/linux/ip.h>
#include <uapi/linux/ipv6.h>
#include <uapi/linux/tcp.h>
#include <uapi/linux/udp.h>
#include <uapi/linux/if_ether.h>
#include <uapi/linux/ptrace.h>
#define member_read(destination, source_struct, source_member) \
do{ \
bpf_probe_read( \
destination, \
sizeof(source_struct->source_member), \
((char*)source_struct) + offsetof(typeof(*source_struct), source_member) \
); \
} while(0)
#define PROTOCOL {{protocol}}
#define V4_SADDR {{saddr4}}
#define V4_DADDR {{daddr4}}
#define V6_SADDR {{saddr6}}
#define V6_DADDR {{daddr6}}
#define SPORT {{sport}}
#define DPORT {{dport}}
struct event_data {
uint8_t event_id;
uint8_t l4_protocol;
uint16_t l3_protocol;
union {
struct {
uint32_t saddr;
uint32_t daddr;
} v4;
struct {
struct in6_addr saddr;
struct in6_addr daddr;
} v6;
};
uint16_t sport;
uint16_t dport;
};
BPF_PERF_OUTPUT(events);
static inline bool
v6_src_match(struct in6_addr *src)
{
bool any = true;
struct in6_addr saddr = {
.in6_u.u6_addr8 = { V6_SADDR }
};
#pragma unroll
for (int i = 0; i < 16; i++) {
if (src->in6_u.u6_addr8[i] != saddr.in6_u.u6_addr8[i]) {
any = false;
}
}
if (any) {
return true;
}
#pragma unroll
for (int i = 0; i < 16; i++) {
if (saddr.in6_u.u6_addr8[i] != 0) {
return false;
}
}
return true;
}
static inline bool
v6_dst_match(struct in6_addr *dst)
{
bool any = true;
struct in6_addr daddr = {
.in6_u.u6_addr8 = { V6_DADDR }
};
#pragma unroll
for (int i = 0; i < 16; i++) {
if (daddr.in6_u.u6_addr8[i] != 0) {
any = false;
}
}
if (any) {
return true;
}
#pragma unroll
for (int i = 0; i < 16; i++) {
if (dst->in6_u.u6_addr8[i] != daddr.in6_u.u6_addr8[i]) {
return false;
}
}
return true;
}
static inline void
trace_body(struct pt_regs *ctx, struct sk_buff *skb, struct event_data *t)
{
uint8_t *head;
uint16_t eth_ofs;
uint16_t ip_ofs;
size_t hdr_len;
struct iphdr iph;
struct ipv6hdr ip6h;
member_read(&head, skb, head);
/* get ethernet type */
member_read(&t->l3_protocol, skb, protocol);
/* get offset of network header */
member_read(&ip_ofs, skb, network_header);
if (t->l3_protocol == 0x0008) {
/* extract ipv4 header */
bpf_probe_read(&iph, sizeof(iph), head + ip_ofs);
if (iph.protocol != PROTOCOL && PROTOCOL != 0) return;
if (iph.saddr != V4_SADDR && V4_SADDR != 0) return;
if (iph.daddr != V4_DADDR && V4_DADDR != 0) return;
t->l4_protocol = iph.protocol;
t->v4.saddr = iph.saddr;
t->v4.daddr = iph.daddr;
hdr_len = iph.ihl * 4;
} else if (t->l3_protocol == 0xdd86) {
/* extract ipv6 header */
bpf_probe_read(&ip6h, sizeof(ip6h), head + ip_ofs);
/*
* Skip the extension headers.
* Due to the limitation of the BPF we only can handle
* limited number of headers (we chose 1 in here for now).
*/
uint8_t nexthdr = ip6h.nexthdr;
uint8_t nexthdr_len = 0;
if (nexthdr == 0 || nexthdr == 41 ||
nexthdr == 43 || nexthdr == 44) {
bpf_probe_read(&nexthdr, 1, head + ip_ofs + sizeof(ip6h));
bpf_probe_read(&nexthdr_len, 1, head + ip_ofs + sizeof(ip6h) + 1);
}
if (nexthdr != PROTOCOL && PROTOCOL != 0) return;
if (!v6_src_match(&ip6h.saddr)) return;
if (!v6_dst_match(&ip6h.daddr)) return;
t->l4_protocol = nexthdr;
t->v6.saddr = ip6h.saddr;
t->v6.daddr = ip6h.daddr;
hdr_len = sizeof(ip6h) + nexthdr_len;
} else {
return;
}
if (t->l4_protocol == 6) {
/* TCP */
struct tcphdr tcph;
bpf_probe_read(&tcph, sizeof(tcph), head + hdr_len);
if (tcph.source != SPORT && SPORT != 0) return;
if (tcph.dest != DPORT && DPORT != 0) return;
t->sport = tcph.source;
t->dport = tcph.dest;
} else if (t->l4_protocol == 13) {
/* UDP */
struct udphdr udph;
bpf_probe_read(&udph, sizeof(udph), head + hdr_len);
if (udph.source != SPORT && SPORT != 0) return;
if (udph.dest != DPORT && DPORT != 0) return;
t->sport = udph.source;
t->dport = udph.dest;
}
events.perf_submit(ctx, t, sizeof(*t));
}
"""
EVENTS = [
{
"name": "ip_local_deliver",
"args": [
"struct sk_buff *skb"
]
},
{
"name": "ip_rcv",
"args": [
"struct sk_buff *skb"
]
},
{
"name": "ip_local_out",
"args": [
"struct net *net",
"struct sock *sk",
"struct sk_buff *skb"
]
},
{
"name": "ip_output",
"args": [
"struct net *net",
"struct sock *sk",
"struct sk_buff *skb"
]
},
{
"name": "ip_forward",
"args": [
"struct sk_buff *skb"
]
},
{
"name": "tcp_v4_rcv",
"args": [
"struct sk_buff *skb"
]
},
{
"name": "udp_rcv",
"args": [
"struct sk_buff *skb"
]
},
{
"name": "vrf_l3_rcv",
"args": [
"struct net_device *vrf_dev",
"struct sk_buff *skb",
"u16 proto"
]
},
{
"name": "vrf_l3_out",
"args": [
"struct net_device *vrf_dev",
"struct sock *sk",
"struct sk_buff *skb",
"u16 proto"
]
},
{
"name": "vrf_output",
"args": [
"struct net *net",
"struct sock *sk",
"struct sk_buff *skb",
]
},
{
"name": "vrf_xmit",
"args": [
"struct sk_buff *skb",
"struct net_device *dev"
]
},
{
"name": "vrf_local_xmit",
"args": [
"struct sk_buff *skb",
"struct net_device *dev",
"struct dst_entry *dst"
]
},
{
"name": "ip_route_input_noref",
"args": [
"struct sk_buff *skb",
"__be32 daddr",
"__be32 saddr",
"u8 tos",
"struct net_device *dev",
]
},
{
"name": "ipv6_rcv",
"args": [
"struct sk_buff *skb"
]
},
{
"name": "ip6_input",
"args": [
"struct sk_buff *skb"
]
},
{
"name": "ip6_output",
"args": [
"struct net *net",
"struct sock *sk",
"struct sk_buff *skb"
]
},
{
"name": "ip6_local_out",
"args": [
"struct net *net",
"struct sock *sk",
"struct sk_buff *skb"
]
},
]
PROBE = """
{% for e in events %}
void kprobe__{{ e["name"] }}( {{ ",".join(["struct pt_regs *ctx"] + e["args"]) }} ) {
struct event_data t = { {{ loop.index - 1 }} };
trace_body(ctx, skb, &t);
}
{% endfor %}
"""
@click.command()
@click.option("--protocol", default="any")
@click.option("--saddr4", default="any")
@click.option("--daddr4", default="any")
@click.option("--saddr6", default="any")
@click.option("--daddr6", default="any")
@click.option("--sport", default="any")
@click.option("--dport", default="any")
def main(protocol, saddr4, daddr4, saddr6, daddr6, sport, dport):
def inet_addr4(addr):
if addr == "any":
return 0
else:
a = ipaddress.IPv4Address(addr).packed
return int.from_bytes(a, byteorder="little")
def inet_addr6(addr):
if addr == "any":
return "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0"
else:
a = ipaddress.IPv6Address(addr).packed
al = ",".join(list(map(lambda b: "0x" + str(b), a)))
return al
conds = {
"protocol": 0 if protocol == "any" else PROTO_TO_ID[protocol],
"saddr4": inet_addr4(saddr4),
"daddr4": inet_addr4(daddr4),
"saddr6": inet_addr6(saddr6),
"daddr6": inet_addr6(daddr6),
"sport": 0 if sport == "any" else int(sport),
"dport": 0 if dport == "any" else int(dport),
}
preamble = Template(PREAMBLE).render(**conds)
probes = Template(PROBE).render(events=EVENTS)
text = preamble + probes
print(text)
b = BPF(text=text)
events = b["events"]
flows = {}
def get_event(cpu, data, size):
event = cast(data, POINTER(EventData)).contents
event_name = EVENTS[event.event_id]["name"]
if event.l3_protocol == 0x0008:
saddr = ipaddress.IPv4Address(socket.ntohl(event.addrs.v4.saddr))
daddr = ipaddress.IPv4Address(socket.ntohl(event.addrs.v4.daddr))
elif event.l3_protocol == 0xdd86:
saddr = ipaddress.IPv6Address(bytes(event.addrs.v6.saddr))
daddr = ipaddress.IPv6Address(bytes(event.addrs.v6.daddr))
else:
print(f"Unsupported l3 protocol {event.l3_protocol}")
return
flow = Flow(
protocol=ID_TO_PROTO[str(event.l4_protocol)],
saddr=str(saddr),
daddr=str(daddr),
sport=socket.ntohs(event.sport),
dport=socket.ntohs(event.dport),
)
event_list = flows.get(flow, [])
if event_name not in event_list:
event_list.append(event_name)
flows[flow] = event_list
events.open_perf_buffer(get_event)
print("Trace ready!")
while 1:
try:
b.perf_buffer_poll()
except KeyboardInterrupt:
exit()
for f, e in flows.items():
print(f"{f.protocol} {f.saddr}:{f.sport} -> {f.daddr}:{f.dport} {e}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment