Created
December 8, 2014 17:16
-
-
Save tuxology/68fbd813b6eb84fb9766 to your computer and use it in GitHub Desktop.
LTTng sched_switch eBPF filter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* addons/lttng-sched-filter.c | |
* | |
* A filtered version of sched_switch | |
* | |
* Copyright (C) 2014 Suchakra Sharma <[email protected]> | |
* | |
* This library is free software; you can redistribute it and/or | |
* modify it under the terms of the GNU Lesser General Public | |
* License as published by the Free Software Foundation; only | |
* version 2.1 of the License. | |
* | |
* This library is distributed in the hope that it will be useful, | |
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
* Lesser General Public License for more details. | |
* | |
* You should have received a copy of the GNU Lesser General Public | |
* License along with this library; if not, write to the Free Software | |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
*/ | |
#include <linux/module.h> | |
#include <linux/netdevice.h> | |
#include <linux/skbuff.h> | |
#include <linux/uaccess.h> | |
#include <linux/bpf.h> | |
#include <linux/filter.h> | |
#include <trace/bpf_trace.h> | |
#include <asm/syscall.h> | |
#include <linux/interrupt.h> | |
#include <linux/time.h> | |
#include <uapi/linux/time.h> | |
#include <linux/proc_fs.h> | |
#include <linux/seq_file.h> | |
#include <linux/string.h> | |
#include <linux/vmalloc.h> | |
//#include <linux/kallsyms.h> | |
#include <linux/sched.h> | |
#include <linux/binfmts.h> | |
#include <linux/version.h> | |
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,9,0)) | |
#include <linux/sched/rt.h> | |
#endif | |
#include "../wrapper/tracepoint.h" | |
#include "../instrumentation/events/lttng-module/addons.h" | |
#define BPF 1 | |
#define SIMPLE 0 | |
#define NOFILT 0 | |
DEFINE_TRACE(sched_switch_filter); | |
/* Procfs stuff */ | |
#define MAX_LEN 16000000 | |
static struct proc_dir_entry *proc_entry; | |
static char *accum_time; | |
u64 len = 0; | |
static int ebpf_proc_show(struct seq_file *m, void *v) { | |
seq_printf(m, accum_time); | |
return 0; | |
} | |
static int ebpf_proc_open(struct inode *inode, struct file *file) { | |
return single_open(file, ebpf_proc_show, NULL); | |
} | |
static const struct file_operations ebpf_proc_fops = { | |
.owner = THIS_MODULE, | |
.open = ebpf_proc_open, | |
.read = seq_read, | |
.llseek = seq_lseek, | |
.release = single_release, | |
}; | |
/* Timing stuff */ | |
atomic_t count = ATOMIC_INIT(0); | |
/* Global definitions */ | |
struct bpf_prog *prog; | |
/* The actual eBPF prog instructions */ | |
static struct bpf_insn insn_prog[] = { | |
BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 0), /* r2 = bctx (which is therefore arg1, and thus, prev->comm) */ | |
BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_2, 0), /* r3 = *(prev->comm) */ | |
BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_1, 8), /* r4 = comm */ | |
BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0), /* r4 = which is "sshd" */ | |
BPF_JMP_REG(BPF_JEQ, BPF_REG_5, BPF_REG_3, 3), | |
BPF_LD_IMM64(BPF_REG_0, 0), /* FALSE */ | |
BPF_EXIT_INSN(), | |
BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 16), /* r3 = *(prev->state) */ | |
BPF_LD_IMM64(BPF_REG_4, 0), /* r4 = 0 */ | |
BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_4, 3), | |
BPF_LD_IMM64(BPF_REG_0, 0), /* FALSE */ | |
BPF_EXIT_INSN(), | |
BPF_LD_IMM64(BPF_REG_0, 1), /* TRUE */ | |
BPF_EXIT_INSN(), | |
}; | |
static void *u64_to_ptr(__u64 val){ | |
return (void *) (unsigned long) val; | |
} | |
static __u64 ptr_to_u64(void *ptr){ | |
return (__u64) (unsigned long) ptr; | |
} | |
void bpf_map_free_deferred(struct work_struct *work) | |
{ | |
struct bpf_map *map = container_of(work, struct bpf_map, work); | |
/* implementation dependent freeing */ | |
map->ops->map_free(map); | |
} | |
void bpf_map_put(struct bpf_map *map) | |
{ | |
if (atomic_dec_and_test(&map->refcnt)) { | |
INIT_WORK(&map->work, bpf_map_free_deferred); | |
schedule_work(&map->work); | |
} | |
} | |
static void free_used_maps(struct bpf_prog_aux *aux) | |
{ | |
int i; | |
for (i = 0; i < aux->used_map_cnt; i++) | |
bpf_map_put(aux->used_maps[i]); | |
kfree(aux->used_maps); | |
} | |
unsigned int run_bpf_filter(struct bpf_prog *prog1, struct bpf_context *ctx){ | |
rcu_read_lock(); | |
u64 ret = BPF_PROG_RUN(prog1, (void*) ctx); | |
rcu_read_unlock(); | |
return ret; | |
} | |
/* Inititlize and prepare the eBPF prog */ | |
unsigned int init_ebpf_prog(void) | |
{ | |
int ret = 0; | |
char bpf_log_buf[1024]; | |
unsigned int insn_count = sizeof(insn_prog) / sizeof(struct bpf_insn); | |
union bpf_attr attr = { | |
.prog_type = BPF_PROG_TYPE_UNSPEC, | |
.insns = ptr_to_u64((void*) insn_prog), | |
.insn_cnt = insn_count, | |
.license = ptr_to_u64((void *) "GPL"), | |
.log_buf = ptr_to_u64(bpf_log_buf), | |
.log_size = 1024, | |
.log_level = 1, | |
}; | |
prog = bpf_prog_alloc(bpf_prog_size(attr.insn_cnt), GFP_USER); | |
if (!prog) | |
return -ENOMEM; | |
prog->jited = false; | |
prog->orig_prog = NULL; | |
prog->len = attr.insn_cnt; | |
if (memcpy(prog->insnsi, u64_to_ptr(attr.insns), prog->len * sizeof(struct bpf_insn)) != 0) | |
atomic_set(&prog->aux->refcnt, 1); | |
prog->aux->is_gpl_compatible = true; | |
/* TODO eBPF verifier */ | |
// char *sym_name = "bpf_check"; | |
// unsigned long sym_addr = kallsyms_lookup_name(sym_name); | |
// int (*bpf_check)(struct bpf_prog*, union bpf_attr*) = | |
// (int (*)(struct bpf_prog*, union bpf_attr*) ) sym_addr; | |
// ret = bpf_check(prog, &attr); | |
/* ready for JIT */ | |
bpf_prog_select_runtime(prog); | |
printk("prog jited? : %d\n", prog->jited); | |
return 0; | |
} | |
unsigned int filter_dev_probe_handler(void* __data, struct rq *rq, struct task_struct *prev, struct task_struct *next) | |
{ | |
struct timespec begin, end, diff; | |
char comm[8] = {}; | |
strcpy(comm, "sshd"); | |
char pcomm[8] = {}; | |
strcpy(pcomm, prev->comm); | |
struct bpf_context bctx = {}; | |
bctx.arg1 = (u64) pcomm; | |
bctx.arg2 = (u64) comm; | |
bctx.arg3 = (u64) prev->state; | |
/* tick */ | |
getrawmonotonic(&begin); | |
#if (NOFILT) | |
trace_sched_switch_filter(prev, next); | |
#elif (SIMPLE) | |
if ((memcmp(prev->comm, comm, 4) == 0) && (prev->state == 0)) | |
{ | |
trace_sched_switch_filter(prev, next); | |
} | |
#elif (BPF) | |
unsigned int ret = 0; | |
ret = run_bpf_filter(prog, &bctx); | |
if (ret == 1){ | |
trace_sched_switch_filter(prev, next); | |
} | |
#endif | |
/* tock */ | |
getrawmonotonic(&end); | |
diff = timespec_sub(end, begin); | |
atomic_inc(&count); | |
sprintf(accum_time + strlen(accum_time), "%d\t%lu\n", atomic_read(&count), diff.tv_nsec); | |
return 0; | |
} | |
static int __init sched_switch_filter_init(void) | |
{ | |
int ret = 0; | |
#if (SIMPLE) | |
printk("SIMPLE RUN\n"); | |
#elif (BPF) | |
printk("BPF RUN\n"); | |
/* Prepare eBPF prog*/ | |
ret = init_ebpf_prog(); | |
#endif | |
/* Init procfs entry */ | |
accum_time = (char*) vmalloc(MAX_LEN); | |
memset(accum_time, 0, MAX_LEN); | |
proc_entry = proc_create("eBPFsched", 0, NULL, &ebpf_proc_fops); | |
if (proc_entry == NULL) | |
{ | |
ret = -1; | |
vfree(accum_time); | |
printk(KERN_INFO "eBPFsched could not be created\n"); | |
} | |
else | |
{ | |
printk(KERN_INFO "eBPFsched created.\n"); | |
} | |
(void) wrapper_lttng_fixup_sig(THIS_MODULE); | |
ret = lttng_wrapper_tracepoint_probe_register("sched_switch", | |
filter_dev_probe_handler, NULL); | |
if (ret) | |
goto error; | |
printk("sched_switch_filter loaded\n"); | |
return 0; | |
error: | |
return ret; | |
} | |
static void __exit sched_switch_filter_exit(void) | |
{ | |
int ret; | |
#if (BPF) | |
free_used_maps(prog->aux); | |
printk("Freed maps\n"); | |
bpf_prog_free(prog); | |
printk("Freed bpf prog\n"); | |
#endif | |
/* Remove procfs entry */ | |
remove_proc_entry("eBPFsched", NULL); | |
printk(KERN_INFO "eBPFsched removed\n"); | |
vfree(accum_time); | |
ret = lttng_wrapper_tracepoint_probe_unregister("sched_switch", | |
filter_dev_probe_handler, NULL); | |
printk("sched_switch_filter unloaded\n"); | |
return; | |
} | |
module_init(sched_switch_filter_init); | |
module_exit(sched_switch_filter_exit); | |
MODULE_LICENSE("GPL and additional rights"); | |
MODULE_AUTHOR("Suchakra Sharma <[email protected]>"); | |
MODULE_DESCRIPTION("LTTng filtered sched_switch"); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment