Skip to content

Instantly share code, notes, and snippets.

@wader
Last active March 14, 2018 00:10
Show Gist options
  • Save wader/4c2d1dca671130b76f0ce912dfd9e148 to your computer and use it in GitHub Desktop.
Save wader/4c2d1dca671130b76f0ce912dfd9e148 to your computer and use it in GitHub Desktop.
disable syscalls with seccomp
// Usage: ./disable-syscalls socket fork vfork clone -- /usr/bin/curl http://test.com
package main
import (
"log"
"os"
"syscall"
"unsafe"
)
// based on https://github.com/torvalds/linux/blob/master/arch/x86/entry/syscalls/syscall_64.tbl
// (\d+)\s+(\S+)\s+(\S+).* -> "$3":$1,
var syscallNames = map[string]uint32{
"read": 0,
"write": 1,
"open": 2,
"close": 3,
"stat": 4,
"fstat": 5,
"lstat": 6,
"poll": 7,
"lseek": 8,
"mmap": 9,
"mprotect": 10,
"munmap": 11,
"brk": 12,
"rt_sigaction": 13,
"rt_sigprocmask": 14,
"rt_sigreturn": 15,
"ioctl": 16,
"pread64": 17,
"pwrite64": 18,
"readv": 19,
"writev": 20,
"access": 21,
"pipe": 22,
"select": 23,
"sched_yield": 24,
"mremap": 25,
"msync": 26,
"mincore": 27,
"madvise": 28,
"shmget": 29,
"shmat": 30,
"shmctl": 31,
"dup": 32,
"dup2": 33,
"pause": 34,
"nanosleep": 35,
"getitimer": 36,
"alarm": 37,
"setitimer": 38,
"getpid": 39,
"sendfile": 40,
"socket": 41,
"connect": 42,
"accept": 43,
"sendto": 44,
"recvfrom": 45,
"sendmsg": 46,
"recvmsg": 47,
"shutdown": 48,
"bind": 49,
"listen": 50,
"getsockname": 51,
"getpeername": 52,
"socketpair": 53,
"setsockopt": 54,
"getsockopt": 55,
"clone": 56,
"fork": 57,
"vfork": 58,
"execve": 59,
"exit": 60,
"wait4": 61,
"kill": 62,
"uname": 63,
"semget": 64,
"semop": 65,
"semctl": 66,
"shmdt": 67,
"msgget": 68,
"msgsnd": 69,
"msgrcv": 70,
"msgctl": 71,
"fcntl": 72,
"flock": 73,
"fsync": 74,
"fdatasync": 75,
"truncate": 76,
"ftruncate": 77,
"getdents": 78,
"getcwd": 79,
"chdir": 80,
"fchdir": 81,
"rename": 82,
"mkdir": 83,
"rmdir": 84,
"creat": 85,
"link": 86,
"unlink": 87,
"symlink": 88,
"readlink": 89,
"chmod": 90,
"fchmod": 91,
"chown": 92,
"fchown": 93,
"lchown": 94,
"umask": 95,
"gettimeofday": 96,
"getrlimit": 97,
"getrusage": 98,
"sysinfo": 99,
"times": 100,
"ptrace": 101,
"getuid": 102,
"syslog": 103,
"getgid": 104,
"setuid": 105,
"setgid": 106,
"geteuid": 107,
"getegid": 108,
"setpgid": 109,
"getppid": 110,
"getpgrp": 111,
"setsid": 112,
"setreuid": 113,
"setregid": 114,
"getgroups": 115,
"setgroups": 116,
"setresuid": 117,
"getresuid": 118,
"setresgid": 119,
"getresgid": 120,
"getpgid": 121,
"setfsuid": 122,
"setfsgid": 123,
"getsid": 124,
"capget": 125,
"capset": 126,
"rt_sigpending": 127,
"rt_sigtimedwait": 128,
"rt_sigqueueinfo": 129,
"rt_sigsuspend": 130,
"sigaltstack": 131,
"utime": 132,
"mknod": 133,
"uselib": 134,
"personality": 135,
"ustat": 136,
"statfs": 137,
"fstatfs": 138,
"sysfs": 139,
"getpriority": 140,
"setpriority": 141,
"sched_setparam": 142,
"sched_getparam": 143,
"sched_setscheduler": 144,
"sched_getscheduler": 145,
"sched_get_priority_max": 146,
"sched_get_priority_min": 147,
"sched_rr_get_interval": 148,
"mlock": 149,
"munlock": 150,
"mlockall": 151,
"munlockall": 152,
"vhangup": 153,
"modify_ldt": 154,
"pivot_root": 155,
"_sysctl": 156,
"prctl": 157,
"arch_prctl": 158,
"adjtimex": 159,
"setrlimit": 160,
"chroot": 161,
"sync": 162,
"acct": 163,
"settimeofday": 164,
"mount": 165,
"umount2": 166,
"swapon": 167,
"swapoff": 168,
"reboot": 169,
"sethostname": 170,
"setdomainname": 171,
"iopl": 172,
"ioperm": 173,
"create_module": 174,
"init_module": 175,
"delete_module": 176,
"get_kernel_syms": 177,
"query_module": 178,
"quotactl": 179,
"nfsservctl": 180,
"getpmsg": 181,
"putpmsg": 182,
"afs_syscall": 183,
"tuxcall": 184,
"security": 185,
"gettid": 186,
"readahead": 187,
"setxattr": 188,
"lsetxattr": 189,
"fsetxattr": 190,
"getxattr": 191,
"lgetxattr": 192,
"fgetxattr": 193,
"listxattr": 194,
"llistxattr": 195,
"flistxattr": 196,
"removexattr": 197,
"lremovexattr": 198,
"fremovexattr": 199,
"tkill": 200,
"time": 201,
"futex": 202,
"sched_setaffinity": 203,
"sched_getaffinity": 204,
"set_thread_area": 205,
"io_setup": 206,
"io_destroy": 207,
"io_getevents": 208,
"io_submit": 209,
"io_cancel": 210,
"get_thread_area": 211,
"lookup_dcookie": 212,
"epoll_create": 213,
"epoll_ctl_old": 214,
"epoll_wait_old": 215,
"remap_file_pages": 216,
"getdents64": 217,
"set_tid_address": 218,
"restart_syscall": 219,
"semtimedop": 220,
"fadvise64": 221,
"timer_create": 222,
"timer_settime": 223,
"timer_gettime": 224,
"timer_getoverrun": 225,
"timer_delete": 226,
"clock_settime": 227,
"clock_gettime": 228,
"clock_getres": 229,
"clock_nanosleep": 230,
"exit_group": 231,
"epoll_wait": 232,
"epoll_ctl": 233,
"tgkill": 234,
"utimes": 235,
"vserver": 236,
"mbind": 237,
"set_mempolicy": 238,
"get_mempolicy": 239,
"mq_open": 240,
"mq_unlink": 241,
"mq_timedsend": 242,
"mq_timedreceive": 243,
"mq_notify": 244,
"mq_getsetattr": 245,
"kexec_load": 246,
"waitid": 247,
"add_key": 248,
"request_key": 249,
"keyctl": 250,
"ioprio_set": 251,
"ioprio_get": 252,
"inotify_init": 253,
"inotify_add_watch": 254,
"inotify_rm_watch": 255,
"migrate_pages": 256,
"openat": 257,
"mkdirat": 258,
"mknodat": 259,
"fchownat": 260,
"futimesat": 261,
"newfstatat": 262,
"unlinkat": 263,
"renameat": 264,
"linkat": 265,
"symlinkat": 266,
"readlinkat": 267,
"fchmodat": 268,
"faccessat": 269,
"pselect6": 270,
"ppoll": 271,
"unshare": 272,
"set_robust_list": 273,
"get_robust_list": 274,
"splice": 275,
"tee": 276,
"sync_file_range": 277,
"vmsplice": 278,
"move_pages": 279,
"utimensat": 280,
"epoll_pwait": 281,
"signalfd": 282,
"timerfd_create": 283,
"eventfd": 284,
"fallocate": 285,
"timerfd_settime": 286,
"timerfd_gettime": 287,
"accept4": 288,
"signalfd4": 289,
"eventfd2": 290,
"epoll_create1": 291,
"dup3": 292,
"pipe2": 293,
"inotify_init1": 294,
"preadv": 295,
"pwritev": 296,
"rt_tgsigqueueinfo": 297,
"perf_event_open": 298,
"recvmmsg": 299,
"fanotify_init": 300,
"fanotify_mark": 301,
"prlimit64": 302,
"name_to_handle_at": 303,
"open_by_handle_at": 304,
"clock_adjtime": 305,
"syncfs": 306,
"sendmmsg": 307,
"setns": 308,
"getcpu": 309,
"process_vm_readv": 310,
"process_vm_writev": 311,
"kcmp": 312,
"finit_module": 313,
"sched_setattr": 314,
"sched_getattr": 315,
"renameat2": 316,
"seccomp": 317,
"getrandom": 318,
"memfd_create": 319,
"kexec_file_load": 320,
"bpf": 321,
"execveat": 322,
"userfaultfd": 323,
"membarrier": 324,
"mlock2": 325,
"copy_file_range": 326,
"preadv2": 327,
"pwritev2": 328,
"pkey_mprotect": 329,
"pkey_alloc": 330,
"pkey_free": 331,
"statx": 332,
// TODO: x32 compat
// "rt_sigaction": 512,
// "rt_sigreturn": 513,
// "ioctl": 514,
// "readv": 515,
// "writev": 516,
// "recvfrom": 517,
// "sendmsg": 518,
// "recvmsg": 519,
// "execve": 520,
// "ptrace": 521,
// "rt_sigpending": 522,
// "rt_sigtimedwait": 523,
// "rt_sigqueueinfo": 524,
// "sigaltstack": 525,
// "timer_create": 526,
// "mq_notify": 527,
// "kexec_load": 528,
// "waitid": 529,
// "set_robust_list": 530,
// "get_robust_list": 531,
// "vmsplice": 532,
// "move_pages": 533,
// "preadv": 534,
// "pwritev": 535,
// "rt_tgsigqueueinfo": 536,
// "recvmmsg": 537,
// "sendmmsg": 538,
// "process_vm_readv": 539,
// "process_vm_writev": 540,
// "setsockopt": 541,
// "getsockopt": 542,
// "io_setup": 543,
// "io_submit": 544,
// "execveat": 545,
// "preadv2": 546,
// "pwritev2": 547,
}
func seccompRetErrno(syscalls []uint32, retErrno uint32) syscall.Errno {
// https://github.com/torvalds/linux/blob/master/include/uapi/linux/filter.h
type sockFilter struct {
code uint16
jt uint8
jf uint8
k uint32
}
type sockFprog struct {
len uint16
filt []sockFilter
}
// https://github.com/torvalds/linux/blob/master/include/uapi/linux/seccomp.h
const (
seccompSetModeFilter = 0x1 // set bpf filter mode
seccompFilterFlagTsync = 0x1 // same filter for all threads
seccompRetErrno uint32 = 0x00050000 // return errno value
seccompRetAllow uint32 = 0x7fff0000 // allow syscall
)
// syscall number for seccomp on amd64
// https://github.com/torvalds/linux/tree/master/arch/x86/entry/syscalls
// for some reason not defined in syscall package
const sysSeccomp = 317
// for some reason not defined for some archs in syscall package
// https://github.com/torvalds/linux/blob/master/include/uapi/linux/prctl.h
const PR_SET_NO_NEW_PRIVS = 38
// set no new privs so that we can install seccomp filter as non-root
// https://github.com/torvalds/linux/blob/master/Documentation/userspace-api/no_new_privs.rst
if _, _, prctlerrno :=
syscall.Syscall(
uintptr(syscall.SYS_PRCTL),
uintptr(PR_SET_NO_NEW_PRIVS),
uintptr(1),
uintptr(0)); prctlerrno != 0 {
return prctlerrno
}
// generate BPF program:
// load syscall
// if s[0]
// return ENOSYS
// if s[1]
// return ENOSYS
// ...
// return ALLOW
filt := []sockFilter{
{syscall.BPF_LD + syscall.BPF_ABS + syscall.BPF_W, 0, 0, 0},
}
for _, s := range syscalls {
filt = append(filt,
sockFilter{syscall.BPF_JMP + syscall.BPF_JEQ + syscall.BPF_K, 0, 1, s}, // if eq pc+0 else pc+1
sockFilter{syscall.BPF_RET + syscall.BPF_K, 0, 0, seccompRetErrno | retErrno},
)
}
filt = append(filt,
sockFilter{syscall.BPF_RET + syscall.BPF_K, 0, 0, seccompRetAllow},
)
/*
BPF runs on this data:
struct seccomp_data {
int nr;
__u32 arch;
__u64 instruction_pointer;
__u64 args[6];
};
*/
filter := &sockFprog{
len: uint16(len(filt)),
filt: filt,
}
_, _, seccompErrno :=
syscall.Syscall(
sysSeccomp,
uintptr(seccompSetModeFilter),
uintptr(seccompFilterFlagTsync),
uintptr(unsafe.Pointer(filter)))
return seccompErrno
}
func main() {
log.SetFlags(0)
var argRest []string
var disableSyscalls []uint32
for i, arg := range os.Args[1:] {
if arg == "--" {
argRest = os.Args[i+2:]
break
}
if nr, ok := syscallNames[arg]; !ok {
log.Fatalf("unknown syscall %s", arg)
} else {
disableSyscalls = append(disableSyscalls, nr)
}
}
if len(argRest) == 0 {
log.Fatal("no command to run")
}
if seccompErrno := seccompRetErrno(disableSyscalls, uint32(syscall.ENOSYS)); seccompErrno != 0 {
log.Fatalf("seccomp call failed errno=%d", seccompErrno)
}
log.Fatal(syscall.Exec(argRest[0], argRest, os.Environ()))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment