Created
April 5, 2015 18:31
-
-
Save ilammy/f39b7366f9dd2f15479d to your computer and use it in GitHub Desktop.
Linux kernel system call table hooking
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <asm/uaccess.h> | |
#include <linux/init.h> | |
#include <linux/kernel.h> | |
#include <linux/module.h> | |
#include <linux/version.h> | |
#include "locate_sct.h" | |
#include "ttgl.h" | |
MODULE_LICENSE("GPL"); | |
MODULE_AUTHOR("ilammy <[email protected]>"); | |
MODULE_DESCRIPTION("Locates and patches the system call table to hook execve() " | |
"and install additional customizable restrictions on the " | |
"processes that can be launched from userland."); | |
#if 0 | |
#if !(defined(ARCH_X86) || defined(ARCH_X64)) | |
# error "Only x86(_64) kernel architectures are supported" | |
#endif | |
#if LINUX_KERNEL_VERSION != KERNEL_VERSION(3,2,0) | |
# error "Only Linux kernel 3.2.0 is supported" | |
#endif | |
#endif | |
static | |
void hex_dump(unsigned char *bytes, size_t count) | |
{ | |
size_t i; | |
printk(KERN_INFO "Dumping %zu bytes at %p:", count, bytes); | |
for (i = 0; i < count; i++) | |
{ | |
if (i % 16 == 0) | |
{ | |
printk("\n "); | |
} | |
printk("%02X ", bytes[i]); | |
} | |
printk("\n"); | |
} | |
long hijacked_sys_execve(const char __user *filename, | |
const char __user *const __user *argv, | |
const char __user *const __user *envp) | |
{ | |
char buffer[256]; | |
strncpy_from_user(buffer, filename, 256); | |
printk(KERN_INFO "hijacked_sys_execve: i see what you did there: %s\n", | |
buffer); | |
return sys_execve(filename, argv, envp); | |
} | |
static | |
int print_sct_1(struct gl_region regions[], size_t region_count, void* arg) | |
{ | |
size_t i; | |
unsigned long* sys_call_table = regions[0].writeable; | |
for (i = 0; i < 256; i++) | |
if ((void*) sys_call_table[i] == (void*) sys_execve) | |
sys_call_table[i] = (unsigned long*) hijacked_sys_execve; | |
return 0; | |
} | |
static | |
int print_sct_2(struct gl_region regions[], size_t region_count, void* arg) | |
{ | |
size_t i; | |
unsigned long* sys_call_table = regions[0].writeable; | |
for (i = 0; i < 256; i++) | |
if ((void*) sys_call_table[i] == (void*) hijacked_sys_execve) | |
sys_call_table[i] = (unsigned long*) sys_execve; | |
return 0; | |
} | |
static | |
int __init afw_init(void) | |
{ | |
struct gl_region sys_call_table; | |
printk(KERN_INFO "init_module()\n"); | |
printk(KERN_INFO "located sys_call_table: %p\n" | |
"located ia32_sys_call_table: %p\n", | |
afw_locate_sys_call_table(), | |
afw_locate_ia32_sys_call_table()); | |
sys_call_table = (struct gl_region) { | |
.source = afw_locate_sys_call_table(), | |
.length = 256 * sizeof(unsigned long) | |
}; | |
afw_do_with_write_permissions(print_sct_1, &sys_call_table, 1, NULL); | |
return 0; | |
} | |
static | |
void __exit afw_exit(void) | |
{ | |
struct gl_region sys_call_table; | |
printk(KERN_INFO "exit_module()\n"); | |
sys_call_table = (struct gl_region) { | |
.source = afw_locate_sys_call_table(), | |
.length = 256 * sizeof(unsigned long) | |
}; | |
afw_do_with_write_permissions(print_sct_2, &sys_call_table, 1, NULL); | |
} | |
module_init(afw_init); | |
module_exit(afw_exit); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "locate_sct.h" | |
#include <asm/desc.h> | |
#include <asm/desc_defs.h> | |
#include <asm/irq_vectors.h> | |
#include <asm/msr.h> | |
#include <asm/msr-index.h> | |
/* | |
** We need to find the address of the sys_call_table. Since 2.6 kernels this | |
** symbol is no longer exported, so there is no easy way to find it. One way | |
** is to use "cat /boot/System.map-`uname -r` | grep sys_call_table". However, | |
** it may be absent or invalid, and just reading files is not fun. So we're | |
** going the brute-force way. Introspection! The system call handler should | |
** bo somewhere in memory and it obviosly uses sys_call_table, so if we | |
** analyze its code, we can get the address we want. | |
** | |
** Actually, on 64-bit x86 systems there are two tables: sys_call_table with | |
** 64-bit handlers and ia32_sys_call_table with 32-bit handlers for 32-bit | |
** emulation mode. We need to locate them both. | |
** | |
** Also, Linux has two mechanisms for handling system calls on x86 systems: | |
** the legacy one that uses "int $0x80" to get into ring 0, and more modern | |
** one which uses special "sysenter"/"syscall" instructions for this. Also | |
** there is vsyscall mechanism for fast user-level system calls, but it is | |
** not covered here. | |
** | |
** Legacy mechanism uses interrupt traps to obtain control. It is initialized | |
** by the function trap_init() which can be found in arch/x86/kernel/traps.c. | |
** The interrupt handler for 0x80 always contains a 32-bit handler. | |
** | |
** Modern handler uses model-specific registers (MSRs) to register itself as | |
** a handler for "sysenter"/"syscall" instructions. It is initialized by the | |
** function syscall_init() from arch/x86/kernel/cpu/common.c. Native handler | |
** system_call is loaded to the LSTAR MSR, and IA-32 emulated ones are loaded | |
** to IA32_SYSENTER_EIP and CSTAR MSRs by the function syscall32_cpu_init() | |
** from arch/x86/vdso/vdso32-setup.c. | |
** | |
** So, to sum it up, to get the 32-bit syscall handler we consult the 0x80 | |
** interrupt handler in the interrupt descriptor table. To get the 64-bit | |
** syscall handler we consult the LSTAR MSR. | |
*/ | |
static inline | |
u8* get_32bit_system_call_handler(void) | |
{ | |
struct desc_ptr interrupt_descriptor_table; | |
gate_desc* interrupt_gates; | |
store_idt(&interrupt_descriptor_table); | |
interrupt_gates = (gate_desc*) interrupt_descriptor_table.address; | |
return (u8*) gate_offset(interrupt_gates[IA32_SYSCALL_VECTOR]); | |
} | |
static inline | |
u8* get_64bit_system_call_handler(void) | |
{ | |
u64 system_call_entry; | |
rdmsrl(MSR_LSTAR, system_call_entry); | |
return (u8*) system_call_entry; | |
} | |
/* | |
** Previous functions return pointers to system call handlers. Native system | |
** call handler is named system_call, its 32-bit implementation is located in | |
** arch/x86/kernel/entry_32.S and 64-bit one is in arch/x86/kernel/entry_64.S. | |
** IA-32 emulation handlers are implemented in arch/x86/ia32/ia32entry.S, they | |
** are named ia32_syscall, ia32_sysenter_target, and ia32_cstar_target. | |
** | |
** We are interested in this: | |
** | |
** call *sys_call_table(,%rax,8) | |
** movq %rax,RAX-ARGOFFSET(%rsp) | |
** | |
** or this: | |
** | |
** call *sys_call_table(,%eax,4) | |
** movl %eax,PT_EAX(%esp) | |
** | |
** These snippets do the actual system call and store its return value. | |
** sys_call_table is 32-bit offset, should be expanded to 64-bit if necessary. | |
** RAX, ARGOFFSET, and PT_EAX are all macros that govern calling conventions, | |
** they expand into small numbers that fit into one byte. | |
** | |
** According to the source, these instructions should be found within the | |
** first 256 bytes of the handlers. We should look for a call instruction | |
** followed by a mov instruction. Opcodes are the following: | |
** | |
** call disp32(,%eax,4) ==> FF 14 85 -- -- -- -- | |
** movl %eax,disp8(%esp) ==> 89 44 24 -- | |
** | |
** call disp32(,%rax,8) ==> FF 14 C5 -- -- -- -- | |
** movq %rax,disp8(%rsp) ==> 48 89 44 24 -- | |
*/ | |
static | |
unsigned long* find_sys_call_table_ref(u8* code) | |
{ | |
size_t i; | |
for (i = 0; i < 256; i++) | |
{ | |
#ifdef CONFIG_X86_64 | |
if (code[i + 0] == 0xFF && code[i + 1] == 0x14 && | |
code[i + 2] == 0xC5 && code[i + 7] == 0x48 && | |
code[i + 8] == 0x89 && code[i + 9] == 0x44 && | |
code[i +10] == 0x24) | |
{ | |
u32 offset = *((u32*) &code[i + 3]); | |
return (unsigned long*) (0xFFFFFFFF00000000 | offset); | |
} | |
#else | |
if (code[i + 0] == 0xFF && code[i + 1] == 0x14 && | |
code[i + 2] == 0x85 && code[i + 7] == 0x89 && | |
code[i + 8] == 0x44 && code[i + 9] == 0x24) | |
{ | |
u32 offset = *((u32*) &code[i + 3]); | |
return (unsigned long*) offset; | |
} | |
#endif | |
} | |
return NULL; | |
} | |
/* | |
** And now, when everything's in place... | |
*/ | |
unsigned long* afw_locate_sys_call_table(void) | |
{ | |
#ifdef CONFIG_X86_64 | |
return find_sys_call_table_ref(get_64bit_system_call_handler()); | |
#else | |
return find_sys_call_table_ref(get_32bit_system_call_handler()); | |
#endif | |
} | |
#ifdef CONFIG_IA32_EMULATION | |
unsigned long* afw_locate_ia32_sys_call_table(void) | |
{ | |
return find_sys_call_table_ref(get_32bit_system_call_handler()); | |
} | |
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef AFW_LOCATE_SCT | |
#define AFW_LOCATE_SCT | |
/** | |
* Locate the native system call table (sys_call_table). | |
* | |
* @return | |
* Returns a pointer to the native system call table, | |
* or `NULL` in case of failure. | |
*/ | |
unsigned long* afw_locate_sys_call_table(void); | |
#ifdef CONFIG_IA32_EMULATION | |
/** | |
* Locate the system call table used for IA-32 emulation (ia32_sys_call_table). | |
* | |
* @return | |
* Returns a pointer to the IA-32 emulation system call table, | |
* or `NULL` in case of failure. | |
*/ | |
unsigned long* afw_locate_ia32_sys_call_table(void); | |
#endif | |
#endif // AFW_LOCATE_SCT |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
obj-m += afw.o | |
afw-objs := afw_main.o locate_sct.o ttgl.o | |
ccflags-y := -std=gnu99 -O2 | |
all: | |
make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules | |
clean: | |
make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "ttgl.h" | |
#include <asm/page.h> | |
#include <linux/mm.h> | |
#include <linux/module.h> | |
#include <linux/slab.h> | |
#include <linux/stop_machine.h> | |
/* | |
** Having troubles with read-only memory? Fuck this shit, we're in kernel mode! | |
** To obtain write permissions for a region of kernel memory we can just remap | |
** it somewhere else with necessary permissions. God bless MMU and paging. | |
*/ | |
#define base_of_page(x) ((void*)((unsigned long)(x) & PAGE_MASK)) | |
static | |
int enumerate_pages(void* region, struct page *pages[], size_t page_num) | |
{ | |
size_t i; | |
void* page_addr = base_of_page(region); | |
for (i = 0; i < page_num; i++) { | |
// explain | |
if (__module_address((unsigned long) page_addr)) { | |
pages[i] = vmalloc_to_page(page_addr); | |
} | |
else { | |
pages[i] = virt_to_page(page_addr); | |
WARN_ON(!PageReserved(pages[i])); | |
} | |
if (!pages[i]) | |
return -EFAULT; | |
page_addr += PAGE_SIZE; | |
} | |
return 0; | |
} | |
static | |
void* remap_with_write_permissions(void* region, size_t len) | |
{ | |
void* writeable_region; | |
size_t page_num = DIV_ROUND_UP(offset_in_page(region) + len, PAGE_SIZE); | |
struct page **pages = kmalloc(page_num * sizeof(*pages), GFP_KERNEL); | |
if (!pages) | |
goto err; | |
if (enumerate_pages(region, pages, page_num)) | |
goto err; | |
writeable_region = vmap(pages, page_num, VM_MAP, PAGE_KERNEL); | |
if (!writeable_region) | |
goto err; | |
kfree(pages); | |
return writeable_region + offset_in_page(region); | |
err: | |
kfree(pages); | |
return NULL; | |
} | |
/* | |
** One needs to write the unwritable only for monkey-patching the kernel code, | |
** so it is wise to forbid anybody else to mess with global memory when we're | |
** doing our evil stuff. That's why stop_machine() is used. | |
** | |
** Its interface is not identical to our callback, so we need a thunk to pass | |
** all the arguments we want. | |
*/ | |
struct stop_machine_work { | |
int (*fn)(struct gl_region[], size_t, void*); | |
struct gl_region *regions; | |
size_t region_count; | |
void* args; | |
}; | |
static | |
int stop_machine_thunk(void* arg) | |
{ | |
struct stop_machine_work *work = arg; | |
return work->fn(work->regions, work->region_count, work->args); | |
} | |
int afw_do_with_write_permissions(int (*fn)(struct gl_region[], size_t, void*), | |
struct gl_region regions[], | |
size_t region_count, | |
void* args) | |
{ | |
size_t i; | |
int result = 0; | |
struct stop_machine_work work; | |
if (!fn) | |
return -EINVAL; | |
if (!regions || region_count == 0) | |
return fn(NULL, 0, args); | |
for (i = 0; i < region_count; i++) { | |
regions[i].writeable = | |
remap_with_write_permissions(regions[i].source, | |
regions[i].length); | |
if (!regions[i].writeable) { | |
size_t j; | |
for (j = 0; j < i; j++) | |
vunmap(base_of_page(regions[j].writeable)); | |
return -ENOMEM; | |
} | |
} | |
work = (struct stop_machine_work) { | |
.fn = fn, .regions = regions, | |
.region_count = region_count, | |
.args = args | |
}; | |
/* Stop the machines, prepare to die! */ | |
result = stop_machine(stop_machine_thunk, &work, 0); // 0? | |
for (i = 0; i < region_count; i++) | |
vunmap(base_of_page(regions[i].writeable)); | |
return result; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef AFW_TTGL_H | |
#define AFW_TTGL_H | |
#include <stddef.h> | |
/** | |
* Region descriptor for `afw_do_with_write_permissions()`. | |
* | |
* `source` and `length` should be filled by the caller. `writable` is filled | |
* by `afw_do_with_write_permissions()`, it will be valid inside the `fn` | |
* callback (and only there). | |
*/ | |
struct gl_region { | |
void* source; //!< read-only region of memory | |
void* writeable; //!< writeable mapping of the `source` region | |
size_t length; //!< length of the region in bytes | |
}; | |
/** | |
* Execute a function with write permissions for specified memory regions. | |
* | |
* If `regions == NULL` or `region_count == 0` then the call is equivalent to | |
* `fn(NULL, 0, args)`. | |
* | |
* `fn` is executed in atomic context, so please no locks inside it. | |
* | |
* @param fn | |
* The function to execute. Receives `regions` with updated `writeable` | |
* fields, `region_count`, and `args`. Must be non-NULL. | |
* | |
* @param regions | |
* Array of `gl_region` structures which describe the read-only regions | |
* that should be made writable for `fn`. | |
* | |
* @param region_count | |
* Size of `regions` (in elements). | |
* | |
* @param args | |
* Additional arguments to `fn`. | |
* | |
* @return | |
* On success returns the value `fn` returned. | |
* `-EINVAL` if `fn == NULL`. | |
* `-ENOMEM` if failed to obtain write permissions. | |
*/ | |
int afw_do_with_write_permissions(int (*fn)(struct gl_region[], size_t, void*), | |
struct gl_region regions[], | |
size_t region_count, | |
void* args); | |
#endif // AFW_TTGL_H |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
i use function find_sys_call_table_ref not get linux kernel 4.4.131 sys call table address. why?