Created
August 10, 2017 12:26
-
-
Save bonzini/523c792d9365112e50a44fa2c81df7fa to your computer and use it in GitHub Desktop.
An LD_PRELOAD library that waits for a given binary to be exec-ed, and forces it under a gdbserver
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Copyright (C) 2012-2017 by László Nagy | |
Copyright (C) 2017 Paolo Bonzini | |
This file is based on Bear. | |
It is free software: you can redistribute it and/or modify | |
it under the terms of the GNU General Public License as published by | |
the Free Software Foundation, either version 3 of the License, or | |
(at your option) any later version. | |
This file is distributed in the hope that it will be useful, | |
but WITHOUT ANY WARRANTY; without even the implied warranty of | |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
GNU General Public License for more details. | |
You should have received a copy of the GNU General Public License | |
along with this program. If not, see <http://www.gnu.org/licenses/>. */ | |
/* Sample use: | |
* $ gcc -g exec-hook.c -ldl -shared -o exec-hook.so -fPIC -O2 | |
* $ stat /bin/ls | grep Inode | |
* Device: fd01h/64769d Inode: 136420948 Links: 1 | |
* $ INTERCEPT_DEV_INODE=64769:136420948 \ | |
* INTERCEPT_SOCKET=localhost:12345 LD_PRELOAD=./exec-hook.so \ | |
* sh -c 'ls -l' | |
* Process /proc/self/fd/5 created; pid = 32055 | |
* Listening on port 12345 | |
* | |
* Now in another terminal: | |
* $ gdb | |
* (gdb) target remote localhost:12345 | |
* (gdb) c | |
* (gdb) quit | |
* | |
* Add ":NN" at the end of INTERCEPT_DEV_INODE's value to trap the | |
* (N+1)-th invocation of the program (that is, skip the first N). | |
* | |
* Linux only, I'm sorry! | |
*/ | |
#define _GNU_SOURCE | |
#include <sys/types.h> | |
#include <sys/eventfd.h> | |
#include <stdlib.h> | |
#include <stdarg.h> | |
#include <stdio.h> | |
#include <string.h> | |
#include <limits.h> | |
#include <unistd.h> | |
#include <syscall.h> | |
#include <sys/stat.h> | |
#include <fcntl.h> | |
#include <pthread.h> | |
#include <errno.h> | |
extern char **environ; | |
#define GDBSERVER_DEFAULT "/usr/bin/gdbserver" | |
#define ENV_INTERCEPT "INTERCEPT_DEV_INODE" | |
#define ENV_SOCKET "INTERCEPT_SOCKET" | |
#define ENV_GDBSERVER "INTERCEPT_GDBSERVER" | |
#define ENV_EVENTFD "INTERCEPT_EVENTFD" | |
/* This mutex only protects within a thread, i.e. munge_exec against on_load. | |
* Cross-process synchronization uses an eventfd. | |
*/ | |
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; | |
static char *env_names[] = { | |
ENV_INTERCEPT, | |
ENV_SOCKET, | |
ENV_GDBSERVER, | |
ENV_EVENTFD, | |
}; | |
#define ENV_SIZE (sizeof(env_names) / sizeof(env_names[0])) | |
static void on_load(void) __attribute__((constructor)); | |
/* Utility methods to deal with string arrays. environment and process arguments | |
* are both represented as string arrays. */ | |
static size_t strings_length(char **in) | |
{ | |
size_t result = 0; | |
for (char **it = in; it && *it; ++it) { | |
++result; | |
} | |
return result; | |
} | |
static void strings_release(char **in) | |
{ | |
int save_errno = errno; | |
for (char **it = in; it && *it; ++it) { | |
free(*it); | |
} | |
free(in); | |
errno = save_errno; | |
} | |
static char **strings_build(const char *arg, va_list *args) | |
{ | |
char **result = 0; | |
size_t size = 0; | |
for (const char *it = arg; it; it = va_arg(*args, const char *)) { | |
result = realloc(result, (size + 2) * sizeof(const char *)); | |
if (!result) { | |
return NULL; | |
} | |
char *copy = strdup(it); | |
if (!copy) { | |
goto undo; | |
} | |
result[size++] = copy; | |
result[size] = 0; | |
} | |
return result; | |
undo: | |
/* Return an empty array. */ | |
strings_release(result); | |
return NULL; | |
} | |
static int strings_append_all(char ***p_out, char ** in) | |
{ | |
char **out = *p_out; | |
size_t size = strings_length(out); | |
size_t in_size = strings_length(in); | |
char **result = realloc(out, (size + in_size + 1) * sizeof(char *)); | |
if (!result) { | |
return -1; | |
} | |
*p_out = result; | |
char **out_it = result + size; | |
for (char **in_it = in; in_it && *in_it; ++in_it) { | |
char *copy = strdup(*in_it); | |
if (!copy) { | |
goto undo; | |
} | |
*out_it++ = copy; | |
*out_it = 0; | |
} | |
return 0; | |
undo: | |
if (out) { | |
/* This is the original size */ | |
result[size] = NULL; | |
} else { | |
/* Return an empty array. */ | |
strings_release(result); | |
*p_out = NULL; | |
} | |
return -1; | |
} | |
static char **strings_copy(char **in) | |
{ | |
char **out = NULL; | |
if (strings_append_all(&out, in) < 0) { | |
return NULL; | |
} | |
return out; | |
} | |
static int strings_append(char ***p_out, char *e) | |
{ | |
char **out = *p_out; | |
size_t size = strings_length(out); | |
char **result = realloc(out, (size + 2) * sizeof(char *)); | |
if (!result) { | |
return -1; | |
} | |
result[size++] = e; | |
result[size++] = 0; | |
*p_out = result; | |
return 0; | |
} | |
static int strings_append_copy(char ***p_out, const char *e) | |
{ | |
char *s = strdup(e); | |
if (!s) { | |
return -1; | |
} | |
return strings_append(p_out, s); | |
} | |
/* we update the environment assure that children processes will copy | |
* the desired behaviour | |
*/ | |
static char *initial_env[ENV_SIZE]; | |
static void capture_env(void) | |
{ | |
for (size_t it = 0; it < ENV_SIZE; ++it) { | |
char const *env_value = getenv(env_names[it]); | |
char *env_copy = (env_value) ? strdup(env_value) : NULL; | |
initial_env[it] = env_copy; | |
} | |
} | |
static char **restore_environ_var(char *envs[], char *key, char *value) | |
{ | |
/* find the key if it's there */ | |
size_t key_length = strlen(key); | |
char **it = envs; | |
for (; it && *it; ++it) { | |
if (!memcmp(*it, key, key_length) && (*it)[key_length] == '=') | |
break; | |
} | |
/* allocate a environment entry */ | |
size_t value_length = strlen(value); | |
size_t env_length = key_length + value_length + 2; | |
char *env = malloc(env_length); | |
if (!env) { | |
goto out; | |
} | |
sprintf(env, "%s=%s", key, value); | |
/* replace or append the environment entry */ | |
if (it && *it) { | |
free((void *)*it); | |
*it = env; | |
return envs; | |
} | |
if (strings_append(&envs, env) < 0) { | |
goto out; | |
} | |
return envs; | |
out: | |
strings_release(envs); | |
return NULL; | |
} | |
static char **restore_environment(char *const envp[]) | |
{ | |
char **result = strings_copy((char **)envp); | |
for (size_t it = 0; it < ENV_SIZE; ++it) { | |
if (initial_env[it]) { | |
result = restore_environ_var(result, env_names[it], initial_env[it]); | |
if (!result) { | |
break; | |
} | |
} | |
} | |
return result; | |
} | |
/* Invoke the Linux system calls. */ | |
static int call_execve(const char *path, char *const argv[], | |
char *const envp[]) | |
{ | |
char **menvp = restore_environment(envp); | |
syscall(SYS_execve, path, argv, menvp); | |
strings_release(menvp); | |
return -1; | |
} | |
static int call_execveat(int dirfd, const char *path, char *const argv[], | |
char *const envp[], int flags) | |
{ | |
char **menvp = restore_environment(envp); | |
if (dirfd == AT_FDCWD && flags == 0) { | |
syscall(SYS_execve, path, argv, menvp); | |
} else { | |
syscall(SYS_execveat, dirfd, path, argv, menvp, flags); | |
} | |
strings_release(menvp); | |
return -1; | |
} | |
/* This is the main part of the hook. */ | |
static const char *socket; | |
static const char *gdbserver; | |
static int evfd; | |
static int dev; | |
static int ino; | |
static int skip = 0; | |
static int intercept_parse_envvar(const char *devino) | |
{ | |
if (sscanf(devino, "%d:%d:%d", &dev, &ino, &skip) < 2) { | |
return -1; | |
} | |
return 0; | |
} | |
/* Ensure that we're the first to actually exec the target. */ | |
static int get_token(void) | |
{ | |
uint64_t val; | |
/* The value we read is the number of execs left until the trapping one, | |
* plus one. The "plus one" is because zero blocks reads to the eventfd. | |
* So 1 means the trapped exec already happened, 2 means it is the next one, | |
* etc. | |
*/ | |
if (read(evfd, &val, 8) < 8) { | |
return 0; | |
} | |
/* Entered critical section, no one else can read the eventfd until | |
* the next write. This critical section is cross-process, unlike | |
* the mutex! | |
*/ | |
if (val > 1) { | |
uint64_t new = val - 1; | |
write(evfd, &new, 8); | |
return val == 2; | |
} else { | |
uint64_t new = 1; | |
write(evfd, &new, 8); | |
return 0; | |
} | |
} | |
static int intercept_init_eventfd(void) | |
{ | |
evfd = eventfd(0, 0); | |
if (evfd == -1) { | |
return -1; | |
} | |
uint64_t value = skip + 2; | |
write(evfd, &value, 8); | |
/* Pass the eventfd file descriptor to children processes */ | |
char c[12]; | |
sprintf(c, "%d", evfd); | |
setenv(ENV_EVENTFD, c, 0); | |
return 0; | |
} | |
/* Reopen an O_PATH file descriptor into a read-only one. Cannot | |
* do openat(fd, "", O_RDONLY) for an O_PATH file descriptor? | |
*/ | |
static int reopen_path(int fd) | |
{ | |
char name[30]; | |
sprintf(name, "/proc/self/fd/%d", fd); | |
return open(name, O_RDONLY); | |
} | |
static int munge_exec(int fd, char* const* argv, char *const* envp, int flags) | |
{ | |
struct stat st; | |
/* Check if we're active. */ | |
if (!socket) { | |
return 0; | |
} | |
/* Check if the file matches the desired executable */ | |
if (fstatat(fd, "", &st, flags | AT_EMPTY_PATH) == -1) { | |
return -1; | |
} | |
if (st.st_dev != dev || st.st_ino != ino) { | |
return 0; | |
} | |
/* Check if it's the right time to start the gdbserver */ | |
if (!get_token()) { | |
return 0; | |
} | |
int new_fd = reopen_path(fd); | |
if (new_fd == -1) { | |
return -1; | |
} | |
/* Build gdbserver command line */ | |
char **new_argv = NULL; | |
if (strings_append_copy(&new_argv, gdbserver) < 0) { | |
return - 1; | |
} | |
if (strings_append_copy(&new_argv, socket) < 0) { | |
return -1; | |
} | |
char name[30]; | |
sprintf(name, "/proc/self/fd/%d", new_fd); | |
if (strings_append_copy(&new_argv, name) < 0) { | |
return -1; | |
} | |
if (strings_append_all(&new_argv, (char **) argv + 1) < 0) { | |
return -1; | |
} | |
call_execve(gdbserver, new_argv, envp); | |
strings_release(new_argv); | |
return -1; | |
} | |
static int my_execveat(int fd, char *const argv[], char *const envp[], | |
int orig_dirfd, const char *orig_path, | |
int orig_flags) | |
{ | |
pthread_mutex_lock(&mutex); | |
int result = munge_exec(fd, argv, envp, orig_flags); | |
if (result) { | |
goto bad; | |
} | |
/* Pass call through. */ | |
call_execveat(fd, "", argv, envp, orig_flags | AT_EMPTY_PATH); | |
if (errno == ENOENT || errno == ENOSYS) { | |
/* When coming from execve, this actually becomes an | |
* execve system call, so that we can run on Linux < 3.19. | |
*/ | |
call_execveat(orig_dirfd, orig_path, argv, envp, orig_flags); | |
} | |
bad:; | |
int save_errno = errno; | |
pthread_mutex_unlock(&mutex); | |
errno = save_errno; | |
return -1; | |
} | |
/* These are the functions we are try to hijack. */ | |
/* Not yet in glibc, but prepare for the future */ | |
int execveat(int dirfd, const char *path, char *const argv[], char *const envp[], | |
int flags) | |
{ | |
int fd = openat(dirfd, path, O_PATH | O_CLOEXEC); | |
if (fd == -1) { | |
return -1; | |
} | |
my_execveat(fd, argv, envp, dirfd, path, flags); | |
return -1; | |
} | |
static int my_execve(const char *path, char *const argv[], char *const envp[]) | |
{ | |
int fd = open(path, O_PATH | O_CLOEXEC); | |
if (fd == -1) { | |
return -1; | |
} | |
my_execveat(fd, argv, envp, AT_FDCWD, path, 0); | |
return -1; | |
} | |
int fexecve(int fd, char *const argv[], char *const envp[]) | |
{ | |
char name[30]; | |
sprintf(name, "/proc/self/fd/%d", fd); | |
return my_execveat(fd, argv, envp, AT_FDCWD, name, 0); | |
} | |
int execve(const char *path, char *const argv[], char *const envp[]) | |
{ | |
return my_execve(path, argv, envp); | |
} | |
int execv(const char *path, char *const argv[]) | |
{ | |
return my_execve(path, argv, environ); | |
} | |
int execl(const char *path, const char *arg, ...) | |
{ | |
va_list args; | |
va_start(args, arg); | |
char **argv = strings_build(arg, &args); | |
va_end(args); | |
my_execve(path, argv, environ); | |
strings_release(argv); | |
return -1; | |
} | |
// int execle(const char *path, const char *arg, ..., char * const envp[]); | |
int execle(const char *path, const char *arg, ...) | |
{ | |
va_list args; | |
va_start(args, arg); | |
char **argv = strings_build(arg, &args); | |
char *const *envp = va_arg(args, char *const *); | |
va_end(args); | |
my_execve(path, argv, envp); | |
strings_release(argv); | |
return -1; | |
} | |
/* These are the functions we are trying to hijack, for which we resolve | |
* the PATH ourselves. | |
*/ | |
static int do_shell(const char *file, | |
char *const argv[], char *const envp[]) | |
{ | |
char **new_argv = NULL; | |
if (strings_append_copy(&new_argv, argv[0]) < 0) { | |
return -1; | |
} | |
if (strings_append_copy(&new_argv, "--") < 0) { | |
return -1; | |
} | |
if (strings_append_copy(&new_argv, file) < 0) { | |
return -1; | |
} | |
if (strings_append_all(&new_argv, (char **) argv + 1) < 0) { | |
return -1; | |
} | |
my_execve("/bin/sh", new_argv, envp); | |
strings_release(new_argv); | |
return -1; | |
} | |
static int do_execvpe(const char *file, const char *search_path, | |
char *const argv[], char *const envp[]) | |
{ | |
int ret = ENOENT; | |
if (strchr(file, '/') != NULL) { | |
my_execve(file, argv, envp); | |
if (errno == ENOEXEC) { | |
return do_shell(file, argv, envp); | |
} | |
return -1; | |
} | |
if (!search_path) { | |
search_path = getenv("PATH"); | |
if (!search_path) { | |
search_path = "/bin:/usr/bin"; | |
} | |
} | |
int l = strnlen(file, NAME_MAX + 1); | |
if (l > NAME_MAX) { | |
errno = ENAMETOOLONG; | |
return -1; | |
} | |
while (*search_path) { | |
const char *p = search_path; | |
const char *q = strchr(p, ':'); | |
if (!q) { | |
q = p + strlen(p); | |
search_path = q; | |
} else { | |
search_path = q + 1; | |
} | |
int n = q - p; | |
char path[n + l + 2]; | |
memcpy(path, p, n); | |
path[n] = '/'; | |
strcpy(path+n+1, file); | |
my_execve(path, argv, envp); | |
if (errno == ENOEXEC) { | |
return do_shell(path, argv, envp); | |
} else if ((errno == EACCES || errno == ENOTDIR) && ret == ENOENT) { | |
ret = errno; | |
} else if (errno != ENOENT) { | |
break; | |
} | |
} | |
errno = ret; | |
return -1; | |
} | |
int execvpe(const char *file, char *const argv[], char *const envp[]) | |
{ | |
return do_execvpe(file, NULL, argv, envp); | |
} | |
int execvp(const char *file, char *const argv[]) | |
{ | |
return do_execvpe(file, NULL, argv, environ); | |
} | |
int execvP(const char *file, const char *search_path, char *const argv[]) | |
{ | |
return do_execvpe(file, search_path, argv, environ); | |
} | |
int execlp(const char *file, const char *arg, ...) | |
{ | |
va_list args; | |
va_start(args, arg); | |
char **argv = strings_build(arg, &args); | |
va_end(args); | |
do_execvpe(file, NULL, argv, environ); | |
strings_release(argv); | |
return -1; | |
} | |
#if 0 | |
/* Currently we cannot do anything about these functions. */ | |
int posix_spawn(pid_t *restrict pid, const char *restrict path, | |
const posix_spawn_file_actions_t *file_actions, | |
const posix_spawnattr_t *restrict attrp, | |
char *const argv[restrict], char *const envp[restrict]) | |
{ | |
errno = ENOSYS; | |
return -1; | |
} | |
int posix_spawnp(pid_t *restrict pid, const char *restrict file, | |
const posix_spawn_file_actions_t *file_actions, | |
const posix_spawnattr_t *restrict attrp, | |
char *const argv[restrict], char *const envp[restrict]) | |
{ | |
errno = ENOSYS; | |
return -1; | |
} | |
#endif | |
/* The initialization method. */ | |
static void on_load(void) | |
{ | |
char *devino_var = getenv(ENV_INTERCEPT); | |
char *socket_var = getenv(ENV_SOCKET); | |
char *gdbserver_var = getenv(ENV_GDBSERVER); | |
char *evfd_var = getenv(ENV_EVENTFD); | |
pthread_mutex_lock(&mutex); | |
if (devino_var && intercept_parse_envvar(devino_var) != -1 && | |
socket_var && socket_var[0] && socket_var[0] != '-') { | |
if (evfd_var) { | |
evfd = atoi(evfd_var); | |
} else { | |
intercept_init_eventfd(); | |
} | |
if (evfd != -1) { | |
socket = socket_var; | |
gdbserver = gdbserver_var ? gdbserver_var : GDBSERVER_DEFAULT; | |
} | |
} | |
capture_env(); | |
pthread_mutex_unlock(&mutex); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment