Skip to content

Instantly share code, notes, and snippets.

@rrampage
Last active May 18, 2025 10:23
Show Gist options
  • Save rrampage/5046b60ca2d040bcffb49ee38e86041f to your computer and use it in GitHub Desktop.
Save rrampage/5046b60ca2d040bcffb49ee38e86041f to your computer and use it in GitHub Desktop.
Minimal Linux shell (C, Zig and ASM)
/*
Compile with one of the following:
zig cc -v -s -Os -target aarch64-linux-musl -nostdlib -flto -static shell.c -o csh
zig cc -v -s -Os -target aarch64-linux-gnu -nostdlib -flto -static shell.c -o csh
CLANG:
clang -v -s -Oz -ffreestanding -nostdlib -fno-stack-protector -Wl,--entry=_start -Wl,--gc-sections -Wl,-z,now -flto -static -o csh shell.c
musl-clang -v -s -Os -nostdlib -nostartfiles -fno-stack-protector -Wl,--entry=_start -Wl,--gc-sections -Wl,-z,now -flto -nostdinc -static -o csh shell.c
GCC (some things DO NOT WORK like `pwd`):
gcc -v -s -Oz -ffreestanding -nostdlib -nostartfiles -fno-stack-protector -Wl,--entry=_start -Wl,--gc-sections -Wl,-z,now -flto -nostdinc -static -o csh shell.c
gcc -v -c -Os -nostdlib -nostartfiles -fno-stack-protector -Wl,--entry=_start shell.c
ld -flto -O2 --entry _start -z stack-size=16777216 --gc-sections --eh-frame-hdr -s -znow -m elf_x86_64 -static -o csh shell.o
*/
typedef unsigned int dev_t;
typedef unsigned long ino_t;
typedef unsigned int mode_t;
typedef signed int pid_t;
typedef unsigned int uid_t;
typedef unsigned int gid_t;
typedef unsigned long nlink_t;
typedef signed long off_t;
typedef signed long blksize_t;
typedef signed long blkcnt_t;
typedef signed long time_t;
typedef unsigned long long uint64_t;
typedef unsigned long long size_t;
struct clone_args {
uint64_t flags;
uint64_t pidfd;
uint64_t child_tid;
uint64_t parent_tid;
uint64_t exit_signal;
uint64_t stack;
uint64_t stack_size;
uint64_t tls;
uint64_t set_tid;
uint64_t set_tid_size;
uint64_t cgroup;
};
struct timespec {
time_t tv_sec;
int : 8 * (sizeof(time_t) - sizeof(long)) * (1234 == 4321);
long tv_nsec;
int : 8 * (sizeof(time_t) - sizeof(long)) * (1234 != 4321);
};
struct linux_dirent64 {
uint64_t d_ino; /* 64-bit inode number */
uint64_t d_off; /* Not an offset; see getdents() */
unsigned short d_reclen; /* Size of this dirent */
unsigned char d_type; /* File type */
char d_name[]; /* Filename (null-terminated) */
};
#define NULL 0
#define DT_REG 8
#define DT_LNK 10
#define SIGCHLD 17
#define P_ALL 0
#define WEXITED 4
#define O_RDONLY 00000000
#define S_IXUSR 0100
// Only supporting x64 and aarch64 currently
#if defined(__x86_64__)
#define O_DIRECTORY 0200000
struct stat {
dev_t st_dev;
ino_t st_ino;
nlink_t st_nlink;
mode_t st_mode;
uid_t st_uid;
gid_t st_gid;
unsigned int __pad;
dev_t st_rdev;
off_t st_size;
blksize_t st_blksize;
blkcnt_t st_blocks;
struct timespec st_atim;
struct timespec st_mtim;
struct timespec st_ctim;
unsigned __unused[3];
};
#define SYS_read 0
#define SYS_write 1
#define SYS_exit 60
#define SYS_openat 257
#define SYS_getdents64 217
#define SYS_newfstatat 262
#define SYS_clone3 435
#define SYS_waitid 247
#define SYS_execve 59
#define SYS_execveat 322
#define SYS_getcwd 79
#define SYS_chdir 80
#define syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
({ \
long _ret; \
register long _num __asm__ ("rax") = (num); \
register long _arg1 __asm__ ("rdi") = (long)(arg1); \
register long _arg2 __asm__ ("rsi") = (long)(arg2); \
register long _arg3 __asm__ ("rdx") = (long)(arg3); \
register long _arg4 __asm__ ("r10") = (long)(arg4); \
register long _arg5 __asm__ ("r8") = (long)(arg5); \
register long _arg6 __asm__ ("r9") = (long)(arg6); \
\
__asm__ volatile ( \
"syscall\n" \
: "=a"(_ret) \
: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
"r"(_arg6), "0"(_num) \
: "rcx", "r11", "memory", "cc" \
); \
_ret; \
})
#define syscall5(num, arg1, arg2, arg3, arg4, arg5) \
({ \
long _ret; \
register long _num __asm__ ("rax") = (num); \
register long _arg1 __asm__ ("rdi") = (long)(arg1); \
register long _arg2 __asm__ ("rsi") = (long)(arg2); \
register long _arg3 __asm__ ("rdx") = (long)(arg3); \
register long _arg4 __asm__ ("r10") = (long)(arg4); \
register long _arg5 __asm__ ("r8") = (long)(arg5); \
\
__asm__ volatile ( \
"syscall\n" \
: "=a"(_ret) \
: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
"0"(_num) \
: "rcx", "r11", "memory", "cc" \
); \
_ret; \
})
#define syscall4(num, arg1, arg2, arg3, arg4) \
({ \
long _ret; \
register long _num __asm__ ("rax") = (num); \
register long _arg1 __asm__ ("rdi") = (long)(arg1); \
register long _arg2 __asm__ ("rsi") = (long)(arg2); \
register long _arg3 __asm__ ("rdx") = (long)(arg3); \
register long _arg4 __asm__ ("r10") = (long)(arg4); \
\
__asm__ volatile ( \
"syscall\n" \
: "=a"(_ret) \
: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
"0"(_num) \
: "rcx", "r11", "memory", "cc" \
); \
_ret; \
})
#define syscall3(num, arg1, arg2, arg3) \
({ \
long _ret; \
register long _num __asm__ ("rax") = (num); \
register long _arg1 __asm__ ("rdi") = (long)(arg1); \
register long _arg2 __asm__ ("rsi") = (long)(arg2); \
register long _arg3 __asm__ ("rdx") = (long)(arg3); \
\
__asm__ volatile ( \
"syscall\n" \
: "=a"(_ret) \
: "r"(_arg1), "r"(_arg2), "r"(_arg3), \
"0"(_num) \
: "rcx", "r11", "memory", "cc" \
); \
_ret; \
})
#define syscall2(num, arg1, arg2) \
({ \
long _ret; \
register long _num __asm__ ("rax") = (num); \
register long _arg1 __asm__ ("rdi") = (long)(arg1); \
register long _arg2 __asm__ ("rsi") = (long)(arg2); \
\
__asm__ volatile ( \
"syscall\n" \
: "=a"(_ret) \
: "r"(_arg1), "r"(_arg2), \
"0"(_num) \
: "rcx", "r11", "memory", "cc" \
); \
_ret; \
})
#define syscall1(num, arg1) \
({ \
long _ret; \
register long _num __asm__ ("rax") = (num); \
register long _arg1 __asm__ ("rdi") = (long)(arg1); \
\
__asm__ volatile ( \
"syscall\n" \
: "=a"(_ret) \
: "r"(_arg1), \
"0"(_num) \
: "rcx", "r11", "memory", "cc" \
); \
_ret; \
})
#define syscall0(num) \
({ \
long _ret; \
register long _num __asm__ ("rax") = (num); \
\
__asm__ volatile ( \
"syscall\n" \
: "=a"(_ret) \
: "0"(_num) \
: "rcx", "r11", "memory", "cc" \
); \
_ret; \
})
#elif defined(__aarch64__)
#define O_DIRECTORY 040000
#define SYS_read 63
#define SYS_write 64
#define SYS_openat 56
#define SYS_getdents64 61
#define SYS_newfstatat 79
#define SYS_clone3 435
#define SYS_waitid 95
#define SYS_execve 221
#define SYS_execveat 281
#define SYS_exit 93
#define SYS_getcwd 17
#define SYS_chdir 49
struct stat {
dev_t st_dev;
ino_t st_ino;
mode_t st_mode;
nlink_t st_nlink;
uid_t st_uid;
gid_t st_gid;
dev_t st_rdev;
unsigned long __pad0;
off_t st_size;
blksize_t st_blksize;
int __pad1;
blkcnt_t st_blocks;
struct timespec st_atim;
struct timespec st_mtim;
struct timespec st_ctim;
unsigned __unused[2];
};
#define syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
({ \
register long _num __asm__ ("x8") = (num); \
register long _arg1 __asm__ ("x0") = (long)(arg1); \
register long _arg2 __asm__ ("x1") = (long)(arg2); \
register long _arg3 __asm__ ("x2") = (long)(arg3); \
register long _arg4 __asm__ ("x3") = (long)(arg4); \
register long _arg5 __asm__ ("x4") = (long)(arg5); \
register long _arg6 __asm__ ("x5") = (long)(arg6); \
\
__asm__ volatile ( \
"svc #0\n" \
: "=r" (_arg1) \
: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
"r"(_arg6), "r"(_num) \
: "memory", "cc" \
); \
_arg1; \
})
#define syscall5(num, arg1, arg2, arg3, arg4, arg5) \
({ \
register long _num __asm__ ("x8") = (num); \
register long _arg1 __asm__ ("x0") = (long)(arg1); \
register long _arg2 __asm__ ("x1") = (long)(arg2); \
register long _arg3 __asm__ ("x2") = (long)(arg3); \
register long _arg4 __asm__ ("x3") = (long)(arg4); \
register long _arg5 __asm__ ("x4") = (long)(arg5); \
\
__asm__ volatile ( \
"svc #0\n" \
: "=r" (_arg1) \
: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
"r"(_num) \
: "memory", "cc" \
); \
_arg1; \
})
#define syscall4(num, arg1, arg2, arg3, arg4) \
({ \
register long _num __asm__ ("x8") = (num); \
register long _arg1 __asm__ ("x0") = (long)(arg1); \
register long _arg2 __asm__ ("x1") = (long)(arg2); \
register long _arg3 __asm__ ("x2") = (long)(arg3); \
register long _arg4 __asm__ ("x3") = (long)(arg4); \
\
__asm__ volatile ( \
"svc #0\n" \
: "=r"(_arg1) \
: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
"r"(_num) \
: "memory", "cc" \
); \
_arg1; \
})
#define syscall3(num, arg1, arg2, arg3) \
({ \
register long _num __asm__ ("x8") = (num); \
register long _arg1 __asm__ ("x0") = (long)(arg1); \
register long _arg2 __asm__ ("x1") = (long)(arg2); \
register long _arg3 __asm__ ("x2") = (long)(arg3); \
\
__asm__ volatile ( \
"svc #0\n" \
: "=r"(_arg1) \
: "r"(_arg1), "r"(_arg2), "r"(_arg3), \
"r"(_num) \
: "memory", "cc" \
); \
_arg1; \
})
#define syscall2(num, arg1, arg2) \
({ \
register long _num __asm__ ("x8") = (num); \
register long _arg1 __asm__ ("x0") = (long)(arg1); \
register long _arg2 __asm__ ("x1") = (long)(arg2); \
\
__asm__ volatile ( \
"svc #0\n" \
: "=r"(_arg1) \
: "r"(_arg1), "r"(_arg2), \
"r"(_num) \
: "memory", "cc" \
); \
_arg1; \
})
#define syscall1(num, arg1) \
({ \
register long _num __asm__ ("x8") = (num); \
register long _arg1 __asm__ ("x0") = (long)(arg1); \
\
__asm__ volatile ( \
"svc #0\n" \
: "=r"(_arg1) \
: "r"(_arg1), \
"r"(_num) \
: "memory", "cc" \
); \
_arg1; \
})
#define syscall0(num) \
({ \
register long _num __asm__ ("x8") = (num); \
register long _arg1 __asm__ ("x0"); \
\
__asm__ volatile ( \
"svc #0\n" \
: "=r"(_arg1) \
: "r"(_num) \
: "memory", "cc" \
); \
_arg1; \
})
#else
#error "Unsupported architecture"
#endif
union sigval {
int sival_int;
void *sival_ptr;
};
typedef struct {
int si_signo;
int si_code;
union sigval si_value;
int si_errno;
int si_pid;
uid_t si_uid;
void *si_addr;
int si_status;
int si_band;
} siginfo_t;
#define BUF_SIZE 4096
#define MAX_TOKENS 64
#define BUILTIN_LEN 5
static const char PATH_PREFIX[] = "PATH=";
static const char HOME_PREFIX[] = "HOME=";
#define PWD "pwd"
#define CD "cd"
#define EXIT "exit"
#define ECHO "echo"
#define TYPE "type"
const char *BUILTINS[BUILTIN_LEN] = {ECHO, EXIT, TYPE, PWD, CD};
const int pl = sizeof(PATH_PREFIX);
char* PATH = 0;
char* HOME = 0;
char command[BUF_SIZE];
char pathbuf[BUF_SIZE];
char *pathargv[MAX_TOKENS] = {0};
int pc = 0;
#define SP(s) (s) " "
#define SPL(s) sizeof((s)) + 1
int is_prefix(const char *s1, const char *s2, const int l1) {
const int n = l1;
for (int i = 0; i < n - 1; i++) {
char c1 = s1[i];
char c2 = s2[i];
if (!c2 || c1 != c2) { return 0;}
}
return 1;
}
// From https://stackoverflow.com/a/34873763
int str_cmp( const char *s1, const char *s2 ) {
const unsigned char *p1 = ( const unsigned char * )s1;
const unsigned char *p2 = ( const unsigned char * )s2;
while ( *p1 && *p1 == *p2 ) {++p1, ++p2;}
return ( *p1 > *p2 ) - ( *p2 > *p1 );
}
long str_len(const char *str) {
const char *head = str;
while (*head) { head++;}
return head - str;
}
long a_to_l(const char *str) {
long res = 0;
int sign = 1;
// Skip leading whitespace
while (*str == ' ' || *str == '\t' || *str == '\n' ||
*str == '\v' || *str == '\f' || *str == '\r') {str++;}
if (*str == '-') {
sign = -1;
str++;
} else if (*str == '+') {
str++;
}
while (*str >= '0' && *str <= '9') {
res = res * 10 + (*str - '0');
str++;
}
return sign * res;
}
// Returns index of 'c' in 'str'. If not found, -1
long index_of(const char *str, char c) {
const char *head = str;
while(*head) {
if (*head == c ) {
return head - str;
}
head++;
}
return -1;
}
char *l_to_a(long value, char *str) {
char *p = str;
char *start = str;
int is_negative = 0;
// Handle negative numbers
if (value < 0) {
is_negative = 1;
value = -value;
}
// Convert digits in reverse order
do {
*p++ = '0' + (value % 10);
value /= 10;
} while (value != 0);
if (is_negative)
*p++ = '-';
*p = '\0';
// Reverse the string in-place
char *end = p - 1;
while (start < end) {
char tmp = *start;
*start++ = *end;
*end-- = tmp;
}
return str;
}
/*
* memcpy, memset, memmove and memcmp seem to be needed by both gcc and clang even for no include code
* Copied from nolibc
*/
__attribute__((weak,unused,section(".text.nolibc_memcpy")))
void *memcpy(void *dst, const void *src, size_t len) {
size_t pos = 0;
while (pos < len) {
((char *)dst)[pos] = ((const char *)src)[pos];
pos++;
}
return dst;
}
__attribute__((weak,unused,section(".text.nolibc_memset")))
void *memset(void *dst, int b, size_t len) {
char *p = dst;
while (len--) {
/* prevent gcc from recognizing memset() here */
__asm__ volatile("");
*(p++) = b;
}
return dst;
}
/* might be ignored by the compiler without -ffreestanding, then found as
* missing.
*/
__attribute__((weak,unused,section(".text.nolibc_memmove")))
void *memmove(void *dst, const void *src, size_t len) {
size_t dir, pos;
pos = len;
dir = -1;
if (dst < src) {
pos = -1;
dir = 1;
}
while (len) {
pos += dir;
((char *)dst)[pos] = ((const char *)src)[pos];
len--;
}
return dst;
}
static __attribute__((unused))
int memcmp(const void *s1, const void *s2, size_t n) {
size_t ofs = 0;
int c1 = 0;
while (ofs < n && !(c1 = ((unsigned char *)s1)[ofs] - ((unsigned char *)s2)[ofs])) {
ofs++;
}
return c1;
}
int tokenize(const char *buffer, char *scratch, char *argv[], const long n, const char delim) {
int si = 0; // index in scratch
int argc = 0; // number of tokens
for (int i = 0; i < n;) {
// Skip leading spaces
while (i < n && buffer[i] == delim) {i++;}
if (i >= n) {break;}
if (argc >= MAX_TOKENS) {break;}
argv[argc++] = &scratch[si];
// Copy characters until next space or end of buffer
for (; i < n && buffer[i] != delim; i++){
if (si >= BUF_SIZE - 1) {break;} // leave space for null terminator
scratch[si++] = buffer[i];
}
scratch[si++] = 0; // null-terminate the token
if (si >= BUF_SIZE) {break;}
}
return argc;
}
#define GETDENTS_BUF_SIZE (1024 * 64)
// Return dirfd if found otherwise return -1
// If return_dirfd is 0, will return index of argv where fname is found
int traverse(int argc, char *argv[], char *fname, int return_dirfd ) {
int dirfd;
char buf[GETDENTS_BUF_SIZE];
long nread;
struct linux_dirent64 *d;
long fl = str_len(fname);
for (int i = 0; i < argc; i++) {
dirfd = syscall3(SYS_openat, 0, argv[i], O_RDONLY | O_DIRECTORY);
if (dirfd < 0) {
continue;
}
for (;;) {
nread = syscall3(SYS_getdents64, dirfd, buf, GETDENTS_BUF_SIZE);
if (nread == -1) {return -1;}
if (nread == 0) {break;}
for (long bpos = 0; bpos < nread;) {
d = (struct linux_dirent64 *)(buf + bpos);
bpos += d->d_reclen;
if (d->d_type != DT_REG && d->d_type != DT_LNK) { continue;}
// Compare with fname and then stat
long dl = str_len(d->d_name);
if (dl != fl) { continue;}
int cmp = 1;
for (int j = 0; j < fl; j++) {
if (d->d_name[j] != fname[j]) {
cmp = 0;
break;
}
}
if (cmp == 0) {continue;}
struct stat sb;
int status = syscall4(SYS_newfstatat, dirfd, d->d_name, &sb, 0);
if (status == 0 && sb.st_mode & S_IXUSR) {
// Return fd of the path directory which contains the executable
return return_dirfd ? dirfd : i;
}
// If file with matching name doesn't have correct permissions, look in another dir
break;
}
}
}
return -1;
}
int child_func(char* argv[]) {
if (index_of(argv[0], '/') != -1) {
return syscall3(SYS_execve, argv[0], argv, NULL);
}
int dirfd = traverse(pc, pathargv, argv[0], 1);
if (dirfd == -1) {return -1;}
return syscall5(SYS_execveat, dirfd, argv[0], argv, 0, 0);
}
int handle_builtins(char* argv[], int nread) {
char* word = argv[0];
if (str_cmp(ECHO, word) == 0) {
// TODO: echo rest of cmd
long offset = argv[1] - argv[0];
if (offset <= 0 || offset >= nread) {
syscall3(SYS_write, 1, "\n", 1);
return 1;
}
char* rest = command + offset;
syscall3(SYS_write, 1, rest, nread - offset);
syscall3(SYS_write, 1, "\n", 1);
return 1;
}
if (str_cmp(EXIT, word) == 0) {
long st = a_to_l(argv[1]);
syscall1(SYS_exit, st);
}
if (str_cmp(PWD, word) == 0) {
char cwd[1024];
long st = syscall2(SYS_getcwd, cwd, sizeof(cwd));
if (st <= 0) {
syscall1(SYS_exit, st);
}
syscall3(SYS_write, 1, cwd, str_len(cwd));
syscall3(SYS_write, 1, "\n", 1);
return 1;
}
if (str_cmp(CD, word) == 0) {
if (argv[1] == 0) {
// Just 'cd' means go to home directory
syscall1(SYS_chdir, HOME);
return 1;
}
if (argv[2]) {
syscall3(SYS_write, 1, "cd: too many arguments\n", 23);
return 1;
}
char* dir = argv[1];
if (dir[0] == '~') {
if (dir[1] == 0) {
syscall1(SYS_chdir, HOME);
}
else if (dir[1] == '/') {
char home_buf[1024] = {0};
int i = 0;
char* home = HOME;
while (*home) {
home_buf[i++] = *home;
home++;
}
dir++;
while (*dir) {
home_buf[i++] = *dir;
dir++;
}
syscall3(SYS_write, 1, home_buf, str_len(home_buf));
syscall1(SYS_chdir, home_buf);
} else {
syscall3(SYS_write, 1, argv[1], str_len(argv[1]));
syscall3(SYS_write, 1, ": No such file or directory\n", 28);
}
return 1;
}
syscall1(SYS_chdir, dir);
return 1;
}
if (str_cmp(TYPE, word) == 0) {
argv++;
while (*argv) {
int is_builtin = 0;
for (int i = 0; i < BUILTIN_LEN; ++i) {
if (str_cmp(BUILTINS[i], *argv) == 0) {
syscall3(SYS_write, 1, *argv, str_len(*argv));
syscall3(SYS_write, 1, " is a shell builtin\n", 20);
is_builtin = 1;
break;
}
}
if (!is_builtin) {
int idx = traverse(pc, pathargv, *argv, 0);
syscall3(SYS_write, 1, *argv, str_len(*argv));
if (idx == -1) {
syscall3(SYS_write, 1, ": not found\n", 12);
} else {
syscall3(SYS_write, 1, " is ", 4);
syscall3(SYS_write, 1, pathargv[idx], str_len(pathargv[idx]));
syscall3(SYS_write, 1, "/", 1);
syscall3(SYS_write, 1, *argv, str_len(*argv));
syscall3(SYS_write, 1, "\n", 1);
}
}
argv++;
}
return 1;
}
return 0;
}
int main(int argc, char *argv[], char *envp[]) {
while (*envp) {
if (is_prefix(HOME_PREFIX, *envp, pl)) {
HOME = *envp + pl - 1;
}
if (is_prefix(PATH_PREFIX, *envp, pl)) {
PATH = *envp + pl - 1;
}
envp++;
}
if (PATH == 0) { return -1;}
long sl = str_len(PATH);
pc = tokenize(PATH, pathbuf, pathargv, sl, ':');
for (;;) {
syscall3(SYS_write, 1, "$ ", 2);
int nread = syscall3(SYS_read, 0, command, BUF_SIZE);
if (nread <= 0) { syscall1(SYS_exit, nread);}
if (nread == 1) {continue;}
command[nread - 1] = 0;
char buf[BUF_SIZE];
char *cmdargv[MAX_TOKENS] = {0};
tokenize(command, buf, cmdargv, nread - 1, ' ');
if (handle_builtins(cmdargv, nread)) {
continue;
}
struct clone_args args = {0};
args.exit_signal = SIGCHLD;
int pid = syscall2(SYS_clone3, &args, sizeof(args));
if (pid == -1) {
syscall1(SYS_exit, pid);
}
if (pid == 0) {
// Child process
child_func(cmdargv);
break;
}
else {
// Parent
siginfo_t info; // not used
syscall4(SYS_waitid, P_ALL, 0, &info, WEXITED);
}
}
return 0;
}
void pre_main(int *argc_ptr) {
int argc = argc_ptr[0];
char** argv = (char**)(argc_ptr) + 1;
char** envp = argv + argc + 1;
//int sl = str_len(envp[0]);
//syscall3(SYS_write, 1, envp[0], sl);
int status = main(argc, argv, envp);
syscall1(SYS_exit, status);
}
__attribute__((section(".text.startup"))) __attribute__((naked)) __attribute__((__optimize__("-fno-stack-protector"))) void _start(void) {
#if defined(__x86_64__)
asm volatile (
"xorl %%ebp, %%ebp\n\t"
"movq %%rsp, %%rdi\n\t"
"andq $-16, %%rsp\n\t"
"callq %P0\n\t"
:
: [pre_main] "X"(&pre_main)
:"memory"
);
#elif defined(__aarch64__)
asm volatile (
"mov fp, #0\n\t"
"mov lr, #0\n\t"
"mov x0, sp\n\t"
"and sp, x0, #-16\n\t"
"bl %0\n\t"
:
: [pre_main] "X"(&pre_main)
:"x11", "memory"
);
#else
#error "Unsupported architecture"
#endif
}
// A minimal shell with no allocations
const std = @import("std");
const builtin = @import("builtin");
const linux = std.os.linux;
const elf = std.elf;
const native_arch = builtin.cpu.arch;
// stdin,out,err
const BUF_SIZE = 1024;
const MAX_TOKENS = 64;
var input = [_:0]u8{0} ** BUF_SIZE;
const STDIN = 0;
const STDOUT = 1;
const STDERR = 2;
const nl: []const u8 = "\n";
const prompt: []const u8 = "$ ";
// path
var path = [_:0]u8{0} ** 4096;
var pl: usize = 0;
const path_prefix: []const u8 = "PATH=";
// Clone stuff
const CloneArgs = extern struct {
flags: u64,
pidfd: u64,
child_tid: u64,
parent_tid: u64,
exit_signal: u64,
stack: u64,
stack_size: u64,
tls: u64,
set_tid: u64,
set_tid_size: u64,
cgroup: u64,
};
pub export fn _start() callconv(.naked) noreturn {
if (builtin.unwind_tables != .none or !builtin.strip_debug_info) asm volatile (switch (native_arch) {
.aarch64 => ".cfi_undefined lr",
.x86_64 => ".cfi_undefined %%rip",
else => @compileError("unsupported arch"),
});
asm volatile (switch (native_arch) {
.x86_64 =>
\\ xorl %%ebp, %%ebp
\\ movq %%rsp, %%rdi
\\ andq $-16, %%rsp
\\ callq %[posixCallMainAndExit:P]
,
.aarch64 =>
\\ mov fp, #0
\\ mov lr, #0
\\ mov x0, sp
\\ and sp, x0, #-16
\\ b %[posixCallMainAndExit]
,
else => @compileError("unsupported arch"),
}
:
: [_start] "X" (&_start),
[posixCallMainAndExit] "X" (&posixCallMainAndExit),
);
}
fn posixCallMainAndExit(argc_argv_ptr: [*]usize) callconv(.c) noreturn {
const argc = argc_argv_ptr[0];
const argv = @as([*][*:0]u8, @ptrCast(argc_argv_ptr + 1));
const envp_optional: [*:null]?[*:0]u8 = @ptrCast(@alignCast(argv + argc + 1));
var envp_count: usize = 0;
while (envp_optional[envp_count]) |_| : (envp_count += 1) {}
const envp = @as([*][*:0]u8, @ptrCast(envp_optional))[0..envp_count];
const auxv: [*]elf.Auxv = @ptrCast(@alignCast(envp.ptr + envp_count + 1));
// var at_hwcap: usize = 0;
// const phdrs = init: {
// var i: usize = 0;
// var at_phdr: usize = 0;
// var at_phnum: usize = 0;
// while (auxv[i].a_type != elf.AT_NULL) : (i += 1) {
// switch (auxv[i].a_type) {
// elf.AT_PHNUM => at_phnum = auxv[i].a_un.a_val,
// elf.AT_PHDR => at_phdr = auxv[i].a_un.a_val,
// elf.AT_HWCAP => at_hwcap = auxv[i].a_un.a_val,
// else => continue,
// }
// }
// break :init @as([*]elf.Phdr, @ptrFromInt(at_phdr))[0..at_phnum];
// };
linux.elf_aux_maybe = auxv;
//if (!builtin.single_threaded) {
// linux.tls.initStatic(phdrs);
//}
// const opt_init_array_start = @extern([*]*const fn () callconv(.c) void, .{
// .name = "__init_array_start",
// .linkage = .weak,
// });
// const opt_init_array_end = @extern([*]*const fn () callconv(.c) void, .{
// .name = "__init_array_end",
// .linkage = .weak,
// });
// if (opt_init_array_start) |init_array_start| {
// const init_array_end = opt_init_array_end.?;
// const slice = init_array_start[0 .. init_array_end - init_array_start];
// for (slice) |func| func();
// }
std.os.argv = argv[0..argc];
std.os.environ = envp;
main();
}
pub fn main() noreturn {
const envp = std.os.environ;
// TODO vdso parsing to get faster time related syscalls via vDSO
const vdso_addr = linux.getauxval(std.elf.AT_SYSINFO_EHDR);
const elf_header = @as(*std.elf.Ehdr, @ptrFromInt(vdso_addr));
println(&elf_header.e_ident, 16);
get_path(envp);
//println(&path, pl);
while (true) {
print(prompt.ptr, prompt.len);
const nread = linux.read(STDIN, &input, BUF_SIZE);
switch (nread) {
0 => linux.exit(0),
1 => continue,
else => {},
}
var argv: [MAX_TOKENS:null]?[*:0]const u8 = undefined;
// const argv: [*:null]?[*:0]u8 = [_:null]?[*:0]u8{null};
// println(&input, nread);
_ = tokenize_input(&input, &argv, nread);
var clone_args = std.mem.zeroes(CloneArgs);
clone_args.exit_signal = linux.SIG.CHLD;
// clone3 only available from Linux 5.1 onwards...
const pid = linux.syscall2(.clone3, @intFromPtr(&clone_args), @sizeOf(CloneArgs));
if (pid == 0) {
// child process
try invoke_exec(&argv, envp);
break;
} else {
var sig_info = std.mem.zeroes(linux.siginfo_t);
const status: i32 = @intCast(std.os.linux.waitid(linux.P.ALL, 0, &sig_info, linux.W.EXITED));
if (status != 0) {
println("WAIT_ERR", 8);
linux.exit(status);
}
}
}
linux.exit(0);
}
fn print(ptr: [*]const u8, count: usize) void {
_ = linux.write(STDOUT, ptr, count);
}
fn println(ptr: [*]const u8, count: usize) void {
print(ptr, count);
print(nl.ptr, nl.len);
}
fn get_path(envp: [][*:0]u8) void {
// can we just make path a [][*:0]u8 ?
for (envp) |e| {
// Similar logic as std.posix.getenvZ(key: [*:0]const u8)
if (std.mem.eql(u8, e[0..5], path_prefix)) {
const epath = e[5..];
while (epath[pl] != 0 and pl < path.len) {
path[pl] = epath[pl];
pl += 1;
}
break;
}
}
}
// parse input
fn tokenize_input(buf: [*:0]u8, argv: [:null]?[*:0]const u8, buf_len: usize) usize {
var i: usize = 0;
var n: usize = 0;
var offset: usize = 0; // offset of pointer to mark start of next token
while (i < buf_len) : (i += 1) {
if (buf[i] == ' ' or buf[i] == '\t' or buf[i] == '\n') {
buf[i] = 0;
if (buf[offset] != 0) {
const argv_ptr: *align(1) const [*:0]u8 = @ptrCast(&buf[offset..i :0]);
argv[n] = argv_ptr.*;
n += 1;
offset = i + 1;
}
}
}
argv[n] = null;
return n;
}
// exec
fn invoke_exec(argv: [*:null]?[*:0]const u8, envp: [][*:0]u8) !void {
const binary = argv[0].?;
// If command contains a '/', we assume it is an absolute or relative path
const binSpan = std.mem.sliceTo(binary, 0);
const cEnvp: [*:null]const ?[*:0]const u8 = @ptrCast(envp);
if (std.mem.indexOfScalar(u8, binSpan, '/')) |_| {
// std.debug.print("ptr: {}\n", .{@intFromPtr(argv)});
const status: i32 = @intCast(linux.execve(binary, argv, cEnvp));
// std.debug.print("Exec: {}\n", .{status});
linux.exit(status);
}
// Open directories for each PATH directory and check for argv[0]
const dirfd = traversePath(binary);
if (dirfd == -1) {
println("NOT_FOUND", 9);
} else {
// println("FOUND", 5);
// [:null] ?[*:0]const u8
// [*:null]const ?[*:0]const u8
// [*:null]const ?[*:0]const u8
// std.debug.print("ptr: {}\n", .{@intFromPtr(argv)});
const status: i32 = @intCast(linux.syscall5(.execveat, @as(usize, @bitCast(@as(isize, dirfd))), @intFromPtr(binary), @intFromPtr(argv), 0, 0));
linux.exit(status);
}
}
// traverses path to find binary
fn traversePath(binary: [*:0]const u8) i32 {
var i: usize = 0;
var offset: usize = 0;
const getdents_buf_len = 1024 * 64;
var getdents_buf: [getdents_buf_len]u8 align(@alignOf(linux.dirent64)) = undefined;
// Explicitly comparing <= path_len here as otherwise we will skip the last directory in path
while (i <= pl) : (i += 1) {
if (path[i] != ':' and path[i] != 0) {
continue;
}
path[i] = 0;
if (path[offset] == 0) {
println("ZERO_AT_OFFSET", 14);
offset = i + 1;
continue;
}
// const path_ptr: [*:0]const u8 = @ptrCast(&path[offset..i :0]);
const pathSlice: [*:0]const u8 = path[offset..i :0];
// std.debug.print("offset: {} i: {} binary: {s} pathSlice {s}\n", .{ offset, i, binary, pathSlice });
const mode: linux.O = .{ .DIRECTORY = true };
// std.debug.print("offset: {} i: {} binary: {s} path_ptr {s} mode {}\n", .{ offset, i, binary, pathSlice, mode });
offset = i + 1;
const dirfd: i32 = @intCast(linux.openat(0, pathSlice, mode, 0));
if (dirfd == -1) {
continue;
}
while (true) {
const nread = linux.getdents64(dirfd, &getdents_buf, getdents_buf_len);
// std.debug.print("NREAD: {} dirfd: {} binary: {s} path_ptr {s}\n", .{ nread, dirfd, binary, pathSlice });
if (nread == 0 or nread > getdents_buf_len) {
break;
}
var bpos: usize = 0;
while (bpos < nread) {
const dp = @as(*align(1) linux.dirent64, @ptrCast(&getdents_buf[bpos]));
bpos = bpos + dp.reclen;
if (dp.type != linux.DT.REG and dp.type != linux.DT.LNK) {
continue;
}
const fname = std.mem.sliceTo(@as([*:0]u8, @ptrCast(&dp.name)), 0);
const bname = std.mem.sliceTo(binary, 0);
// std.debug.print("FNAME: {s} Binary: {s}\n", .{ fname, binary });
if (!std.mem.eql(u8, fname, bname)) {
continue;
}
var stat = std.mem.zeroes(linux.Stat);
const status = linux.fstatat(dirfd, binary, &stat, 0);
if (status == 0 and (stat.mode & linux.S.IXUSR != 0)) {
return dirfd;
}
}
}
}
return -1;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment