-
-
Save rrampage/5046b60ca2d040bcffb49ee38e86041f to your computer and use it in GitHub Desktop.
Minimal Linux shell (C, Zig and ASM)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Compile with one of the following: | |
zig cc -v -s -Os -target aarch64-linux-musl -nostdlib -flto -static shell.c -o csh | |
zig cc -v -s -Os -target aarch64-linux-gnu -nostdlib -flto -static shell.c -o csh | |
CLANG: | |
clang -v -s -Oz -ffreestanding -nostdlib -fno-stack-protector -Wl,--entry=_start -Wl,--gc-sections -Wl,-z,now -flto -static -o csh shell.c | |
musl-clang -v -s -Os -nostdlib -nostartfiles -fno-stack-protector -Wl,--entry=_start -Wl,--gc-sections -Wl,-z,now -flto -nostdinc -static -o csh shell.c | |
GCC (some things DO NOT WORK like `pwd`): | |
gcc -v -s -Oz -ffreestanding -nostdlib -nostartfiles -fno-stack-protector -Wl,--entry=_start -Wl,--gc-sections -Wl,-z,now -flto -nostdinc -static -o csh shell.c | |
gcc -v -c -Os -nostdlib -nostartfiles -fno-stack-protector -Wl,--entry=_start shell.c | |
ld -flto -O2 --entry _start -z stack-size=16777216 --gc-sections --eh-frame-hdr -s -znow -m elf_x86_64 -static -o csh shell.o | |
*/ | |
typedef unsigned int dev_t; | |
typedef unsigned long ino_t; | |
typedef unsigned int mode_t; | |
typedef signed int pid_t; | |
typedef unsigned int uid_t; | |
typedef unsigned int gid_t; | |
typedef unsigned long nlink_t; | |
typedef signed long off_t; | |
typedef signed long blksize_t; | |
typedef signed long blkcnt_t; | |
typedef signed long time_t; | |
typedef unsigned long long uint64_t; | |
typedef unsigned long long size_t; | |
struct clone_args { | |
uint64_t flags; | |
uint64_t pidfd; | |
uint64_t child_tid; | |
uint64_t parent_tid; | |
uint64_t exit_signal; | |
uint64_t stack; | |
uint64_t stack_size; | |
uint64_t tls; | |
uint64_t set_tid; | |
uint64_t set_tid_size; | |
uint64_t cgroup; | |
}; | |
struct timespec { | |
time_t tv_sec; | |
int : 8 * (sizeof(time_t) - sizeof(long)) * (1234 == 4321); | |
long tv_nsec; | |
int : 8 * (sizeof(time_t) - sizeof(long)) * (1234 != 4321); | |
}; | |
struct linux_dirent64 { | |
uint64_t d_ino; /* 64-bit inode number */ | |
uint64_t d_off; /* Not an offset; see getdents() */ | |
unsigned short d_reclen; /* Size of this dirent */ | |
unsigned char d_type; /* File type */ | |
char d_name[]; /* Filename (null-terminated) */ | |
}; | |
#define NULL 0 | |
#define DT_REG 8 | |
#define DT_LNK 10 | |
#define SIGCHLD 17 | |
#define P_ALL 0 | |
#define WEXITED 4 | |
#define O_RDONLY 00000000 | |
#define S_IXUSR 0100 | |
// Only supporting x64 and aarch64 currently | |
#if defined(__x86_64__) | |
#define O_DIRECTORY 0200000 | |
struct stat { | |
dev_t st_dev; | |
ino_t st_ino; | |
nlink_t st_nlink; | |
mode_t st_mode; | |
uid_t st_uid; | |
gid_t st_gid; | |
unsigned int __pad; | |
dev_t st_rdev; | |
off_t st_size; | |
blksize_t st_blksize; | |
blkcnt_t st_blocks; | |
struct timespec st_atim; | |
struct timespec st_mtim; | |
struct timespec st_ctim; | |
unsigned __unused[3]; | |
}; | |
#define SYS_read 0 | |
#define SYS_write 1 | |
#define SYS_exit 60 | |
#define SYS_openat 257 | |
#define SYS_getdents64 217 | |
#define SYS_newfstatat 262 | |
#define SYS_clone3 435 | |
#define SYS_waitid 247 | |
#define SYS_execve 59 | |
#define SYS_execveat 322 | |
#define SYS_getcwd 79 | |
#define SYS_chdir 80 | |
#define syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ | |
({ \ | |
long _ret; \ | |
register long _num __asm__ ("rax") = (num); \ | |
register long _arg1 __asm__ ("rdi") = (long)(arg1); \ | |
register long _arg2 __asm__ ("rsi") = (long)(arg2); \ | |
register long _arg3 __asm__ ("rdx") = (long)(arg3); \ | |
register long _arg4 __asm__ ("r10") = (long)(arg4); \ | |
register long _arg5 __asm__ ("r8") = (long)(arg5); \ | |
register long _arg6 __asm__ ("r9") = (long)(arg6); \ | |
\ | |
__asm__ volatile ( \ | |
"syscall\n" \ | |
: "=a"(_ret) \ | |
: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ | |
"r"(_arg6), "0"(_num) \ | |
: "rcx", "r11", "memory", "cc" \ | |
); \ | |
_ret; \ | |
}) | |
#define syscall5(num, arg1, arg2, arg3, arg4, arg5) \ | |
({ \ | |
long _ret; \ | |
register long _num __asm__ ("rax") = (num); \ | |
register long _arg1 __asm__ ("rdi") = (long)(arg1); \ | |
register long _arg2 __asm__ ("rsi") = (long)(arg2); \ | |
register long _arg3 __asm__ ("rdx") = (long)(arg3); \ | |
register long _arg4 __asm__ ("r10") = (long)(arg4); \ | |
register long _arg5 __asm__ ("r8") = (long)(arg5); \ | |
\ | |
__asm__ volatile ( \ | |
"syscall\n" \ | |
: "=a"(_ret) \ | |
: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ | |
"0"(_num) \ | |
: "rcx", "r11", "memory", "cc" \ | |
); \ | |
_ret; \ | |
}) | |
#define syscall4(num, arg1, arg2, arg3, arg4) \ | |
({ \ | |
long _ret; \ | |
register long _num __asm__ ("rax") = (num); \ | |
register long _arg1 __asm__ ("rdi") = (long)(arg1); \ | |
register long _arg2 __asm__ ("rsi") = (long)(arg2); \ | |
register long _arg3 __asm__ ("rdx") = (long)(arg3); \ | |
register long _arg4 __asm__ ("r10") = (long)(arg4); \ | |
\ | |
__asm__ volatile ( \ | |
"syscall\n" \ | |
: "=a"(_ret) \ | |
: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ | |
"0"(_num) \ | |
: "rcx", "r11", "memory", "cc" \ | |
); \ | |
_ret; \ | |
}) | |
#define syscall3(num, arg1, arg2, arg3) \ | |
({ \ | |
long _ret; \ | |
register long _num __asm__ ("rax") = (num); \ | |
register long _arg1 __asm__ ("rdi") = (long)(arg1); \ | |
register long _arg2 __asm__ ("rsi") = (long)(arg2); \ | |
register long _arg3 __asm__ ("rdx") = (long)(arg3); \ | |
\ | |
__asm__ volatile ( \ | |
"syscall\n" \ | |
: "=a"(_ret) \ | |
: "r"(_arg1), "r"(_arg2), "r"(_arg3), \ | |
"0"(_num) \ | |
: "rcx", "r11", "memory", "cc" \ | |
); \ | |
_ret; \ | |
}) | |
#define syscall2(num, arg1, arg2) \ | |
({ \ | |
long _ret; \ | |
register long _num __asm__ ("rax") = (num); \ | |
register long _arg1 __asm__ ("rdi") = (long)(arg1); \ | |
register long _arg2 __asm__ ("rsi") = (long)(arg2); \ | |
\ | |
__asm__ volatile ( \ | |
"syscall\n" \ | |
: "=a"(_ret) \ | |
: "r"(_arg1), "r"(_arg2), \ | |
"0"(_num) \ | |
: "rcx", "r11", "memory", "cc" \ | |
); \ | |
_ret; \ | |
}) | |
#define syscall1(num, arg1) \ | |
({ \ | |
long _ret; \ | |
register long _num __asm__ ("rax") = (num); \ | |
register long _arg1 __asm__ ("rdi") = (long)(arg1); \ | |
\ | |
__asm__ volatile ( \ | |
"syscall\n" \ | |
: "=a"(_ret) \ | |
: "r"(_arg1), \ | |
"0"(_num) \ | |
: "rcx", "r11", "memory", "cc" \ | |
); \ | |
_ret; \ | |
}) | |
#define syscall0(num) \ | |
({ \ | |
long _ret; \ | |
register long _num __asm__ ("rax") = (num); \ | |
\ | |
__asm__ volatile ( \ | |
"syscall\n" \ | |
: "=a"(_ret) \ | |
: "0"(_num) \ | |
: "rcx", "r11", "memory", "cc" \ | |
); \ | |
_ret; \ | |
}) | |
#elif defined(__aarch64__) | |
#define O_DIRECTORY 040000 | |
#define SYS_read 63 | |
#define SYS_write 64 | |
#define SYS_openat 56 | |
#define SYS_getdents64 61 | |
#define SYS_newfstatat 79 | |
#define SYS_clone3 435 | |
#define SYS_waitid 95 | |
#define SYS_execve 221 | |
#define SYS_execveat 281 | |
#define SYS_exit 93 | |
#define SYS_getcwd 17 | |
#define SYS_chdir 49 | |
struct stat { | |
dev_t st_dev; | |
ino_t st_ino; | |
mode_t st_mode; | |
nlink_t st_nlink; | |
uid_t st_uid; | |
gid_t st_gid; | |
dev_t st_rdev; | |
unsigned long __pad0; | |
off_t st_size; | |
blksize_t st_blksize; | |
int __pad1; | |
blkcnt_t st_blocks; | |
struct timespec st_atim; | |
struct timespec st_mtim; | |
struct timespec st_ctim; | |
unsigned __unused[2]; | |
}; | |
#define syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ | |
({ \ | |
register long _num __asm__ ("x8") = (num); \ | |
register long _arg1 __asm__ ("x0") = (long)(arg1); \ | |
register long _arg2 __asm__ ("x1") = (long)(arg2); \ | |
register long _arg3 __asm__ ("x2") = (long)(arg3); \ | |
register long _arg4 __asm__ ("x3") = (long)(arg4); \ | |
register long _arg5 __asm__ ("x4") = (long)(arg5); \ | |
register long _arg6 __asm__ ("x5") = (long)(arg6); \ | |
\ | |
__asm__ volatile ( \ | |
"svc #0\n" \ | |
: "=r" (_arg1) \ | |
: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ | |
"r"(_arg6), "r"(_num) \ | |
: "memory", "cc" \ | |
); \ | |
_arg1; \ | |
}) | |
#define syscall5(num, arg1, arg2, arg3, arg4, arg5) \ | |
({ \ | |
register long _num __asm__ ("x8") = (num); \ | |
register long _arg1 __asm__ ("x0") = (long)(arg1); \ | |
register long _arg2 __asm__ ("x1") = (long)(arg2); \ | |
register long _arg3 __asm__ ("x2") = (long)(arg3); \ | |
register long _arg4 __asm__ ("x3") = (long)(arg4); \ | |
register long _arg5 __asm__ ("x4") = (long)(arg5); \ | |
\ | |
__asm__ volatile ( \ | |
"svc #0\n" \ | |
: "=r" (_arg1) \ | |
: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ | |
"r"(_num) \ | |
: "memory", "cc" \ | |
); \ | |
_arg1; \ | |
}) | |
#define syscall4(num, arg1, arg2, arg3, arg4) \ | |
({ \ | |
register long _num __asm__ ("x8") = (num); \ | |
register long _arg1 __asm__ ("x0") = (long)(arg1); \ | |
register long _arg2 __asm__ ("x1") = (long)(arg2); \ | |
register long _arg3 __asm__ ("x2") = (long)(arg3); \ | |
register long _arg4 __asm__ ("x3") = (long)(arg4); \ | |
\ | |
__asm__ volatile ( \ | |
"svc #0\n" \ | |
: "=r"(_arg1) \ | |
: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ | |
"r"(_num) \ | |
: "memory", "cc" \ | |
); \ | |
_arg1; \ | |
}) | |
#define syscall3(num, arg1, arg2, arg3) \ | |
({ \ | |
register long _num __asm__ ("x8") = (num); \ | |
register long _arg1 __asm__ ("x0") = (long)(arg1); \ | |
register long _arg2 __asm__ ("x1") = (long)(arg2); \ | |
register long _arg3 __asm__ ("x2") = (long)(arg3); \ | |
\ | |
__asm__ volatile ( \ | |
"svc #0\n" \ | |
: "=r"(_arg1) \ | |
: "r"(_arg1), "r"(_arg2), "r"(_arg3), \ | |
"r"(_num) \ | |
: "memory", "cc" \ | |
); \ | |
_arg1; \ | |
}) | |
#define syscall2(num, arg1, arg2) \ | |
({ \ | |
register long _num __asm__ ("x8") = (num); \ | |
register long _arg1 __asm__ ("x0") = (long)(arg1); \ | |
register long _arg2 __asm__ ("x1") = (long)(arg2); \ | |
\ | |
__asm__ volatile ( \ | |
"svc #0\n" \ | |
: "=r"(_arg1) \ | |
: "r"(_arg1), "r"(_arg2), \ | |
"r"(_num) \ | |
: "memory", "cc" \ | |
); \ | |
_arg1; \ | |
}) | |
#define syscall1(num, arg1) \ | |
({ \ | |
register long _num __asm__ ("x8") = (num); \ | |
register long _arg1 __asm__ ("x0") = (long)(arg1); \ | |
\ | |
__asm__ volatile ( \ | |
"svc #0\n" \ | |
: "=r"(_arg1) \ | |
: "r"(_arg1), \ | |
"r"(_num) \ | |
: "memory", "cc" \ | |
); \ | |
_arg1; \ | |
}) | |
#define syscall0(num) \ | |
({ \ | |
register long _num __asm__ ("x8") = (num); \ | |
register long _arg1 __asm__ ("x0"); \ | |
\ | |
__asm__ volatile ( \ | |
"svc #0\n" \ | |
: "=r"(_arg1) \ | |
: "r"(_num) \ | |
: "memory", "cc" \ | |
); \ | |
_arg1; \ | |
}) | |
#else | |
#error "Unsupported architecture" | |
#endif | |
union sigval { | |
int sival_int; | |
void *sival_ptr; | |
}; | |
typedef struct { | |
int si_signo; | |
int si_code; | |
union sigval si_value; | |
int si_errno; | |
int si_pid; | |
uid_t si_uid; | |
void *si_addr; | |
int si_status; | |
int si_band; | |
} siginfo_t; | |
#define BUF_SIZE 4096 | |
#define MAX_TOKENS 64 | |
#define BUILTIN_LEN 5 | |
static const char PATH_PREFIX[] = "PATH="; | |
static const char HOME_PREFIX[] = "HOME="; | |
#define PWD "pwd" | |
#define CD "cd" | |
#define EXIT "exit" | |
#define ECHO "echo" | |
#define TYPE "type" | |
const char *BUILTINS[BUILTIN_LEN] = {ECHO, EXIT, TYPE, PWD, CD}; | |
const int pl = sizeof(PATH_PREFIX); | |
char* PATH = 0; | |
char* HOME = 0; | |
char command[BUF_SIZE]; | |
char pathbuf[BUF_SIZE]; | |
char *pathargv[MAX_TOKENS] = {0}; | |
int pc = 0; | |
#define SP(s) (s) " " | |
#define SPL(s) sizeof((s)) + 1 | |
int is_prefix(const char *s1, const char *s2, const int l1) { | |
const int n = l1; | |
for (int i = 0; i < n - 1; i++) { | |
char c1 = s1[i]; | |
char c2 = s2[i]; | |
if (!c2 || c1 != c2) { return 0;} | |
} | |
return 1; | |
} | |
// From https://stackoverflow.com/a/34873763 | |
int str_cmp( const char *s1, const char *s2 ) { | |
const unsigned char *p1 = ( const unsigned char * )s1; | |
const unsigned char *p2 = ( const unsigned char * )s2; | |
while ( *p1 && *p1 == *p2 ) {++p1, ++p2;} | |
return ( *p1 > *p2 ) - ( *p2 > *p1 ); | |
} | |
long str_len(const char *str) { | |
const char *head = str; | |
while (*head) { head++;} | |
return head - str; | |
} | |
long a_to_l(const char *str) { | |
long res = 0; | |
int sign = 1; | |
// Skip leading whitespace | |
while (*str == ' ' || *str == '\t' || *str == '\n' || | |
*str == '\v' || *str == '\f' || *str == '\r') {str++;} | |
if (*str == '-') { | |
sign = -1; | |
str++; | |
} else if (*str == '+') { | |
str++; | |
} | |
while (*str >= '0' && *str <= '9') { | |
res = res * 10 + (*str - '0'); | |
str++; | |
} | |
return sign * res; | |
} | |
// Returns index of 'c' in 'str'. If not found, -1 | |
long index_of(const char *str, char c) { | |
const char *head = str; | |
while(*head) { | |
if (*head == c ) { | |
return head - str; | |
} | |
head++; | |
} | |
return -1; | |
} | |
char *l_to_a(long value, char *str) { | |
char *p = str; | |
char *start = str; | |
int is_negative = 0; | |
// Handle negative numbers | |
if (value < 0) { | |
is_negative = 1; | |
value = -value; | |
} | |
// Convert digits in reverse order | |
do { | |
*p++ = '0' + (value % 10); | |
value /= 10; | |
} while (value != 0); | |
if (is_negative) | |
*p++ = '-'; | |
*p = '\0'; | |
// Reverse the string in-place | |
char *end = p - 1; | |
while (start < end) { | |
char tmp = *start; | |
*start++ = *end; | |
*end-- = tmp; | |
} | |
return str; | |
} | |
/* | |
* memcpy, memset, memmove and memcmp seem to be needed by both gcc and clang even for no include code | |
* Copied from nolibc | |
*/ | |
__attribute__((weak,unused,section(".text.nolibc_memcpy"))) | |
void *memcpy(void *dst, const void *src, size_t len) { | |
size_t pos = 0; | |
while (pos < len) { | |
((char *)dst)[pos] = ((const char *)src)[pos]; | |
pos++; | |
} | |
return dst; | |
} | |
__attribute__((weak,unused,section(".text.nolibc_memset"))) | |
void *memset(void *dst, int b, size_t len) { | |
char *p = dst; | |
while (len--) { | |
/* prevent gcc from recognizing memset() here */ | |
__asm__ volatile(""); | |
*(p++) = b; | |
} | |
return dst; | |
} | |
/* might be ignored by the compiler without -ffreestanding, then found as | |
* missing. | |
*/ | |
__attribute__((weak,unused,section(".text.nolibc_memmove"))) | |
void *memmove(void *dst, const void *src, size_t len) { | |
size_t dir, pos; | |
pos = len; | |
dir = -1; | |
if (dst < src) { | |
pos = -1; | |
dir = 1; | |
} | |
while (len) { | |
pos += dir; | |
((char *)dst)[pos] = ((const char *)src)[pos]; | |
len--; | |
} | |
return dst; | |
} | |
static __attribute__((unused)) | |
int memcmp(const void *s1, const void *s2, size_t n) { | |
size_t ofs = 0; | |
int c1 = 0; | |
while (ofs < n && !(c1 = ((unsigned char *)s1)[ofs] - ((unsigned char *)s2)[ofs])) { | |
ofs++; | |
} | |
return c1; | |
} | |
int tokenize(const char *buffer, char *scratch, char *argv[], const long n, const char delim) { | |
int si = 0; // index in scratch | |
int argc = 0; // number of tokens | |
for (int i = 0; i < n;) { | |
// Skip leading spaces | |
while (i < n && buffer[i] == delim) {i++;} | |
if (i >= n) {break;} | |
if (argc >= MAX_TOKENS) {break;} | |
argv[argc++] = &scratch[si]; | |
// Copy characters until next space or end of buffer | |
for (; i < n && buffer[i] != delim; i++){ | |
if (si >= BUF_SIZE - 1) {break;} // leave space for null terminator | |
scratch[si++] = buffer[i]; | |
} | |
scratch[si++] = 0; // null-terminate the token | |
if (si >= BUF_SIZE) {break;} | |
} | |
return argc; | |
} | |
#define GETDENTS_BUF_SIZE (1024 * 64) | |
// Return dirfd if found otherwise return -1 | |
// If return_dirfd is 0, will return index of argv where fname is found | |
int traverse(int argc, char *argv[], char *fname, int return_dirfd ) { | |
int dirfd; | |
char buf[GETDENTS_BUF_SIZE]; | |
long nread; | |
struct linux_dirent64 *d; | |
long fl = str_len(fname); | |
for (int i = 0; i < argc; i++) { | |
dirfd = syscall3(SYS_openat, 0, argv[i], O_RDONLY | O_DIRECTORY); | |
if (dirfd < 0) { | |
continue; | |
} | |
for (;;) { | |
nread = syscall3(SYS_getdents64, dirfd, buf, GETDENTS_BUF_SIZE); | |
if (nread == -1) {return -1;} | |
if (nread == 0) {break;} | |
for (long bpos = 0; bpos < nread;) { | |
d = (struct linux_dirent64 *)(buf + bpos); | |
bpos += d->d_reclen; | |
if (d->d_type != DT_REG && d->d_type != DT_LNK) { continue;} | |
// Compare with fname and then stat | |
long dl = str_len(d->d_name); | |
if (dl != fl) { continue;} | |
int cmp = 1; | |
for (int j = 0; j < fl; j++) { | |
if (d->d_name[j] != fname[j]) { | |
cmp = 0; | |
break; | |
} | |
} | |
if (cmp == 0) {continue;} | |
struct stat sb; | |
int status = syscall4(SYS_newfstatat, dirfd, d->d_name, &sb, 0); | |
if (status == 0 && sb.st_mode & S_IXUSR) { | |
// Return fd of the path directory which contains the executable | |
return return_dirfd ? dirfd : i; | |
} | |
// If file with matching name doesn't have correct permissions, look in another dir | |
break; | |
} | |
} | |
} | |
return -1; | |
} | |
int child_func(char* argv[]) { | |
if (index_of(argv[0], '/') != -1) { | |
return syscall3(SYS_execve, argv[0], argv, NULL); | |
} | |
int dirfd = traverse(pc, pathargv, argv[0], 1); | |
if (dirfd == -1) {return -1;} | |
return syscall5(SYS_execveat, dirfd, argv[0], argv, 0, 0); | |
} | |
int handle_builtins(char* argv[], int nread) { | |
char* word = argv[0]; | |
if (str_cmp(ECHO, word) == 0) { | |
// TODO: echo rest of cmd | |
long offset = argv[1] - argv[0]; | |
if (offset <= 0 || offset >= nread) { | |
syscall3(SYS_write, 1, "\n", 1); | |
return 1; | |
} | |
char* rest = command + offset; | |
syscall3(SYS_write, 1, rest, nread - offset); | |
syscall3(SYS_write, 1, "\n", 1); | |
return 1; | |
} | |
if (str_cmp(EXIT, word) == 0) { | |
long st = a_to_l(argv[1]); | |
syscall1(SYS_exit, st); | |
} | |
if (str_cmp(PWD, word) == 0) { | |
char cwd[1024]; | |
long st = syscall2(SYS_getcwd, cwd, sizeof(cwd)); | |
if (st <= 0) { | |
syscall1(SYS_exit, st); | |
} | |
syscall3(SYS_write, 1, cwd, str_len(cwd)); | |
syscall3(SYS_write, 1, "\n", 1); | |
return 1; | |
} | |
if (str_cmp(CD, word) == 0) { | |
if (argv[1] == 0) { | |
// Just 'cd' means go to home directory | |
syscall1(SYS_chdir, HOME); | |
return 1; | |
} | |
if (argv[2]) { | |
syscall3(SYS_write, 1, "cd: too many arguments\n", 23); | |
return 1; | |
} | |
char* dir = argv[1]; | |
if (dir[0] == '~') { | |
if (dir[1] == 0) { | |
syscall1(SYS_chdir, HOME); | |
} | |
else if (dir[1] == '/') { | |
char home_buf[1024] = {0}; | |
int i = 0; | |
char* home = HOME; | |
while (*home) { | |
home_buf[i++] = *home; | |
home++; | |
} | |
dir++; | |
while (*dir) { | |
home_buf[i++] = *dir; | |
dir++; | |
} | |
syscall3(SYS_write, 1, home_buf, str_len(home_buf)); | |
syscall1(SYS_chdir, home_buf); | |
} else { | |
syscall3(SYS_write, 1, argv[1], str_len(argv[1])); | |
syscall3(SYS_write, 1, ": No such file or directory\n", 28); | |
} | |
return 1; | |
} | |
syscall1(SYS_chdir, dir); | |
return 1; | |
} | |
if (str_cmp(TYPE, word) == 0) { | |
argv++; | |
while (*argv) { | |
int is_builtin = 0; | |
for (int i = 0; i < BUILTIN_LEN; ++i) { | |
if (str_cmp(BUILTINS[i], *argv) == 0) { | |
syscall3(SYS_write, 1, *argv, str_len(*argv)); | |
syscall3(SYS_write, 1, " is a shell builtin\n", 20); | |
is_builtin = 1; | |
break; | |
} | |
} | |
if (!is_builtin) { | |
int idx = traverse(pc, pathargv, *argv, 0); | |
syscall3(SYS_write, 1, *argv, str_len(*argv)); | |
if (idx == -1) { | |
syscall3(SYS_write, 1, ": not found\n", 12); | |
} else { | |
syscall3(SYS_write, 1, " is ", 4); | |
syscall3(SYS_write, 1, pathargv[idx], str_len(pathargv[idx])); | |
syscall3(SYS_write, 1, "/", 1); | |
syscall3(SYS_write, 1, *argv, str_len(*argv)); | |
syscall3(SYS_write, 1, "\n", 1); | |
} | |
} | |
argv++; | |
} | |
return 1; | |
} | |
return 0; | |
} | |
int main(int argc, char *argv[], char *envp[]) { | |
while (*envp) { | |
if (is_prefix(HOME_PREFIX, *envp, pl)) { | |
HOME = *envp + pl - 1; | |
} | |
if (is_prefix(PATH_PREFIX, *envp, pl)) { | |
PATH = *envp + pl - 1; | |
} | |
envp++; | |
} | |
if (PATH == 0) { return -1;} | |
long sl = str_len(PATH); | |
pc = tokenize(PATH, pathbuf, pathargv, sl, ':'); | |
for (;;) { | |
syscall3(SYS_write, 1, "$ ", 2); | |
int nread = syscall3(SYS_read, 0, command, BUF_SIZE); | |
if (nread <= 0) { syscall1(SYS_exit, nread);} | |
if (nread == 1) {continue;} | |
command[nread - 1] = 0; | |
char buf[BUF_SIZE]; | |
char *cmdargv[MAX_TOKENS] = {0}; | |
tokenize(command, buf, cmdargv, nread - 1, ' '); | |
if (handle_builtins(cmdargv, nread)) { | |
continue; | |
} | |
struct clone_args args = {0}; | |
args.exit_signal = SIGCHLD; | |
int pid = syscall2(SYS_clone3, &args, sizeof(args)); | |
if (pid == -1) { | |
syscall1(SYS_exit, pid); | |
} | |
if (pid == 0) { | |
// Child process | |
child_func(cmdargv); | |
break; | |
} | |
else { | |
// Parent | |
siginfo_t info; // not used | |
syscall4(SYS_waitid, P_ALL, 0, &info, WEXITED); | |
} | |
} | |
return 0; | |
} | |
void pre_main(int *argc_ptr) { | |
int argc = argc_ptr[0]; | |
char** argv = (char**)(argc_ptr) + 1; | |
char** envp = argv + argc + 1; | |
//int sl = str_len(envp[0]); | |
//syscall3(SYS_write, 1, envp[0], sl); | |
int status = main(argc, argv, envp); | |
syscall1(SYS_exit, status); | |
} | |
__attribute__((section(".text.startup"))) __attribute__((naked)) __attribute__((__optimize__("-fno-stack-protector"))) void _start(void) { | |
#if defined(__x86_64__) | |
asm volatile ( | |
"xorl %%ebp, %%ebp\n\t" | |
"movq %%rsp, %%rdi\n\t" | |
"andq $-16, %%rsp\n\t" | |
"callq %P0\n\t" | |
: | |
: [pre_main] "X"(&pre_main) | |
:"memory" | |
); | |
#elif defined(__aarch64__) | |
asm volatile ( | |
"mov fp, #0\n\t" | |
"mov lr, #0\n\t" | |
"mov x0, sp\n\t" | |
"and sp, x0, #-16\n\t" | |
"bl %0\n\t" | |
: | |
: [pre_main] "X"(&pre_main) | |
:"x11", "memory" | |
); | |
#else | |
#error "Unsupported architecture" | |
#endif | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// A minimal shell with no allocations | |
const std = @import("std"); | |
const builtin = @import("builtin"); | |
const linux = std.os.linux; | |
const elf = std.elf; | |
const native_arch = builtin.cpu.arch; | |
// stdin,out,err | |
const BUF_SIZE = 1024; | |
const MAX_TOKENS = 64; | |
var input = [_:0]u8{0} ** BUF_SIZE; | |
const STDIN = 0; | |
const STDOUT = 1; | |
const STDERR = 2; | |
const nl: []const u8 = "\n"; | |
const prompt: []const u8 = "$ "; | |
// path | |
var path = [_:0]u8{0} ** 4096; | |
var pl: usize = 0; | |
const path_prefix: []const u8 = "PATH="; | |
// Clone stuff | |
const CloneArgs = extern struct { | |
flags: u64, | |
pidfd: u64, | |
child_tid: u64, | |
parent_tid: u64, | |
exit_signal: u64, | |
stack: u64, | |
stack_size: u64, | |
tls: u64, | |
set_tid: u64, | |
set_tid_size: u64, | |
cgroup: u64, | |
}; | |
pub export fn _start() callconv(.naked) noreturn { | |
if (builtin.unwind_tables != .none or !builtin.strip_debug_info) asm volatile (switch (native_arch) { | |
.aarch64 => ".cfi_undefined lr", | |
.x86_64 => ".cfi_undefined %%rip", | |
else => @compileError("unsupported arch"), | |
}); | |
asm volatile (switch (native_arch) { | |
.x86_64 => | |
\\ xorl %%ebp, %%ebp | |
\\ movq %%rsp, %%rdi | |
\\ andq $-16, %%rsp | |
\\ callq %[posixCallMainAndExit:P] | |
, | |
.aarch64 => | |
\\ mov fp, #0 | |
\\ mov lr, #0 | |
\\ mov x0, sp | |
\\ and sp, x0, #-16 | |
\\ b %[posixCallMainAndExit] | |
, | |
else => @compileError("unsupported arch"), | |
} | |
: | |
: [_start] "X" (&_start), | |
[posixCallMainAndExit] "X" (&posixCallMainAndExit), | |
); | |
} | |
fn posixCallMainAndExit(argc_argv_ptr: [*]usize) callconv(.c) noreturn { | |
const argc = argc_argv_ptr[0]; | |
const argv = @as([*][*:0]u8, @ptrCast(argc_argv_ptr + 1)); | |
const envp_optional: [*:null]?[*:0]u8 = @ptrCast(@alignCast(argv + argc + 1)); | |
var envp_count: usize = 0; | |
while (envp_optional[envp_count]) |_| : (envp_count += 1) {} | |
const envp = @as([*][*:0]u8, @ptrCast(envp_optional))[0..envp_count]; | |
const auxv: [*]elf.Auxv = @ptrCast(@alignCast(envp.ptr + envp_count + 1)); | |
// var at_hwcap: usize = 0; | |
// const phdrs = init: { | |
// var i: usize = 0; | |
// var at_phdr: usize = 0; | |
// var at_phnum: usize = 0; | |
// while (auxv[i].a_type != elf.AT_NULL) : (i += 1) { | |
// switch (auxv[i].a_type) { | |
// elf.AT_PHNUM => at_phnum = auxv[i].a_un.a_val, | |
// elf.AT_PHDR => at_phdr = auxv[i].a_un.a_val, | |
// elf.AT_HWCAP => at_hwcap = auxv[i].a_un.a_val, | |
// else => continue, | |
// } | |
// } | |
// break :init @as([*]elf.Phdr, @ptrFromInt(at_phdr))[0..at_phnum]; | |
// }; | |
linux.elf_aux_maybe = auxv; | |
//if (!builtin.single_threaded) { | |
// linux.tls.initStatic(phdrs); | |
//} | |
// const opt_init_array_start = @extern([*]*const fn () callconv(.c) void, .{ | |
// .name = "__init_array_start", | |
// .linkage = .weak, | |
// }); | |
// const opt_init_array_end = @extern([*]*const fn () callconv(.c) void, .{ | |
// .name = "__init_array_end", | |
// .linkage = .weak, | |
// }); | |
// if (opt_init_array_start) |init_array_start| { | |
// const init_array_end = opt_init_array_end.?; | |
// const slice = init_array_start[0 .. init_array_end - init_array_start]; | |
// for (slice) |func| func(); | |
// } | |
std.os.argv = argv[0..argc]; | |
std.os.environ = envp; | |
main(); | |
} | |
pub fn main() noreturn { | |
const envp = std.os.environ; | |
// TODO vdso parsing to get faster time related syscalls via vDSO | |
const vdso_addr = linux.getauxval(std.elf.AT_SYSINFO_EHDR); | |
const elf_header = @as(*std.elf.Ehdr, @ptrFromInt(vdso_addr)); | |
println(&elf_header.e_ident, 16); | |
get_path(envp); | |
//println(&path, pl); | |
while (true) { | |
print(prompt.ptr, prompt.len); | |
const nread = linux.read(STDIN, &input, BUF_SIZE); | |
switch (nread) { | |
0 => linux.exit(0), | |
1 => continue, | |
else => {}, | |
} | |
var argv: [MAX_TOKENS:null]?[*:0]const u8 = undefined; | |
// const argv: [*:null]?[*:0]u8 = [_:null]?[*:0]u8{null}; | |
// println(&input, nread); | |
_ = tokenize_input(&input, &argv, nread); | |
var clone_args = std.mem.zeroes(CloneArgs); | |
clone_args.exit_signal = linux.SIG.CHLD; | |
// clone3 only available from Linux 5.1 onwards... | |
const pid = linux.syscall2(.clone3, @intFromPtr(&clone_args), @sizeOf(CloneArgs)); | |
if (pid == 0) { | |
// child process | |
try invoke_exec(&argv, envp); | |
break; | |
} else { | |
var sig_info = std.mem.zeroes(linux.siginfo_t); | |
const status: i32 = @intCast(std.os.linux.waitid(linux.P.ALL, 0, &sig_info, linux.W.EXITED)); | |
if (status != 0) { | |
println("WAIT_ERR", 8); | |
linux.exit(status); | |
} | |
} | |
} | |
linux.exit(0); | |
} | |
fn print(ptr: [*]const u8, count: usize) void { | |
_ = linux.write(STDOUT, ptr, count); | |
} | |
fn println(ptr: [*]const u8, count: usize) void { | |
print(ptr, count); | |
print(nl.ptr, nl.len); | |
} | |
fn get_path(envp: [][*:0]u8) void { | |
// can we just make path a [][*:0]u8 ? | |
for (envp) |e| { | |
// Similar logic as std.posix.getenvZ(key: [*:0]const u8) | |
if (std.mem.eql(u8, e[0..5], path_prefix)) { | |
const epath = e[5..]; | |
while (epath[pl] != 0 and pl < path.len) { | |
path[pl] = epath[pl]; | |
pl += 1; | |
} | |
break; | |
} | |
} | |
} | |
// parse input | |
fn tokenize_input(buf: [*:0]u8, argv: [:null]?[*:0]const u8, buf_len: usize) usize { | |
var i: usize = 0; | |
var n: usize = 0; | |
var offset: usize = 0; // offset of pointer to mark start of next token | |
while (i < buf_len) : (i += 1) { | |
if (buf[i] == ' ' or buf[i] == '\t' or buf[i] == '\n') { | |
buf[i] = 0; | |
if (buf[offset] != 0) { | |
const argv_ptr: *align(1) const [*:0]u8 = @ptrCast(&buf[offset..i :0]); | |
argv[n] = argv_ptr.*; | |
n += 1; | |
offset = i + 1; | |
} | |
} | |
} | |
argv[n] = null; | |
return n; | |
} | |
// exec | |
fn invoke_exec(argv: [*:null]?[*:0]const u8, envp: [][*:0]u8) !void { | |
const binary = argv[0].?; | |
// If command contains a '/', we assume it is an absolute or relative path | |
const binSpan = std.mem.sliceTo(binary, 0); | |
const cEnvp: [*:null]const ?[*:0]const u8 = @ptrCast(envp); | |
if (std.mem.indexOfScalar(u8, binSpan, '/')) |_| { | |
// std.debug.print("ptr: {}\n", .{@intFromPtr(argv)}); | |
const status: i32 = @intCast(linux.execve(binary, argv, cEnvp)); | |
// std.debug.print("Exec: {}\n", .{status}); | |
linux.exit(status); | |
} | |
// Open directories for each PATH directory and check for argv[0] | |
const dirfd = traversePath(binary); | |
if (dirfd == -1) { | |
println("NOT_FOUND", 9); | |
} else { | |
// println("FOUND", 5); | |
// [:null] ?[*:0]const u8 | |
// [*:null]const ?[*:0]const u8 | |
// [*:null]const ?[*:0]const u8 | |
// std.debug.print("ptr: {}\n", .{@intFromPtr(argv)}); | |
const status: i32 = @intCast(linux.syscall5(.execveat, @as(usize, @bitCast(@as(isize, dirfd))), @intFromPtr(binary), @intFromPtr(argv), 0, 0)); | |
linux.exit(status); | |
} | |
} | |
// traverses path to find binary | |
fn traversePath(binary: [*:0]const u8) i32 { | |
var i: usize = 0; | |
var offset: usize = 0; | |
const getdents_buf_len = 1024 * 64; | |
var getdents_buf: [getdents_buf_len]u8 align(@alignOf(linux.dirent64)) = undefined; | |
// Explicitly comparing <= path_len here as otherwise we will skip the last directory in path | |
while (i <= pl) : (i += 1) { | |
if (path[i] != ':' and path[i] != 0) { | |
continue; | |
} | |
path[i] = 0; | |
if (path[offset] == 0) { | |
println("ZERO_AT_OFFSET", 14); | |
offset = i + 1; | |
continue; | |
} | |
// const path_ptr: [*:0]const u8 = @ptrCast(&path[offset..i :0]); | |
const pathSlice: [*:0]const u8 = path[offset..i :0]; | |
// std.debug.print("offset: {} i: {} binary: {s} pathSlice {s}\n", .{ offset, i, binary, pathSlice }); | |
const mode: linux.O = .{ .DIRECTORY = true }; | |
// std.debug.print("offset: {} i: {} binary: {s} path_ptr {s} mode {}\n", .{ offset, i, binary, pathSlice, mode }); | |
offset = i + 1; | |
const dirfd: i32 = @intCast(linux.openat(0, pathSlice, mode, 0)); | |
if (dirfd == -1) { | |
continue; | |
} | |
while (true) { | |
const nread = linux.getdents64(dirfd, &getdents_buf, getdents_buf_len); | |
// std.debug.print("NREAD: {} dirfd: {} binary: {s} path_ptr {s}\n", .{ nread, dirfd, binary, pathSlice }); | |
if (nread == 0 or nread > getdents_buf_len) { | |
break; | |
} | |
var bpos: usize = 0; | |
while (bpos < nread) { | |
const dp = @as(*align(1) linux.dirent64, @ptrCast(&getdents_buf[bpos])); | |
bpos = bpos + dp.reclen; | |
if (dp.type != linux.DT.REG and dp.type != linux.DT.LNK) { | |
continue; | |
} | |
const fname = std.mem.sliceTo(@as([*:0]u8, @ptrCast(&dp.name)), 0); | |
const bname = std.mem.sliceTo(binary, 0); | |
// std.debug.print("FNAME: {s} Binary: {s}\n", .{ fname, binary }); | |
if (!std.mem.eql(u8, fname, bname)) { | |
continue; | |
} | |
var stat = std.mem.zeroes(linux.Stat); | |
const status = linux.fstatat(dirfd, binary, &stat, 0); | |
if (status == 0 and (stat.mode & linux.S.IXUSR != 0)) { | |
return dirfd; | |
} | |
} | |
} | |
} | |
return -1; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment