Last active
June 6, 2021 01:14
-
-
Save chris-se/e0fbc073fcbd9ac2d7ae to your computer and use it in GitHub Desktop.
A very simple initrd implementation that mounts / and /usr
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Author: Christian Seiler <[email protected]> | |
* License: GPLv3+ | |
* | |
* IMPORTANT NOTE: | |
* This was just a proof of concept, there's a better version available from | |
* https://github.com/chris-se/tiny-initrd | |
* | |
* Usage: musl-gcc -O2 -Wall -Wextra -static -o init simple_initrd.c | |
* mkdir initramfs initramfs/{dev,proc,target} | |
* cp init initramfs/ | |
* strip initramfs/init | |
* cd initramfs ; find . | cpio --quiet -R 0:0 -o -H newc | gzip > ../initrd.img ; cd .. | |
* | |
* (You don't HAVE to use musl-gcc, but if you want your binary to be small | |
* then you really should use something like musl, dietlibc, uclibc or | |
* similar. In principle this could also work with dynamic linking if you | |
* add the dynamlic linker to your initrd image + the libraries required, | |
* but again, this would defeat the purpose of having a very small binary.) | |
* | |
* Specify root=/dev/... and x-mount.usr=/dev/... on kernel command line to have | |
* this initrd mount / and /usr before the system is booted. x-mount.usr= is | |
* optional. | |
* | |
* This relies on devtmpfs and kernel device names. root=UUID=... is not supported, | |
* neither is root=/dev/disk/by-... or similar symlinks. Only native kernel device | |
* names are supported. | |
* | |
* Everything required for mounting / and /usr needs to be compiled into the | |
* kernel, no modules will be loaded. | |
* | |
* This is basically meant to replace the no-initrd situation, but also mount /usr. | |
* It's not supposed to support fancy things. If you need that, use a proper initrd | |
* and this code isn't for you. | |
* | |
* First draft, designed to be small in code size. Could probably be improved | |
* quite a bit... | |
* | |
* compile-tested on amd64, but should be portable. Tell me if it isn't. | |
*/ | |
#include <sys/types.h> | |
#include <sys/stat.h> | |
#include <sys/mount.h> | |
#include <sys/uio.h> | |
#include <sys/time.h> | |
#include <errno.h> | |
#include <string.h> | |
#include <unistd.h> | |
#include <fcntl.h> | |
#include <stdlib.h> | |
#include <time.h> | |
#ifndef CMDLINE_FILE | |
#define CMDLINE_FILE "/proc/cmdline" | |
#endif /* !defined(CMDLINE_FILE) */ | |
#ifndef CMDLINE_BUFSZ | |
#define CMDLINE_BUFSZ 4096 | |
#endif /* !defined(CMDLINE_BUFSZ) */ | |
#ifndef FSTYPES_FILE | |
#define FSTYPES_FILE "/proc/filesystems" | |
#endif /* !defined(FSTYPES_FILE) */ | |
#ifndef FSTYPES_BUFSZ | |
#define FSTYPES_BUFSZ 16384 | |
#endif /* !defined(FSTYPES_BUFSZ) */ | |
#ifndef FS_BUFSZ | |
#define FS_BUFSZ 256 | |
#endif /* !defined(FS_BUFSZ) */ | |
#ifndef DEVTMPFS_MOUNTOPTS | |
#define DEVTMPFS_MOUNTOPTS "size=10240k,mode=0755" | |
#endif /* !defined(DEVTMPFS_MOUNTOPTS) */ | |
#ifndef FSDEV_TIMEOUT | |
#define FSDEV_TIMEOUT 180 | |
#endif /* !defined(FSDEV_TIMEOUT) */ | |
#define N_MAX_SUPPORTED_FILESYSTEMS 256 | |
#define N_MAX_FSTYPELEN 32 | |
static void panic(int e, const char *message) __attribute__((noreturn)); | |
void panic(int e, const char *message) | |
{ | |
struct iovec iov[4]; | |
const char *error = e != 0 ? strerror(e) : ""; | |
/* Don't use stdio functions, because we link statically | |
* and they bloat the binary. */ | |
iov[0].iov_base = (char *)message; | |
iov[0].iov_len = strlen(message); | |
iov[1].iov_base = (char *)": "; | |
iov[1].iov_len = 2; | |
iov[2].iov_base = (char *)error; | |
iov[2].iov_len = strlen(error); | |
iov[3].iov_base = (char *)"\n"; | |
iov[3].iov_len = 1; | |
writev(2, iov, 4); | |
_exit(1); | |
} | |
void warn3(const char *p1, const char *p2, const char *p3) | |
{ | |
struct iovec iov[4]; | |
/* Don't use stdio functions, because we link statically | |
* and they bloat the binary. */ | |
iov[0].iov_base = (char *)p1; | |
iov[0].iov_len = strlen(p1); | |
iov[1].iov_base = (char *)p2; | |
iov[1].iov_len = strlen(p2); | |
iov[2].iov_base = (char *)p3; | |
iov[2].iov_len = strlen(p3); | |
iov[3].iov_base = (char *)"\n"; | |
iov[3].iov_len = 1; | |
writev(2, iov, 4); | |
} | |
static char rootfs[FS_BUFSZ]; | |
static char usrfs[FS_BUFSZ]; | |
static char init_binary[FS_BUFSZ]; | |
static char supported_filesystems[N_MAX_SUPPORTED_FILESYSTEMS][N_MAX_FSTYPELEN]; | |
static int n_supported_filesystems; | |
static int do_mount(const char *source, const char *target, | |
const char *filesystemtype, unsigned long mountflags, | |
const void *data) | |
{ | |
/* We need to loop through filesystem types as the kernel doesn't do | |
* that for us if we call mount(). libmount does something similar, | |
* but we don't want to link against it. */ | |
if (!filesystemtype && !(mountflags & (MS_MOVE | MS_REMOUNT | MS_BIND))) { | |
int i, rc = -1; | |
errno = EINVAL; | |
for (i = 0; i < n_supported_filesystems; i++) { | |
rc = mount(source, target, supported_filesystems[i], mountflags | MS_SILENT, data); | |
if (rc == 0) | |
return 0; | |
} | |
return rc; | |
} | |
return mount(source, target, filesystemtype, mountflags, data); | |
} | |
static void parse_cmdline() | |
{ | |
int fd; | |
ssize_t r; | |
char buf[CMDLINE_BUFSZ] = { 0 }; | |
char *saveptr; | |
char *token; | |
fd = open(CMDLINE_FILE, O_RDONLY); | |
if (fd < 0) | |
panic(errno, "Couldn't open " CMDLINE_FILE); | |
r = read(fd, buf, CMDLINE_BUFSZ - 1); | |
if (r < 0) | |
panic(errno, "Couldn't read " CMDLINE_FILE); | |
close(fd); | |
for (token = strtok_r(buf, " \t\r\n", &saveptr); token != NULL; token = strtok_r(NULL, " \t\r\n", &saveptr)) { | |
if (!strncmp(token, "root=", 5)) { | |
token += 5; | |
if (strlen(token) > FS_BUFSZ - 1) | |
panic(0, "root= too long"); | |
strncpy(rootfs, token, FS_BUFSZ); | |
} else if (!strncmp(token, "x-mount.usr=", 12)) { | |
token += 12; | |
if (strlen(token) > FS_BUFSZ - 1) | |
panic(0, "x-mount.usr= too long"); | |
strncpy(usrfs, token, FS_BUFSZ); | |
} else if (!strncmp(token, "init=", 5)) { | |
token += 5; | |
if (strlen(token) > FS_BUFSZ - 1) | |
panic(0, "init= too long"); | |
strncpy(init_binary, token, FS_BUFSZ); | |
} | |
} | |
} | |
static void parse_filesystems() | |
{ | |
int fd; | |
ssize_t r; | |
char buf[FSTYPES_BUFSZ] = { 0 }; | |
char *saveptr; | |
char *line; | |
fd = open(FSTYPES_FILE, O_RDONLY); | |
if (fd < 0) | |
panic(errno, "Couldn't open " FSTYPES_FILE); | |
r = read(fd, buf, FSTYPES_BUFSZ - 1); | |
if (r < 0) | |
panic(errno, "Couldn't read " FSTYPES_FILE); | |
close(fd); | |
for (line = strtok_r(buf, "\n", &saveptr); line != NULL; line = strtok_r(NULL, "\n", &saveptr)) { | |
if (!strncmp(line, "nodev", 5)) | |
continue; | |
while (line[0] == ' ' || line[0] == '\t') | |
++line; | |
if (strlen(line) > N_MAX_FSTYPELEN - 1) { | |
warn3("[initrd] filesystem type ", line, " is too long for us, ignoring."); | |
continue; | |
} | |
if (n_supported_filesystems == N_MAX_SUPPORTED_FILESYSTEMS) { | |
warn3("[initrd] kernel supports too many filesystem types, ignoring ", line, ", sorry."); | |
continue; | |
} | |
strncpy(supported_filesystems[n_supported_filesystems++], line, N_MAX_FSTYPELEN - 1); | |
} | |
} | |
static void try_exec(int orig_argc, char *const orig_argv[], const char *binary) | |
{ | |
char *argv[256]; | |
int i; | |
if (orig_argc > 255) | |
panic(0, "Too many arguments to init."); | |
argv[0] = (char *)init_binary; | |
for (i = 1; i < orig_argc; i++) | |
argv[i] = orig_argv[i]; | |
argv[i] = NULL; | |
execv(binary, argv); | |
} | |
static void wait_for_filesystems() | |
{ | |
/* We don't have udev running, but there is devtmpfs, so we just | |
* do a very simple and stupid polling loop to wait until the | |
* requested devices are present. This could be improved a bit, | |
* but for now it's good enough. */ | |
int have_devices = 0; | |
time_t start; | |
struct timeval tv; | |
int r; | |
r = gettimeofday(&tv, NULL); | |
if (r < 0) | |
panic(errno, "Couldn't determine current time for timeout."); | |
start = tv.tv_sec; | |
have_devices = (access(rootfs, F_OK) == 0 && (strlen(usrfs) == 0 || access(usrfs, F_OK) == 0)); | |
while (!have_devices) { | |
r = gettimeofday(&tv, NULL); | |
if (r < 0) | |
panic(errno, "Couldn't determine current time for timeout"); | |
if (tv.tv_sec - start > FSDEV_TIMEOUT) | |
panic(0, "Timeout while waiting for devices for / and /usr filesystems to appear " | |
"(did you specify the correct ones?)"); | |
/* Sleep for 50 milliseconds, then poll again. */ | |
struct timespec req = { 0, 50 * 1000 * 1000 }; | |
struct timespec rem; | |
(void)nanosleep(&req, &rem); | |
have_devices = (access(rootfs, F_OK) == 0 && (strlen(usrfs) == 0 || access(usrfs, F_OK) == 0)); | |
} | |
} | |
int main(int argc, char **argv) | |
{ | |
int r; | |
r = do_mount("proc", "/proc", "proc", MS_NODEV | MS_NOEXEC | MS_NOSUID, NULL); | |
if (r < 0) | |
panic(errno, "Couldn't mount /proc filesystem"); | |
r = do_mount("udev", "/dev", "devtmpfs", 0, DEVTMPFS_MOUNTOPTS); | |
if (r < 0) | |
panic(errno, "Couldn't mount devtmpfs-/dev filesystem"); | |
parse_filesystems(); | |
parse_cmdline(); | |
if (!strlen(rootfs)) | |
panic(0, "No root filesystem (root=) specified"); | |
if (rootfs[0] != '/') | |
panic(0, "root filesystem (root=) must be a (non-symlink) kernel device path"); | |
if (strlen(usrfs) && usrfs[0] != '/') | |
panic(0, "/usr filesystem (x-mount.usr=) must be a (non-symlink) kernel device path"); | |
wait_for_filesystems(); | |
r = do_mount(rootfs, "/target", NULL, MS_RDONLY, NULL); | |
if (r < 0) | |
panic(errno, "Couldn't mount / filesystem"); | |
if (strlen(usrfs)) { | |
r = do_mount(usrfs, "/target/usr", NULL, MS_RDONLY, NULL); | |
if (r < 0) | |
panic(errno, "Couldn't mount /usr filesystem"); | |
} | |
if (access("/target/dev", F_OK) != 0) | |
panic(errno, "/dev doesn't exist on rootfs"); | |
if (access("/target/proc", F_OK) != 0) | |
panic(errno, "/proc doesn't exist on rootfs"); | |
r = do_mount("/dev", "/target/dev", NULL, MS_MOVE, NULL); | |
if (r < 0) | |
panic(errno, "Couldn't move /dev from initrd to root filesystem"); | |
r = do_mount("/proc", "/target/proc", NULL, MS_MOVE, NULL); | |
if (r < 0) | |
panic(errno, "Couldn't move /proc from initrd to root filesystem"); | |
r = chdir("/target"); | |
if (r < 0) | |
panic(errno, "Couldn't change directory into new root filesystem"); | |
r = do_mount("/target", "/", NULL, MS_MOVE, NULL); | |
if (r < 0) | |
panic(errno, "Couldn't move new root filesystem to /"); | |
r = chroot("."); | |
if (r < 0) | |
panic(errno, "Couldn't switch rootfs"); | |
if (strlen(init_binary)) { | |
try_exec(argc, argv, init_binary); | |
} else { | |
try_exec(argc, argv, "/sbin/init"); | |
try_exec(argc, argv, "/etc/init"); | |
try_exec(argc, argv, "/bin/init"); | |
try_exec(argc, argv, "/bin/sh"); | |
} | |
/* Message stolen from Linux's init/main.c */ | |
panic(0, "No working init found. Try passing init= option to kernel. " | |
"See Linux Documentation/init.txt for guidance."); | |
return 1; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment