Skip to content

Instantly share code, notes, and snippets.

@chris-se
Last active June 6, 2021 01:14
Show Gist options
  • Save chris-se/e0fbc073fcbd9ac2d7ae to your computer and use it in GitHub Desktop.
Save chris-se/e0fbc073fcbd9ac2d7ae to your computer and use it in GitHub Desktop.
A very simple initrd implementation that mounts / and /usr
/*
* Author: Christian Seiler <[email protected]>
* License: GPLv3+
*
* IMPORTANT NOTE:
* This was just a proof of concept, there's a better version available from
* https://github.com/chris-se/tiny-initrd
*
* Usage: musl-gcc -O2 -Wall -Wextra -static -o init simple_initrd.c
* mkdir initramfs initramfs/{dev,proc,target}
* cp init initramfs/
* strip initramfs/init
* cd initramfs ; find . | cpio --quiet -R 0:0 -o -H newc | gzip > ../initrd.img ; cd ..
*
* (You don't HAVE to use musl-gcc, but if you want your binary to be small
* then you really should use something like musl, dietlibc, uclibc or
* similar. In principle this could also work with dynamic linking if you
* add the dynamlic linker to your initrd image + the libraries required,
* but again, this would defeat the purpose of having a very small binary.)
*
* Specify root=/dev/... and x-mount.usr=/dev/... on kernel command line to have
* this initrd mount / and /usr before the system is booted. x-mount.usr= is
* optional.
*
* This relies on devtmpfs and kernel device names. root=UUID=... is not supported,
* neither is root=/dev/disk/by-... or similar symlinks. Only native kernel device
* names are supported.
*
* Everything required for mounting / and /usr needs to be compiled into the
* kernel, no modules will be loaded.
*
* This is basically meant to replace the no-initrd situation, but also mount /usr.
* It's not supposed to support fancy things. If you need that, use a proper initrd
* and this code isn't for you.
*
* First draft, designed to be small in code size. Could probably be improved
* quite a bit...
*
* compile-tested on amd64, but should be portable. Tell me if it isn't.
*/
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mount.h>
#include <sys/uio.h>
#include <sys/time.h>
#include <errno.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <time.h>
#ifndef CMDLINE_FILE
#define CMDLINE_FILE "/proc/cmdline"
#endif /* !defined(CMDLINE_FILE) */
#ifndef CMDLINE_BUFSZ
#define CMDLINE_BUFSZ 4096
#endif /* !defined(CMDLINE_BUFSZ) */
#ifndef FSTYPES_FILE
#define FSTYPES_FILE "/proc/filesystems"
#endif /* !defined(FSTYPES_FILE) */
#ifndef FSTYPES_BUFSZ
#define FSTYPES_BUFSZ 16384
#endif /* !defined(FSTYPES_BUFSZ) */
#ifndef FS_BUFSZ
#define FS_BUFSZ 256
#endif /* !defined(FS_BUFSZ) */
#ifndef DEVTMPFS_MOUNTOPTS
#define DEVTMPFS_MOUNTOPTS "size=10240k,mode=0755"
#endif /* !defined(DEVTMPFS_MOUNTOPTS) */
#ifndef FSDEV_TIMEOUT
#define FSDEV_TIMEOUT 180
#endif /* !defined(FSDEV_TIMEOUT) */
#define N_MAX_SUPPORTED_FILESYSTEMS 256
#define N_MAX_FSTYPELEN 32
static void panic(int e, const char *message) __attribute__((noreturn));
void panic(int e, const char *message)
{
struct iovec iov[4];
const char *error = e != 0 ? strerror(e) : "";
/* Don't use stdio functions, because we link statically
* and they bloat the binary. */
iov[0].iov_base = (char *)message;
iov[0].iov_len = strlen(message);
iov[1].iov_base = (char *)": ";
iov[1].iov_len = 2;
iov[2].iov_base = (char *)error;
iov[2].iov_len = strlen(error);
iov[3].iov_base = (char *)"\n";
iov[3].iov_len = 1;
writev(2, iov, 4);
_exit(1);
}
void warn3(const char *p1, const char *p2, const char *p3)
{
struct iovec iov[4];
/* Don't use stdio functions, because we link statically
* and they bloat the binary. */
iov[0].iov_base = (char *)p1;
iov[0].iov_len = strlen(p1);
iov[1].iov_base = (char *)p2;
iov[1].iov_len = strlen(p2);
iov[2].iov_base = (char *)p3;
iov[2].iov_len = strlen(p3);
iov[3].iov_base = (char *)"\n";
iov[3].iov_len = 1;
writev(2, iov, 4);
}
static char rootfs[FS_BUFSZ];
static char usrfs[FS_BUFSZ];
static char init_binary[FS_BUFSZ];
static char supported_filesystems[N_MAX_SUPPORTED_FILESYSTEMS][N_MAX_FSTYPELEN];
static int n_supported_filesystems;
static int do_mount(const char *source, const char *target,
const char *filesystemtype, unsigned long mountflags,
const void *data)
{
/* We need to loop through filesystem types as the kernel doesn't do
* that for us if we call mount(). libmount does something similar,
* but we don't want to link against it. */
if (!filesystemtype && !(mountflags & (MS_MOVE | MS_REMOUNT | MS_BIND))) {
int i, rc = -1;
errno = EINVAL;
for (i = 0; i < n_supported_filesystems; i++) {
rc = mount(source, target, supported_filesystems[i], mountflags | MS_SILENT, data);
if (rc == 0)
return 0;
}
return rc;
}
return mount(source, target, filesystemtype, mountflags, data);
}
static void parse_cmdline()
{
int fd;
ssize_t r;
char buf[CMDLINE_BUFSZ] = { 0 };
char *saveptr;
char *token;
fd = open(CMDLINE_FILE, O_RDONLY);
if (fd < 0)
panic(errno, "Couldn't open " CMDLINE_FILE);
r = read(fd, buf, CMDLINE_BUFSZ - 1);
if (r < 0)
panic(errno, "Couldn't read " CMDLINE_FILE);
close(fd);
for (token = strtok_r(buf, " \t\r\n", &saveptr); token != NULL; token = strtok_r(NULL, " \t\r\n", &saveptr)) {
if (!strncmp(token, "root=", 5)) {
token += 5;
if (strlen(token) > FS_BUFSZ - 1)
panic(0, "root= too long");
strncpy(rootfs, token, FS_BUFSZ);
} else if (!strncmp(token, "x-mount.usr=", 12)) {
token += 12;
if (strlen(token) > FS_BUFSZ - 1)
panic(0, "x-mount.usr= too long");
strncpy(usrfs, token, FS_BUFSZ);
} else if (!strncmp(token, "init=", 5)) {
token += 5;
if (strlen(token) > FS_BUFSZ - 1)
panic(0, "init= too long");
strncpy(init_binary, token, FS_BUFSZ);
}
}
}
static void parse_filesystems()
{
int fd;
ssize_t r;
char buf[FSTYPES_BUFSZ] = { 0 };
char *saveptr;
char *line;
fd = open(FSTYPES_FILE, O_RDONLY);
if (fd < 0)
panic(errno, "Couldn't open " FSTYPES_FILE);
r = read(fd, buf, FSTYPES_BUFSZ - 1);
if (r < 0)
panic(errno, "Couldn't read " FSTYPES_FILE);
close(fd);
for (line = strtok_r(buf, "\n", &saveptr); line != NULL; line = strtok_r(NULL, "\n", &saveptr)) {
if (!strncmp(line, "nodev", 5))
continue;
while (line[0] == ' ' || line[0] == '\t')
++line;
if (strlen(line) > N_MAX_FSTYPELEN - 1) {
warn3("[initrd] filesystem type ", line, " is too long for us, ignoring.");
continue;
}
if (n_supported_filesystems == N_MAX_SUPPORTED_FILESYSTEMS) {
warn3("[initrd] kernel supports too many filesystem types, ignoring ", line, ", sorry.");
continue;
}
strncpy(supported_filesystems[n_supported_filesystems++], line, N_MAX_FSTYPELEN - 1);
}
}
static void try_exec(int orig_argc, char *const orig_argv[], const char *binary)
{
char *argv[256];
int i;
if (orig_argc > 255)
panic(0, "Too many arguments to init.");
argv[0] = (char *)init_binary;
for (i = 1; i < orig_argc; i++)
argv[i] = orig_argv[i];
argv[i] = NULL;
execv(binary, argv);
}
static void wait_for_filesystems()
{
/* We don't have udev running, but there is devtmpfs, so we just
* do a very simple and stupid polling loop to wait until the
* requested devices are present. This could be improved a bit,
* but for now it's good enough. */
int have_devices = 0;
time_t start;
struct timeval tv;
int r;
r = gettimeofday(&tv, NULL);
if (r < 0)
panic(errno, "Couldn't determine current time for timeout.");
start = tv.tv_sec;
have_devices = (access(rootfs, F_OK) == 0 && (strlen(usrfs) == 0 || access(usrfs, F_OK) == 0));
while (!have_devices) {
r = gettimeofday(&tv, NULL);
if (r < 0)
panic(errno, "Couldn't determine current time for timeout");
if (tv.tv_sec - start > FSDEV_TIMEOUT)
panic(0, "Timeout while waiting for devices for / and /usr filesystems to appear "
"(did you specify the correct ones?)");
/* Sleep for 50 milliseconds, then poll again. */
struct timespec req = { 0, 50 * 1000 * 1000 };
struct timespec rem;
(void)nanosleep(&req, &rem);
have_devices = (access(rootfs, F_OK) == 0 && (strlen(usrfs) == 0 || access(usrfs, F_OK) == 0));
}
}
int main(int argc, char **argv)
{
int r;
r = do_mount("proc", "/proc", "proc", MS_NODEV | MS_NOEXEC | MS_NOSUID, NULL);
if (r < 0)
panic(errno, "Couldn't mount /proc filesystem");
r = do_mount("udev", "/dev", "devtmpfs", 0, DEVTMPFS_MOUNTOPTS);
if (r < 0)
panic(errno, "Couldn't mount devtmpfs-/dev filesystem");
parse_filesystems();
parse_cmdline();
if (!strlen(rootfs))
panic(0, "No root filesystem (root=) specified");
if (rootfs[0] != '/')
panic(0, "root filesystem (root=) must be a (non-symlink) kernel device path");
if (strlen(usrfs) && usrfs[0] != '/')
panic(0, "/usr filesystem (x-mount.usr=) must be a (non-symlink) kernel device path");
wait_for_filesystems();
r = do_mount(rootfs, "/target", NULL, MS_RDONLY, NULL);
if (r < 0)
panic(errno, "Couldn't mount / filesystem");
if (strlen(usrfs)) {
r = do_mount(usrfs, "/target/usr", NULL, MS_RDONLY, NULL);
if (r < 0)
panic(errno, "Couldn't mount /usr filesystem");
}
if (access("/target/dev", F_OK) != 0)
panic(errno, "/dev doesn't exist on rootfs");
if (access("/target/proc", F_OK) != 0)
panic(errno, "/proc doesn't exist on rootfs");
r = do_mount("/dev", "/target/dev", NULL, MS_MOVE, NULL);
if (r < 0)
panic(errno, "Couldn't move /dev from initrd to root filesystem");
r = do_mount("/proc", "/target/proc", NULL, MS_MOVE, NULL);
if (r < 0)
panic(errno, "Couldn't move /proc from initrd to root filesystem");
r = chdir("/target");
if (r < 0)
panic(errno, "Couldn't change directory into new root filesystem");
r = do_mount("/target", "/", NULL, MS_MOVE, NULL);
if (r < 0)
panic(errno, "Couldn't move new root filesystem to /");
r = chroot(".");
if (r < 0)
panic(errno, "Couldn't switch rootfs");
if (strlen(init_binary)) {
try_exec(argc, argv, init_binary);
} else {
try_exec(argc, argv, "/sbin/init");
try_exec(argc, argv, "/etc/init");
try_exec(argc, argv, "/bin/init");
try_exec(argc, argv, "/bin/sh");
}
/* Message stolen from Linux's init/main.c */
panic(0, "No working init found. Try passing init= option to kernel. "
"See Linux Documentation/init.txt for guidance.");
return 1;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment