Skip to content

Instantly share code, notes, and snippets.

@marcan
Last active July 21, 2024 14:00
Show Gist options
  • Save marcan/a2eafd605d3d6ac76eb10a7c64f736c3 to your computer and use it in GitHub Desktop.
Save marcan/a2eafd605d3d6ac76eb10a7c64f736c3 to your computer and use it in GitHub Desktop.
Linux kernel initialization, translated to bash
#!/boot/bzImage
# Linux kernel userspace initialization code, translated to bash
# (Minus floppy disk handling, because seriously, it's 2017.)
# Not 100% accurate, but gives you a good idea of how kernel init works
# GPLv2, Copyright 2017 Hector Martin <[email protected]>
# Based on Linux 4.10-rc2.
# Note: pretend chroot is a builtin and affects the current process
# Note: kernel actually uses major/minor device numbers instead of device name
# strings in a few places, but I simplified it by using strings
# everywhere even though that is not completely accurate.
panic() {
echo "$*"
while true; do
sleep 1
done
}
## https://github.com/torvalds/linux/blob/v4.10-rc2/init/do_mounts.c#L363
do_mount_root() {
mount -t $2 "$1" /root $rootflags || return $?
cd /root
echo "VFS: Mounted root ($2 filesystem) on device $major:$minor"
}
## https://github.com/torvalds/linux/blob/v4.10-rc2/init/do_mounts.c#L381
mount_block_root() {
if [ -z $rootfstype ]; then
rootfstype=$built_in_filesystem_types
fi
for fs in ${rootfstype//,/ }; do
do_mount_root $1 $fs
ret=$?
case $ret in
13|22) # EACCES or EINVAL
;;
*)
echo "VFS: Cannot open root device \"$root_device_name\" or $1: error $ret"
echo "Please append a correct \"root=\" boot option; here are the available partitions:"
printk_all_partitions
panic "VFS: Unable to mount root fs on $1"
esac
done
echo "List of all partitions:"
printk_all_partitions
echo "No filesystem could mount root, tried: ${rootfstype//,/ }"
panic "VFS: Unable to mount root fs on $1"
}
## https://github.com/torvalds/linux/blob/v4.10-rc2/init/do_mounts.c#L512
mount_root() {
if [ "$root" = "/dev/nfs" ]; then
mount_nfs_root && return
echo "VFS: Unable to mount root fs via NFS, trying floppy."
root=/dev/fd0
fi
if [ "$root" = "/dev/fd0" ]; then
# floppy switching nonsense
fi
# This is really a mknod, as the kernel is working with the device number
cp -a "$root" /dev/root || echo "Failed to create /dev/root: $?"
mount_block_root /dev/root
}
## https://github.com/torvalds/linux/blob/v4.10-rc2/init/do_mounts_rd.c#L185
rd_load_image() {
# Supports more compression algorithms in practice
gzip -d <$1 >/dev/ram || cat $1 >/dev/ram
# Bunch of nonsense special casing for floppies skipped
# Everyone but S/390 gets a cute spinner here...
}
## https://github.com/torvalds/linux/blob/v4.10-rc2/init/do_mounts_initrd.c#L119
initrd_load() {
mknod /dev/ram b 1 0
if rd_load_image /initrd.image && [ "$root" != "/dev/ram0" ]; then
## https://github.com/torvalds/linux/blob/v4.10-rc2/init/do_mounts_initrd.c#L51
# This is the deprecated "change_root" mechanism; see Documentation/initrd.txt for details.
# In this mode, the initrd should contain /linuxrc and it is *not* responsible for mounting the rootfs.
rm /initrd.image
mknod /dev/root.old b 1 0
# mount initrd on rootfs' /root
mount_block_root /dev/root.old
mkdir /old
cd /old
# try loading default modules from initrd
load_default_modules
(
exec </dev/console >&0 2>&0
cd /root
mount --move . /
chroot .
setsid /linuxrc
)
# move initrd to rootfs' /old
mount --move .. .
# switch root and cwd back to / of rootfs
chroot ..
cd /
mount_root
echo -n "Trying to move old root to /initrd ... "
mount --move /old /root/initrd
ret=$?
if [ $ret = 0 ]; then
echo "okay"
else
if [ $ret = 2 ]; then # ENOENT
echo "/initrd does not exit. Ignored."
else
echo "failed"
fi
echo "Unmounting old root"
umount -l /old
echo -n "Trying to free ramdisk memory ... "
blockdev --flushbufs /dev/root.old && echo "okay" || echo "failed"
if
return 0
else
# Otherwise, if root=/dev/ram0, this is the "new" "pivot_root" initrd mechanism.
# The initrd is just mounted like any other root FS and $init is called in it.
# See Documentation/initrd.txt for what the initrd has to do in this case.
# Note that this is obsolete too in the more recent initramfs case.
rm /initrd.image
return 1
fi
}
## https://github.com/torvalds/linux/blob/v4.10-rc2/init/do_mounts.c#L549
prepare_namespace() {
if [ ! -z "$rootdelay" ]; then
echo "Waiting $rootdelay sec before mounting root device..."
sleep $rootdelay
fi
wait_for_device_probe # wait for devices
md_run_setup # md-raid autoconfig: https://github.com/torvalds/linux/blob/v4.10-rc2/init/do_mounts_md.c#L303
if [ ! -z "$root" ]; then
root_device_name="$root"
case "$root" in
mtd*|ubi*)
mount_block_root "$root"
mount -t devtmpfs devtmpfs dev # only if CONFIG_DEVTMPFS_MOUNT
mount --move . /
chroot .
return
;;
esac
root_device_name="${root##/dev/}"
fi
if ! initrd_load; then
if [ ! -z $root_wait ]; then
echo "Waiting for root device $root..."
while ! driver_probe_done || [ ! -e $root ]; do
sleep 1
done
fi
mount_root
fi
mount -t devtmpfs devtmpfs dev # only if CONFIG_DEVTMPFS_MOUNT
mount --move . /
chroot .
}
## https://github.com/torvalds/linux/blob/v4.10-rc2/init/initramfs.c#L608
populate_rootfs() {
## https://github.com/torvalds/linux/blob/v4.10-rc2/scripts/gen_initramfs_list.sh#L50
## OR (if initramfs disabled): https://github.com/torvalds/linux/blob/v4.10-rc2/init/noinitramfs.c#L28
# default initramfs
cd /
mkdir /dev
mknod /dev/console c 5 1
mkdir /root
# additional kernel built-in initramfs contents (not a real device)
cpio -i < /dev/internal_initramfs
# note: /dev/initrd isn't a real device but represents the initrd memory
# /initrd.image is a real file on rootfs
if [ -e /dev/initrd ]; then
echo "Trying to unpack rootfs image as initramfs..."
# actual kernel code for cpio can deal with compression & concatenation
if ! cpio -i < /dev/initrd; then
echo "rootfs image is not an initramfs; looks like an initrd"
cp /dev/initrd /initrd.image
fi
free_initrd # gets rid of /dev/initrd: https://github.com/torvalds/linux/blob/v4.10-rc2/init/initramfs.c#L527
# Try loading default modules from initramfs. This gives
# us a chance to load before device_initcalls.
load_default_modules
fi
}
## https://github.com/torvalds/linux/blob/v4.10-rc2/init/main.c#L952
kernel_init() {
## https://github.com/torvalds/linux/blob/v4.10-rc2/init/main.c#L999
early_kernel_init
# Note: at this point, as part of basic VFS init, a rootfs (special tmpfs) is mounted at /
## this is an initcall, called here: https://github.com/torvalds/linux/blob/v4.10-rc2/init/main.c#L873
## declared here: https://github.com/torvalds/linux/blob/v4.10-rc2/init/initramfs.c#L658
populate_rootfs
more_kernel_init
# Open the /dev/console on the rootfs, this should never fail
exec </dev/console >&0 2>&0 || echo "Warning: unable to open an initial console."
# check if there is an early userspace init. If yes, let it do all the work
if [ -z "$rdinit" ]; then
rdinit=/init
fi
if [ ! -e "$rdinit" ]; then
rdinit=
# Mount root, the whole shebang.
# Only done if there is *no* $rdinit (/init) in the initramfs!
prepare_namespace
fi
# Ok, we have completed the initial bootup, and
# we're essentially up and running. Get rid of the
# initmem segments and start the user-mode stuff..
#
# rootfs is available now, try loading the public keys
# and default modules
integrity_load_keys
load_default_modules
late_kernel_init
if [ ! -z "$rdinit" ]; then
# If present in the initramfs, $rdinit (/init) is responsible
# for *everything*, and this is the modern way of doing things.
# To find out what $rdinit has to do in that case, read
# Documentation/filesystems/ramfs-rootfs-initramfs.txt
exec $rdinit
echo "Failed to execute $rdinit (error $?)"
fi
if [ ! -z "$init" ]; then
# This could be the real /sbin/init, or an initrd /sbin/init.
exec $init
echo "Requested init $init failed (error $?)"
fi
exec /sbin/init || exec /etc/init || exec /bin/init || exec /bin/sh
panic "No working init found. Try passing init= option to kernel. See Linux Documentation/admin-guide/init.rst for guidance."
}
kernel_init
@f3rdy
Copy link

f3rdy commented Jan 11, 2017

Thiy is valuable teaching. Thanks for that!

@nonchip
Copy link

nonchip commented Jan 23, 2017

btw the chroot functionality you're assuming is actually a thing (called pivotroot) and used by early inits to mount the real root after running the initrd.

@marcan
Copy link
Author

marcan commented Feb 3, 2017

@nonchip not quite. pivot_root is a separate system call that affects the current mount namespace and all processes sharing it, while chroot only affects the current process. pivot_root is usually used in conjunction with chroot to ensure that the current working directory and root are correctly set. When I write chroot above I really do mean the good old chroot() system call. The problem is that it needs to affect the current process (the hypothetical shell, i.e. it needs to be built-in) while the traditional UNIX chroot command spawns a subprocess/subshell.

See https://github.com/torvalds/linux/blob/v4.10-rc2/fs/namespace.c#L3035 for more details on what exactly pivot_root does. It's very different from chroot (and it also only works on initrd/regular mounts, not on rootfs).

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment