-
Star
(251)
You must be signed in to star a gist -
Fork
(29)
You must be signed in to fork a gist
-
-
Save marcan/a2eafd605d3d6ac76eb10a7c64f736c3 to your computer and use it in GitHub Desktop.
| #!/boot/bzImage | |
| # Linux kernel userspace initialization code, translated to bash | |
| # (Minus floppy disk handling, because seriously, it's 2017.) | |
| # Not 100% accurate, but gives you a good idea of how kernel init works | |
| # GPLv2, Copyright 2017 Hector Martin <[email protected]> | |
| # Based on Linux 4.10-rc2. | |
| # Note: pretend chroot is a builtin and affects the current process | |
| # Note: kernel actually uses major/minor device numbers instead of device name | |
| # strings in a few places, but I simplified it by using strings | |
| # everywhere even though that is not completely accurate. | |
| panic() { | |
| echo "$*" | |
| while true; do | |
| sleep 1 | |
| done | |
| } | |
| ## https://github.com/torvalds/linux/blob/v4.10-rc2/init/do_mounts.c#L363 | |
| do_mount_root() { | |
| mount -t $2 "$1" /root $rootflags || return $? | |
| cd /root | |
| echo "VFS: Mounted root ($2 filesystem) on device $major:$minor" | |
| } | |
| ## https://github.com/torvalds/linux/blob/v4.10-rc2/init/do_mounts.c#L381 | |
| mount_block_root() { | |
| if [ -z $rootfstype ]; then | |
| rootfstype=$built_in_filesystem_types | |
| fi | |
| for fs in ${rootfstype//,/ }; do | |
| do_mount_root $1 $fs | |
| ret=$? | |
| case $ret in | |
| 13|22) # EACCES or EINVAL | |
| ;; | |
| *) | |
| echo "VFS: Cannot open root device \"$root_device_name\" or $1: error $ret" | |
| echo "Please append a correct \"root=\" boot option; here are the available partitions:" | |
| printk_all_partitions | |
| panic "VFS: Unable to mount root fs on $1" | |
| esac | |
| done | |
| echo "List of all partitions:" | |
| printk_all_partitions | |
| echo "No filesystem could mount root, tried: ${rootfstype//,/ }" | |
| panic "VFS: Unable to mount root fs on $1" | |
| } | |
| ## https://github.com/torvalds/linux/blob/v4.10-rc2/init/do_mounts.c#L512 | |
| mount_root() { | |
| if [ "$root" = "/dev/nfs" ]; then | |
| mount_nfs_root && return | |
| echo "VFS: Unable to mount root fs via NFS, trying floppy." | |
| root=/dev/fd0 | |
| fi | |
| if [ "$root" = "/dev/fd0" ]; then | |
| # floppy switching nonsense | |
| fi | |
| # This is really a mknod, as the kernel is working with the device number | |
| cp -a "$root" /dev/root || echo "Failed to create /dev/root: $?" | |
| mount_block_root /dev/root | |
| } | |
| ## https://github.com/torvalds/linux/blob/v4.10-rc2/init/do_mounts_rd.c#L185 | |
| rd_load_image() { | |
| # Supports more compression algorithms in practice | |
| gzip -d <$1 >/dev/ram || cat $1 >/dev/ram | |
| # Bunch of nonsense special casing for floppies skipped | |
| # Everyone but S/390 gets a cute spinner here... | |
| } | |
| ## https://github.com/torvalds/linux/blob/v4.10-rc2/init/do_mounts_initrd.c#L119 | |
| initrd_load() { | |
| mknod /dev/ram b 1 0 | |
| if rd_load_image /initrd.image && [ "$root" != "/dev/ram0" ]; then | |
| ## https://github.com/torvalds/linux/blob/v4.10-rc2/init/do_mounts_initrd.c#L51 | |
| # This is the deprecated "change_root" mechanism; see Documentation/initrd.txt for details. | |
| # In this mode, the initrd should contain /linuxrc and it is *not* responsible for mounting the rootfs. | |
| rm /initrd.image | |
| mknod /dev/root.old b 1 0 | |
| # mount initrd on rootfs' /root | |
| mount_block_root /dev/root.old | |
| mkdir /old | |
| cd /old | |
| # try loading default modules from initrd | |
| load_default_modules | |
| ( | |
| exec </dev/console >&0 2>&0 | |
| cd /root | |
| mount --move . / | |
| chroot . | |
| setsid /linuxrc | |
| ) | |
| # move initrd to rootfs' /old | |
| mount --move .. . | |
| # switch root and cwd back to / of rootfs | |
| chroot .. | |
| cd / | |
| mount_root | |
| echo -n "Trying to move old root to /initrd ... " | |
| mount --move /old /root/initrd | |
| ret=$? | |
| if [ $ret = 0 ]; then | |
| echo "okay" | |
| else | |
| if [ $ret = 2 ]; then # ENOENT | |
| echo "/initrd does not exit. Ignored." | |
| else | |
| echo "failed" | |
| fi | |
| echo "Unmounting old root" | |
| umount -l /old | |
| echo -n "Trying to free ramdisk memory ... " | |
| blockdev --flushbufs /dev/root.old && echo "okay" || echo "failed" | |
| if | |
| return 0 | |
| else | |
| # Otherwise, if root=/dev/ram0, this is the "new" "pivot_root" initrd mechanism. | |
| # The initrd is just mounted like any other root FS and $init is called in it. | |
| # See Documentation/initrd.txt for what the initrd has to do in this case. | |
| # Note that this is obsolete too in the more recent initramfs case. | |
| rm /initrd.image | |
| return 1 | |
| fi | |
| } | |
| ## https://github.com/torvalds/linux/blob/v4.10-rc2/init/do_mounts.c#L549 | |
| prepare_namespace() { | |
| if [ ! -z "$rootdelay" ]; then | |
| echo "Waiting $rootdelay sec before mounting root device..." | |
| sleep $rootdelay | |
| fi | |
| wait_for_device_probe # wait for devices | |
| md_run_setup # md-raid autoconfig: https://github.com/torvalds/linux/blob/v4.10-rc2/init/do_mounts_md.c#L303 | |
| if [ ! -z "$root" ]; then | |
| root_device_name="$root" | |
| case "$root" in | |
| mtd*|ubi*) | |
| mount_block_root "$root" | |
| mount -t devtmpfs devtmpfs dev # only if CONFIG_DEVTMPFS_MOUNT | |
| mount --move . / | |
| chroot . | |
| return | |
| ;; | |
| esac | |
| root_device_name="${root##/dev/}" | |
| fi | |
| if ! initrd_load; then | |
| if [ ! -z $root_wait ]; then | |
| echo "Waiting for root device $root..." | |
| while ! driver_probe_done || [ ! -e $root ]; do | |
| sleep 1 | |
| done | |
| fi | |
| mount_root | |
| fi | |
| mount -t devtmpfs devtmpfs dev # only if CONFIG_DEVTMPFS_MOUNT | |
| mount --move . / | |
| chroot . | |
| } | |
| ## https://github.com/torvalds/linux/blob/v4.10-rc2/init/initramfs.c#L608 | |
| populate_rootfs() { | |
| ## https://github.com/torvalds/linux/blob/v4.10-rc2/scripts/gen_initramfs_list.sh#L50 | |
| ## OR (if initramfs disabled): https://github.com/torvalds/linux/blob/v4.10-rc2/init/noinitramfs.c#L28 | |
| # default initramfs | |
| cd / | |
| mkdir /dev | |
| mknod /dev/console c 5 1 | |
| mkdir /root | |
| # additional kernel built-in initramfs contents (not a real device) | |
| cpio -i < /dev/internal_initramfs | |
| # note: /dev/initrd isn't a real device but represents the initrd memory | |
| # /initrd.image is a real file on rootfs | |
| if [ -e /dev/initrd ]; then | |
| echo "Trying to unpack rootfs image as initramfs..." | |
| # actual kernel code for cpio can deal with compression & concatenation | |
| if ! cpio -i < /dev/initrd; then | |
| echo "rootfs image is not an initramfs; looks like an initrd" | |
| cp /dev/initrd /initrd.image | |
| fi | |
| free_initrd # gets rid of /dev/initrd: https://github.com/torvalds/linux/blob/v4.10-rc2/init/initramfs.c#L527 | |
| # Try loading default modules from initramfs. This gives | |
| # us a chance to load before device_initcalls. | |
| load_default_modules | |
| fi | |
| } | |
| ## https://github.com/torvalds/linux/blob/v4.10-rc2/init/main.c#L952 | |
| kernel_init() { | |
| ## https://github.com/torvalds/linux/blob/v4.10-rc2/init/main.c#L999 | |
| early_kernel_init | |
| # Note: at this point, as part of basic VFS init, a rootfs (special tmpfs) is mounted at / | |
| ## this is an initcall, called here: https://github.com/torvalds/linux/blob/v4.10-rc2/init/main.c#L873 | |
| ## declared here: https://github.com/torvalds/linux/blob/v4.10-rc2/init/initramfs.c#L658 | |
| populate_rootfs | |
| more_kernel_init | |
| # Open the /dev/console on the rootfs, this should never fail | |
| exec </dev/console >&0 2>&0 || echo "Warning: unable to open an initial console." | |
| # check if there is an early userspace init. If yes, let it do all the work | |
| if [ -z "$rdinit" ]; then | |
| rdinit=/init | |
| fi | |
| if [ ! -e "$rdinit" ]; then | |
| rdinit= | |
| # Mount root, the whole shebang. | |
| # Only done if there is *no* $rdinit (/init) in the initramfs! | |
| prepare_namespace | |
| fi | |
| # Ok, we have completed the initial bootup, and | |
| # we're essentially up and running. Get rid of the | |
| # initmem segments and start the user-mode stuff.. | |
| # | |
| # rootfs is available now, try loading the public keys | |
| # and default modules | |
| integrity_load_keys | |
| load_default_modules | |
| late_kernel_init | |
| if [ ! -z "$rdinit" ]; then | |
| # If present in the initramfs, $rdinit (/init) is responsible | |
| # for *everything*, and this is the modern way of doing things. | |
| # To find out what $rdinit has to do in that case, read | |
| # Documentation/filesystems/ramfs-rootfs-initramfs.txt | |
| exec $rdinit | |
| echo "Failed to execute $rdinit (error $?)" | |
| fi | |
| if [ ! -z "$init" ]; then | |
| # This could be the real /sbin/init, or an initrd /sbin/init. | |
| exec $init | |
| echo "Requested init $init failed (error $?)" | |
| fi | |
| exec /sbin/init || exec /etc/init || exec /bin/init || exec /bin/sh | |
| panic "No working init found. Try passing init= option to kernel. See Linux Documentation/admin-guide/init.rst for guidance." | |
| } | |
| kernel_init |
btw the chroot functionality you're assuming is actually a thing (called pivotroot) and used by early inits to mount the real root after running the initrd.
@nonchip not quite. pivot_root is a separate system call that affects the current mount namespace and all processes sharing it, while chroot only affects the current process. pivot_root is usually used in conjunction with chroot to ensure that the current working directory and root are correctly set. When I write chroot above I really do mean the good old chroot() system call. The problem is that it needs to affect the current process (the hypothetical shell, i.e. it needs to be built-in) while the traditional UNIX chroot command spawns a subprocess/subshell.
See https://github.com/torvalds/linux/blob/v4.10-rc2/fs/namespace.c#L3035 for more details on what exactly pivot_root does. It's very different from chroot (and it also only works on initrd/regular mounts, not on rootfs).
Thiy is valuable teaching. Thanks for that!