Last active
July 22, 2024 19:56
-
-
Save andreas-wilm/3460a788d6548370a136e63b5b91281e to your computer and use it in GitHub Desktop.
Combine all NVMs on AWS instance (e.g. i3) as raid0 and mount as data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/raid-config.html | |
nvmes=$(sudo lsblk | awk '/^nvme/ {printf "/dev/%s ", $1}') | |
sudo mdadm --create --verbose /dev/md0 --level=0 --name=my_raid --raid-devices=$(echo $nvmes | wc -w) $nvmes | |
sleep 10# crutch | |
sudo mkfs.ext4 -L my_raid /dev/md0 | |
sudo mdadm --detail --scan | sudo tee -a /etc/mdadm.conf | |
sudo dracut -H -f /boot/initramfs-$(uname -r).img $(uname -r) | |
sudo mkdir /data | |
sudo mount LABEL=my_raid /data | |
sudo chown ec2-user:ec2-user /data/ |
Python code to determine unused blockdevices (fixing above issues):
import json
import subprocess
res = subprocess.check_output(['lsblk', '-J'], stderr=subprocess.STDOUT)
jd = json.loads(res.decode())
for bd in jd['blockdevices']:
if not bd['mountpoint'] and not 'children' in bd:
print("/dev/{}".format(bd["name"]))
Full Python version at rpd-aws-tools.git/batch-ami/raid0.py
I could not find the above resource for the full Python version, so I wrote one. Pasted below.
#!/usr/bin/python3
import json
import os
import shutil
import subprocess
import time
RAID_LABEL = "local_raid"
def mount(name = "LABEL={}".format(RAID_LABEL), path = "/mnt/local"):
os.makedirs(path, mode=0o777)
subprocess.run(["mount", name, path], check = True)
def main():
devs = []
res = subprocess.check_output(["lsblk", "-J"], stderr=subprocess.STDOUT)
jd = json.loads(res.decode())
for bd in jd["blockdevices"]:
if not bd.get("mountpoint") and not "children" in bd:
devs.append("/dev/{}".format(bd["name"]))
if not devs:
return
if len(devs) == 1:
mount(name = devs[0])
return
if not shutil.which("mdadm"):
subprocess.run(["yum", "install", "-y", "mdadm"], check = True)
cmd = ["mdadm", "--create", "/dev/md0", "--level=0", "--name=%s" % RAID_LABEL, "--raid-devices=%d" % len(devs)]
cmd.extend(devs)
subprocess.run(cmd, check = True)
while True:
with open("/proc/mdstat", "r") as f:
mdstat = f.read()
if "active raid0" in mdstat:
break
time.sleep(1)
subprocess.run(["mkfs.ext4", "-L", RAID_LABEL, "/dev/md0"], check = True)
with open("/etc/mdadm.conf", "w") as f:
subprocess.run(["mdadm", "--detail", "--scan"], stdout = f, check = True)
res = subprocess.run(["uname", "-r"], capture_output = True)
uname = res.stdout.decode('utf-8').strip()
subprocess.run(["dracut", "-H", "-f", "/boot/initramfs-%s.img" % uname, uname], check = True)
mount()
main()
Note that if using the above with AWS Batch, you will need to wrap the above in MIME.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Slightly better approach to get all unused disks:
Output lsblk in json, get unmounted disks (ignore parts), spit out block device name (without /dev/)