Last active
June 9, 2023 03:50
-
-
Save kugland/32c46bae792c3720b6b14b4b124a3355 to your computer and use it in GitHub Desktop.
Sort input lines by extension
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Copyright (C) 2023 Andre Kugland | |
# This script is released under the MIT License. | |
""" | |
sort-by-ext: Sort input lines by extension. | |
Usage: sort-by-ext [-0] | |
This script is a command-line utility that reads lines of text from standard | |
input, sorts them by file extension, and writes the sorted lines to standard | |
output. | |
Options: | |
-0, --null use the null character as separator instead of newline | |
The purpose of this script is to reorder the files in the creation of a tar | |
archive, so that a better compression ratio can be achieved. | |
Example: | |
find . -type f -print0 | sort-by-ext -0 | tar --null -czf archive.tgz -T - | |
""" | |
if __name__ != "__main__": | |
raise Exception("This script is not meant to be imported.") | |
import sys | |
from os.path import basename, splitext | |
import argparse | |
def make_sort_key(line: bytes) -> bytes: | |
"""Return a key that can be used to sort the given line.""" | |
base = basename(line) | |
ext = splitext(base)[1] | |
return ext + b"/" + base + b"/" + line | |
parser = argparse.ArgumentParser(add_help=False) | |
parser.add_argument("-0", "--null", action="store_true") | |
parser.add_argument("-h", "--help", action="store_true") | |
args = parser.parse_args() | |
if args.help: | |
print(__doc__.strip()) | |
exit(0) | |
separator = b"\0" if args.null else b"\n" | |
# Make sure stdin and stdout are opened in binary mode | |
with ( | |
open(sys.stdin.fileno(), "rb", closefd=False) as stdin, | |
open(sys.stdout.fileno(), "wb", closefd=False) as stdout, | |
): | |
lines = stdin.read().split(separator) | |
if lines[-1] == b"": # Remove trailing empty line if present | |
lines.pop() | |
lines.sort(key=make_sort_key) | |
for line in lines: | |
stdout.write(line + separator) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment