Created
March 19, 2026 19:24
-
-
Save Techcable/c65d22c64209bd3c0235fd1ab03d1a9d to your computer and use it in GitHub Desktop.
Categorizes ASCII characters in a somewhat arbitrary fashion.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // SPDX-FileCopyrightText: Copyright 2026 Techcable <https://techcable.net> | |
| // SPDX-License-Identifier: MIT OR Apache-2.0 OR CC0-1.0 | |
| import java.util.List; | |
| import java.util.function.Predicate; | |
| import java.util.stream.IntStream; | |
| /// Categorizes ASCII characters. | |
| /// | |
| /// The categories are somewhat arbitrary, but could be useful. | |
| public enum AsciiCharCategory { | |
| // first match wins | |
| ASCII_UPPERCASE_LETTER(charRange('A', 'Z')), | |
| ASCII_LOWERCASE_LETTER(charRange('a', 'z')), | |
| ASCII_DIGIT(charRange('0', '9')), | |
| ASCII_NEWLINE('\n', '\r'), | |
| ASCII_PATHSEP('/', '\\'), | |
| ASCII_NONPRINTABLE((c) -> c < ' ' || c == 127), | |
| ASCII_WHITESPACE(Character::isWhitespace), | |
| ASCII_SYMBOL((_) -> true /* fallback to symbol if nothing else */); | |
| private final Predicate<Character> rawFilter; | |
| private AsciiCharCategory(List<Character> list) { | |
| this.rawFilter = list::contains; | |
| } | |
| private AsciiCharCategory(Predicate<Character> rawFilter) { | |
| this.rawFilter = rawFilter; | |
| } | |
| private AsciiCharCategory(char... items) { | |
| this.rawFilter = (Character tgt) -> { | |
| for (char item : items) { | |
| if (item == tgt) { | |
| return true; | |
| } | |
| } | |
| return false; | |
| }; | |
| } | |
| private static List<Character> charRange(char start, char end) { | |
| if (start > end) throw new IllegalArgumentException(); | |
| return IntStream.rangeClosed(start, end) | |
| .mapToObj((i) -> (char) i) | |
| .toList(); | |
| } | |
| public static final List<AsciiCharCategory> ALL_CATEGORIES = List.of(values()); | |
| private static final List<AsciiCharCategory> TABLE = IntStream.range(0, 128) | |
| .mapToObj((int cInt) -> { | |
| char c = (char) cInt; | |
| for (var category : ALL_CATEGORIES) { | |
| if (category.rawFilter.test(c)) { | |
| return category; | |
| } | |
| } | |
| throw new RuntimeException("Nothing matches `" + c + "`"); | |
| }) | |
| .toList(); | |
| public static AsciiCharCategory of(char c) { | |
| assert TABLE.size() == 128; | |
| if (!isAscii(c)) throw new IllegalArgumentException("Not an ASCII char: `" + c + "`"); | |
| return TABLE.get(c); | |
| } | |
| public static boolean isAscii(char c) { | |
| //noinspection ConstantValue | |
| return c >= 0 && c <= 127; | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment