Skip to content

Instantly share code, notes, and snippets.

@Techcable
Created March 19, 2026 19:24
Show Gist options
  • Select an option

  • Save Techcable/c65d22c64209bd3c0235fd1ab03d1a9d to your computer and use it in GitHub Desktop.

Select an option

Save Techcable/c65d22c64209bd3c0235fd1ab03d1a9d to your computer and use it in GitHub Desktop.
Categorizes ASCII characters in a somewhat arbitrary fashion.
// SPDX-FileCopyrightText: Copyright 2026 Techcable <https://techcable.net>
// SPDX-License-Identifier: MIT OR Apache-2.0 OR CC0-1.0
import java.util.List;
import java.util.function.Predicate;
import java.util.stream.IntStream;
/// Categorizes ASCII characters.
///
/// The categories are somewhat arbitrary, but could be useful.
public enum AsciiCharCategory {
// first match wins
ASCII_UPPERCASE_LETTER(charRange('A', 'Z')),
ASCII_LOWERCASE_LETTER(charRange('a', 'z')),
ASCII_DIGIT(charRange('0', '9')),
ASCII_NEWLINE('\n', '\r'),
ASCII_PATHSEP('/', '\\'),
ASCII_NONPRINTABLE((c) -> c < ' ' || c == 127),
ASCII_WHITESPACE(Character::isWhitespace),
ASCII_SYMBOL((_) -> true /* fallback to symbol if nothing else */);
private final Predicate<Character> rawFilter;
private AsciiCharCategory(List<Character> list) {
this.rawFilter = list::contains;
}
private AsciiCharCategory(Predicate<Character> rawFilter) {
this.rawFilter = rawFilter;
}
private AsciiCharCategory(char... items) {
this.rawFilter = (Character tgt) -> {
for (char item : items) {
if (item == tgt) {
return true;
}
}
return false;
};
}
private static List<Character> charRange(char start, char end) {
if (start > end) throw new IllegalArgumentException();
return IntStream.rangeClosed(start, end)
.mapToObj((i) -> (char) i)
.toList();
}
public static final List<AsciiCharCategory> ALL_CATEGORIES = List.of(values());
private static final List<AsciiCharCategory> TABLE = IntStream.range(0, 128)
.mapToObj((int cInt) -> {
char c = (char) cInt;
for (var category : ALL_CATEGORIES) {
if (category.rawFilter.test(c)) {
return category;
}
}
throw new RuntimeException("Nothing matches `" + c + "`");
})
.toList();
public static AsciiCharCategory of(char c) {
assert TABLE.size() == 128;
if (!isAscii(c)) throw new IllegalArgumentException("Not an ASCII char: `" + c + "`");
return TABLE.get(c);
}
public static boolean isAscii(char c) {
//noinspection ConstantValue
return c >= 0 && c <= 127;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment