Skip to content

Instantly share code, notes, and snippets.

@jo-makar
Last active October 14, 2024 17:37
Show Gist options
  • Save jo-makar/53ddc21e1e281e44acb9d9dc4d1daca5 to your computer and use it in GitHub Desktop.
Save jo-makar/53ddc21e1e281e44acb9d9dc4d1daca5 to your computer and use it in GitHub Desktop.
Manipulate gron-based data stored as Map<String, Object>
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NavigableSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.TreeSet;
public class GronUtils {
/**
* Associate a value with the key interpreted as a gron expression.
* NB The keys in the map are expected to be flattened, eg `a.b[0]`, `a.c.d`, etc.
*
* Example:
* ```
* Map<String, Object> map = new HashMap<>(Map.of("a.b[0]", 1));
* putGron(map, "a.b[0]", 2);
* assertEquiv(map, Map.of("a.b[0]", 2));
* ```
*
* Objects/arrays that do not exist are created as needed (using gron expression lookahead).
* It is an error if an intermediate field of different type would be overwritten.
* Eg, `putGron(Map.of("a.b[0]", 1), "a.b.c", 1)` since `a.b` is already an array.
*
* For array indices it's expected that the array is already of sufficient length.
*
* The special array index -1 causes the value to be appended to the end of the array.
* It is an error if the key with the asssociated index does not reference an array.
*/
public static Result<Object> putGron(Map<String, Object> map, String key, Object value) {
// Algorithm outline:
//
// - If the key exists in the map, simply replace it
//
// - Otherwise verify the key matches the map structure
// - Ie verify an array entry (or object field or scalar) key refers to an array (object or scalar)
//
// - And verify array indices exist and handle the use of special -1 index
// - Parse the key as a gron expression (also verifies its validity)
// - Build up the key from the gron expression segments
// - For each intermediate key if referring to an array index
// - Verify the array index exists, if not return an error
// - Replace a -1 index with the highest existing array index plus one
// - NB determing the highest index involves searching through the set of keys
// - This is because keys are sorted lexicographically (eg {a[0],a[10],a[1],...,a[9]})
if (map.containsKey(key))
return Result.ok(map.put(key, value));
NavigableSet<String> mapKeys = new TreeSet<String>(map.keySet());
//
// Verify the gron expression matches the existing map structure
// (as indicated by specific mismatches of floor/ceiling-to-key suffixes)
//
String floor, ceiling;
if ((floor = mapKeys.floor(key)) != null) {
if (boundaryMismatch(key, floor))
return Result.err(new RuntimeException("Structure mismatch"));
} else if ((ceiling = mapKeys.ceiling(key)) != null) {
if (boundaryMismatch(key, ceiling))
return Result.err(new RuntimeException("Structure mismatch"));
}
//
// Verify array indices exist and handle the use of special -1 index
//
StringBuilder currentKey = new StringBuilder();
for (GronSegment segment : parseGron(key)) {
if (segment.isField()) {
if (currentKey.length() > 0)
currentKey.append('.');
currentKey.append(segment.field());
}
else /* if (segment.isIndex()) */ {
int index = segment.index();
if (index > -1) {
String testKey = String.format("%s[%d]", currentKey.toString(), index);
if (!testKeyFound(testKey, mapKeys.floor(testKey)) &&
!testKeyFound(testKey, mapKeys.ceiling(testKey)))
return Result.err(new RuntimeException("Array index not found"));
currentKey.append(String.format("[%d]", index));
}
else /* if (index == -1) */ {
//
// Determine the highest array index
// NB keys are sorted lexicographically (not numerically)
//
String pseudoKey = currentKey.toString() + "[";
TreeSet<Integer> indices = new TreeSet<>();
Iterator<String> tailSetIter = mapKeys.tailSet(pseudoKey, false).iterator();
while (tailSetIter.hasNext()) {
String tailSetKey = tailSetIter.next();
if (!tailSetKey.startsWith(pseudoKey))
break;
int idx;
try {
int start = pseudoKey.length();
int end = tailSetKey.indexOf(']', pseudoKey.length());
idx = Integer.parseUnsignedInt(tailSetKey.substring(start, end));
} catch (Throwable t) {
// Should never happen
return Result.err(t);
}
indices.add(idx);
}
int highestIndex = indices.isEmpty() ? -1 : indices.last();
currentKey.append(String.format("[%d]", highestIndex + 1));
}
}
}
map.put(currentKey.toString(), value);
return Result.ok(null);
}
// Given a key and its boundary (ie floor or ceiling) determine if there is a boundary mismatch.
// Floor (ceiling) refers to the previous (next) key in a sorted list of existing keys.
//
// | key | boundary | mismatch | notes |
// | ---- | -------- | -------- | --------------------------------- |
// | a.c | a.b | false | |
// | a[1] | a[0] | false | |
// | a[0] | a | true | a is a scalar, expected an array |
// | a | a[0] | true | a is an array, expected a scalar |
// | a[0] | a.b | true | a is an object, expected an array |
// | a.b | a[0] | true | a is an array, expected an object |
// | a.b | a | true | a is a scalar, expected an object |
// | a | a.b | true | a is an object, expected a scalar |
private static boolean boundaryMismatch(String key, String boundary) {
List<Integer> keyCps = key.codePoints().boxed().collect(Collectors.toList());
List<Integer> boundaryCps = boundary.codePoints().boxed().collect(Collectors.toList());
int prefixLength = longestCommonPrefixLength(keyCps, boundaryCps);
int keySuffixFirstCp = prefixLength >= keyCps.size() ? -1 : keyCps.get(prefixLength);
int boundarySuffixFirstCp = prefixLength >= boundaryCps.size() ? -1 : boundaryCps.get(prefixLength);
return keySuffixFirstCp == '.' || keySuffixFirstCp == '[' ||
boundarySuffixFirstCp == '.' || boundarySuffixFirstCp == '[';
}
// Given a key and its boundary (ie floor or ceiling) determine if the key exists
// (eg testKeyFound("a.b[1]", "a.b[1].c") returns true)
private static boolean testKeyFound(String testKey, String boundary) {
if (boundary == null)
return false;
int length = longestCommonPrefixLength(testKey, boundary);
return testKey.codePointCount(0, testKey.length()) == length;
}
// Return the longest common prefix length of two strings (as measured in Unicode code points)
private static int longestCommonPrefixLength(String s, String t) {
return longestCommonPrefixLength(
s.codePoints().boxed().collect(Collectors.toList()),
t.codePoints().boxed().collect(Collectors.toList())
);
}
private static int longestCommonPrefixLength(List<Integer> s, List<Integer> t) {
int maxLen = Math.min(s.size(), t.size());
int len;
for (len = 0; len < maxLen && s.get(len).equals(t.get(len)); len++) ;
return len;
}
// TODO Switch over to a proper lexer/parser as needed (for example if quoted field name support is needed)
private static final Pattern gronSegmentPattern = Pattern.compile("\\.|\\[[0-9]\\]|\\[[1-9][0-9]+\\]|\\[-1\\]|[^\\.\\[]+");
// Parse a gron expression into its components
// Eg parse "a.b[0]" into {"a","b",0} (as GronSegments)
private static GronSegment[] parseGron(String expr) {
if (expr == null || expr.isEmpty())
throw new RuntimeException("Missing gron");
List<String> segments = new ArrayList<>();
Matcher matcher = gronSegmentPattern.matcher(expr);
// Ensure that the expression is processed without intervening skipped portions.
// Eg matcher.results().map(MatchResult::group).collect() may result in skips.
int expectedStart = 0;
while (matcher.find()) {
if (matcher.start() != expectedStart)
throw new RuntimeException("Invalid gron, expression not parsed");
expectedStart = matcher.end();
segments.add(matcher.group());
}
if (!matcher.hitEnd())
throw new RuntimeException("Invalid gron, expression not fully parsed");
// Verify the gron expression is valid, specifically:
// - Starts with a field
// - Every dot is followed by a field
if (segments.get(0).matches("^(\\.|\\[.*)"))
throw new RuntimeException(String.format("Invalid gron: '%s'", expr));
for (int i = 1; i < segments.size(); i++) {
if (!segments.get(i).equals("."))
continue;
if (i == segments.size() - 1)
throw new RuntimeException("Invalid gron, ends with dot");
if (segments.get(i + 1).matches("^(\\.|\\[.*)"))
throw new RuntimeException("Invalid gron, dot not followed by field");
}
return segments.stream()
// Strip out the dots since no longer needed
.filter(s -> !s.equals("."))
.map(s -> new GronSegment(s))
.toArray(size -> new GronSegment[size]);
}
private static class GronSegment {
private final String field;
private final Integer index;
public GronSegment(String value) {
if (value.startsWith("[")) {
field = null;
index = Integer.parseInt(value.substring(1, value.length() - 1));
} else {
field = value;
index = null;
}
}
public boolean isField() { return field != null; }
public boolean isIndex() { return index != null; }
public String field() {
if (field == null)
throw new RuntimeException("Non-field stored");
return field;
}
public Integer index() {
if (index == null)
throw new RuntimeException("Non-index stored");
return index;
}
}
import java.util.Optional;
public interface Result<T> {
boolean isOk();
boolean isErr();
/** Return the wrapped value or throw the exception */
T get();
Optional<Throwable> getError();
static <T> Result<T> ok(T value) { return new Ok<>(value); }
static <T> Result<T> err(Throwable err) { return new Err<>(err); }
final class Ok<T> implements Result<T> {
private final T value;
private Ok(T value) { this.value = value; }
public boolean isOk() { return true; }
public boolean isErr() { return false; }
public T get() { return value; }
public Optional<Throwable> getError() { return Optional.empty(); }
}
final class Err<T> implements Result<T> {
private final Throwable err;
private Err(Throwable err) { this.err = err; }
public boolean isOk() { return false; }
public boolean isErr() { return true; }
public T get() { throw err; }
public Optional<Throwable> getError() { return Optional.of(err); }
}
}
QuadConsumer<String, Pair<String, Object>, String, Object> testEquals = (inputJson, putGronArgs, expectedJson, expectedRetval) -> {
// Verifies expectedJson and expectedRetval match actual values
// ...
};
TriConsumer<String, Pair<String, Object>, String> testThrows = (inputJson, putGronArgs, expectedError) -> {
// Verifies the exception thrown message contains the expectedError
// ...
};
// Documentation examples
testEquals.accept("{\"a.b[0]\":1}", Pair.of("a.b[0]", 2), "{\"a.b[0]\":2}", 1L);
// Invalid gron expressions
testThrows.accept("{}", Pair.of(null, 1), "Missing gron");
testThrows.accept("{}", Pair.of("", 1), "Missing gron");
testThrows.accept("{}", Pair.of("a[]", 1), "Invalid gron");
testThrows.accept("{}", Pair.of("a.", 1), "Invalid gron");
testThrows.accept("{}", Pair.of("[0]", 1), "Invalid gron");
testThrows.accept("{}", Pair.of(".a", 1), "Invalid gron");
testThrows.accept("{}", Pair.of("a..b", 1), "Invalid gron");
testThrows.accept("{}", Pair.of("a.[0]", 1), "Invalid gron");
testThrows.accept("{}", Pair.of("a[0].", 1), "Invalid gron");
// Array gron segment handling tests
testThrows.accept("{\"a\":1}", Pair.of("a[0]", 1), "Structure mismatch");
testThrows.accept("{\"a[0]\":1}", Pair.of("a", 1), "Structure mismatch");
testThrows.accept("{\"a.b\":1}", Pair.of("a[0]", 1), "Structure mismatch");
testThrows.accept("{\"a[0]\":1}", Pair.of("a.b", 1), "Structure mismatch");
testEquals.accept("{\"a[0]\":1}", Pair.of("a[-1]", 2), "{\"a[0]\":1,\"a[1]\":2}", null);
testEquals.accept("{\"a[0]\":1}", Pair.of("a[-1][-1]", 2), "{\"a[0]\":1,\"a[1][0]\":2}", null);
testEquals.accept("{\"a[0]\":1}", Pair.of("a[-1][-1][-1]", 2), "{\"a[0]\":1,\"a[1][0][0]\":2}", null);
testEquals.accept("{\"a[0]\":1}", Pair.of("a[-1].b", 2), "{\"a[0]\":1,\"a[1].b\":2}", null);
testEquals.accept("{\"a.b\":1}", Pair.of("a.c[-1]", 2), "{\"a.b\":1,\"a.c[0]\":2}", null);
testThrows.accept("{\"a.b\":1}", Pair.of("a.b[-1]", 2), "Structure mismatch");
testThrows.accept("{\"a.b.c\":1}", Pair.of("a.b[-1]", 2), "Structure mismatch");
testThrows.accept("{\"a[0]\":1}", Pair.of("a[1]", 1), "Array index not found");
testEquals.accept("{\"a[0]\":1}", Pair.of("a[0]", 2), "{\"a[0]\":2}", 1L);
testEquals.accept("{\"a[0][0]\":1}", Pair.of("a[0][0]", 2), "{\"a[0][0]\":2}", 1L);
testEquals.accept("{\"a[0].b\":1}", Pair.of("a[0].b", 2), "{\"a[0].b\":2}]}", 1L);
// Object gron segment handling tests
testThrows.accept("{\"a\":1}", Pair.of("a.b", 1), "Structure mismatch");
testThrows.accept("{\"a.b\":1}", Pair.of("a", 1), "Structure mismatch");
// Structure mismatch between arrays and objects already covered above
testEquals.accept("{\"a\":1}", Pair.of("a", 2), "{\"a\":2}", 1L);
testEquals.accept("{\"a.b\":1}}", Pair.of("a.b", 2), "{\"a.b\":2}}", 1L);
testEquals.accept("{\"a.a\":1}", Pair.of("a.b.c", 2), "{\"a.a\":1,\"a.b.c\":2}", null);
testEquals.accept("{\"a.a\":1}", Pair.of("a.b.c.d", 2), "{\"a.a\":1,\"a.b.c.d\":2}", null);
testEquals.accept("{\"a[0].a\":1}", Pair.of("a[0].b.c", 2), "{\"a[0].a\":1,\"a[0].b.c\":2}", null);
// Verify unicode works as expected (via arbitrary tests above)
testEquals.accept("{\"πŸ˜€.πŸ˜›[0]\":1}", Pair.of("πŸ˜€.πŸ˜›[0]", 2), "{\"πŸ˜€.πŸ˜›[0]\":2}", 1L);
testThrows.accept("{\"πŸ˜€\":1}", Pair.of("πŸ˜€[0]", 1), "Structure mismatch");
testThrows.accept("{\"πŸ˜€.πŸ˜›\":1}", Pair.of("πŸ˜€[0]", 1), "Structure mismatch");
testEquals.accept("{\"πŸ˜€[0]\":1}", Pair.of("πŸ˜€[-1]", 2), "{\"πŸ˜€[0]\":1,\"πŸ˜€[1]\":2}", null);
testEquals.accept("{\"πŸ˜€.πŸ˜›\":1}", Pair.of("πŸ˜€.😎[-1]", 2), "{\"πŸ˜€.πŸ˜›\":1,\"πŸ˜€.😎[0]\":2}", null);
testThrows.accept("{\"πŸ˜€[0]\":1}", Pair.of("πŸ˜€[1]", 1), "Array index not found");
testEquals.accept("{\"πŸ˜€.πŸ˜€\":1}", Pair.of("πŸ˜€.πŸ˜›.😎", 2), "{\"πŸ˜€.πŸ˜€\":1,\"πŸ˜€.πŸ˜›.😎\":2}", null);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment