Last active
October 14, 2024 17:37
-
-
Save jo-makar/53ddc21e1e281e44acb9d9dc4d1daca5 to your computer and use it in GitHub Desktop.
Manipulate gron-based data stored as Map<String, Object>
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.ArrayList; | |
import java.util.Iterator; | |
import java.util.List; | |
import java.util.Map; | |
import java.util.NavigableSet; | |
import java.util.regex.Matcher; | |
import java.util.regex.Pattern; | |
import java.util.stream.Collectors; | |
import java.util.TreeSet; | |
public class GronUtils { | |
/** | |
* Associate a value with the key interpreted as a gron expression. | |
* NB The keys in the map are expected to be flattened, eg `a.b[0]`, `a.c.d`, etc. | |
* | |
* Example: | |
* ``` | |
* Map<String, Object> map = new HashMap<>(Map.of("a.b[0]", 1)); | |
* putGron(map, "a.b[0]", 2); | |
* assertEquiv(map, Map.of("a.b[0]", 2)); | |
* ``` | |
* | |
* Objects/arrays that do not exist are created as needed (using gron expression lookahead). | |
* It is an error if an intermediate field of different type would be overwritten. | |
* Eg, `putGron(Map.of("a.b[0]", 1), "a.b.c", 1)` since `a.b` is already an array. | |
* | |
* For array indices it's expected that the array is already of sufficient length. | |
* | |
* The special array index -1 causes the value to be appended to the end of the array. | |
* It is an error if the key with the asssociated index does not reference an array. | |
*/ | |
public static Result<Object> putGron(Map<String, Object> map, String key, Object value) { | |
// Algorithm outline: | |
// | |
// - If the key exists in the map, simply replace it | |
// | |
// - Otherwise verify the key matches the map structure | |
// - Ie verify an array entry (or object field or scalar) key refers to an array (object or scalar) | |
// | |
// - And verify array indices exist and handle the use of special -1 index | |
// - Parse the key as a gron expression (also verifies its validity) | |
// - Build up the key from the gron expression segments | |
// - For each intermediate key if referring to an array index | |
// - Verify the array index exists, if not return an error | |
// - Replace a -1 index with the highest existing array index plus one | |
// - NB determing the highest index involves searching through the set of keys | |
// - This is because keys are sorted lexicographically (eg {a[0],a[10],a[1],...,a[9]}) | |
if (map.containsKey(key)) | |
return Result.ok(map.put(key, value)); | |
NavigableSet<String> mapKeys = new TreeSet<String>(map.keySet()); | |
// | |
// Verify the gron expression matches the existing map structure | |
// (as indicated by specific mismatches of floor/ceiling-to-key suffixes) | |
// | |
String floor, ceiling; | |
if ((floor = mapKeys.floor(key)) != null) { | |
if (boundaryMismatch(key, floor)) | |
return Result.err(new RuntimeException("Structure mismatch")); | |
} else if ((ceiling = mapKeys.ceiling(key)) != null) { | |
if (boundaryMismatch(key, ceiling)) | |
return Result.err(new RuntimeException("Structure mismatch")); | |
} | |
// | |
// Verify array indices exist and handle the use of special -1 index | |
// | |
StringBuilder currentKey = new StringBuilder(); | |
for (GronSegment segment : parseGron(key)) { | |
if (segment.isField()) { | |
if (currentKey.length() > 0) | |
currentKey.append('.'); | |
currentKey.append(segment.field()); | |
} | |
else /* if (segment.isIndex()) */ { | |
int index = segment.index(); | |
if (index > -1) { | |
String testKey = String.format("%s[%d]", currentKey.toString(), index); | |
if (!testKeyFound(testKey, mapKeys.floor(testKey)) && | |
!testKeyFound(testKey, mapKeys.ceiling(testKey))) | |
return Result.err(new RuntimeException("Array index not found")); | |
currentKey.append(String.format("[%d]", index)); | |
} | |
else /* if (index == -1) */ { | |
// | |
// Determine the highest array index | |
// NB keys are sorted lexicographically (not numerically) | |
// | |
String pseudoKey = currentKey.toString() + "["; | |
TreeSet<Integer> indices = new TreeSet<>(); | |
Iterator<String> tailSetIter = mapKeys.tailSet(pseudoKey, false).iterator(); | |
while (tailSetIter.hasNext()) { | |
String tailSetKey = tailSetIter.next(); | |
if (!tailSetKey.startsWith(pseudoKey)) | |
break; | |
int idx; | |
try { | |
int start = pseudoKey.length(); | |
int end = tailSetKey.indexOf(']', pseudoKey.length()); | |
idx = Integer.parseUnsignedInt(tailSetKey.substring(start, end)); | |
} catch (Throwable t) { | |
// Should never happen | |
return Result.err(t); | |
} | |
indices.add(idx); | |
} | |
int highestIndex = indices.isEmpty() ? -1 : indices.last(); | |
currentKey.append(String.format("[%d]", highestIndex + 1)); | |
} | |
} | |
} | |
map.put(currentKey.toString(), value); | |
return Result.ok(null); | |
} | |
// Given a key and its boundary (ie floor or ceiling) determine if there is a boundary mismatch. | |
// Floor (ceiling) refers to the previous (next) key in a sorted list of existing keys. | |
// | |
// | key | boundary | mismatch | notes | | |
// | ---- | -------- | -------- | --------------------------------- | | |
// | a.c | a.b | false | | | |
// | a[1] | a[0] | false | | | |
// | a[0] | a | true | a is a scalar, expected an array | | |
// | a | a[0] | true | a is an array, expected a scalar | | |
// | a[0] | a.b | true | a is an object, expected an array | | |
// | a.b | a[0] | true | a is an array, expected an object | | |
// | a.b | a | true | a is a scalar, expected an object | | |
// | a | a.b | true | a is an object, expected a scalar | | |
private static boolean boundaryMismatch(String key, String boundary) { | |
List<Integer> keyCps = key.codePoints().boxed().collect(Collectors.toList()); | |
List<Integer> boundaryCps = boundary.codePoints().boxed().collect(Collectors.toList()); | |
int prefixLength = longestCommonPrefixLength(keyCps, boundaryCps); | |
int keySuffixFirstCp = prefixLength >= keyCps.size() ? -1 : keyCps.get(prefixLength); | |
int boundarySuffixFirstCp = prefixLength >= boundaryCps.size() ? -1 : boundaryCps.get(prefixLength); | |
return keySuffixFirstCp == '.' || keySuffixFirstCp == '[' || | |
boundarySuffixFirstCp == '.' || boundarySuffixFirstCp == '['; | |
} | |
// Given a key and its boundary (ie floor or ceiling) determine if the key exists | |
// (eg testKeyFound("a.b[1]", "a.b[1].c") returns true) | |
private static boolean testKeyFound(String testKey, String boundary) { | |
if (boundary == null) | |
return false; | |
int length = longestCommonPrefixLength(testKey, boundary); | |
return testKey.codePointCount(0, testKey.length()) == length; | |
} | |
// Return the longest common prefix length of two strings (as measured in Unicode code points) | |
private static int longestCommonPrefixLength(String s, String t) { | |
return longestCommonPrefixLength( | |
s.codePoints().boxed().collect(Collectors.toList()), | |
t.codePoints().boxed().collect(Collectors.toList()) | |
); | |
} | |
private static int longestCommonPrefixLength(List<Integer> s, List<Integer> t) { | |
int maxLen = Math.min(s.size(), t.size()); | |
int len; | |
for (len = 0; len < maxLen && s.get(len).equals(t.get(len)); len++) ; | |
return len; | |
} | |
// TODO Switch over to a proper lexer/parser as needed (for example if quoted field name support is needed) | |
private static final Pattern gronSegmentPattern = Pattern.compile("\\.|\\[[0-9]\\]|\\[[1-9][0-9]+\\]|\\[-1\\]|[^\\.\\[]+"); | |
// Parse a gron expression into its components | |
// Eg parse "a.b[0]" into {"a","b",0} (as GronSegments) | |
private static GronSegment[] parseGron(String expr) { | |
if (expr == null || expr.isEmpty()) | |
throw new RuntimeException("Missing gron"); | |
List<String> segments = new ArrayList<>(); | |
Matcher matcher = gronSegmentPattern.matcher(expr); | |
// Ensure that the expression is processed without intervening skipped portions. | |
// Eg matcher.results().map(MatchResult::group).collect() may result in skips. | |
int expectedStart = 0; | |
while (matcher.find()) { | |
if (matcher.start() != expectedStart) | |
throw new RuntimeException("Invalid gron, expression not parsed"); | |
expectedStart = matcher.end(); | |
segments.add(matcher.group()); | |
} | |
if (!matcher.hitEnd()) | |
throw new RuntimeException("Invalid gron, expression not fully parsed"); | |
// Verify the gron expression is valid, specifically: | |
// - Starts with a field | |
// - Every dot is followed by a field | |
if (segments.get(0).matches("^(\\.|\\[.*)")) | |
throw new RuntimeException(String.format("Invalid gron: '%s'", expr)); | |
for (int i = 1; i < segments.size(); i++) { | |
if (!segments.get(i).equals(".")) | |
continue; | |
if (i == segments.size() - 1) | |
throw new RuntimeException("Invalid gron, ends with dot"); | |
if (segments.get(i + 1).matches("^(\\.|\\[.*)")) | |
throw new RuntimeException("Invalid gron, dot not followed by field"); | |
} | |
return segments.stream() | |
// Strip out the dots since no longer needed | |
.filter(s -> !s.equals(".")) | |
.map(s -> new GronSegment(s)) | |
.toArray(size -> new GronSegment[size]); | |
} | |
private static class GronSegment { | |
private final String field; | |
private final Integer index; | |
public GronSegment(String value) { | |
if (value.startsWith("[")) { | |
field = null; | |
index = Integer.parseInt(value.substring(1, value.length() - 1)); | |
} else { | |
field = value; | |
index = null; | |
} | |
} | |
public boolean isField() { return field != null; } | |
public boolean isIndex() { return index != null; } | |
public String field() { | |
if (field == null) | |
throw new RuntimeException("Non-field stored"); | |
return field; | |
} | |
public Integer index() { | |
if (index == null) | |
throw new RuntimeException("Non-index stored"); | |
return index; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.Optional; | |
public interface Result<T> { | |
boolean isOk(); | |
boolean isErr(); | |
/** Return the wrapped value or throw the exception */ | |
T get(); | |
Optional<Throwable> getError(); | |
static <T> Result<T> ok(T value) { return new Ok<>(value); } | |
static <T> Result<T> err(Throwable err) { return new Err<>(err); } | |
final class Ok<T> implements Result<T> { | |
private final T value; | |
private Ok(T value) { this.value = value; } | |
public boolean isOk() { return true; } | |
public boolean isErr() { return false; } | |
public T get() { return value; } | |
public Optional<Throwable> getError() { return Optional.empty(); } | |
} | |
final class Err<T> implements Result<T> { | |
private final Throwable err; | |
private Err(Throwable err) { this.err = err; } | |
public boolean isOk() { return false; } | |
public boolean isErr() { return true; } | |
public T get() { throw err; } | |
public Optional<Throwable> getError() { return Optional.of(err); } | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
QuadConsumer<String, Pair<String, Object>, String, Object> testEquals = (inputJson, putGronArgs, expectedJson, expectedRetval) -> { | |
// Verifies expectedJson and expectedRetval match actual values | |
// ... | |
}; | |
TriConsumer<String, Pair<String, Object>, String> testThrows = (inputJson, putGronArgs, expectedError) -> { | |
// Verifies the exception thrown message contains the expectedError | |
// ... | |
}; | |
// Documentation examples | |
testEquals.accept("{\"a.b[0]\":1}", Pair.of("a.b[0]", 2), "{\"a.b[0]\":2}", 1L); | |
// Invalid gron expressions | |
testThrows.accept("{}", Pair.of(null, 1), "Missing gron"); | |
testThrows.accept("{}", Pair.of("", 1), "Missing gron"); | |
testThrows.accept("{}", Pair.of("a[]", 1), "Invalid gron"); | |
testThrows.accept("{}", Pair.of("a.", 1), "Invalid gron"); | |
testThrows.accept("{}", Pair.of("[0]", 1), "Invalid gron"); | |
testThrows.accept("{}", Pair.of(".a", 1), "Invalid gron"); | |
testThrows.accept("{}", Pair.of("a..b", 1), "Invalid gron"); | |
testThrows.accept("{}", Pair.of("a.[0]", 1), "Invalid gron"); | |
testThrows.accept("{}", Pair.of("a[0].", 1), "Invalid gron"); | |
// Array gron segment handling tests | |
testThrows.accept("{\"a\":1}", Pair.of("a[0]", 1), "Structure mismatch"); | |
testThrows.accept("{\"a[0]\":1}", Pair.of("a", 1), "Structure mismatch"); | |
testThrows.accept("{\"a.b\":1}", Pair.of("a[0]", 1), "Structure mismatch"); | |
testThrows.accept("{\"a[0]\":1}", Pair.of("a.b", 1), "Structure mismatch"); | |
testEquals.accept("{\"a[0]\":1}", Pair.of("a[-1]", 2), "{\"a[0]\":1,\"a[1]\":2}", null); | |
testEquals.accept("{\"a[0]\":1}", Pair.of("a[-1][-1]", 2), "{\"a[0]\":1,\"a[1][0]\":2}", null); | |
testEquals.accept("{\"a[0]\":1}", Pair.of("a[-1][-1][-1]", 2), "{\"a[0]\":1,\"a[1][0][0]\":2}", null); | |
testEquals.accept("{\"a[0]\":1}", Pair.of("a[-1].b", 2), "{\"a[0]\":1,\"a[1].b\":2}", null); | |
testEquals.accept("{\"a.b\":1}", Pair.of("a.c[-1]", 2), "{\"a.b\":1,\"a.c[0]\":2}", null); | |
testThrows.accept("{\"a.b\":1}", Pair.of("a.b[-1]", 2), "Structure mismatch"); | |
testThrows.accept("{\"a.b.c\":1}", Pair.of("a.b[-1]", 2), "Structure mismatch"); | |
testThrows.accept("{\"a[0]\":1}", Pair.of("a[1]", 1), "Array index not found"); | |
testEquals.accept("{\"a[0]\":1}", Pair.of("a[0]", 2), "{\"a[0]\":2}", 1L); | |
testEquals.accept("{\"a[0][0]\":1}", Pair.of("a[0][0]", 2), "{\"a[0][0]\":2}", 1L); | |
testEquals.accept("{\"a[0].b\":1}", Pair.of("a[0].b", 2), "{\"a[0].b\":2}]}", 1L); | |
// Object gron segment handling tests | |
testThrows.accept("{\"a\":1}", Pair.of("a.b", 1), "Structure mismatch"); | |
testThrows.accept("{\"a.b\":1}", Pair.of("a", 1), "Structure mismatch"); | |
// Structure mismatch between arrays and objects already covered above | |
testEquals.accept("{\"a\":1}", Pair.of("a", 2), "{\"a\":2}", 1L); | |
testEquals.accept("{\"a.b\":1}}", Pair.of("a.b", 2), "{\"a.b\":2}}", 1L); | |
testEquals.accept("{\"a.a\":1}", Pair.of("a.b.c", 2), "{\"a.a\":1,\"a.b.c\":2}", null); | |
testEquals.accept("{\"a.a\":1}", Pair.of("a.b.c.d", 2), "{\"a.a\":1,\"a.b.c.d\":2}", null); | |
testEquals.accept("{\"a[0].a\":1}", Pair.of("a[0].b.c", 2), "{\"a[0].a\":1,\"a[0].b.c\":2}", null); | |
// Verify unicode works as expected (via arbitrary tests above) | |
testEquals.accept("{\"π.π[0]\":1}", Pair.of("π.π[0]", 2), "{\"π.π[0]\":2}", 1L); | |
testThrows.accept("{\"π\":1}", Pair.of("π[0]", 1), "Structure mismatch"); | |
testThrows.accept("{\"π.π\":1}", Pair.of("π[0]", 1), "Structure mismatch"); | |
testEquals.accept("{\"π[0]\":1}", Pair.of("π[-1]", 2), "{\"π[0]\":1,\"π[1]\":2}", null); | |
testEquals.accept("{\"π.π\":1}", Pair.of("π.π[-1]", 2), "{\"π.π\":1,\"π.π[0]\":2}", null); | |
testThrows.accept("{\"π[0]\":1}", Pair.of("π[1]", 1), "Array index not found"); | |
testEquals.accept("{\"π.π\":1}", Pair.of("π.π.π", 2), "{\"π.π\":1,\"π.π.π\":2}", null); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment