Created
July 14, 2023 23:32
-
-
Save dnault/1520b0f4612c73e7b080f63f9ab4f900 to your computer and use it in GitHub Desktop.
Quick way to remove top-level fields from a byte array containing a JSON object.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import com.fasterxml.jackson.core.JsonFactory; | |
import com.fasterxml.jackson.core.JsonParser; | |
import com.fasterxml.jackson.core.JsonProcessingException; | |
import com.fasterxml.jackson.core.JsonToken; | |
import com.fasterxml.jackson.databind.json.JsonMapper; | |
import java.io.IOException; | |
import java.util.Arrays; | |
import java.util.Collections; | |
import java.util.LinkedHashMap; | |
import java.util.Map; | |
import java.util.Set; | |
import static java.nio.charset.StandardCharsets.UTF_8; | |
public class JsonFieldEraser { | |
private static final JsonFactory jsonFactory = new JsonMapper().getFactory(); | |
private JsonFieldEraser() { | |
throw new AssertionError("not instantiable"); | |
} | |
/** | |
* Example usage. | |
*/ | |
public static void main(String[] args) throws JsonProcessingException { | |
Map<String, Object> map = new LinkedHashMap<>(); | |
map.put("meta1", 1); | |
map.put("meta2", Collections.emptyMap()); | |
map.put("meta3", Collections.emptyList()); | |
map.put("data1", Collections.singletonMap("color", "red")); | |
map.put("meta4", true); | |
map.put("data2", Collections.singletonList("xyzzy")); | |
map.put("meta5", "foo"); | |
String json = new JsonMapper().writeValueAsString(map); | |
System.out.println(json); | |
byte[] bytes = json.getBytes(UTF_8); | |
erase(bytes, Set.of("meta1", "meta2", "meta3", "meta4", "meta5")); | |
System.out.println(new String(bytes, UTF_8)); | |
} | |
/** | |
* Replaces the given fields with whitespace. Returns the set of erased fields. | |
*/ | |
public static Set<String> erase(byte[] jsonObject, Set<String> topLevelFieldsToErase) { | |
Map<String, ByteRange> filedNameToRange = new LinkedHashMap<>(); | |
try (JsonParser parser = jsonFactory.createParser(jsonObject)) { | |
if (parser.nextToken() != JsonToken.START_OBJECT) { | |
throw new IllegalArgumentException("Byte array does not contain JSON object"); | |
} | |
int depth = 1; | |
JsonToken token; | |
while ((token = parser.nextToken()) != null) { | |
if (depth == 1 && token == JsonToken.FIELD_NAME) { | |
String fieldName = parser.currentName(); | |
if (topLevelFieldsToErase.contains(fieldName)) { | |
int start = (int) parser.currentTokenLocation().getByteOffset(); | |
skipToEndOfFieldValue(parser); | |
int pastEnd = (int) parser.currentLocation().getByteOffset(); | |
filedNameToRange.put(fieldName, new ByteRange(jsonObject, start, pastEnd)); | |
if (filedNameToRange.size() == topLevelFieldsToErase.size()) { | |
// found all the fields; don't need to keep parsing | |
break; | |
} | |
} | |
} else if (token == JsonToken.START_OBJECT) { | |
depth++; | |
} else if (token == JsonToken.END_OBJECT) { | |
depth--; | |
if (depth == 0 && parser.nextToken() != null) { | |
// multiple JSON roots, or trailing garbage | |
throw new IllegalArgumentException("Byte array does has multiple document roots, or trailing garbage"); | |
} | |
} | |
} | |
for (ByteRange range : filedNameToRange.values()) { | |
range.swallowOneComma(); | |
range.fill((byte) ' '); | |
} | |
return filedNameToRange.keySet(); | |
} catch (IOException e) { | |
throw new IllegalArgumentException("Byte array does not contain valid JSON", e); | |
} | |
} | |
private static void skipToEndOfFieldValue(JsonParser parser) throws IOException { | |
JsonToken valueToken = parser.nextToken(); | |
if (valueToken.isStructStart()) { | |
finishStruct(parser); | |
} else { | |
parser.finishToken(); | |
} | |
} | |
private static void finishStruct(JsonParser parser) throws IOException { | |
int depth = 1; | |
JsonToken token; | |
while ((token = parser.nextToken()) != null) { | |
if (token == JsonToken.START_OBJECT || token == JsonToken.START_ARRAY) { | |
depth++; | |
} else if (token == JsonToken.END_OBJECT || token == JsonToken.END_ARRAY) { | |
depth--; | |
if (depth == 0) { | |
return; | |
} | |
} | |
} | |
throw new IllegalArgumentException("Unexpected end of JSON"); | |
} | |
private static class ByteRange { | |
private final byte[] bytes; | |
private int startOffset; | |
private int pastEndOffset; | |
private ByteRange(byte[] bytes, int startOffset, int pastEndOffset) { | |
this.bytes = bytes; | |
this.startOffset = startOffset; | |
this.pastEndOffset = pastEndOffset; | |
} | |
@Override | |
public String toString() { | |
return "[" + startOffset + "," + pastEndOffset + ") = |" + new String(bytes, startOffset, pastEndOffset - startOffset, UTF_8) + "|"; | |
} | |
void fill(byte fillByte) { | |
Arrays.fill(bytes, startOffset, pastEndOffset, fillByte); | |
} | |
private void swallowOneComma() { | |
swallowWhitespace(); | |
if (bytes[pastEndOffset] == ',') { | |
pastEndOffset++; | |
} else if (bytes[startOffset - 1] == ',') { | |
startOffset--; | |
} | |
} | |
private void swallowWhitespace() { | |
while (isJsonWhitespace(bytes[startOffset - 1])) { | |
startOffset--; | |
} | |
while (isJsonWhitespace(bytes[pastEndOffset])) { | |
pastEndOffset++; | |
} | |
} | |
private static boolean isJsonWhitespace(byte b) { | |
switch (b) { | |
case 0x20: // Space | |
case 0x09: // Horizontal tab | |
case 0x0A: // LF | |
case 0x0D: // CR | |
return true; | |
default: | |
return false; | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment