Created
May 13, 2024 08:43
-
-
Save wagyourtail/3ab70855ad4810db749e6647943fcbc5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package xyz.wagyourtail.jvmdg.j9.intl; | |
import java.io.BufferedReader; | |
import java.io.IOException; | |
import java.io.StringReader; | |
import java.math.BigInteger; | |
import java.util.*; | |
public class Json5Slurper { | |
public static Object parseString(String str) throws IOException { | |
return parse(stringReader(str)); | |
} | |
// Map<String, Object> | List<Object> | String | Number | Boolean | null | |
public static Object parse(BufferedReader br) throws IOException { | |
while (true) { | |
if (skipWhitespace(br)) { | |
continue; | |
} | |
if (skipComment(br)) { | |
continue; | |
} | |
int c = br.read(); | |
if (c == -1) { | |
throw new IllegalArgumentException("Unexpected EOF"); | |
} | |
switch (c) { | |
case '{': | |
return parseObject(br); | |
case '[': | |
return parseArray(br); | |
case '"': | |
case '\'': | |
return translateEscapes(parseString(br, (char) c)); | |
case '-': | |
case '0': | |
case '1': | |
case '2': | |
case '3': | |
case '4': | |
case '5': | |
case '6': | |
case '7': | |
case '8': | |
case '9': | |
case 'N': | |
case 'I': | |
return parseNumber(br, (char) c); | |
case 'n': | |
char n = (char) br.read(); | |
if (n != 'u') throw new IllegalArgumentException("Unexpected character: " + n); | |
n = (char) br.read(); | |
if (n != 'l') throw new IllegalArgumentException("Unexpected character: " + n); | |
n = (char) br.read(); | |
if (n != 'l') throw new IllegalArgumentException("Unexpected character: " + n); | |
return null; | |
} | |
throw new IllegalArgumentException("Unexpected character: " + (char) c); | |
} | |
} | |
private static Map<String, Object> parseObject(BufferedReader br) throws IOException { | |
Map<String, Object> map = new HashMap<>(); | |
boolean comma = false; | |
boolean colon = false; | |
String key = null; | |
while (true) { | |
if (skipWhitespace(br)) { | |
continue; | |
} | |
if (skipComment(br)) { | |
continue; | |
} | |
br.mark(1); | |
int c = br.read(); | |
if (c == '}') { | |
return map; | |
} | |
if (comma) { | |
if (c != ',') { | |
throw new IllegalArgumentException("Expected comma"); | |
} | |
comma = false; | |
continue; | |
} | |
if (colon) { | |
if (c != ':') { | |
throw new IllegalArgumentException("Expected colon"); | |
} | |
colon = false; | |
continue; | |
} | |
if (c == -1) { | |
throw new IllegalArgumentException("Unexpected EOF"); | |
} | |
if (key == null) { | |
switch (c) { | |
case '"': | |
case '\'': | |
key = translateEscapes(parseString(br, (char) c)); | |
break; | |
default: | |
if (Character.isLetter(c) || c == '_' || c == '$' || c == '\\') { | |
key = parseIdentifier(br, (char) c); | |
} else { | |
throw new IllegalArgumentException("Unexpected character: " + (char) c); | |
} | |
break; | |
} | |
colon = true; | |
} else { | |
br.reset(); | |
Object value = parse(br); | |
map.put(key, value); | |
key = null; | |
comma = true; | |
} | |
} | |
} | |
private static List<Object> parseArray(BufferedReader br) { | |
List<Object> list = new ArrayList<>(); | |
boolean comma = false; | |
while (true) { | |
try { | |
if (skipWhitespace(br)) { | |
continue; | |
} | |
if (skipComment(br)) { | |
continue; | |
} | |
br.mark(1); | |
int c = br.read(); | |
if (c == ']') { | |
return list; | |
} | |
if (comma) { | |
if (c != ',') { | |
throw new IllegalArgumentException("Expected comma"); | |
} else { | |
comma = false; | |
continue; | |
} | |
} | |
if (c == -1) { | |
throw new IllegalArgumentException("Unexpected EOF"); | |
} | |
br.reset(); | |
Object value = parse(br); | |
list.add(value); | |
comma = true; | |
} catch (IOException e) { | |
throw new IllegalArgumentException(e); | |
} | |
} | |
} | |
private static String parseString(BufferedReader br, char start) throws IOException { | |
int escapes = 0; | |
StringBuilder sb = new StringBuilder(); | |
int c; | |
while ((c = br.read()) != -1) { | |
if (c == start) { | |
if (escapes == 0) { | |
return sb.toString(); | |
} | |
sb.append(Character.toChars(c)); | |
escapes--; | |
} | |
if (c == '\n') { | |
if (escapes == 0) { | |
throw new IllegalArgumentException("Unexpected newline"); | |
} | |
escapes = 0; | |
sb.append('\n'); | |
continue; | |
} | |
if (c == '\\') { | |
escapes++; | |
if (escapes == 2) { | |
sb.append('\\'); | |
escapes = 0; | |
} | |
} else { | |
if (escapes == 1) { | |
sb.append('\\'); | |
} | |
sb.append(Character.toChars(c)); | |
escapes = 0; | |
} | |
} | |
throw new IllegalArgumentException("Unexpected EOF"); | |
} | |
private static String parseIdentifier(BufferedReader br, char start) throws IOException { | |
StringBuilder sb = new StringBuilder(); | |
sb.append(start); | |
int c; | |
br.mark(1); | |
while ((c = br.read()) != -1) { | |
// TODO: verify this works properly... https://262.ecma-international.org/5.1/#sec-7.6 | |
if (!Character.isWhitespace(c)) { | |
br.mark(1); | |
sb.append(Character.toChars(c)); | |
} else { | |
br.reset(); | |
return sb.toString(); | |
} | |
} | |
throw new IllegalArgumentException("Unexpected EOF"); | |
} | |
private static Number parseNumber(BufferedReader br, char start) throws IOException { | |
switch (start) { | |
case '-': { | |
Number n = parseNumber(br, (char) br.read()); | |
if (n instanceof Double) { | |
return -((Double) n); | |
} else if (n instanceof Long) { | |
return -((Long) n); | |
} else if (n instanceof BigInteger) { | |
return ((BigInteger) n).negate(); | |
} else { | |
return -((Integer) n); | |
} | |
} | |
case 'N': { | |
char n = (char) br.read(); | |
if (n != 'a') throw new IllegalArgumentException("Unexpected character: " + n); | |
n = (char) br.read(); | |
if (n != 'N') throw new IllegalArgumentException("Unexpected character: " + n); | |
return Float.NaN; | |
} | |
case 'I': { | |
char n = (char) br.read(); | |
if (n != 'n') throw new IllegalArgumentException("Unexpected character: " + n); | |
n = (char) br.read(); | |
if (n != 'f') throw new IllegalArgumentException("Unexpected character: " + n); | |
n = (char) br.read(); | |
if (n != 'i') throw new IllegalArgumentException("Unexpected character: " + n); | |
n = (char) br.read(); | |
if (n != 'n') throw new IllegalArgumentException("Unexpected character: " + n); | |
n = (char) br.read(); | |
if (n != 'i') throw new IllegalArgumentException("Unexpected character: " + n); | |
n = (char) br.read(); | |
if (n != 't') throw new IllegalArgumentException("Unexpected character: " + n); | |
n = (char) br.read(); | |
if (n != 'y') throw new IllegalArgumentException("Unexpected character: " + n); | |
return Double.POSITIVE_INFINITY; | |
} | |
case '0': | |
case '1': | |
case '2': | |
case '3': | |
case '4': | |
case '5': | |
case '6': | |
case '7': | |
case '8': | |
case '9': { | |
StringBuilder sb = new StringBuilder(); | |
sb.append(start); | |
int c; | |
br.mark(1); | |
while ((c = br.read()) != -1) { | |
if (Character.isDigit(c) || c == '.' || c == 'e' || c == 'E' || c == '+' || c == '-') { | |
br.mark(1); | |
sb.append(Character.toChars(c)); | |
} else { | |
br.reset(); | |
String s = sb.toString(); | |
if (s.contains(".") || s.contains("e") || s.contains("E")) { | |
return Double.parseDouble(s); | |
} else { | |
try { | |
return Integer.parseInt(s); | |
} catch (NumberFormatException e) { | |
try { | |
return Long.parseLong(s); | |
} catch (NumberFormatException e2) { | |
return new BigInteger(s); | |
} | |
} | |
} | |
} | |
} | |
throw new IllegalArgumentException("Unexpected EOF"); | |
} | |
} | |
throw new IllegalArgumentException("Unexpected character: " + start); | |
} | |
private static boolean skipWhitespace(BufferedReader br) throws IOException { | |
br.mark(1); | |
int c; | |
int skipped = 0; | |
while ((c = br.read()) != -1) { | |
if (!Character.isWhitespace(c)) { | |
br.reset(); | |
return skipped != 0; | |
} | |
skipped++; | |
br.mark(1); | |
} | |
throw new IllegalArgumentException("Unexpected EOF"); | |
} | |
private static boolean skipComment(BufferedReader br) throws IOException { | |
br.mark(2); | |
int c = br.read(); | |
if (c == '/') { | |
int c2 = br.read(); | |
if (c2 == '/') { | |
while ((c = br.read()) != -1) { | |
if (c == '\n') { | |
break; | |
} | |
} | |
return true; | |
} else if (c2 == '*') { | |
while ((c = br.read()) != -1) { | |
if (c == '*') { | |
if (br.read() == '/') { | |
return true; | |
} | |
} | |
} | |
throw new IllegalArgumentException("Unexpected EOF"); | |
} else { | |
br.reset(); | |
} | |
} else { | |
br.reset(); | |
} | |
return false; | |
} | |
public static String translateEscapes(String string) { | |
if (string.isEmpty()) { | |
return string; | |
} | |
char[] chars = string.toCharArray(); | |
int j = 0; | |
for (int i = 0; i < chars.length; i++) { | |
char c = chars[i]; | |
if (c == '\\') { | |
if (i + 1 < chars.length) { | |
char d = chars[++i]; | |
switch (d) { | |
case 'b': | |
c = '\b'; | |
break; | |
case 'f': | |
c = '\f'; | |
break; | |
case 'n': | |
c = '\n'; | |
break; | |
case 'r': | |
c = '\r'; | |
break; | |
case 's': | |
c = ' '; | |
break; | |
case 't': | |
c = '\t'; | |
break; | |
case '\'': | |
case '\"': | |
case '\\': | |
case '\n': | |
case '\r': | |
c = d; | |
case '0': | |
case '1': | |
case '2': | |
case '3': | |
case '4': | |
case '5': | |
case '6': | |
case '7': | |
case '8': | |
case '9': | |
int limit = Math.min(d < '4' ? 2 : 1, chars.length); | |
int code = d - '0'; | |
for (int k = 1; k < limit; k++) { | |
char e = chars[i + 1]; | |
if (e >= '0' && e <= '9') { | |
code = code * 10 + e - '0'; | |
i++; | |
} | |
} | |
c = (char) code; | |
break; | |
case 'u': | |
String hex = new String(chars, i, 4); | |
if (hex.length() != 4) { | |
throw new IllegalArgumentException("Invalid unicode escape: " + hex + ", expected 4 characters, found EOS"); | |
} | |
c = (char) Integer.parseInt(hex, 16); | |
i += 4; | |
break; | |
default: | |
throw new IllegalArgumentException(String.format("Invalid escape sequence: \\%c \\\\u%04X", d, (int) d)); | |
} | |
} else { | |
throw new IllegalArgumentException("Invalid escape sequence: \\EOS"); | |
} | |
} | |
chars[j++] = c; | |
} | |
return new String(chars, 0, j); | |
} | |
public static String escape(String string, char escapeQuotes) { | |
final StringBuilder result = new StringBuilder(string.length()); | |
for (int i = 0; i < string.length(); i++) { | |
final char c = string.charAt(i); | |
switch (c) { | |
case '\0': | |
result.append("\\0"); | |
break; | |
case '\t': | |
result.append("\\t"); | |
break; | |
case '\b': | |
result.append("\\b"); | |
break; | |
case '\n': | |
result.append("\\n"); | |
break; | |
case '\r': | |
result.append("\\r"); | |
break; | |
case '\f': | |
result.append("\\f"); | |
break; | |
case '\'': | |
case '"': | |
if (escapeQuotes == c) { | |
result.append('\\'); | |
} | |
result.append(c); | |
break; | |
case '\\': | |
result.append("\\\\"); | |
break; | |
default: | |
final int type = Character.getType(c); | |
if (type != Character.UNASSIGNED && type != Character.CONTROL && type != Character.SURROGATE) { | |
result.append(c); | |
} else if (c < 0x10) { | |
result.append("\\x0").append(Character.forDigit(c, 16)); | |
} else { | |
final String hex = Integer.toHexString(c); | |
if (c < 0x100) { | |
result.append("\\x").append(hex); | |
} else if (c < 0x1000) { | |
result.append("\\u0").append(hex); | |
} else { | |
result.append("\\u").append(hex); | |
} | |
} | |
break; | |
} | |
} | |
return result.toString(); | |
} | |
private static BufferedReader stringReader(String str) { | |
return new BufferedReader(new StringReader(str)); | |
} | |
public static void main(String[] args) throws IOException { | |
Object a = parseString("// this\n" + | |
"{\n" + | |
" // is\n" + | |
" // really\n" + | |
" \"legal\"\n" + | |
" // totoally\n" + | |
" : \n" + | |
" // but\n" + | |
" \"should\"\n" + | |
" // it\n" + | |
"}\n" + | |
"/* be... */"); | |
System.out.println(a); | |
Object b = parseString("[1,2,-Infinity]"); | |
System.out.println(b); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment