Skip to content

Instantly share code, notes, and snippets.

@wagyourtail
Created May 13, 2024 08:43
Show Gist options
  • Save wagyourtail/3ab70855ad4810db749e6647943fcbc5 to your computer and use it in GitHub Desktop.
Save wagyourtail/3ab70855ad4810db749e6647943fcbc5 to your computer and use it in GitHub Desktop.
package xyz.wagyourtail.jvmdg.j9.intl;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.StringReader;
import java.math.BigInteger;
import java.util.*;
public class Json5Slurper {
public static Object parseString(String str) throws IOException {
return parse(stringReader(str));
}
// Map<String, Object> | List<Object> | String | Number | Boolean | null
public static Object parse(BufferedReader br) throws IOException {
while (true) {
if (skipWhitespace(br)) {
continue;
}
if (skipComment(br)) {
continue;
}
int c = br.read();
if (c == -1) {
throw new IllegalArgumentException("Unexpected EOF");
}
switch (c) {
case '{':
return parseObject(br);
case '[':
return parseArray(br);
case '"':
case '\'':
return translateEscapes(parseString(br, (char) c));
case '-':
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case 'N':
case 'I':
return parseNumber(br, (char) c);
case 'n':
char n = (char) br.read();
if (n != 'u') throw new IllegalArgumentException("Unexpected character: " + n);
n = (char) br.read();
if (n != 'l') throw new IllegalArgumentException("Unexpected character: " + n);
n = (char) br.read();
if (n != 'l') throw new IllegalArgumentException("Unexpected character: " + n);
return null;
}
throw new IllegalArgumentException("Unexpected character: " + (char) c);
}
}
private static Map<String, Object> parseObject(BufferedReader br) throws IOException {
Map<String, Object> map = new HashMap<>();
boolean comma = false;
boolean colon = false;
String key = null;
while (true) {
if (skipWhitespace(br)) {
continue;
}
if (skipComment(br)) {
continue;
}
br.mark(1);
int c = br.read();
if (c == '}') {
return map;
}
if (comma) {
if (c != ',') {
throw new IllegalArgumentException("Expected comma");
}
comma = false;
continue;
}
if (colon) {
if (c != ':') {
throw new IllegalArgumentException("Expected colon");
}
colon = false;
continue;
}
if (c == -1) {
throw new IllegalArgumentException("Unexpected EOF");
}
if (key == null) {
switch (c) {
case '"':
case '\'':
key = translateEscapes(parseString(br, (char) c));
break;
default:
if (Character.isLetter(c) || c == '_' || c == '$' || c == '\\') {
key = parseIdentifier(br, (char) c);
} else {
throw new IllegalArgumentException("Unexpected character: " + (char) c);
}
break;
}
colon = true;
} else {
br.reset();
Object value = parse(br);
map.put(key, value);
key = null;
comma = true;
}
}
}
private static List<Object> parseArray(BufferedReader br) {
List<Object> list = new ArrayList<>();
boolean comma = false;
while (true) {
try {
if (skipWhitespace(br)) {
continue;
}
if (skipComment(br)) {
continue;
}
br.mark(1);
int c = br.read();
if (c == ']') {
return list;
}
if (comma) {
if (c != ',') {
throw new IllegalArgumentException("Expected comma");
} else {
comma = false;
continue;
}
}
if (c == -1) {
throw new IllegalArgumentException("Unexpected EOF");
}
br.reset();
Object value = parse(br);
list.add(value);
comma = true;
} catch (IOException e) {
throw new IllegalArgumentException(e);
}
}
}
private static String parseString(BufferedReader br, char start) throws IOException {
int escapes = 0;
StringBuilder sb = new StringBuilder();
int c;
while ((c = br.read()) != -1) {
if (c == start) {
if (escapes == 0) {
return sb.toString();
}
sb.append(Character.toChars(c));
escapes--;
}
if (c == '\n') {
if (escapes == 0) {
throw new IllegalArgumentException("Unexpected newline");
}
escapes = 0;
sb.append('\n');
continue;
}
if (c == '\\') {
escapes++;
if (escapes == 2) {
sb.append('\\');
escapes = 0;
}
} else {
if (escapes == 1) {
sb.append('\\');
}
sb.append(Character.toChars(c));
escapes = 0;
}
}
throw new IllegalArgumentException("Unexpected EOF");
}
private static String parseIdentifier(BufferedReader br, char start) throws IOException {
StringBuilder sb = new StringBuilder();
sb.append(start);
int c;
br.mark(1);
while ((c = br.read()) != -1) {
// TODO: verify this works properly... https://262.ecma-international.org/5.1/#sec-7.6
if (!Character.isWhitespace(c)) {
br.mark(1);
sb.append(Character.toChars(c));
} else {
br.reset();
return sb.toString();
}
}
throw new IllegalArgumentException("Unexpected EOF");
}
private static Number parseNumber(BufferedReader br, char start) throws IOException {
switch (start) {
case '-': {
Number n = parseNumber(br, (char) br.read());
if (n instanceof Double) {
return -((Double) n);
} else if (n instanceof Long) {
return -((Long) n);
} else if (n instanceof BigInteger) {
return ((BigInteger) n).negate();
} else {
return -((Integer) n);
}
}
case 'N': {
char n = (char) br.read();
if (n != 'a') throw new IllegalArgumentException("Unexpected character: " + n);
n = (char) br.read();
if (n != 'N') throw new IllegalArgumentException("Unexpected character: " + n);
return Float.NaN;
}
case 'I': {
char n = (char) br.read();
if (n != 'n') throw new IllegalArgumentException("Unexpected character: " + n);
n = (char) br.read();
if (n != 'f') throw new IllegalArgumentException("Unexpected character: " + n);
n = (char) br.read();
if (n != 'i') throw new IllegalArgumentException("Unexpected character: " + n);
n = (char) br.read();
if (n != 'n') throw new IllegalArgumentException("Unexpected character: " + n);
n = (char) br.read();
if (n != 'i') throw new IllegalArgumentException("Unexpected character: " + n);
n = (char) br.read();
if (n != 't') throw new IllegalArgumentException("Unexpected character: " + n);
n = (char) br.read();
if (n != 'y') throw new IllegalArgumentException("Unexpected character: " + n);
return Double.POSITIVE_INFINITY;
}
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9': {
StringBuilder sb = new StringBuilder();
sb.append(start);
int c;
br.mark(1);
while ((c = br.read()) != -1) {
if (Character.isDigit(c) || c == '.' || c == 'e' || c == 'E' || c == '+' || c == '-') {
br.mark(1);
sb.append(Character.toChars(c));
} else {
br.reset();
String s = sb.toString();
if (s.contains(".") || s.contains("e") || s.contains("E")) {
return Double.parseDouble(s);
} else {
try {
return Integer.parseInt(s);
} catch (NumberFormatException e) {
try {
return Long.parseLong(s);
} catch (NumberFormatException e2) {
return new BigInteger(s);
}
}
}
}
}
throw new IllegalArgumentException("Unexpected EOF");
}
}
throw new IllegalArgumentException("Unexpected character: " + start);
}
private static boolean skipWhitespace(BufferedReader br) throws IOException {
br.mark(1);
int c;
int skipped = 0;
while ((c = br.read()) != -1) {
if (!Character.isWhitespace(c)) {
br.reset();
return skipped != 0;
}
skipped++;
br.mark(1);
}
throw new IllegalArgumentException("Unexpected EOF");
}
private static boolean skipComment(BufferedReader br) throws IOException {
br.mark(2);
int c = br.read();
if (c == '/') {
int c2 = br.read();
if (c2 == '/') {
while ((c = br.read()) != -1) {
if (c == '\n') {
break;
}
}
return true;
} else if (c2 == '*') {
while ((c = br.read()) != -1) {
if (c == '*') {
if (br.read() == '/') {
return true;
}
}
}
throw new IllegalArgumentException("Unexpected EOF");
} else {
br.reset();
}
} else {
br.reset();
}
return false;
}
public static String translateEscapes(String string) {
if (string.isEmpty()) {
return string;
}
char[] chars = string.toCharArray();
int j = 0;
for (int i = 0; i < chars.length; i++) {
char c = chars[i];
if (c == '\\') {
if (i + 1 < chars.length) {
char d = chars[++i];
switch (d) {
case 'b':
c = '\b';
break;
case 'f':
c = '\f';
break;
case 'n':
c = '\n';
break;
case 'r':
c = '\r';
break;
case 's':
c = ' ';
break;
case 't':
c = '\t';
break;
case '\'':
case '\"':
case '\\':
case '\n':
case '\r':
c = d;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
int limit = Math.min(d < '4' ? 2 : 1, chars.length);
int code = d - '0';
for (int k = 1; k < limit; k++) {
char e = chars[i + 1];
if (e >= '0' && e <= '9') {
code = code * 10 + e - '0';
i++;
}
}
c = (char) code;
break;
case 'u':
String hex = new String(chars, i, 4);
if (hex.length() != 4) {
throw new IllegalArgumentException("Invalid unicode escape: " + hex + ", expected 4 characters, found EOS");
}
c = (char) Integer.parseInt(hex, 16);
i += 4;
break;
default:
throw new IllegalArgumentException(String.format("Invalid escape sequence: \\%c \\\\u%04X", d, (int) d));
}
} else {
throw new IllegalArgumentException("Invalid escape sequence: \\EOS");
}
}
chars[j++] = c;
}
return new String(chars, 0, j);
}
public static String escape(String string, char escapeQuotes) {
final StringBuilder result = new StringBuilder(string.length());
for (int i = 0; i < string.length(); i++) {
final char c = string.charAt(i);
switch (c) {
case '\0':
result.append("\\0");
break;
case '\t':
result.append("\\t");
break;
case '\b':
result.append("\\b");
break;
case '\n':
result.append("\\n");
break;
case '\r':
result.append("\\r");
break;
case '\f':
result.append("\\f");
break;
case '\'':
case '"':
if (escapeQuotes == c) {
result.append('\\');
}
result.append(c);
break;
case '\\':
result.append("\\\\");
break;
default:
final int type = Character.getType(c);
if (type != Character.UNASSIGNED && type != Character.CONTROL && type != Character.SURROGATE) {
result.append(c);
} else if (c < 0x10) {
result.append("\\x0").append(Character.forDigit(c, 16));
} else {
final String hex = Integer.toHexString(c);
if (c < 0x100) {
result.append("\\x").append(hex);
} else if (c < 0x1000) {
result.append("\\u0").append(hex);
} else {
result.append("\\u").append(hex);
}
}
break;
}
}
return result.toString();
}
private static BufferedReader stringReader(String str) {
return new BufferedReader(new StringReader(str));
}
public static void main(String[] args) throws IOException {
Object a = parseString("// this\n" +
"{\n" +
" // is\n" +
" // really\n" +
" \"legal\"\n" +
" // totoally\n" +
" : \n" +
" // but\n" +
" \"should\"\n" +
" // it\n" +
"}\n" +
"/* be... */");
System.out.println(a);
Object b = parseString("[1,2,-Infinity]");
System.out.println(b);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment