Created
April 14, 2016 21:21
-
-
Save ppanyukov/0ae50ca3e84e9caddb80596972aca7c1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.tttv.util | |
import scala.collection.mutable | |
/** | |
* NoBullshitJsonParser is a fast-as-you-can no-OO-bloatware-nonsense JSON | |
* parser for strings with one while loop in less than 200 lines of codez. | |
* | |
* It's totally non-compliant with strict JSON and is a work in progress. | |
* | |
* Arrays are not supported either by way of throwing exceptions at the mo. | |
* | |
*/ | |
object NoBullshitJsonParser { | |
/** | |
* parse parses JSON string and returns flat map of field->value pairs. | |
* | |
* The field names in the map are fully qualified e.g. field.subfield. | |
* | |
* The values are exactly as they appear in JSON less quotes for strings, | |
* no parsing no other stuff. If you need nulls, ints, etc --> do it yourself. | |
* | |
*/ | |
def parse(s: String): scala.collection.mutable.Map[String, String] = { | |
// TODO: replace exceptions with multi-value return? | |
// State of where we are in the json tree | |
val STATE_BEGIN = 0 | |
val STATE_OBJECT = 1 // we are inside object | |
val STATE_ARRAY = 2 // we are inside array | |
val STATE_FIELD_NAME = 3 // we are inside the field name | |
val STATE_FIELD_SEP = 4 // we are between field name and ':' | |
val STATE_FIELD_SEP2 = 5 // we are between ':' and field value | |
val STATE_VAL_STR = 6 // we are inside string field value | |
val STATE_VAL_PLAIN = 7 // we are inside plain value (bool, number, null) | |
var values = scala.collection.mutable.HashMap.empty[String, String].withDefaultValue("") | |
// TODO: empty string is equivalent to an empty object | |
if(s.isEmpty) { | |
return scala.collection.mutable.HashMap.empty[String, String].withDefaultValue("") | |
} | |
// These two are indexes to the start and end of field names and values. | |
var indexStart = 0 | |
var indexEnd = 0 | |
// The state where we are in terms of STATE_*. Stack for nested objects. | |
val stateStack = new mutable.ArrayStack[Int]() | |
var state = STATE_BEGIN | |
// TODO: optimize efficient field building, currently using list.mkString(".") | |
// These will get joined to fully qualified names like field.subfield. | |
var currentFieldNames = List.empty[String] | |
while(indexEnd < s.length) { | |
val c = s(indexEnd) | |
state match { | |
case STATE_BEGIN => { | |
if (Character.isWhitespace(c)) { | |
// skip | |
} else if (c == '{') { | |
stateStack.push(state) | |
state = STATE_OBJECT | |
} else if (c == '[') { | |
stateStack.push(state) | |
state = STATE_ARRAY | |
} else { | |
val message = | |
"Expect whitespace, object start { or array start [ at the beginnig of json. " + | |
s"Current char is '${c}'. " + | |
s"Current position is '${indexEnd}'" | |
throw new Exception(message) | |
} | |
} | |
case STATE_OBJECT => { | |
if (Character.isWhitespace(c) || c == ',') { | |
// skip | |
} else if (c == '"') { | |
stateStack.push(state) | |
state = STATE_FIELD_NAME | |
indexStart = indexEnd + 1 // drop quotes | |
} else if (c == '}') { | |
state = stateStack.pop() | |
} else { | |
val message = | |
"Expect whitespace, object end } or field name start \" within the start of object. " + | |
s"Current char is '${c}'. " + | |
s"Current position is '${indexEnd}'" | |
throw new Exception(message) | |
} | |
} | |
case STATE_FIELD_NAME => { | |
if (c == '"') { | |
// TODO: escaping the quotes | |
// the end | |
val fieldName = s.substring(indexStart, indexEnd) | |
state = STATE_FIELD_SEP | |
currentFieldNames = fieldName :: currentFieldNames | |
} | |
} | |
case STATE_FIELD_SEP => { | |
if (Character.isWhitespace(c)) { | |
// skip | |
} else if (c == ':') { | |
state = STATE_FIELD_SEP2 | |
} else { | |
val message = | |
"Expect ':' or whitespace between field name and value. " + | |
s"Current char is '${c}'. " + | |
s"Current position is '${indexEnd}'" | |
throw new Exception(message) | |
} | |
} | |
case STATE_FIELD_SEP2 => { | |
if (Character.isWhitespace(c)) { | |
// skip | |
} else if (c == '{') { | |
state = STATE_OBJECT | |
} else if (c == '[') { | |
state = STATE_ARRAY | |
} else if (c == '"') { | |
state = STATE_VAL_STR | |
indexStart = indexEnd + 1 // drop quotes | |
} else if (c == 't' || c == 'f' || c == 'n' || Character.isDigit(c)) { | |
indexStart = indexEnd // do not drop quotes | |
state = STATE_VAL_PLAIN | |
} else { | |
val message = | |
"Expect whitespace, object start {, array start [, string val start, or plain value start after ':'. " + | |
s"Current char is '${c}'. " + | |
s"Current position is '${indexEnd}'" | |
throw new Exception(message) | |
} | |
} | |
case STATE_VAL_STR => { | |
if (c == '"') { | |
// TODO: escaping the quotes? | |
// the end | |
val fullFieldName = currentFieldNames.mkString(".") | |
val value = s.substring(indexStart, indexEnd) | |
values += (fullFieldName -> value) | |
currentFieldNames = currentFieldNames.tail | |
// back to object | |
state = stateStack.pop() | |
} | |
} | |
case STATE_VAL_PLAIN => { | |
if (Character.isWhitespace(c) || c == ',') { | |
// end | |
val fullFieldName = currentFieldNames.mkString(".") | |
val value = s.substring(indexStart, indexEnd) | |
values += (fullFieldName -> value) | |
currentFieldNames = currentFieldNames.tail | |
// back to object | |
state = stateStack.pop() | |
} | |
} | |
case STATE_ARRAY => { | |
throw new Exception("Arrays not supported yet.") | |
} | |
} | |
indexEnd += 1 | |
} | |
values | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment