Last active
March 19, 2019 21:49
-
-
Save pshirshov/eeaf6a74c78b50ba913f547ebebe0f35 to your computer and use it in GitHub Desktop.
dirty json flattenizer / draft
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.github.pshirshov.izumi.idealingua.runtime.flatjson | |
import com.github.pshirshov.izumi.functional.IzEither._ | |
import io.circe._ | |
import io.circe.literal._ | |
import scala.annotation.switch | |
import scala.collection.mutable.ArrayBuffer | |
import scala.util.control.NonFatal | |
sealed trait PathElement | |
object PathElement { | |
final case class ObjectName(name: String) extends PathElement | |
final case class Index(idx: Long) extends PathElement | |
} | |
sealed trait UnpackFailure | |
object UnpackFailure { | |
final case class ScalarParsingFailed(tpe: String, value: String, t: Throwable) extends UnpackFailure | |
final case class UnterminatedEscapeSequence(path: String) extends UnpackFailure | |
final case class UnexpectedType(tpe: String, path: String) extends UnpackFailure | |
final case class BadPathFormat(path: String) extends UnpackFailure | |
final case class PathIndexParsingFailed(path: String, t: Throwable) extends UnpackFailure | |
final case class StructuralFailure(structure: Seq[((Seq[PathElement], String), String)]) extends UnpackFailure | |
} | |
object Test { | |
def main(args: Array[String]): Unit = { | |
val f = new JsonFlattener().flatten(json"""{"a": [1,2, {"x.x": "y", "y": 123}], "b": {"c": "d"}}""") | |
val i = new JsonFlattener().inflate(f) | |
println(f) | |
println(i) | |
} | |
} | |
class JsonFlattener { | |
import PathElement._ | |
import com.github.pshirshov.izumi.fundamentals.platform.strings.IzEscape | |
private val tpes = Set("null", "bool", "long", "float", "str") | |
private val controlChars = Set('.', '[', ']') | |
private val escapeChar = '\\' | |
private val escape = new IzEscape(controlChars, escapeChar) | |
def flatten(node: Json): Seq[(String, String)] = { | |
flatten(node, Seq.empty) | |
} | |
private def flatten(node: Json, prefix: Seq[PathElement]): Seq[(String, String)] = { | |
node.fold( | |
Seq(makePath(prefix, "null") -> "null"), | |
b => Seq(makePath(prefix, "bool") -> b.toString), | |
n => { | |
n.toBigInt | |
.map { | |
bi => | |
Seq(makePath(prefix, "long") -> bi.toString) | |
} | |
.getOrElse { | |
Seq(makePath(prefix, "float") -> n.toBigDecimal.map(_.toString()).getOrElse(n.toDouble.toString)) | |
} | |
}, | |
s => Seq(makePath(prefix, "str") -> s), | |
a => a.zipWithIndex.flatMap { | |
case (element, idx) => | |
flatten(element, prefix :+ PathElement.Index(idx)) | |
}, | |
o => o.toIterable.flatMap { | |
case (name, value) => | |
flatten(value, prefix :+ PathElement.ObjectName(name)) | |
}.toSeq, | |
) | |
} | |
private def makePath(p: Seq[PathElement], tpe: String): String = { | |
val prefix = p.map { | |
case ObjectName(name) => | |
escape.escape(name) | |
case Index(idx) => | |
s"[$idx]" | |
} | |
s"${prefix.mkString(".")}:$tpe" | |
} | |
def inflate(pairs: Seq[(String, String)]): Either[List[UnpackFailure], Json] = { | |
val maybePaths = pairs.map { | |
case (k, v) => | |
parsePath(k).map { | |
parsed => | |
parsed -> v | |
} | |
}.biAggregate | |
for { | |
p <- maybePaths | |
out <- inflateParsed(p) | |
} yield { | |
out | |
} | |
} | |
private def parsePath(path: String): Either[List[UnpackFailure], (Seq[PathElement], String)] = { | |
val idx = path.lastIndexOf(':') | |
if (idx < 0) { | |
Left(List()) | |
} else { | |
val (p, tpe) = path.splitAt(idx) | |
if (tpe.length < 2) { | |
return Left(List(UnpackFailure.BadPathFormat(path))) | |
} | |
val rtpe = tpe.substring(1) | |
if (!tpes.contains(rtpe)) { | |
return Left(List(UnpackFailure.UnexpectedType(rtpe, path))) | |
} | |
val buf = new ArrayBuffer[PathElement]() | |
var inEscape = false | |
var start = 0 | |
for (idx <- p.indices) { | |
val c = p.charAt(idx) | |
if (inEscape) { | |
inEscape = false | |
} else if (c == escapeChar) { | |
inEscape = true | |
} else if (c == '.') { | |
addChunk(p, buf, start, idx) match { | |
case Left(value) => | |
return Left(value) | |
case Right(_) => | |
} | |
start = idx + 1 | |
} | |
} | |
if (inEscape) { | |
Left(List(UnpackFailure.UnterminatedEscapeSequence(path))) | |
} else { | |
if (start < p.length) { | |
addChunk(p, buf, start, p.length) match { | |
case Left(value) => | |
return Left(value) | |
case Right(_) => | |
} | |
} | |
Right((buf.to[Vector], rtpe)) | |
} | |
} | |
} | |
private def addChunk(p: String, buf: ArrayBuffer[PathElement], start: Int, idx: Int): Either[List[UnpackFailure], Unit] = { | |
val chunk = p.substring(start, idx) | |
if (chunk.startsWith("[") && chunk.endsWith("]")) { | |
try { | |
buf.append(Index(chunk.substring(1, chunk.length - 1).toLong)) | |
Right(()) | |
} catch { | |
case NonFatal(t) => | |
Left(List(UnpackFailure.PathIndexParsingFailed(p, t))) | |
} | |
} else { | |
buf.append(ObjectName(escape.unescape(chunk))) | |
Right(()) | |
} | |
} | |
private def inflateParsed(pairs: Seq[((Seq[PathElement], String), String)]): Either[List[UnpackFailure], Json] = { | |
val grouped = pairs.groupBy(_._1._1.headOption) | |
grouped.get(None) match { | |
case Some(value :: Nil) => | |
return parse(value._1._2, value._2) | |
case Some(value) => | |
return for { | |
elements <- value.map(v => parse(v._1._2, v._2)).biAggregate | |
} yield { | |
Json.fromValues(elements) | |
} | |
case None => | |
} | |
val grouped2 = pairs.groupBy(_._1._1.head) | |
if (grouped2.nonEmpty && grouped2.keys.forall(_.isInstanceOf[Index])) { | |
for { | |
elements <- grouped2.toSeq.sortBy(_._1.asInstanceOf[Index].idx).map(_._2).map(inflateParsedNext).biAggregate | |
} yield { | |
Json.fromValues(elements) | |
} | |
} else if (grouped2.keys.forall(_.isInstanceOf[ObjectName])) { | |
for { | |
elements <- grouped2.map { | |
case (k, v) => | |
for { | |
field <- inflateParsedNext(v) | |
} yield { | |
escape.unescape(k.asInstanceOf[ObjectName].name) -> field | |
} | |
}.toSeq.biAggregate | |
} yield { | |
Json.fromFields(elements) | |
} | |
} else { | |
Left(List(UnpackFailure.StructuralFailure(pairs))) | |
} | |
} | |
@inline private[this] def drop(v: ((Seq[PathElement], String), String)): ((Seq[PathElement], String), String) = { | |
v match { | |
case ((path, tpe), value) => | |
((path.drop(1), tpe), value) | |
} | |
} | |
@inline private[this] def inflateParsedNext(pairs: Seq[((Seq[PathElement], String), String)]): Either[List[UnpackFailure], Json] = { | |
inflateParsed(pairs.map(drop)) | |
} | |
private def parse(tpe: String, value: String): Either[List[UnpackFailure], Json] = { | |
try { | |
Right { | |
(tpe: @switch) match { | |
case "null" => Json.Null | |
case "bool" => Json.fromBoolean(value.toBoolean) | |
case "long" => Json.fromLong(value.toLong) | |
case "float" => Json.fromBigDecimal(BigDecimal.apply(value)) | |
case "str" => Json.fromString(value) | |
} | |
} | |
} catch { | |
case NonFatal(t) => | |
Left(List(UnpackFailure.ScalarParsingFailed(tpe, value, t))) | |
} | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment