Created
January 30, 2017 21:58
-
-
Save EdgeCaseBerg/fe01af79bdfd3fef7836066d5c7940ed to your computer and use it in GitHub Desktop.
Example of how to clean out encoded characters from JSON values in scala
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
libraryDependencies += "com.typesafe.play" %% "play-json" % "2.5.9" | |
libraryDependencies += "org.apache.commons" % "commons-lang3" % "3.4" | |
scalaVersion := "2.11.7" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import play.api.libs.json._ | |
import play.api.data.validation.ValidationError | |
import scala.io.Source | |
import org.apache.commons.lang3.StringEscapeUtils | |
import java.nio.file.{Paths, Files} | |
import java.nio.charset.StandardCharsets | |
object example { | |
def cleanJsValue(jsValue: JsValue): JsValue = { | |
jsValue match { | |
case JsObject(fields) => | |
val updatedFields = fields.map { case (k,v) => | |
k -> cleanJsValue(v) | |
} | |
JsObject(updatedFields) | |
case JsArray(list) => JsArray(list.map(cleanJsValue)) | |
case JsString(s) => JsString(StringEscapeUtils.unescapeHtml4(s)) | |
case j => j | |
} | |
} | |
def main(args: Array[String]): Unit = { | |
args.map { fileName => | |
val s = Source.fromFile(fileName).map(_.toByte).toArray | |
val myJson = Json.parse(s) | |
val cleanedJson = cleanJsValue(myJson) | |
Files.write(Paths.get(fileName + ".cleaned.json"), Json.prettyPrint(cleanedJson).getBytes(StandardCharsets.UTF_8)) | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment