Last active
May 26, 2022 01:26
-
-
Save hohonuuli/cd2204c5900b8c7e95559c35fdd3c0e7 to your computer and use it in GitHub Desktop.
Proof-of-concept to convert https://marine-imaging.com/fair/ifdos/iFDO-overview/ to FathomNet-friendly CSV
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env -S scala-cli shebang --scala-version 3.1.2 | |
/* | |
Proof of concept to convert iFDO YAML to FathomNet CSV | |
Requirements: | |
- Install scala-cli https://scala-cli.virtuslab.org | |
Usage: | |
ifdo-to-fathomnet.sc <ifdo-yaml-file> > <fathomnet-csv-file> | |
*/ | |
//> using lib "io.circe::circe-core:0.14.1" | |
//> using lib "io.circe::circe-parser:0.14.1" | |
//> using lib "io.circe::circe-yaml:0.14.1" | |
//> using lib "io.circe::circe-generic:0.14.1" | |
import _root_.io.circe.yaml.parser | |
import _root_.io.circe.* | |
import _root_.io.circe.parser.* | |
import _root_.io.circe.syntax.* | |
import _root_.io.circe.generic.semiauto._ | |
import scala.deriving.* | |
import scala.compiletime.{summonAll} | |
/** | |
* --- Transforms to convert Scala case classes to CSV | |
*/ | |
def transform[A : Transformer](a: A) = summon[Transformer[A]].f(a) | |
trait Transformer[T]: | |
def f(t: T): String | |
given Transformer[String] with | |
def f(x: String) = x | |
given Transformer[Int] with | |
def f(x: Int) = x.toString | |
given Transformer[Double] with | |
def f(x: Double) = f"$x%.5f" | |
given Transformer[Boolean] with | |
def f(x: Boolean) = x.toString | |
given [T] (using t: Transformer[T]): Transformer[Option[T]] = | |
new Transformer[Option[T]]: | |
def f(x: Option[T]) = x match | |
case None => "" | |
case Some(x) => t.f(x) | |
given [A <: Product] (using t: Transformer[A]): Transformer[List[A]] = | |
new Transformer[List[A]]: | |
def f(x: List[A]) = | |
val rows = asHeader(x.head) :: x.map(transform) | |
rows.mkString("\n") | |
inline given [A <: Product] (using m: Mirror.ProductOf[A]): Transformer[A] = | |
new Transformer[A]: | |
type ElemTransformers = Tuple.Map[m.MirroredElemTypes, Transformer] | |
val elemTransformers = summonAll[ElemTransformers].toList.asInstanceOf[List[Transformer[Any]]] | |
def f(a: A): String = | |
val elems = a.productIterator.toList | |
val transformed = elems.zip(elemTransformers) map { (elem, transformer) => transformer.f(elem) } | |
transformed.mkString(",") | |
def asHeader[A <: Product](a: A): String = a.productElementNames.toList.mkString(",") | |
/** | |
* --- Data model | |
*/ | |
case class BoundingBox( | |
image: String, | |
concept: String, | |
x: Int, | |
y: Int, | |
width: Int, | |
height: Int, | |
altitude: Option[Double] = None, | |
altconcept: Option[String] = None, | |
depth: Option[Double] = None, | |
groupof: Option[Boolean] = None, | |
imagingtype: Option[String] = None, | |
latitude: Option[Double] = None, | |
longitude: Option[Double] = None, | |
observer: Option[String] = None, | |
occluded: Option[Boolean] = None, | |
oxygen: Option[Double] = None, | |
pressure: Option[Double] = None, | |
salinity: Option[Double] = None, | |
temperature: Option[Double] = None, | |
timestamp: Option[String] = None | |
) | |
case class ImageAnnotationCreator(id: String, name: String) | |
case class ImageAnnotationLabel(id: String, info: String, name: String) | |
case class Label(annotator: String, label: String) | |
case class ImageAnnotation(coordinates: Seq[Int], shape: String, labels: Seq[Label]) | |
case class Annotation(url: String, coordinates: Seq[Int], labels: Seq[Label]): | |
def toBoundingBox: BoundingBox = | |
val xs = Seq(coordinates(0), coordinates(2), coordinates(4), coordinates(6)) | |
val ys = Seq(coordinates(1), coordinates(3), coordinates(5), coordinates(7)) | |
val x = xs.min | |
val y = ys.min | |
val width = xs.max - x | |
val height = ys.max - y | |
val concept = labels.head.label // just grabbing the first label for now | |
val observer = labels.head.annotator | |
BoundingBox(url.toString, concept, x, y, width, height, observer = Some(observer)) | |
/** | |
* --- Circe decoders to simplify JSON/YAML parsing | |
*/ | |
given Decoder[Label] = deriveDecoder | |
given Decoder[ImageAnnotation] = deriveDecoder | |
given Decoder[ImageAnnotationCreator] = deriveDecoder | |
given Decoder[ImageAnnotationLabel] = deriveDecoder | |
/** | |
* --- Methods to extract the data we're interested in from the YAMl | |
*/ | |
def extractImageAnnotationCreators(json: Json): Iterable[ImageAnnotationCreator] = | |
json.hcursor | |
.downField("image-set-header") | |
.downField("image-annotation-creators") | |
.focus | |
.get | |
.as[List[ImageAnnotationCreator]] match | |
case Left(e) => Nil | |
case Right(xs) => xs | |
def extractImageAnnotationLabels(json: Json): Iterable[ImageAnnotationLabel] = | |
json.hcursor | |
.downField("image-set-header") | |
.downField("image-annotation-labels") | |
.focus | |
.get | |
.as[List[ImageAnnotationLabel]] match | |
case Left(e) => Nil | |
case Right(xs) => xs | |
def extractImageAnnotations(json: Json): Iterable[Annotation] = | |
val cur = json.hcursor | |
val annoCursor = cur.downField("image-set-items") | |
val listOfAnnotations = for | |
url <- annoCursor.keys.get | |
yield | |
annoCursor | |
.downField(url) | |
.downField("image-annotations") | |
.focus | |
.get | |
.as[List[ImageAnnotation]] match | |
case Left(e) => Nil | |
case Right(xs) => xs | |
.filter(_.shape == "rectangle") | |
.map(x => Annotation(url, x.coordinates, x.labels)) | |
listOfAnnotations.flatten | |
/** | |
* --- ifdo encodes relational data in a flat file. We have to unmunge relations. | |
*/ | |
def resolve(labels: Seq[ImageAnnotationLabel], | |
creators: Seq[ImageAnnotationCreator], | |
annotations: Seq[Annotation]): Seq[BoundingBox] = | |
for | |
a <- annotations | |
yield | |
val creator = creators.find(_.id == a.labels.head.annotator).get | |
val label = labels.find(_.id == a.labels.head.label).get | |
val newA = a.copy(labels = Seq(Label(creator.name, label.name))) | |
newA.toBoundingBox | |
/** | |
* --- Main method. Converts a YAML string to a CSV string | |
*/ | |
def yamlToCsv(yaml: String): String = | |
parser.parse(yaml) match | |
case Left(e) => "Parsing failed" | |
case Right(doc) => | |
val annotations = extractImageAnnotations(doc).toSeq | |
val creators = extractImageAnnotationCreators(doc).toSeq | |
val labels = extractImageAnnotationLabels(doc).toSeq | |
val fini = resolve(labels, creators, annotations).toList | |
transform(fini) | |
val yaml = scala.io.Source.fromFile(args(0)).mkString | |
val csv = yamlToCsv(yaml) | |
println(csv) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Input:
Output: