こちらのツイート https://twitter.com/matarillo/status/1302048512771645440 をみて foldMap か foldLeft で簡単に書けるのでは?と思ったのが発端
case class OrganizationRecord(
id: Long,
organizationLv1: String,
organizationLv2: String,
organizationLv3: String,
organizationLv4: String,
)
case class Organization(
id: Long,
name: String,
children: Vector[Organization] = Vector(),
)
CSV のデータ順が保証されているなら foldLeft
使うとこんな感じに
records.foldLeft(Vector.empty[Organization]) { (acuum, r) =>
val lv4 = if (r.organizationLv4 != null) Vector(Organization(r.id, r.organizationLv4)) else Vector()
val lv3 = if (r.organizationLv3 != null) Vector(Organization(r.id, r.organizationLv3, lv4)) else Vector()
val lv2 = if (r.organizationLv2 != null) Vector(Organization(r.id, r.organizationLv2, lv3)) else Vector()
val lv1 = Organization(r.id, r.organizationLv1, lv2)
concat(acuum, lv1)
}
def concat(orgs: Vector[Organization], org: Organization): Vector[Organization] = {
if (orgs.isEmpty) return Vector(org)
val init :+ last = orgs
if (last.name == org.name) init :+ Organization(last.id, last.name, org.children.foldLeft(last.children)(concat))
else orgs :+ org
}
// データ
val records: Vector[OrganizationRecord] = Vector(
OrganizationRecord(1, "org1", null, null, null),
OrganizationRecord(2, "org1", "org1-1", null, null),
OrganizationRecord(3, "org1", "org1-1", "org1-1-1", null),
OrganizationRecord(4, "org2", null, null, null),
OrganizationRecord(5, "org2", "org2-1", null, null),
OrganizationRecord(6, "org2", "org2-1", "org2-1-1", null),
OrganizationRecord(7, "org2", "org2-2", "org2-2-1", null),
OrganizationRecord(8, "org2", "org2-2", "org2-2-1", "org2-2-1-1"),
OrganizationRecord(9, "org2", "org2-2", "org2-2-1", "org2-2-1-2"),
OrganizationRecord(10, "org3", null, null, null),
)
// 結果
Organization(1,org1,Vector(
Organization(2,org1-1,Vector(
Organization(3,org1-1-1,Vector()))))),
Organization(4,org2,Vector(
Organization(5,org2-1,Vector(
Organization(6,org2-1-1,Vector()))),
Organization(7,org2-2,Vector(
Organization(7,org2-2-1,Vector(
Organization(8,org2-2-1-1,Vector()),
Organization(9,org2-2-1-2,Vector()))))))),
Organization(10,org3,Vector())
もしCSVデータの順序が保証されないのであれば以下のようにすればOK
def concat(orgs: Vector[Organization], org: Organization): Vector[Organization] = {
val (same, remaining) = orgs.partition(_.name == org.name)
if (same.isEmpty) orgs :+ org
else remaining :+
Organization(
same.head.id min org.id,
org.name,
org.children.foldLeft(same.head.children)(concat),
)
}
val records: Vector[OrganizationRecord] = Vector(
OrganizationRecord(1, "org1", null, null, null),
OrganizationRecord(9, "org2", "org2-2", "org2-2-1", "org2-2-1-2"),
OrganizationRecord(2, "org1", "org1-1", null, null),
OrganizationRecord(3, "org1", "org1-1", "org1-1-1", null),
OrganizationRecord(10, "org3", null, null, null),
OrganizationRecord(4, "org2", null, null, null),
OrganizationRecord(5, "org2", "org2-1", null, null),
OrganizationRecord(6, "org2", "org2-1", "org2-1-1", null),
OrganizationRecord(7, "org2", "org2-2", "org2-2-1", null),
OrganizationRecord(8, "org2", "org2-2", "org2-2-1", "org2-2-1-1"),
)
// 結果
Organization(1,org1,Vector(
Organization(2,org1-1,Vector(
Organization(3,org1-1-1,Vector()))))),
Organization(4,org2,Vector(
Organization(5,org2-1,Vector(
Organization(6,org2-1-1,Vector()))),
Organization(7,org2-2,Vector(
Organization(7,org2-2-1,Vector(
Organization(9,org2-2-1-2,Vector()),
Organization(8,org2-2-1-1,Vector()))))))),
Organization(10,org3,Vector())
name
でのソートが必要かも?
groupBy
使う場合と比較して無駄になるオブジェクトの生成が多い。
case class OrganizationRecord(
id: Long,
organizationLv1: String,
organizationLv2: Option[String],
organizationLv3: Option[String],
organizationLv4: Option[String],
)
case class Organization(
id: Long,
name: String,
children: Vector[Organization] = Vector(),
)
def concat(orgs: Vector[Organization], org: Organization): Vector[Organization] = {
val Name = org.name
orgs match {
case init :+ Organization(id, Name, cs) => init :+ Organization(id, Name, org.children.foldLeft(cs)(concat))
case rest => rest :+ org
}
}
def main(args: Array[String]) {
val records: Vector[OrganizationRecord] = Vector(
OrganizationRecord(1, "org1", None, None, None),
OrganizationRecord(2, "org1", Some("org1-1"), None, None),
OrganizationRecord(3, "org1", Some("org1-1"), Some("org1-1-1"), None),
OrganizationRecord(4, "org2", None, None, None),
OrganizationRecord(5, "org2", Some("org2-1"), None, None),
OrganizationRecord(6, "org2", Some("org2-1"), Some("org2-1-1"), None),
OrganizationRecord(7, "org2", Some("org2-2"), Some("org2-2-1"), None),
OrganizationRecord(8, "org2", Some("org2-2"), Some("org2-2-1"), Some("org2-2-1-1")),
OrganizationRecord(9, "org2", Some("org2-2"), Some("org2-2-1"), Some("org2-2-1-2")),
OrganizationRecord(10, "org3", None, None, None),
)
val r = records.foldLeft(Vector.empty[Organization]) { (acuum, record) =>
val lv4 = record.organizationLv4.map(Organization(record.id, _)).toVector
val lv3 = record.organizationLv3.map(Organization(record.id, _, lv4)).toVector
val lv2 = record.organizationLv2.map(Organization(record.id, _, lv3)).toVector
val lv1 = Organization(record.id, record.organizationLv1, lv2)
concat(acuum, lv1)
}
r.foreach(println)
}
こうすると foldMap
で書けるけど Vectorの連結を上書くと誤解をうむ可能性もあるので OrganizationList
みたいな専用型用意してもいいかも?
foldMap にすると元のCSVデータが巨大な時にparallelに処理することで効率化できる可能性がある。
import cats.syntax.foldable._
import cats.instances.vector.catsStdInstancesForVector
implicit val monoid: Monoid[Vector[Organization]] = new Monoid[Vector[Organization]] {
def empty: Vector[Organization] = Vector()
def combine(x: Vector[Organization], y: Vector[Organization]): Vector[Organization] = (x, y) match {
case (i :+ l, h +: t) if l.name == h.name => i :+ Organization(l.id, l.name, combine(l.children, h.children)) :++ t
case _ => x :++ y
}
}
val r = records.foldMap { record =>
val lv4 = record.organizationLv4.map(Organization(record.id, _)).toVector
val lv3 = record.organizationLv3.map(Organization(record.id, _, lv4)).toVector
val lv2 = record.organizationLv2.map(Organization(record.id, _, lv3)).toVector
val lv1 = Organization(record.id, record.organizationLv1, lv2)
Vector(lv1)
}
using System;
using System.Linq;
using System.Collections.Generic;
using System.Collections.Immutable;
public class OrganizationRecord
{
public long Id { get; }
public string OrganizationLv1 { get; }
public string OrganizationLv2 { get; }
public string OrganizationLv3 { get; }
public string OrganizationLv4 { get; }
public OrganizationRecord(long id, string lv1, string lv2, string lv3, string lv4)
{
Id = id;
OrganizationLv1 = lv1;
OrganizationLv2 = lv2;
OrganizationLv3 = lv3;
OrganizationLv4 = lv4;
}
}
public class Organization
{
public long Id { get; }
public string Name { get; }
public IReadOnlyList<Organization> Children { get; }
public Organization(long Id, string Name, IReadOnlyList<Organization> Children)
{
this.Id = Id;
this.Name = Name;
this.Children = Children;
}
}
public class Hoge
{
public static void Main(string[] args)
{
var organizations = new []
{
new OrganizationRecord(1, "org1", null, null, null),
new OrganizationRecord(2, "org1", "org1-1", null, null),
new OrganizationRecord(3, "org1", "org1-1", "org1-1-1", null),
new OrganizationRecord(4, "org2", null, null, null),
new OrganizationRecord(5, "org2", "org2-1", null, null),
new OrganizationRecord(6, "org2", "org2-1", "org2-1-1", null),
new OrganizationRecord(7, "org2", "org2-2", "org2-2-1", null),
new OrganizationRecord(8, "org2", "org2-2", "org2-2-1", "org2-2-1-1"),
new OrganizationRecord(9, "org2", "org2-2", "org2-2-1", "org2-2-1-2"),
new OrganizationRecord(10, "org3", null, null, null)
};
IReadOnlyList<Organization> empty = ImmutableList.Create<Organization>();
IReadOnlyList<Organization> single(Organization org) => ImmutableList.Create<Organization>(org);
var r = organizations.Aggregate(empty, (accum, r) =>
{
var lv4 = r.OrganizationLv4 != null ? single(new Organization(r.Id, r.OrganizationLv4, empty)) : empty;
var lv3 = r.OrganizationLv3 != null ? single(new Organization(r.Id, r.OrganizationLv3, lv4)) : empty;
var lv2 = r.OrganizationLv2 != null ? single(new Organization(r.Id, r.OrganizationLv2, lv3)) : empty;
var lv1 = new Organization(r.Id, r.OrganizationLv1, lv2);
return Concat(accum, lv1);
});
Console.WriteLine(r);
}
private static IReadOnlyList<Organization> Concat(IReadOnlyList<Organization> orgs, Organization org)
{
if (orgs.Count == 0) return ImmutableList.Create(org);
var last = orgs.LastOrDefault();
if (last.Name != org.Name) return orgs.ToImmutableList().Add(org);
var newOrg = new Organization(last.Id, last.Name, org.Children.Aggregate(last.Children, Concat));
return orgs.Take(orgs.Count - 1).ToImmutableList().Add(newOrg);
}
}