Skip to content

Instantly share code, notes, and snippets.

@gakuzzzz
Last active December 22, 2022 06:30
Show Gist options
  • Save gakuzzzz/adbd82325de86923cdb75acb098e9d0e to your computer and use it in GitHub Desktop.
Save gakuzzzz/adbd82325de86923cdb75acb098e9d0e to your computer and use it in GitHub Desktop.
https://twitter.com/matarillo/status/1302048512771645440 フラットなデータ集合からネストしたグルーピングするのに foldMap が便利だよというお話

前提

こちらのツイート https://twitter.com/matarillo/status/1302048512771645440 をみて foldMap か foldLeft で簡単に書けるのでは?と思ったのが発端

データ構造がこんな感じだとして

  case class OrganizationRecord(
    id: Long,
    organizationLv1: String,
    organizationLv2: String,
    organizationLv3: String,
    organizationLv4: String,
  )
  case class Organization(
    id: Long,
    name: String,
    children: Vector[Organization] = Vector(),
  )

実装

CSV のデータ順が保証されているなら foldLeft 使うとこんな感じに

  records.foldLeft(Vector.empty[Organization]) { (acuum, r) =>
    val lv4 = if (r.organizationLv4 != null) Vector(Organization(r.id, r.organizationLv4)) else Vector()
    val lv3 = if (r.organizationLv3 != null) Vector(Organization(r.id, r.organizationLv3, lv4)) else Vector()
    val lv2 = if (r.organizationLv2 != null) Vector(Organization(r.id, r.organizationLv2, lv3)) else Vector()
    val lv1 = Organization(r.id, r.organizationLv1, lv2)
    concat(acuum, lv1)
  }

  def concat(orgs: Vector[Organization], org: Organization): Vector[Organization] = {
    if (orgs.isEmpty) return Vector(org)
    val init :+ last = orgs
    if (last.name == org.name) init :+ Organization(last.id, last.name, org.children.foldLeft(last.children)(concat))
    else orgs :+ org
  }

実行例

    // データ
    val records: Vector[OrganizationRecord] = Vector(
      OrganizationRecord(1,  "org1", null,     null,       null),
      OrganizationRecord(2,  "org1", "org1-1", null,       null),
      OrganizationRecord(3,  "org1", "org1-1", "org1-1-1", null),
      OrganizationRecord(4,  "org2", null,     null,       null),
      OrganizationRecord(5,  "org2", "org2-1", null,       null),
      OrganizationRecord(6,  "org2", "org2-1", "org2-1-1", null),
      OrganizationRecord(7,  "org2", "org2-2", "org2-2-1", null),
      OrganizationRecord(8,  "org2", "org2-2", "org2-2-1", "org2-2-1-1"),
      OrganizationRecord(9,  "org2", "org2-2", "org2-2-1", "org2-2-1-2"),
      OrganizationRecord(10, "org3", null,     null,       null),
    )
// 結果
Organization(1,org1,Vector(
  Organization(2,org1-1,Vector(
    Organization(3,org1-1-1,Vector()))))),
Organization(4,org2,Vector(
  Organization(5,org2-1,Vector(
    Organization(6,org2-1-1,Vector()))), 
  Organization(7,org2-2,Vector(
    Organization(7,org2-2-1,Vector(
      Organization(8,org2-2-1-1,Vector()), 
      Organization(9,org2-2-1-2,Vector()))))))),
Organization(10,org3,Vector())

補足

もしCSVデータの順序が保証されないのであれば以下のようにすればOK

  def concat(orgs: Vector[Organization], org: Organization): Vector[Organization] = {
    val (same, remaining) = orgs.partition(_.name == org.name)
    if (same.isEmpty) orgs :+ org
    else remaining :+ 
      Organization(
        same.head.id min org.id, 
        org.name,
        org.children.foldLeft(same.head.children)(concat),
      )
  }
    val records: Vector[OrganizationRecord] = Vector(
      OrganizationRecord(1,  "org1", null,     null,       null),
      OrganizationRecord(9,  "org2", "org2-2", "org2-2-1", "org2-2-1-2"),
      OrganizationRecord(2,  "org1", "org1-1", null,       null),
      OrganizationRecord(3,  "org1", "org1-1", "org1-1-1", null),
      OrganizationRecord(10, "org3", null,     null,       null),
      OrganizationRecord(4,  "org2", null,     null,       null),
      OrganizationRecord(5,  "org2", "org2-1", null,       null),
      OrganizationRecord(6,  "org2", "org2-1", "org2-1-1", null),
      OrganizationRecord(7,  "org2", "org2-2", "org2-2-1", null),
      OrganizationRecord(8,  "org2", "org2-2", "org2-2-1", "org2-2-1-1"),
    )
// 結果
Organization(1,org1,Vector(
  Organization(2,org1-1,Vector(
    Organization(3,org1-1-1,Vector()))))),
Organization(4,org2,Vector(
  Organization(5,org2-1,Vector(
    Organization(6,org2-1-1,Vector()))), 
  Organization(7,org2-2,Vector(
    Organization(7,org2-2-1,Vector(
      Organization(9,org2-2-1-2,Vector()), 
      Organization(8,org2-2-1-1,Vector()))))))),
Organization(10,org3,Vector())

name でのソートが必要かも?

デメリット

groupBy 使う場合と比較して無駄になるオブジェクトの生成が多い。

より Scala っぽく書くと

  case class OrganizationRecord(
    id: Long,
    organizationLv1: String,
    organizationLv2: Option[String],
    organizationLv3: Option[String],
    organizationLv4: Option[String],
  )
  case class Organization(
    id: Long,
    name: String,
    children: Vector[Organization] = Vector(),
  )

  def concat(orgs: Vector[Organization], org: Organization): Vector[Organization] = {
    val Name = org.name
    orgs match {
      case init :+ Organization(id, Name, cs) => init :+ Organization(id, Name, org.children.foldLeft(cs)(concat))
      case rest                               => rest :+ org
    }
  }

  def main(args: Array[String]) {
    val records: Vector[OrganizationRecord] = Vector(
      OrganizationRecord(1,  "org1", None,           None,             None),
      OrganizationRecord(2,  "org1", Some("org1-1"), None,             None),
      OrganizationRecord(3,  "org1", Some("org1-1"), Some("org1-1-1"), None),
      OrganizationRecord(4,  "org2", None,           None,             None),
      OrganizationRecord(5,  "org2", Some("org2-1"), None,             None),
      OrganizationRecord(6,  "org2", Some("org2-1"), Some("org2-1-1"), None),
      OrganizationRecord(7,  "org2", Some("org2-2"), Some("org2-2-1"), None),
      OrganizationRecord(8,  "org2", Some("org2-2"), Some("org2-2-1"), Some("org2-2-1-1")),
      OrganizationRecord(9,  "org2", Some("org2-2"), Some("org2-2-1"), Some("org2-2-1-2")),
      OrganizationRecord(10, "org3", None,           None,             None),
    )

    val r = records.foldLeft(Vector.empty[Organization]) { (acuum, record) =>
      val lv4 = record.organizationLv4.map(Organization(record.id, _)).toVector
      val lv3 = record.organizationLv3.map(Organization(record.id, _, lv4)).toVector
      val lv2 = record.organizationLv2.map(Organization(record.id, _, lv3)).toVector
      val lv1 = Organization(record.id, record.organizationLv1, lv2)
      concat(acuum, lv1)
    }

    r.foreach(println)
  }

foldMap

こうすると foldMap で書けるけど Vectorの連結を上書くと誤解をうむ可能性もあるので OrganizationList みたいな専用型用意してもいいかも?

foldMap にすると元のCSVデータが巨大な時にparallelに処理することで効率化できる可能性がある。

  import cats.syntax.foldable._
  import cats.instances.vector.catsStdInstancesForVector

  implicit val monoid: Monoid[Vector[Organization]] = new Monoid[Vector[Organization]] {
    def empty: Vector[Organization] = Vector()

    def combine(x: Vector[Organization], y: Vector[Organization]): Vector[Organization] = (x, y) match {
      case (i :+ l, h +: t) if l.name == h.name => i :+ Organization(l.id, l.name, combine(l.children, h.children)) :++ t
      case _                                    => x :++ y
    }
  }
  
  val r = records.foldMap { record =>
    val lv4 = record.organizationLv4.map(Organization(record.id, _)).toVector
    val lv3 = record.organizationLv3.map(Organization(record.id, _, lv4)).toVector
    val lv2 = record.organizationLv2.map(Organization(record.id, _, lv3)).toVector
    val lv1 = Organization(record.id, record.organizationLv1, lv2)
    Vector(lv1)
  }

C# で書くとこんな感じ?

using System;
using System.Linq;
using System.Collections.Generic;
using System.Collections.Immutable;

public class OrganizationRecord
{
    public long Id { get; }
    public string OrganizationLv1 { get; }
    public string OrganizationLv2 { get; }
    public string OrganizationLv3 { get; }
    public string OrganizationLv4 { get; }

    public OrganizationRecord(long id, string lv1, string lv2, string lv3, string lv4)
    {
        Id = id;
        OrganizationLv1 = lv1;
        OrganizationLv2 = lv2;
        OrganizationLv3 = lv3;
        OrganizationLv4 = lv4;
    }

}
public class Organization
{
    public long Id { get; }
    public string Name { get; }
    public IReadOnlyList<Organization> Children { get; }

    public Organization(long Id, string Name, IReadOnlyList<Organization> Children)
    {
        this.Id = Id;
        this.Name = Name;
        this.Children = Children;
    }
}

public class Hoge
{
    public static void Main(string[] args)
    {
        var organizations = new []
        {
            new OrganizationRecord(1,  "org1", null,     null,       null),
            new OrganizationRecord(2,  "org1", "org1-1", null,       null),
            new OrganizationRecord(3,  "org1", "org1-1", "org1-1-1", null),
            new OrganizationRecord(4,  "org2", null,     null,       null),
            new OrganizationRecord(5,  "org2", "org2-1", null,       null),
            new OrganizationRecord(6,  "org2", "org2-1", "org2-1-1", null),
            new OrganizationRecord(7,  "org2", "org2-2", "org2-2-1", null),
            new OrganizationRecord(8,  "org2", "org2-2", "org2-2-1", "org2-2-1-1"),
            new OrganizationRecord(9,  "org2", "org2-2", "org2-2-1", "org2-2-1-2"),
            new OrganizationRecord(10, "org3", null,     null,       null)
        };
        
        IReadOnlyList<Organization> empty = ImmutableList.Create<Organization>();
        IReadOnlyList<Organization> single(Organization org) => ImmutableList.Create<Organization>(org);

        var r = organizations.Aggregate(empty, (accum, r) =>
        {
            var lv4 = r.OrganizationLv4 != null ? single(new Organization(r.Id, r.OrganizationLv4, empty)) : empty;
            var lv3 = r.OrganizationLv3 != null ? single(new Organization(r.Id, r.OrganizationLv3, lv4)) : empty;
            var lv2 = r.OrganizationLv2 != null ? single(new Organization(r.Id, r.OrganizationLv2, lv3)) : empty;
            var lv1 = new Organization(r.Id, r.OrganizationLv1, lv2);
            return Concat(accum, lv1);
        });
        
        Console.WriteLine(r);
    }

    private static IReadOnlyList<Organization> Concat(IReadOnlyList<Organization> orgs, Organization org)
    {
        if (orgs.Count == 0) return ImmutableList.Create(org);
        var last = orgs.LastOrDefault();
        if (last.Name != org.Name) return orgs.ToImmutableList().Add(org);
        var newOrg = new Organization(last.Id, last.Name, org.Children.Aggregate(last.Children, Concat));
        return orgs.Take(orgs.Count - 1).ToImmutableList().Add(newOrg);
    }

}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment