Created
February 14, 2020 06:05
-
-
Save clintval/72e78280f64a10db691f01f921bd1245 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| object SampleUtil { | |
| /** Join all of the data across a collection of samples. All fields will be joined on the delimiter `";"`. Regardless | |
| * of the lanes the libraries were sequenced on, the resulting sample will have the lanes field cleared to [[None]]. | |
| * The merged sample will have its ordinal set to zero. | |
| * | |
| * @throws IllegalArgumentException when there are no libraries to merge | |
| * @throws IllegalArgumentException when trying to join samples with different sample names | |
| */ | |
| def merge(samples: Seq[Sample]): Sample = { | |
| val sampleName = samples.map(_.sampleName) match { | |
| case Seq() => throw new IllegalStateException("Must attempt to join at at least one sample.") | |
| case Seq(head, tail @ _*) => { | |
| require(tail.forall(_ == head), s"All libraries must be the same sample, found: ${samples.map(_.sampleName.mkString(", "))}") | |
| head | |
| } | |
| } | |
| /** Concatenate all of the values for field on a sample using a delimiter, return None if we end up with the empty string */ | |
| def concat(f: Sample => String): Option[String] = { | |
| Option(samples.map(f).mkString(LibraryJoinDelimiter)).filter(_.trim.nonEmpty) | |
| } | |
| val sampleId = samples.map(_.sampleId).mkString(LibraryJoinDelimiter) | |
| val libraryId = samples.map(_.libraryId).mkString(LibraryJoinDelimiter) | |
| val project = concat(_.project.getOrElse("")) | |
| val description = concat(_.description.getOrElse("")) | |
| val i7IndexBases = concat(_.i7IndexBases.getOrElse("")) | |
| val i5IndexBases = concat(_.i5IndexBases.getOrElse("")) | |
| // Combine the extended attributes across all libraries while taking into account that not all libraries will have | |
| // all of the extended attributes set. In the case when an extended attribute key cannot be found, ensure we add an | |
| // empty string as a placeholder for that sample. Merge these keys in the same order of the input libraries | |
| // collection. | |
| val extendedKeys = samples.map(_.extendedAttributes.keySet).reduce(_ ++ _) | |
| val extendedAttributes = extendedKeys.foldLeft(Map.empty[String, String]) { (acc, key: String) => | |
| samples.foldLeft(acc) { (acc, sample) => | |
| sample.extendedAttributes.get(key) match { | |
| case None if acc.get(key).isEmpty => acc.updated(key, "") | |
| case None if acc.get(key).nonEmpty => acc.updated(key, Seq(acc(key), "").mkString(LibraryJoinDelimiter)) | |
| case Some(_value) if acc.get(key).isEmpty => acc.updated(key, _value) | |
| case Some(_value) if acc.get(key).nonEmpty => acc.updated(key, Seq(acc(key), _value).mkString(LibraryJoinDelimiter)) | |
| } | |
| } | |
| } | |
| new Sample( | |
| sampleOrdinal = 0, | |
| sampleId = sampleId, | |
| sampleName = sampleName, | |
| libraryId = libraryId, | |
| project = project, | |
| description = description, | |
| lane = None, | |
| i7IndexBases = i7IndexBases, | |
| i5IndexBases = i5IndexBases, | |
| extendedAttributes = extendedAttributes | |
| ) | |
| } | |
| } | |
| object SampleUtilTest { | |
| private val LibraryOne: Sample = new Sample( | |
| sampleOrdinal = 0, | |
| sampleId = "1", | |
| sampleName = "SamplePrime", | |
| libraryId = "LibraryOne", | |
| project = Some("project1"), | |
| description = Some("Description 1"), | |
| lane = Some(1), | |
| i7IndexBases = Some("CAT"), | |
| i5IndexBases = Some("TCA"), | |
| extendedAttributes = Map(ReferenceNameKey.toUpperCase -> "mm10") | |
| ) | |
| private val LibraryTwo: Sample = new Sample( | |
| sampleOrdinal = 0, | |
| sampleId = "2", | |
| sampleName = "SamplePrime", | |
| libraryId = "LibraryTwo", | |
| project = Some("project1"), | |
| description = Some("Description 2"), | |
| lane = Some(2), | |
| i7IndexBases = Some("TTT"), | |
| i5IndexBases = Some("CCC"), | |
| extendedAttributes = Map(ReferenceNameKey.toUpperCase -> "mm10", GenomeEquivalentsKey.toUpperCase -> "2000") | |
| ) | |
| private val LibraryThree: Sample = new Sample( | |
| sampleOrdinal = 0, | |
| sampleId = "3", | |
| sampleName = "SamplePrime", | |
| libraryId = "LibraryThree", | |
| project = Some("project1"), | |
| description = Some("Description 3"), | |
| lane = Some(5), | |
| i7IndexBases = Some("AAA"), | |
| i5IndexBases = Some("AAA"), | |
| extendedAttributes = Map(ReferenceNameKey.toUpperCase -> "mm10") | |
| ) | |
| "SampleUtil.merge" should "raise an exception when no libraries are defined" in { | |
| an[IllegalStateException] shouldBe thrownBy { SampleUtil.merge(Seq.empty) } | |
| } | |
| it should "raise an exception when samples have different names" in { | |
| val LibraryFromADifferentSample = LibraryOne.copy(sampleName = LibraryOne.sampleName + "-different") | |
| an[IllegalArgumentException] shouldBe thrownBy { SampleUtil.merge(Seq(LibraryOne, LibraryFromADifferentSample))} | |
| } | |
| it should "return a single completely defined sample unchanged except for the sample ordinal and lane field" in { | |
| val merged = LibraryOne.copy(sampleOrdinal = 0, lane = None) | |
| SampleUtil.merge(Seq(LibraryOne)) shouldBe merged | |
| } | |
| it should "merge three libraries which have all of their fields set, and mismatching dictionaries" in { | |
| val merged: Sample = new Sample( | |
| sampleOrdinal = 0, | |
| sampleId = "1;2;3", | |
| sampleName = "SamplePrime", | |
| libraryId = "LibraryOne;LibraryTwo;LibraryThree", | |
| project = Some("project1;project1;project1"), | |
| description = Some("Description 1;Description 2;Description 3"), | |
| lane = None, | |
| i7IndexBases = Some("CAT;TTT;AAA"), | |
| i5IndexBases = Some("TCA;CCC;AAA"), | |
| extendedAttributes = Map(ReferenceNameKey.toUpperCase -> "mm10;mm10;mm10", GenomeEquivalentsKey.toUpperCase -> ";2000;") | |
| ) | |
| SampleUtil.merge(Seq(LibraryOne, LibraryTwo, LibraryThree)) shouldBe merged | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment