Created
March 1, 2017 01:08
-
-
Save drdozer/5595c41b9b9e122f91dc27a0ee1d12ff to your computer and use it in GitHub Desktop.
little scala amm script to generate and then mutate perfect telomeric repeats
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scala.util.Random | |
val DNA = "AGCT" | |
val telRep = "TTAGG" | |
val startCodon = "ATG" | |
val stopCodons = Set("TAG", "TAA", "TGA") | |
def telSeq(reps: Int) = telRep * reps | |
case class CodonAt(beginsAt: Int, endsBefore: Int, codon: String) | |
case class OrfAt(beginsAt: Int, endsBefore: Int, terminates: Boolean, codons: Seq[CodonAt]) | |
def reverseComplementN(nuc: Char): Char = nuc match { | |
case 'A' => 'T' | |
case 'G' => 'C' | |
case 'C' => 'G' | |
case 'T' => 'A' | |
} | |
def dnaToCodons(dna: String, phase: Int): Seq[CodonAt] = phase match { | |
case p if p > 0 && p <= 3 => | |
dna.drop(p - 1) | |
.grouped(3) | |
.zipWithIndex | |
.to[Seq] | |
.collect { case (c, i) if c.length == 3 => | |
CodonAt(i*3+p, i*3 + 3 + p, c) | |
} | |
} | |
def codonsToORF(codons: Seq[CodonAt]): Seq[OrfAt] = | |
codons | |
.tails | |
.filter { cs => cs.headOption.map(_.codon == startCodon).getOrElse(false) } | |
.to[Seq].map { cs => | |
val (untilStop, stopOnwards) = cs.span(c => !stopCodons(c.codon)) | |
stopOnwards.headOption match { | |
case None => | |
OrfAt(untilStop.head.beginsAt, untilStop.last.endsBefore, false, untilStop) | |
case Some(stop) => | |
OrfAt(untilStop.head.beginsAt, stop.endsBefore, true, untilStop :+ stopOnwards.head) | |
} | |
} | |
def randomNuc(random: Random): Char = | |
DNA charAt random.nextInt(4) | |
def nucMutate(nuc: Char, random: Random): Char = randomNuc(random) match { | |
case n if n == nuc => randomNuc(random) | |
case n => n | |
} | |
def substitutionMutate(dna: String, random: Random): String = { | |
val mutateAt = random.nextInt(dna.length) | |
val (pfx, rest) = dna.splitAt(mutateAt) | |
val toMut = rest.head | |
val sfx = rest.tail | |
s"$pfx${nucMutate(toMut, random)}$sfx" | |
} | |
def substitutionMutateN(n: Int, dna: String, random: Random): String = | |
if(n == 0) dna | |
else substitutionMutateN(n - 1, substitutionMutate(dna, random), random) | |
val myTel = telSeq(20) | |
println(s"Got telomere: $myTel") | |
val phase1 = dnaToCodons(myTel, 1) | |
val phase2 = dnaToCodons(myTel, 2) | |
val phase3 = dnaToCodons(myTel, 3) | |
val phase1Orfs = codonsToORF(phase1) | |
val phase2Orfs = codonsToORF(phase2) | |
val phase3Orfs = codonsToORF(phase3) | |
println(s"Phase 1 codons: $phase1") | |
println(s"Phase 2 codons: $phase2") | |
println(s"Phase 3 codons: $phase3") | |
println(s"Phase 1 orfs: $phase1Orfs") | |
println(s"Phase 2 orfs: $phase2Orfs") | |
println(s"Phase 3 orfs: $phase3Orfs") | |
val rand = new Random() | |
for { i <- 0 until 50 } yield { | |
val myTel_1 = substitutionMutateN(4, myTel, rand) | |
println(s"Got telomere: $myTel_1") | |
val phase1_1 = dnaToCodons(myTel_1, 1) | |
val phase2_1 = dnaToCodons(myTel_1, 2) | |
val phase3_1 = dnaToCodons(myTel_1, 3) | |
val phase1Orfs_1 = codonsToORF(phase1_1) | |
val phase2Orfs_1 = codonsToORF(phase2_1) | |
val phase3Orfs_1 = codonsToORF(phase3_1) | |
if (!phase1Orfs_1.isEmpty) println(s"Phase 1 orfs: $phase1Orfs_1") | |
if (!phase2Orfs_1.isEmpty) println(s"Phase 2 orfs: $phase2Orfs_1") | |
if (!phase3Orfs_1.isEmpty) println(s"Phase 3 orfs: $phase3Orfs_1") | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment