Last active
December 21, 2016 08:22
-
-
Save tflori/1542cb898b07bbe0879aa9954d2c11db to your computer and use it in GitHub Desktop.
fasta php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/* The Computer Language Benchmarks Game | |
http://benchmarksgame.alioth.debian.org/ | |
contributed by Thomas Flori | |
*/ | |
while (ob_get_level()) ob_end_clean(); | |
ini_set('memory_limit', '512M'); | |
define('LINE_LENGTH', 60); | |
define('NUM_THREADS', 16); | |
define('BUFFER_LINE_COUNT', 1024); | |
define('BUFFER_SIZE', LINE_LENGTH * BUFFER_LINE_COUNT); | |
define('IA', 3877); | |
define('IC', 29573); | |
define('IM', 139968); | |
$workers = []; | |
$n = 1000; | |
if ($_SERVER['argc'] > 1) { | |
$n = (int)$_SERVER['argv'][1]; | |
} | |
echo ">ONE Homo sapiens alu\n"; | |
$alu = "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" | |
. "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" | |
. "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT" | |
. "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA" | |
. "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG" | |
. "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC" | |
. "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"; | |
$length = $n * 2; | |
$buffer = strlen($alu) * 60; | |
while ($length > 0) { | |
$c = $length > $buffer ? $buffer : $length; | |
echo chunk_split(str_pad('', $c, $alu), LINE_LENGTH, "\n"); // yes a one liner | |
$length -= $c; | |
} | |
echo ">TWO IUB ambiguity codes\n"; | |
$iub = [ | |
'a' => 0.27, 'c' => 0.12, 'g' => 0.12, 't' => 0.27, 'B' => 0.02, 'D' => 0.02, 'H' => 0.02, 'K' => 0.02, | |
'M' => 0.02, 'N' => 0.02, 'R' => 0.02, 'S' => 0.02, 'V' => 0.02, 'W' => 0.02, 'Y' => 0.02 | |
]; | |
accumulatePropabilities($iub); | |
writeDNA(function ($randoms) use ($iub) { | |
return getDNA($randoms, $iub); | |
}, $n * 3); | |
echo ">THREE Homo sapiens frequency\n"; | |
$homosapiens = [ | |
'a' => 0.3029549426680, 'c' => 0.1979883004921, | |
'g' => 0.1975473066391, 't' => 0.3015094502008 | |
]; | |
accumulatePropabilities($homosapiens); | |
writeDNA(function ($randoms) use ($homosapiens) { | |
return getDNA($randoms, $homosapiens); | |
}, $n * 5); | |
function writeDNA($callback, $length) { | |
static $seed = 42; | |
$threads = []; | |
$i = 0; | |
$j = 0; | |
// here we need a lot of memory how to avoid this? | |
$dna = shmop_open(ftok(__FILE__, chr(0)), 'c', 0644, $length); | |
while ($length > 0) { | |
$c = BUFFER_SIZE; | |
if ($c + $i > $length) { | |
$c = $length - $i; | |
} | |
if (count($threads) === NUM_THREADS) { | |
$pid = pcntl_wait($status); | |
array_splice($threads, array_search($pid, $threads), 1); | |
} | |
$randoms = generateRandoms($seed, $c); | |
if (!$pid = pcntl_fork()) { | |
$offset = BUFFER_SIZE * $j; | |
shmop_write($dna, $callback($randoms), $offset); | |
exit(); | |
} else { | |
$threads[] = $pid; | |
} | |
$length -= $c; | |
$j++; | |
} | |
while (count($threads)) { | |
$pid = pcntl_wait($status); | |
array_splice($threads, array_search($pid, $threads), 1); | |
} | |
echo chunk_split(shmop_read($dna, 0, $length), LINE_LENGTH, "\n"); | |
shmop_delete($dna); | |
} | |
function generateRandoms(&$seed, $count) { | |
$randoms = array_fill(0, $count, 0); | |
foreach ($randoms as &$r) { | |
$r = $seed = ($seed * IA + IC) % IM; | |
} | |
return $randoms; | |
} | |
function getDNA($randoms, $genelist) { | |
$dna = ''; | |
foreach ($randoms as $r) { | |
foreach ($genelist as $nucleoid => $v) { | |
if ($r < $v) { | |
$dna .= $nucleoid; | |
break; | |
} | |
} | |
} | |
return $dna; | |
} | |
function accumulatePropabilities(&$genelist) { | |
$cumul = 0; | |
foreach($genelist as $k=>&$v) { | |
$cumul = $v = $v * IM + $cumul; | |
} | |
$genelist = array_map('intval', array_map('ceil', $genelist)); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/* The Computer Language Benchmarks Game | |
http://benchmarksgame.alioth.debian.org/ | |
contributed by Wing-Chung Leung | |
modified by Isaac Gouy | |
modified by anon | |
modified by Thomas Flori | |
*/ | |
ob_implicit_flush(1); | |
ob_start(NULL, 4096); | |
define('IA', 3877); | |
define('IC', 29573); | |
define('IM', 139968); | |
$last = 42; | |
function gen_random(&$last, &$randoms) { | |
foreach($randoms as &$r) { | |
$r = $last = ($last * IA + IC) % IM; | |
} | |
} | |
/* Weighted selection from alphabet */ | |
function makeCumulative(&$genelist) { | |
$cumul = 0; | |
foreach($genelist as $k=>&$v) { | |
$cumul = $v = $v * IM + $cumul; | |
} | |
$genelist = array_map('intval', array_map('ceil', $genelist)); | |
} | |
/* Generate and write FASTA format */ | |
function makeRandomFasta(&$genelist, $n) { | |
$width = 60; | |
$lines = (int) ($n / $width); | |
$pick = str_repeat('?', $width)."\n"; | |
$randoms = array_fill(0, $width, 0.0); | |
global $last; | |
// full lines | |
for ($i = 0; $i < $lines; ++$i) { | |
gen_random($last, $randoms); | |
$j = 0; | |
foreach ($randoms as $r) { | |
foreach($genelist as $k=>$v) { | |
if ($r < $v) { | |
break; | |
} | |
} | |
$pick[$j++] = $k; | |
} | |
echo $pick; | |
} | |
// last, partial line | |
$w = $n % $width; | |
if ($w !== 0) { | |
$randoms = array_fill(0, $w, 0.0); | |
gen_random($last, $randoms); | |
$j = 0; | |
foreach ($randoms as $r) { | |
foreach($genelist as $k=>$v) { | |
if ($r < $v) { | |
break; | |
} | |
} | |
$pick[$j++] = $k; | |
} | |
$pick[$w] = "\n"; | |
echo substr($pick, 0, $w+1); | |
} | |
} | |
function makeRepeatFasta($s, $n) { | |
$i = 0; $sLength = strlen($s); $lineLength = 60; | |
while ($n > 0) { | |
if ($n < $lineLength) $lineLength = $n; | |
if ($i + $lineLength < $sLength){ | |
print(substr($s,$i,$lineLength)); print("\n"); | |
$i += $lineLength; | |
} else { | |
print(substr($s,$i)); | |
$i = $lineLength - ($sLength - $i); | |
print(substr($s,0,$i)); print("\n"); | |
} | |
$n -= $lineLength; | |
} | |
} | |
/* Main -- define alphabets, make 3 fragments */ | |
$iub=array( | |
'a' => 0.27, | |
'c' => 0.12, | |
'g' => 0.12, | |
't' => 0.27, | |
'B' => 0.02, | |
'D' => 0.02, | |
'H' => 0.02, | |
'K' => 0.02, | |
'M' => 0.02, | |
'N' => 0.02, | |
'R' => 0.02, | |
'S' => 0.02, | |
'V' => 0.02, | |
'W' => 0.02, | |
'Y' => 0.02 | |
); | |
$homosapiens = array( | |
'a' => 0.3029549426680, | |
'c' => 0.1979883004921, | |
'g' => 0.1975473066391, | |
't' => 0.3015094502008 | |
); | |
$alu = | |
'GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG' . | |
'GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA' . | |
'CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT' . | |
'ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA' . | |
'GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG' . | |
'AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC' . | |
'AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA'; | |
$n = 1000; | |
if ($_SERVER['argc'] > 1) $n = $_SERVER['argv'][1]; | |
makeCumulative($iub); | |
makeCumulative($homosapiens); | |
echo ">ONE Homo sapiens alu\n"; | |
makeRepeatFasta($alu, $n*2); | |
echo ">TWO IUB ambiguity codes\n"; | |
makeRandomFasta($iub, $n*3); | |
echo ">THREE Homo sapiens frequency\n"; | |
makeRandomFasta($homosapiens, $n*5); | |
file_put_contents('php://stderr', "memory peak: " . memory_get_peak_usage(true) . "\n"); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment