Created
August 23, 2016 17:23
-
-
Save lh3/a1ef88085b2dc5454b694ccbf52630aa to your computer and use it in GitHub Desktop.
Preliminary and incomplete PAF->SAM converter (for wgsim_eval.pl)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var getopt = function(args, ostr) { | |
var oli; // option letter list index | |
if (typeof(getopt.place) == 'undefined') | |
getopt.ind = 0, getopt.arg = null, getopt.place = -1; | |
if (getopt.place == -1) { // update scanning pointer | |
if (getopt.ind >= args.length || args[getopt.ind].charAt(getopt.place = 0) != '-') { | |
getopt.place = -1; | |
return null; | |
} | |
if (getopt.place + 1 < args[getopt.ind].length && args[getopt.ind].charAt(++getopt.place) == '-') { // found "--" | |
++getopt.ind; | |
getopt.place = -1; | |
return null; | |
} | |
} | |
var optopt = args[getopt.ind].charAt(getopt.place++); // character checked for validity | |
if (optopt == ':' || (oli = ostr.indexOf(optopt)) < 0) { | |
if (optopt == '-') return null; // if the user didn't specify '-' as an option, assume it means null. | |
if (getopt.place < 0) ++getopt.ind; | |
return '?'; | |
} | |
if (oli+1 >= ostr.length || ostr.charAt(++oli) != ':') { // don't need argument | |
getopt.arg = null; | |
if (getopt.place < 0 || getopt.place >= args[getopt.ind].length) ++getopt.ind, getopt.place = -1; | |
} else { // need an argument | |
if (getopt.place >= 0 && getopt.place < args[getopt.ind].length) | |
getopt.arg = args[getopt.ind].substr(getopt.place); | |
else if (args.length <= ++getopt.ind) { // no arg | |
getopt.place = -1; | |
if (ostr.length > 0 && ostr.charAt(0) == ':') return ':'; | |
return '?'; | |
} else getopt.arg = args[getopt.ind]; // white space | |
getopt.place = -1; | |
++getopt.ind; | |
} | |
return optopt; | |
} | |
var c, max_ins = 800, chi_pen = 45, max_close = 30, output_paf = false; | |
while ((c = getopt(arguments, "p")) != null) { | |
if (c == 'p') output_paf = true; | |
} | |
if (getopt.ind == arguments.length) { | |
print("Usage: k8 paf2sam.js <in.paf>"); | |
exit(1); | |
} | |
var buf = new Bytes(); | |
var file = new File(arguments[getopt.ind]); | |
function process(a) | |
{ | |
var n_reads, max_match = [0, 0], max_match2 = [0, 0], mapq_se = [], n_close = [0, 0], best_read = []; | |
if (a.length == 0) return; | |
for (var i = 0; i < a.length; ++i) { | |
a[i][7] = parseInt(a[i][7]); | |
a[i][9] = parseInt(a[i][9]); | |
a[i][10] = parseInt(a[i][10]); | |
} | |
max_match[0] = a[0][9], best_read[0] = a[0]; | |
n_reads = 1; | |
for (var i = 1; i < a.length; ++i) { | |
if (a[i][0] != a[i-1][0]) ++n_reads; | |
if (max_match[n_reads - 1] < a[i][9]) { | |
max_match2[n_reads - 1] = max_match[n_reads - 1]; | |
max_match[n_reads - 1] = a[i][9]; | |
best_read[n_reads - 1] = a[i]; | |
} else if (max_match2[n_reads - 1] < a[i][9]) { | |
max_match2[n_reads - 1] = a[i][9]; | |
} | |
} | |
if (max_match[0] - a[0][9] < max_close) ++n_close[0]; | |
n_reads = 1; | |
for (var i = 1; i < a.length; ++i) { | |
if (a[i][0] != a[i-1][0]) ++n_reads; | |
if (max_match[n_reads - 1] - a[i][9] < max_close) ++n_close[n_reads - 1]; | |
} | |
for (var i = 0; i < n_reads; ++i) { | |
mapq_se[i] = Math.floor(max_match[i] - max_match2[i] - 40 * Math.log(n_close[i])); | |
if (mapq_se[i] < 0) mapq_se[i] = 0; | |
best_read[i][11] = mapq_se[i]; | |
} | |
if (n_reads == 2) { // paired-end | |
a.sort(function(x,y) { return x[5]<y[5]? -1 : x[5]>y[5]? 1 : x[7]-y[7] }); | |
var paired = [], forward = []; | |
for (var i = 0; i < a.length; ++i) { | |
if (a[i][4] == '+') { | |
forward.push(i); | |
} else { | |
for (var j = forward.length - 1; j >= 0; --j) { | |
var k = forward[j]; | |
if (a[k][5] == a[i][5] && a[i][7] - a[k][7] < max_ins) | |
paired.push([k, i, a[k][9] + a[i][9]]); | |
else break; | |
} | |
} | |
} | |
paired.sort(function(x,y) { return y[2]-x[2] }); | |
var mapq_pe = 0; | |
if (paired.length > 0) { | |
var tmp = max_match[0] + max_match2[1]; | |
if (tmp < max_match[1] + max_match2[0]) | |
tmp = max_match[1] + max_match2[0]; | |
if (paired[0][2] > tmp - chi_pen) { | |
mapq_pe = paired[0][2] - (tmp - chi_pen); | |
if (paired.length > 1) { | |
if (mapq_pe > paired[0][2] - paired[1][2]) | |
mapq_pe = paired[0][2] - paired[1][2]; | |
} | |
for (var i = 0; i < 2; ++i) | |
if (mapq_se[i] < mapq_pe) mapq_se[i] = mapq_pe; | |
for (var i = 0; i < 2; ++i) { | |
best_read[i] = a[paired[0][i]]; | |
best_read[i][11] = mapq_se[i]; | |
} | |
} | |
} | |
} | |
if (output_paf) { | |
for (var i = 0; i < n_reads; ++i) | |
print(best_read[i].join("\t")); | |
} else { | |
for (var i = 0; i < n_reads; ++i) { | |
var m, flag = 0, s = [], t = best_read[i]; | |
if (n_reads == 2) flag |= 1; | |
if ((m = /\/([12])$/.exec(t[0])) != null) { | |
t[0] = t[0].replace(/\/([12])$/, ""); | |
flag |= m == '1'? 64 : 128; | |
} | |
if (t[4] == '-') flag |= 16; | |
if (n_reads == 2 && best_read[1 - i][4] == '-') flag |= 32; | |
var cigar = ''; | |
if (t[2] != '0') cigar = t[2] + 'S'; | |
cigar += (parseInt(t[3]) - parseInt(t[2])) + 'M'; | |
if (t[3] != t[1]) cigar += (parseInt(t[1]) - parseInt(t[3])) + 'S'; | |
var s = [t[0], flag, t[5], t[7]+1, t[11], cigar, '*', 0, 0, '*', '*']; | |
print(s.join("\t")); | |
} | |
} | |
} | |
var last_name = null, a = []; | |
while (file.readline(buf) >= 0) { | |
var t = buf.toString().split("\t"); | |
var name = t[0].replace(/\/[12]$/, ""); | |
if (name != last_name) { | |
process(a); | |
last_name = name; | |
a = [t]; | |
} else a.push(t); | |
} | |
process(a); | |
file.close(); | |
buf.destroy(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment