Created
November 30, 2011 22:06
-
-
Save ship561/1411240 to your computer and use it in GitHub Desktop.
update on sto reading
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defn join-sto-fasta-lines [infilespec origin] | |
(let [[seq-lines gc-lines] (sto-GC-and-seq-lines infilespec) | |
gc-lines (if (not= origin "") | |
(concat (take 1 gc-lines) [origin] (drop 1 gc-lines)) | |
gc-lines) | |
recombined-seqs (sort-by | |
#(-> % second first) | |
(vec | |
(reduce | |
(fn [m l] | |
(let [[nm sq] (if (.startsWith l "#") | |
(str/split #"\s{2,}+" l) | |
(str/split #"\s+" l)) | |
prev (get m nm [(gen-uid) ""])] | |
(assoc m nm [(first prev) | |
(str (second prev) sq)]))) | |
{} seq-lines))) | |
{seq-lines false cons-lines true} (group-by | |
#(or (.startsWith (first %) "//") | |
(.startsWith (first %) "#")) | |
recombined-seqs)] | |
[gc-lines seq-lines cons-lines])) | |
(defn join-sto-fasta-file | |
"Block/join unblocked sequence lines in a sto or fasta file. For | |
sto files ORIGIN is a #=GF line indicating tool origin of file. | |
For example, '#=GF AU Infernal 1.0.2'. Defaults to nothing." | |
[in-filespec out-filespec | |
& {origin :origin :or {origin ""}}] | |
(let [[gc-lines seq-lines cons-lines] (join-sto-fasta-lines in-filespec origin)] | |
(io/with-out-writer (fs/fullpath out-filespec) | |
(doseq [gcl gc-lines] (println gcl)) | |
(doseq [sl seq-lines] | |
(let [[nm [id sq]] sl] | |
(cl-format true "~A~40T~A~%" nm sq))) | |
(doseq [cl cons-lines] | |
(let [[nm [id sq]] cl] | |
(cl-format true "~A~40T~A~%" nm sq)))))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment