Created
June 5, 2012 18:32
-
-
Save ship561/2876766 to your computer and use it in GitHub Desktop.
Implementation of Nussinov RNA folding algorithm in Clojure. This algorithm will find the optimal structure with the max number of base pairs.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defn nussinov | |
"Uses the Nussinov algorithm to compute an optimal RNA structure by | |
maximizing base pairs in the structure. The function requires an | |
input string s. The output is a list of base pair locations [i | |
j]. It will also print out the sequence and the structure so that it | |
can be visually inspected. An example sequence of 'GGGAAAUCC' will | |
give the answer ([2 6] [1 7] [0 8]). Locations are 0 based (ie seq | |
goes from 0 to n-1)." | |
[s] | |
(let [s (.toUpperCase s) | |
n (count s) | |
delta (fn [i j] ;;base pairing score | |
(let [bp {"AU" 1 "UA" 1 "GC" 1 "CG" 1 "GU" 1 "UG" 1} | |
b1 (subs s i (inc i)) | |
b2 (subs s j (inc j))] | |
(get bp (str b1 b2) 0))) | |
loc (filter #(and (< (second %) n) ;possible locations in the table to fill in | |
(> (- (second %) (first %)) 3)) | |
(for [k (range (- n 1)) ;;fill in table diagonally | |
i (range n)] | |
[i (+ i k 1)])) | |
gamma (reduce (fn [m [i j]] ;;table of values | |
(let [g (fn [i j] ;;determines where the | |
;;current gamma(i,j) score comes from | |
(max (get m [(inc i) j] 0) ;;i unpaired | |
(get m [i (dec j)] 0) ;;j unpaired | |
(+ (get m [(inc i) (dec j)] 0) (delta i j)) ;;i j paired | |
(apply max (if (empty? (range (inc i) j)) ;;bifurcation | |
'(-1) | |
(for [k (range (inc i) j)] | |
(+ (get m [i k] 0) (get m [(inc k) j] 0)))))))] | |
(assoc m [i j] (g i j)))) | |
{} loc) | |
;;traceback starts at [0 n-1] | |
traceback (loop [x (list [0 (dec n)]) ;;x is stack of positions to check | |
bp (list)] | |
(if-not (empty? x) | |
(let [[i j] (first x) | |
x (pop x)] | |
(if (< i j) | |
(let [ks (filter (fn [k] | |
(= (+ (get gamma [i k] 0) (get gamma [(inc k) j] 0)) (get gamma [i j] 0))) | |
(range (inc i) j))] | |
;(prn i j (get gamma [i j] 0) ks) | |
(cond | |
(= (get gamma [(inc i) j] 0) (get gamma [i j] 0)) ;;i unpaired | |
(recur (conj x [(inc i) j]) | |
bp) | |
(= (get gamma [i (dec j)] 0) (get gamma [i j] 0)) ;;j unpaired | |
(recur (conj x [i (dec j)]) | |
bp) | |
(= (+ (get gamma [(inc i) (dec j)] 0) (delta i j)) (get gamma [i j] 0)) ;;i j base paire | |
(recur (conj x [(inc i) (dec j)]) | |
(conj bp [i j])) | |
(not (empty? ks)) ;;bifurcation | |
(recur (conj x [i (first ks)] [(inc (first ks)) j]) | |
bp) | |
;:else ;;not sure if necessary. only 4 possible conditions | |
;(recur x | |
; bp) | |
)) | |
(recur x | |
bp))) | |
bp)) | |
structure (loop [bp traceback | |
st (apply str (repeat n "."))] | |
(if-not (empty? bp) | |
(let [[i j] (first bp)] | |
(recur (rest bp) | |
(str (subs st 0 i) "(" (subs st (inc i) j) ")" (subs st (inc j) n)))) | |
st))] | |
(prn s) | |
(prn structure) | |
traceback)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment