Last active
August 29, 2015 14:08
-
-
Save lytze/296b4cae5bde9605c5e5 to your computer and use it in GitHub Desktop.
A 'Find k-mer' solution in RScript (Running under shell/terminal)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #! /usr/bin/Rscript | |
| # Lytze Nov 4 2014 | |
| # An example of visit stdin (the real stdin, not the console) using RScript | |
| # NO details about seting opts and args | |
| # Usage: | |
| ## $ chmod u+x find_k_mer.R | |
| ## $ ./find_k_mer.R | |
| # OR | |
| ## $ ./find_k_mer.R < inputfile | |
| # Note: | |
| ## The 'Find k-mer' problem is a model problem in bioinfomatics. The promblem is | |
| ## about taking a DNA sequence and a short motif as input (strings), and find the | |
| ## number of the motif in the sequense. This is really simple and served as one of | |
| ## the rudimentals in bioinfomatics. | |
| stdin <- file('stdin'); # Note that the default 'stdin' is not actually the standard input stream | |
| open(stdin); | |
| sequence <- readLines(stdin, 1); | |
| tofind <- readLines(stdin, 1); | |
| close(stdin); | |
| sequence <- strsplit(sequence, '')[[1]]; | |
| tofind <- strsplit(tofind, '')[[1]]; | |
| ## R treat char and string all as 'character', these lines will separete the string | |
| if (length(sequence) > 70) { | |
| cat('Input Seq (heading):', paste(head(sequence, 20), collapse = '')); | |
| cat(' ...', paste(tail(sequence, 20), collapse = ''), '\n'); | |
| } else { | |
| cat('Input Seq (heading):', paste(head(sequence, 50), collapse = ''), '\n'); | |
| } | |
| cat('Input Mer (heading):', paste(head(tofind), collapse = ''), '\n\n'); | |
| ## Echo back the inputs | |
| ## May I should add optings to earase these echos? :) | |
| count <- 0; | |
| for (i in 1:(length(sequence) - length(tofind) + 1)) { | |
| if (sequence[i] == tofind[1]) { | |
| # start matching when the heading bases are the same | |
| j <- 1; | |
| while (j < length(tofind)) { | |
| if (sequence[i + j] != tofind[1 + j]) break; | |
| # go on matching if current base is the same | |
| j <- j + 1; | |
| } | |
| if (j == length(tofind)) count <- count + 1; | |
| } | |
| } | |
| cat('#mer =', count, '\n'); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment