Created
April 28, 2016 07:03
-
-
Save inutano/44fa8bd01c12465b322df5b058b8abe0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# :) | |
if __FILE__ == $0 | |
# Arguments | |
input_fastq = ARGV[0] | |
trimming_sequence = ARGV[1] | |
head_or_tail = ARGV[2] || "HEAD" | |
# Load data | |
data = open(input_fastq).readlines | |
# Modify input data | |
out = data.each_slice(4).map do |read| | |
# Parse fastq read lines | |
read_id = read[0].chomp | |
sequence = read[1].chomp | |
comment = read[2].chomp | |
phred_score = read[3].chomp | |
# Find pattern | |
remove_positions = sequence.enum_for(:scan, /#{trimming_sequence}/).map{ Regexp.last_match.begin(0) } | |
# Remove if pattern is found in sequence | |
if !remove_positions.empty? | |
case head_or_tail | |
when "HEAD" | |
cut_position = remove_positions.first + trimming_sequence.size | |
[ | |
sequence.slice(cut_position, sequence.size), | |
read_id, | |
phred_score.slice(cut_position, sequence.size), | |
comment | |
] | |
when "TAIL" | |
cut_position = remove_positions.last + trimming_sequence.size | |
[ | |
sequence.slice(0, cut_position), | |
read_id, | |
phred_score.slice(0, cut_position), | |
comment | |
] | |
else | |
puts "ERROR: Specify HEAD or TAIL" | |
exit | |
end | |
else | |
# if pattern is not found, output original read and quality | |
[ | |
sequence, | |
read_id, | |
phred_score, | |
comment | |
] | |
end | |
end | |
# output trimmed fastq data | |
puts out.flatten.join("\n") | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
How to execute:
Remove head of sequence until matching "ATCC"
$ ruby trim.rb data.fastq "ATCC" "HEAD"
Remove tail of sequence after matching "ATCC"
$ ruby trim.rb data.fastq "ATCC" "TAIL"