Created
November 29, 2012 17:04
-
-
Save radaniba/4170418 to your computer and use it in GitHub Desktop.
A simple script to draw sequence logos from residue frequency data. It doesn't do everything, but can at least serve as a basis for improvements. Usage is 'drawlogo.rb [options] FILE1 [FILE2 ...]'. Options can can be listed with the '-h' option, but it in
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env ruby | |
| # Draw a sequence logo in SVG. | |
| ### IMPORTS | |
| require 'test/unit/assertions' | |
| require 'optparse' | |
| require 'pp' | |
| require 'csv' | |
| require 'ostruct' | |
| require 'date' | |
| require 'time' | |
| require 'rvg/rvg' | |
| include Test::Unit::Assertions | |
| include Magick | |
| ### CONSTANTS & DEFINES | |
| GOLDEN_RATIO = 1.62 | |
| $RESIDUE_ALPHABET = "ACDEFGHIKLMNPQRSTVWYBXZJUO" | |
| $RESIDUE_PALETTE = { | |
| "A" => 'gray', | |
| "C" => 'lawngreen', | |
| "D" => 'red', | |
| "E" => 'red', | |
| "F" => 'blue', | |
| "H" => 'orchid', | |
| "I" => 'green', | |
| "N" => 'lightseagreen', | |
| "P" => 'peachpuff', | |
| "S" => 'orange', | |
| "X" => 'black', | |
| "G" => 'gray', | |
| "M" => 'lawngreen', | |
| "K" => 'blue', | |
| "R" => 'blue', | |
| "Y" => 'blue', | |
| "W" => 'orchid', | |
| "L" => 'green', | |
| "V" => 'green', | |
| "Q" => 'lightseagreen', | |
| "T" => 'orange', | |
| } | |
| $RESIDUE_PALETTE.default = "pink" | |
| $AMINO_PALETTE = $RESIDUE_PALETTE | |
| $DNA_PALETTE = $RESIDUE_PALETTE | |
| $DNA_PALETTE.default = "pink" | |
| $DEFAULT_COLOR = 'brown' | |
| $RESIDUE_IMGS = {} | |
| $SCRATCH_IMG_SIZE = 60 | |
| $SCRATCH_FONT_SIZE = 50 | |
| CANVAS_SCALE = 2 | |
| ### IMPLEMENTATION | |
| def prep_residue_images() | |
| $RESIDUE_ALPHABET.scan(/./m) { |r| | |
| # generate image for this char | |
| scratch = Image.new($SCRATCH_IMG_SIZE, $SCRATCH_IMG_SIZE) { |img| | |
| img.background_color = "transparent" | |
| } | |
| title = Draw.new.annotate(scratch, 0,0,0,0, r) { | |
| self.fill = $RESIDUE_PALETTE.fetch(r, $DEFAULT_COLOR) | |
| self.stroke = 'transparent' | |
| self.pointsize = $SCRATCH_FONT_SIZE | |
| self.font_weight = BoldWeight | |
| self.gravity = CenterGravity | |
| } | |
| scratch.trim!() | |
| $RESIDUE_IMGS[r] = scratch | |
| #scratch.write("#{r}.png") | |
| } | |
| end | |
| class LogoDrawer | |
| attr_accessor(:site_cnt, :row_cnt, :col_cnt) | |
| attr_accessor(:canvas_ht, :canvas_wt) | |
| attr_accessor(:margin, :inter_row_dist, :inter_cell_dist) | |
| attr_accessor(:cell_wt, :cell_ht) | |
| attr_accessor(:text_ht, :text_dist, :text_face) | |
| attr_accessor(:row_ht) | |
| attr_accessor(:canvas) | |
| attr_accessor(:palette) | |
| attr_accessor(:site_freqs) | |
| def initialize(site_freqs, opts={}) | |
| options = OpenStruct.new( | |
| { | |
| :col_cnt => 0, | |
| :height => GOLDEN_RATIO, | |
| :ordering => 'decreasing', | |
| :palette => $RESIDUE_PALETTE, | |
| }.merge(opts) | |
| ) | |
| # calc rows numbers and such | |
| @site_cnt = site_freqs.length | |
| if [nil, 0].member?(options.col_cnt) | |
| options.col_cnt = @site_cnt | |
| end | |
| @col_cnt = options.col_cnt | |
| @row_cnt = (@site_cnt / Float(@col_cnt)).ceil() | |
| # calc dimensions of page elements | |
| @margin = 10 | |
| @inter_row_dist = 10 | |
| @inter_cell_dist = 4 | |
| @cell_wt = 40 | |
| @cell_ht = options.height * @cell_wt | |
| @text_dist = 4 | |
| @text_ht = 12 # FIXME: how do we work out text height? | |
| @row_ht = @cell_ht + @text_ht + @text_dist + @inter_row_dist | |
| @row_wt = @col_cnt * (@cell_wt + @inter_cell_dist) - @inter_cell_dist | |
| # calc canvas size | |
| @canvas_wt = (@margin * 2) + @row_wt | |
| @canvas_ht = (@margin * 2) + (@row_ht * @row_cnt) | |
| # init canvas | |
| RVG::dpi = 72 | |
| @canvas = RVG.new(@canvas_wt*CANVAS_SCALE, @canvas_ht*CANVAS_SCALE) | |
| @canvas.background_fill = 'white' | |
| @canvas.viewbox(0,0,@canvas_wt, @canvas_ht) | |
| # store data | |
| @site_freqs = site_freqs | |
| end | |
| def draw() | |
| @site_freqs.each_with_index { |s, i| | |
| row_no = i / @col_cnt | |
| col_no = i % @col_cnt | |
| x = @margin + col_no * (@inter_cell_dist + @cell_wt) | |
| y = @margin + row_no * @row_ht | |
| @canvas.g.translate(x, y) { |group| | |
| draw_site(group, s, (i + 1).to_s) | |
| } | |
| } | |
| end | |
| def draw_site(group, site_data, label=nil) | |
| # draw the cell outline, mostly for debugging | |
| group.rect(@cell_wt, @cell_ht).styles(:fill=>'ghostwhite', | |
| :stroke=>'lightgray', :stroke_width=>1) | |
| # write label below | |
| if label && (0 < label.length) | |
| group.text(@cell_wt/2, @cell_ht + @text_dist + @text_ht) { |tbox| | |
| tbox.tspan(label).styles( | |
| :text_anchor=>'middle', :font_size=>@text_ht, | |
| :font_family=>'times', :font_style=>'normal', | |
| :font_weight=>'lighter', | |
| :stroke=>'transparent', :fill=>'black') | |
| } | |
| end | |
| # do the site data | |
| cumulative = 0.0 | |
| site_data.each { |sd| | |
| residue, freq = sd[0], sd[1] | |
| res_text = residue.upcase() | |
| res_image = $RESIDUE_IMGS[res_text] | |
| group.image(res_image, width=@cell_wt, height=(@cell_ht * freq), | |
| x=0, y=(@cell_ht * cumulative)).preserve_aspect_ratio('none') | |
| cumulative += freq | |
| } | |
| end | |
| def save(fpath) | |
| @canvas.draw.write(fpath) | |
| end | |
| end | |
| def interpolate(str, sub_hash) | |
| return str.gsub(/\{([^}]+)\}/) { |m| | |
| sub_hash[$1] | |
| } | |
| end | |
| ### MAIN | |
| # Parse commandline arguments. | |
| # | |
| def parse_clargs(arg_arr) | |
| clopts = { | |
| :col_cnt => 0, | |
| :height => GOLDEN_RATIO, | |
| :ordering => 'decreasing', | |
| :format => "png", | |
| :save => "{root}", | |
| :overwrite => false, | |
| :palette => $RESIDUE_PALETTE, | |
| :add_colors => {}, | |
| } | |
| OptionParser.new { |opts| | |
| opts.program_name = "drawlogo" | |
| opts.banner = "Draw a sequence logo in SVG." | |
| opts.separator("") | |
| opts.separator("The input is a CSV file with one row for each ") | |
| opts.separator("different residue at each site, of the form:") | |
| opts.separator(" <site index>, <residue>, <proportion>") | |
| opts.separator("") | |
| opts.separator("Usage: drawlogo.rb [options] FILE1 [FILE2 ...]") | |
| opts.on('-h', '--help', 'Display this screen') { | |
| puts opts | |
| exit | |
| } | |
| # row width, 0 to have all in one row | |
| opts.on('', '--columns INT', | |
| Integer, | |
| "How many sites to draw in every row") { |v| | |
| clopts[:col_cnt] = Integer(v) | |
| } | |
| # set the shape of the cell height | |
| opts.on('', '--relative-cell-height FLOAT', | |
| Float, | |
| "How high to make a site relative to its width") { |v| | |
| clopts[:height] = Float(v) | |
| } | |
| # set colors to use | |
| opts.on('', '--palette STR', | |
| [:amino, :dna], | |
| "Which palette to use") { |v| | |
| if (v == :amino) | |
| clopts[:palette] = $AMINO_PALETTE | |
| elsif (v == :dna) | |
| clopts[:palette] = $DNA_PALETTE | |
| end | |
| } | |
| # set colors to use | |
| opts.on('', '--add-to-palette STR', | |
| "Add these colors to the palette") { |v| | |
| new_colors = {} | |
| v.split(',').each { |c| | |
| clr = c.split("=") | |
| new_colors[clr[0]] = clr[1] | |
| } | |
| clopts[:add_colors] = new_colors | |
| } | |
| # save parameters | |
| opts.on('', '--format STR', | |
| "Save output in this format") { |v| | |
| clopts[:format] = v.downcase() | |
| } | |
| opts.on('', '--save STR', | |
| "Name output files according this template") { |v| | |
| clopts[:save] = v | |
| } | |
| opts.on('-o', '--overwrite', | |
| "Overwrite pre-existing files") { | |
| clopts[:overwrite] = true | |
| } | |
| begin | |
| opts.parse!(arg_arr) | |
| rescue OptionParser::InvalidOption => e | |
| puts e | |
| puts opts | |
| exit 1 | |
| end | |
| } | |
| pargs = arg_arr | |
| assert(0 < pargs.length, "need a file to work on") | |
| assert(pargs.length == 1, "currently run on only one file at a time") | |
| return clopts, pargs | |
| end | |
| # Main script functionality. | |
| # | |
| def main() | |
| clopts, infiles = parse_clargs(ARGV) | |
| infiles.each { |f| | |
| puts "== Reading '#{f}' ..." | |
| root_name = File.basename(f, File.extname(f)) | |
| # read in csv file and make site data | |
| raw_seq_data = {} | |
| in_hndl = File.open(f, 'rb') | |
| CSV::Reader.parse(in_hndl) { |row| | |
| assert_equal(3, row.length, "row length should be 3") | |
| site_indx, residue, frac = Integer(row[0]), row[1], Float(row[2]) | |
| new_data = [residue, frac] | |
| prev_data = raw_seq_data[site_indx] | |
| if prev_data | |
| raw_seq_data[site_indx] << new_data | |
| else | |
| raw_seq_data[site_indx] = [new_data] | |
| end | |
| } | |
| in_hndl.close() | |
| # post-process site data | |
| puts "Sorting site data ..." | |
| # sort sites by index, sort residues by freq | |
| sorted_seq_data = [] | |
| site_indices = raw_seq_data.keys.sort() | |
| site_indices.each { |e| | |
| site_data = raw_seq_data[e] | |
| # TODO: later introduce other sorting options | |
| sorted_site_data = site_data.sort { |a,b| b[1] <=> a[1] } | |
| sorted_seq_data << sorted_site_data | |
| } | |
| # draw the damn thing | |
| puts "Preparing residue images ..." | |
| prep_residue_images() | |
| drwr = LogoDrawer.new(sorted_seq_data, clopts) | |
| puts "Drawing logo ..." | |
| drwr.draw() | |
| puts "Constructing pallete ..." | |
| pal = clopts[:palette] | |
| clopts[:add_colors].each_pair { |k,v| | |
| pal[k] = v | |
| } | |
| # write output | |
| # make filename | |
| ext = File.extname(f) | |
| subs = { | |
| "base" => File.basename(f), | |
| "root" => File.basename(f, ext), | |
| "date" => Date.today.to_s(), | |
| "time" => Time.now.strftime(fmt='%T'), | |
| "datetime" => DateTime.now.strftime(fmt='%F T%T'), | |
| } | |
| out_name = "#{interpolate(clopts[:save], subs)}.#{clopts[:format]}" | |
| puts "Saving drawing to '#{out_name}' ..." | |
| # do the writing | |
| if File.exists?(out_name) | |
| if ! clopts[:overwrite] | |
| puts "Can't overwrite existing file" | |
| next | |
| end | |
| end | |
| drwr.save(out_name) | |
| puts "Saved." | |
| } | |
| puts "== Finished." | |
| end | |
| if $0 == __FILE__ | |
| main() | |
| end | |
| ### END |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment