Skip to content

Instantly share code, notes, and snippets.

@radaniba
Created November 29, 2012 17:04
Show Gist options
  • Select an option

  • Save radaniba/4170418 to your computer and use it in GitHub Desktop.

Select an option

Save radaniba/4170418 to your computer and use it in GitHub Desktop.
A simple script to draw sequence logos from residue frequency data. It doesn't do everything, but can at least serve as a basis for improvements. Usage is 'drawlogo.rb [options] FILE1 [FILE2 ...]'. Options can can be listed with the '-h' option, but it in
#!/usr/bin/env ruby
# Draw a sequence logo in SVG.
### IMPORTS
require 'test/unit/assertions'
require 'optparse'
require 'pp'
require 'csv'
require 'ostruct'
require 'date'
require 'time'
require 'rvg/rvg'
include Test::Unit::Assertions
include Magick
### CONSTANTS & DEFINES
GOLDEN_RATIO = 1.62
$RESIDUE_ALPHABET = "ACDEFGHIKLMNPQRSTVWYBXZJUO"
$RESIDUE_PALETTE = {
"A" => 'gray',
"C" => 'lawngreen',
"D" => 'red',
"E" => 'red',
"F" => 'blue',
"H" => 'orchid',
"I" => 'green',
"N" => 'lightseagreen',
"P" => 'peachpuff',
"S" => 'orange',
"X" => 'black',
"G" => 'gray',
"M" => 'lawngreen',
"K" => 'blue',
"R" => 'blue',
"Y" => 'blue',
"W" => 'orchid',
"L" => 'green',
"V" => 'green',
"Q" => 'lightseagreen',
"T" => 'orange',
}
$RESIDUE_PALETTE.default = "pink"
$AMINO_PALETTE = $RESIDUE_PALETTE
$DNA_PALETTE = $RESIDUE_PALETTE
$DNA_PALETTE.default = "pink"
$DEFAULT_COLOR = 'brown'
$RESIDUE_IMGS = {}
$SCRATCH_IMG_SIZE = 60
$SCRATCH_FONT_SIZE = 50
CANVAS_SCALE = 2
### IMPLEMENTATION
def prep_residue_images()
$RESIDUE_ALPHABET.scan(/./m) { |r|
# generate image for this char
scratch = Image.new($SCRATCH_IMG_SIZE, $SCRATCH_IMG_SIZE) { |img|
img.background_color = "transparent"
}
title = Draw.new.annotate(scratch, 0,0,0,0, r) {
self.fill = $RESIDUE_PALETTE.fetch(r, $DEFAULT_COLOR)
self.stroke = 'transparent'
self.pointsize = $SCRATCH_FONT_SIZE
self.font_weight = BoldWeight
self.gravity = CenterGravity
}
scratch.trim!()
$RESIDUE_IMGS[r] = scratch
#scratch.write("#{r}.png")
}
end
class LogoDrawer
attr_accessor(:site_cnt, :row_cnt, :col_cnt)
attr_accessor(:canvas_ht, :canvas_wt)
attr_accessor(:margin, :inter_row_dist, :inter_cell_dist)
attr_accessor(:cell_wt, :cell_ht)
attr_accessor(:text_ht, :text_dist, :text_face)
attr_accessor(:row_ht)
attr_accessor(:canvas)
attr_accessor(:palette)
attr_accessor(:site_freqs)
def initialize(site_freqs, opts={})
options = OpenStruct.new(
{
:col_cnt => 0,
:height => GOLDEN_RATIO,
:ordering => 'decreasing',
:palette => $RESIDUE_PALETTE,
}.merge(opts)
)
# calc rows numbers and such
@site_cnt = site_freqs.length
if [nil, 0].member?(options.col_cnt)
options.col_cnt = @site_cnt
end
@col_cnt = options.col_cnt
@row_cnt = (@site_cnt / Float(@col_cnt)).ceil()
# calc dimensions of page elements
@margin = 10
@inter_row_dist = 10
@inter_cell_dist = 4
@cell_wt = 40
@cell_ht = options.height * @cell_wt
@text_dist = 4
@text_ht = 12 # FIXME: how do we work out text height?
@row_ht = @cell_ht + @text_ht + @text_dist + @inter_row_dist
@row_wt = @col_cnt * (@cell_wt + @inter_cell_dist) - @inter_cell_dist
# calc canvas size
@canvas_wt = (@margin * 2) + @row_wt
@canvas_ht = (@margin * 2) + (@row_ht * @row_cnt)
# init canvas
RVG::dpi = 72
@canvas = RVG.new(@canvas_wt*CANVAS_SCALE, @canvas_ht*CANVAS_SCALE)
@canvas.background_fill = 'white'
@canvas.viewbox(0,0,@canvas_wt, @canvas_ht)
# store data
@site_freqs = site_freqs
end
def draw()
@site_freqs.each_with_index { |s, i|
row_no = i / @col_cnt
col_no = i % @col_cnt
x = @margin + col_no * (@inter_cell_dist + @cell_wt)
y = @margin + row_no * @row_ht
@canvas.g.translate(x, y) { |group|
draw_site(group, s, (i + 1).to_s)
}
}
end
def draw_site(group, site_data, label=nil)
# draw the cell outline, mostly for debugging
group.rect(@cell_wt, @cell_ht).styles(:fill=>'ghostwhite',
:stroke=>'lightgray', :stroke_width=>1)
# write label below
if label && (0 < label.length)
group.text(@cell_wt/2, @cell_ht + @text_dist + @text_ht) { |tbox|
tbox.tspan(label).styles(
:text_anchor=>'middle', :font_size=>@text_ht,
:font_family=>'times', :font_style=>'normal',
:font_weight=>'lighter',
:stroke=>'transparent', :fill=>'black')
}
end
# do the site data
cumulative = 0.0
site_data.each { |sd|
residue, freq = sd[0], sd[1]
res_text = residue.upcase()
res_image = $RESIDUE_IMGS[res_text]
group.image(res_image, width=@cell_wt, height=(@cell_ht * freq),
x=0, y=(@cell_ht * cumulative)).preserve_aspect_ratio('none')
cumulative += freq
}
end
def save(fpath)
@canvas.draw.write(fpath)
end
end
def interpolate(str, sub_hash)
return str.gsub(/\{([^}]+)\}/) { |m|
sub_hash[$1]
}
end
### MAIN
# Parse commandline arguments.
#
def parse_clargs(arg_arr)
clopts = {
:col_cnt => 0,
:height => GOLDEN_RATIO,
:ordering => 'decreasing',
:format => "png",
:save => "{root}",
:overwrite => false,
:palette => $RESIDUE_PALETTE,
:add_colors => {},
}
OptionParser.new { |opts|
opts.program_name = "drawlogo"
opts.banner = "Draw a sequence logo in SVG."
opts.separator("")
opts.separator("The input is a CSV file with one row for each ")
opts.separator("different residue at each site, of the form:")
opts.separator(" <site index>, <residue>, <proportion>")
opts.separator("")
opts.separator("Usage: drawlogo.rb [options] FILE1 [FILE2 ...]")
opts.on('-h', '--help', 'Display this screen') {
puts opts
exit
}
# row width, 0 to have all in one row
opts.on('', '--columns INT',
Integer,
"How many sites to draw in every row") { |v|
clopts[:col_cnt] = Integer(v)
}
# set the shape of the cell height
opts.on('', '--relative-cell-height FLOAT',
Float,
"How high to make a site relative to its width") { |v|
clopts[:height] = Float(v)
}
# set colors to use
opts.on('', '--palette STR',
[:amino, :dna],
"Which palette to use") { |v|
if (v == :amino)
clopts[:palette] = $AMINO_PALETTE
elsif (v == :dna)
clopts[:palette] = $DNA_PALETTE
end
}
# set colors to use
opts.on('', '--add-to-palette STR',
"Add these colors to the palette") { |v|
new_colors = {}
v.split(',').each { |c|
clr = c.split("=")
new_colors[clr[0]] = clr[1]
}
clopts[:add_colors] = new_colors
}
# save parameters
opts.on('', '--format STR',
"Save output in this format") { |v|
clopts[:format] = v.downcase()
}
opts.on('', '--save STR',
"Name output files according this template") { |v|
clopts[:save] = v
}
opts.on('-o', '--overwrite',
"Overwrite pre-existing files") {
clopts[:overwrite] = true
}
begin
opts.parse!(arg_arr)
rescue OptionParser::InvalidOption => e
puts e
puts opts
exit 1
end
}
pargs = arg_arr
assert(0 < pargs.length, "need a file to work on")
assert(pargs.length == 1, "currently run on only one file at a time")
return clopts, pargs
end
# Main script functionality.
#
def main()
clopts, infiles = parse_clargs(ARGV)
infiles.each { |f|
puts "== Reading '#{f}' ..."
root_name = File.basename(f, File.extname(f))
# read in csv file and make site data
raw_seq_data = {}
in_hndl = File.open(f, 'rb')
CSV::Reader.parse(in_hndl) { |row|
assert_equal(3, row.length, "row length should be 3")
site_indx, residue, frac = Integer(row[0]), row[1], Float(row[2])
new_data = [residue, frac]
prev_data = raw_seq_data[site_indx]
if prev_data
raw_seq_data[site_indx] << new_data
else
raw_seq_data[site_indx] = [new_data]
end
}
in_hndl.close()
# post-process site data
puts "Sorting site data ..."
# sort sites by index, sort residues by freq
sorted_seq_data = []
site_indices = raw_seq_data.keys.sort()
site_indices.each { |e|
site_data = raw_seq_data[e]
# TODO: later introduce other sorting options
sorted_site_data = site_data.sort { |a,b| b[1] <=> a[1] }
sorted_seq_data << sorted_site_data
}
# draw the damn thing
puts "Preparing residue images ..."
prep_residue_images()
drwr = LogoDrawer.new(sorted_seq_data, clopts)
puts "Drawing logo ..."
drwr.draw()
puts "Constructing pallete ..."
pal = clopts[:palette]
clopts[:add_colors].each_pair { |k,v|
pal[k] = v
}
# write output
# make filename
ext = File.extname(f)
subs = {
"base" => File.basename(f),
"root" => File.basename(f, ext),
"date" => Date.today.to_s(),
"time" => Time.now.strftime(fmt='%T'),
"datetime" => DateTime.now.strftime(fmt='%F T%T'),
}
out_name = "#{interpolate(clopts[:save], subs)}.#{clopts[:format]}"
puts "Saving drawing to '#{out_name}' ..."
# do the writing
if File.exists?(out_name)
if ! clopts[:overwrite]
puts "Can't overwrite existing file"
next
end
end
drwr.save(out_name)
puts "Saved."
}
puts "== Finished."
end
if $0 == __FILE__
main()
end
### END
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment