Created
September 16, 2010 19:58
-
-
Save vjt/583061 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# HTML to RTF conversion routine | |
# Requires panmind-rtf gem, version 0.4.1 and up | |
# Really quick & dirty, needs love and documentation | |
# Injects a .to_rtf method into any Nokogiri NodeSet | |
# and returns an RTF::Document instance | |
# | |
# If you find this code useful, contribute back to the | |
# panmind-rtf gem! http://github.com/Panmind/rtf | |
# | |
# (C) 2010 mind2mind.is, spinned off http://panmind.org/ | |
# Released under the terms of the Ruby license. | |
# | |
# Author: Marcello Barnaba <[email protected]> | |
# | |
require 'nokogiri' | |
require 'rtf' | |
module PM | |
module RTF | |
extend self | |
def from_html(html) | |
html = ::Nokogiri::HTML::Document.parse(html) | |
html.css('body').children.to_rtf | |
end | |
def new(font = :default) | |
::RTF::Document.new font(font) | |
end | |
def font(key) | |
::RTF::Font.new(*case key | |
when :default then [::RTF::Font::ROMAN, 'Times New Roman'] | |
when :monospace then [::RTF::Font::MODERN, 'Courier New' ] | |
end) | |
end | |
def style(key) | |
returning ::RTF::CharacterStyle.new do |style| | |
case key | |
when :h2 | |
style.font_size = 40 | |
style.bold = true | |
when :h3 | |
style.font_size = 28 | |
style.bold = true | |
end | |
end | |
end | |
module Nokogiri | |
module NodeSet | |
def to_rtf(rtf = nil) | |
returning(rtf || PM::RTF.new) do |rtf| | |
each {|node| node.to_rtf(rtf)} | |
end | |
end | |
end | |
module Node | |
def to_rtf(rtf) | |
#puts "handling #{to_html}" | |
case name | |
when 'text' then rtf << text | |
when 'br' then rtf.line_break | |
when 'b', 'strong' then rtf.bold &recurse | |
when 'i', 'em', 'cite' then rtf.italic &recurse | |
when 'u' then rtf.underline &recurse | |
when 'blockquote', 'p', 'div' then rtf.paragraph &recurse | |
when 'sup' then rtf.subscript &recurse | |
when 'sub' then rtf.superscript &recurse | |
when 'ul' then rtf.list :bullets, &recurse | |
when 'ol' then rtf.list :decimal, &recurse | |
when 'li' then rtf.item &recurse | |
when 'a' then rtf.link self[:href], &recurse | |
when 'h2' then rtf.apply PM::RTF.style(:h2), &recurse | |
when 'h3' then rtf.apply PM::RTF.style(:h3), &recurse | |
when 'code' then rtf.font PM::RTF.font(:monospace), &recurse | |
else | |
#puts "Ignoring #{to_html}" | |
end | |
return rtf | |
end | |
def recurse | |
#puts "recursing on #{children.to_html}" | |
lambda {|rtf| children.to_rtf(rtf)} | |
end | |
end | |
end | |
end | |
end | |
Nokogiri::XML::NodeSet.instance_eval { include PM::RTF::Nokogiri::NodeSet } | |
Nokogiri::XML::Node.instance_eval { include PM::RTF::Nokogiri::Node } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
what version of nokogiri are you using?