| title | date | file |
|---|---|---|
Podcast Number One |
2014-12-12 12:40 CST |
one.mp3 |
Blah Blah.
| page '/podcast.xml', layout: false | |
| # Methods defined in the helpers block are available in templates | |
| helpers do | |
| def podcast_source_path(article) | |
| "source/audio/#{article.data.file}" | |
| end | |
| end |
| title | date | file |
|---|---|---|
Podcast Number One |
2014-12-12 12:40 CST |
one.mp3 |
Blah Blah.
| # -*- coding: utf-8 -*- | |
| require 'mp3info' | |
| require 'nokogiri' | |
| # The main method on this module +plain_text+ will convert a string of HTML to a plain text approximation. | |
| class HtmlToPlainText | |
| IGNORE_TAGS = %w(script style object applet iframe).inject({}){|h, t| h[t] = true; h}.freeze | |
| PARAGRAPH_TAGS = %w(p h1 h2 h3 h4 h5 h6 table ol ul dl dd blockquote dialog figure aside section).inject({}){|h, t| h[t] = true; h}.freeze | |
| BLOCK_TAGS = %w(div address li dt center del article header header footer nav pre legend tr).inject({}){|h, t| h[t] = true; h}.freeze | |
| WHITESPACE = [" ", "\n", "\r"].freeze | |
| PLAINTEXT = "plaintext".freeze | |
| PRE = "pre".freeze | |
| BR = "br".freeze | |
| HR = "hr".freeze | |
| TD = "td".freeze | |
| TH = "th".freeze | |
| TR = "tr".freeze | |
| OL = "ol".freeze | |
| UL = "ul".freeze | |
| LI = "li".freeze | |
| A = "a".freeze | |
| TABLE = "table".freeze | |
| NUMBERS = ["1", "a"].freeze | |
| ABSOLUTE_URL_PATTERN = /^[a-z]+:\/\/[a-z0-9]/i.freeze | |
| HTML_PATTERN = /[<&]/.freeze | |
| TRAILING_WHITESPACE = /[ \t]+$/.freeze | |
| BODY_TAG_XPATH = "/html/body".freeze | |
| CARRIDGE_RETURN_PATTERN = /\r(\n?)/.freeze | |
| LINE_BREAK_PATTERN = /[\n\r]/.freeze | |
| NON_PROTOCOL_PATTERN = /:\/?\/?(.*)/.freeze | |
| NOT_WHITESPACE_PATTERN = /\S/.freeze | |
| SPACE = " ".freeze | |
| EMPTY = "".freeze | |
| NEWLINE = "\n".freeze | |
| HREF = "href".freeze | |
| TABLE_SEPARATOR = " | ".freeze | |
| class << self | |
| # Convert some HTML into a plain text approximation. | |
| def truncate(str, truncate_at, options = {}) | |
| return str.dup unless str.length > truncate_at | |
| options[:omission] ||= '...' | |
| length_with_room_for_omission = truncate_at - options[:omission].length | |
| stop = if options[:separator] | |
| str.rindex(options[:separator], length_with_room_for_omission) || length_with_room_for_omission | |
| else | |
| length_with_room_for_omission | |
| end | |
| "#{str[0...stop]}#{options[:omission]}" | |
| end | |
| def plain_text(html) | |
| return nil if html.nil? | |
| return html.dup unless html =~ HTML_PATTERN | |
| body = Nokogiri::HTML::Document.parse(html).xpath(BODY_TAG_XPATH).first | |
| return unless body | |
| convert_node_to_plain_text(body).strip.gsub(CARRIDGE_RETURN_PATTERN, NEWLINE) | |
| end | |
| private | |
| # Convert an HTML node to plain text. This method is called recursively with the output and | |
| # formatting options for special tags. | |
| def convert_node_to_plain_text(parent, out = '', options = {}) | |
| if PARAGRAPH_TAGS.include?(parent.name) | |
| append_paragraph_breaks(out) | |
| elsif BLOCK_TAGS.include?(parent.name) | |
| append_block_breaks(out) | |
| end | |
| format_list_item(out, options) if parent.name == LI | |
| out << "| " if parent.name == TR && data_table?(parent.parent) | |
| parent.children.each do |node| | |
| if node.text? || node.cdata? | |
| text = node.text | |
| unless options[:pre] | |
| text = node.text.gsub(LINE_BREAK_PATTERN, SPACE).squeeze(SPACE) | |
| text.lstrip! if WHITESPACE.include?(out[-1, 1]) | |
| end | |
| out << text | |
| elsif node.name == PLAINTEXT | |
| out << node.text | |
| elsif node.element? && !IGNORE_TAGS.include?(node.name) | |
| convert_node_to_plain_text(node, out, child_options(node, options)) | |
| if node.name == BR | |
| out.sub!(TRAILING_WHITESPACE, EMPTY) | |
| out << NEWLINE | |
| elsif node.name == HR | |
| out.sub!(TRAILING_WHITESPACE, EMPTY) | |
| out << NEWLINE unless out.end_with?(NEWLINE) | |
| out << "-------------------------------\n" | |
| elsif node.name == TD || node.name == TH | |
| out << (data_table?(parent.parent) ? TABLE_SEPARATOR : SPACE) | |
| elsif node.name == A | |
| href = node[HREF] | |
| if href && | |
| href =~ ABSOLUTE_URL_PATTERN && | |
| node.text =~ NOT_WHITESPACE_PATTERN && | |
| node.text != href && | |
| node.text != href[NON_PROTOCOL_PATTERN, 1] # use only text for <a href="mailto:[email protected]">[email protected]</a> | |
| out << " (#{href}) " | |
| end | |
| elsif PARAGRAPH_TAGS.include?(node.name) | |
| append_paragraph_breaks(out) | |
| elsif BLOCK_TAGS.include?(node.name) | |
| append_block_breaks(out) | |
| end | |
| end | |
| end | |
| out | |
| end | |
| # Set formatting options that will be passed to child elements for a tag. | |
| def child_options(node, options) | |
| if node.name == UL | |
| level = options[:ul] || -1 | |
| level += 1 | |
| options.merge(:list => :ul, :ul => level) | |
| elsif node.name == OL | |
| level = options[:ol] || -1 | |
| level += 1 | |
| options.merge(:list => :ol, :ol => level, :number => NUMBERS[level % 2]) | |
| elsif node.name == PRE | |
| options.merge(:pre => true) | |
| else | |
| options | |
| end | |
| end | |
| # Add double line breaks between paragraph elements. If line breaks already exist, | |
| # new ones will only be added to get to two. | |
| def append_paragraph_breaks(out) | |
| out.sub!(TRAILING_WHITESPACE, EMPTY) | |
| if out.end_with?(NEWLINE) | |
| out << NEWLINE unless out.end_with?("\n\n") | |
| else | |
| out << "\n\n" | |
| end | |
| end | |
| # Add a single line break between block elements. If a line break already exists, | |
| # none will be added. | |
| def append_block_breaks(out) | |
| out.sub!(TRAILING_WHITESPACE, EMPTY) | |
| out << NEWLINE unless out.end_with?(NEWLINE) | |
| end | |
| # Add an appropriate bullet or number to a list element. | |
| def format_list_item(out, options) | |
| if options[:list] == :ul | |
| out << "#{'*' * (options[:ul] + 1)} " | |
| elsif options[:list] == :ol | |
| number = options[:number] | |
| options[:number] = number.next | |
| out << "#{number}. " | |
| end | |
| end | |
| def data_table?(table) | |
| table.attributes['border'].to_s.to_i > 0 | |
| end | |
| end | |
| end | |
| xml.instruct! | |
| xml.rss 'xmlns:itunes' => 'http://www.itunes.com/dtds/podcast-1.0.dtd', 'xmlns:atom' => 'http://www.w3.org/2005/Atom', :version => '2.0' do | |
| xml.channel do | |
| xml.title config[:blog_title] | |
| xml.description config[:blog_description] | |
| xml.link URI.join(config[:blog_url], blog.options.prefix.to_s, 'podcast.xml') | |
| xml.atom :link, 'rel' => 'self', 'href' => URI.join(config[:blog_url], blog.options.prefix.to_s, 'podcast.xml') | |
| xml.language 'en-CA' | |
| xml.lastBuildDate blog.articles.first.date.rfc2822 | |
| xml.pubDate blog.articles.first.date.rfc2822 | |
| xml.itunes :author, 'John Frank' | |
| xml.itunes :keywords, config[:blog_keywords].join(', ') | |
| xml.itunes :explicit, (config[:blog_clean] ? 'clean' : 'yes') | |
| xml.itunes :image, :href => URI.join(config[:blog_url], image_path('icon.png')) | |
| xml.itunes :summary, HtmlToPlainText.truncate(HtmlToPlainText.plain_text(config[:blog_description]), 1950) | |
| xml.itunes :owner do | |
| xml.itunes :name, 'John Frank' | |
| xml.itunes :email, '[email protected]' | |
| end | |
| xml.itunes :category, :text => 'Science & Medicine' do | |
| xml.itunes :category, :text => 'Medicine' | |
| end | |
| blog.articles.each do |article| | |
| xml.item do | |
| xml.title article.title | |
| xml.pubDate article.date.rfc822 | |
| xml.enclosure :url => tracked_url(podcast_url(article)), :length => File.size(podcast_source_path(article)), :type => 'audio/mpeg' | |
| xml.link URI.join(config[:blog_url], article.url) | |
| xml.guid({ :isPermaLink => true }, URI.join(config[:blog_url], article.url)) | |
| xml.itunes :author, 'John Frank' | |
| xml.itunes :summary do | |
| xml.cdata! HtmlToPlainText.truncate(HtmlToPlainText.plain_text(article.body), 3950) | |
| end | |
| xml.itunes :duration, Mp3Info.new(podcast_source_path(article)).length.to_i | |
| # xml.description do | |
| # xml.cdata! article.body + partial(:audio_tag, :locals => { :article => article }) | |
| # end | |
| # Most RSS readers will pull out the link to the enclosure, so no need to include it here. | |
| xml.description do | |
| xml.cdata! article.body | |
| end | |
| end | |
| end | |
| end | |
| end |