bouchard · December 26, 2017 05:43
diff --git a/config.rb b/config.rb
 page '/podcast.xml', layout: false

 # Methods defined in the helpers block are available in templates
 helpers do
  def podcast_source_path(article)
    "source/audio/#{article.data.file}"
  end
 end
diff --git a/podcast-number-one.html.markdown b/podcast-number-one.html.markdown
diff --git a/podcast.xml.builder b/podcast.xml.builder
 # -*- coding: utf-8 -*-
 require 'mp3info'
 require 'nokogiri'

 # The main method on this module +plain_text+ will convert a string of HTML to a plain text approximation.
 class HtmlToPlainText

  IGNORE_TAGS = %w(script style object applet iframe).inject({}){|h, t| h[t] = true; h}.freeze
  PARAGRAPH_TAGS = %w(p h1 h2 h3 h4 h5 h6 table ol ul dl dd blockquote dialog figure aside section).inject({}){|h, t| h[t] = true; h}.freeze
  BLOCK_TAGS = %w(div address li dt center del article header header footer nav pre legend tr).inject({}){|h, t| h[t] = true; h}.freeze
  WHITESPACE = [" ", "\n", "\r"].freeze
  PLAINTEXT = "plaintext".freeze
  PRE = "pre".freeze
  BR = "br".freeze
  HR = "hr".freeze
  TD = "td".freeze
  TH = "th".freeze
  TR = "tr".freeze
  OL = "ol".freeze
  UL = "ul".freeze
  LI = "li".freeze
  A = "a".freeze
  TABLE = "table".freeze
  NUMBERS = ["1", "a"].freeze
  ABSOLUTE_URL_PATTERN = /^[a-z]+:\/\/[a-z0-9]/i.freeze
  HTML_PATTERN = /[<&]/.freeze
  TRAILING_WHITESPACE = /[ \t]+$/.freeze
  BODY_TAG_XPATH = "/html/body".freeze
  CARRIDGE_RETURN_PATTERN = /\r(\n?)/.freeze
  LINE_BREAK_PATTERN = /[\n\r]/.freeze
  NON_PROTOCOL_PATTERN = /:\/?\/?(.*)/.freeze
  NOT_WHITESPACE_PATTERN = /\S/.freeze
  SPACE = " ".freeze
  EMPTY = "".freeze
  NEWLINE = "\n".freeze
  HREF = "href".freeze
  TABLE_SEPARATOR = " | ".freeze

  class << self
    # Convert some HTML into a plain text approximation.

    def truncate(str, truncate_at, options = {})
      return str.dup unless str.length > truncate_at

      options[:omission] ||= '...'
      length_with_room_for_omission = truncate_at - options[:omission].length
      stop =        if options[:separator]
        str.rindex(options[:separator], length_with_room_for_omission) || length_with_room_for_omission
      else
        length_with_room_for_omission
      end

      "#{str[0...stop]}#{options[:omission]}"
    end

    def plain_text(html)
      return nil if html.nil?
      return html.dup unless html =~ HTML_PATTERN
      body = Nokogiri::HTML::Document.parse(html).xpath(BODY_TAG_XPATH).first
      return unless body
      convert_node_to_plain_text(body).strip.gsub(CARRIDGE_RETURN_PATTERN, NEWLINE)
    end

    private

    # Convert an HTML node to plain text. This method is called recursively with the output and
    # formatting options for special tags.
    def convert_node_to_plain_text(parent, out = '', options = {})
      if PARAGRAPH_TAGS.include?(parent.name)
        append_paragraph_breaks(out)
      elsif BLOCK_TAGS.include?(parent.name)
        append_block_breaks(out)
      end

      format_list_item(out, options) if parent.name == LI
      out << "| " if parent.name == TR && data_table?(parent.parent)

      parent.children.each do |node|
        if node.text? || node.cdata?
          text = node.text
          unless options[:pre]
            text = node.text.gsub(LINE_BREAK_PATTERN, SPACE).squeeze(SPACE)
            text.lstrip! if WHITESPACE.include?(out[-1, 1])
          end
          out << text
        elsif node.name == PLAINTEXT
          out << node.text
        elsif node.element? && !IGNORE_TAGS.include?(node.name)
          convert_node_to_plain_text(node, out, child_options(node, options))

          if node.name == BR
            out.sub!(TRAILING_WHITESPACE, EMPTY)
            out << NEWLINE
          elsif node.name == HR
            out.sub!(TRAILING_WHITESPACE, EMPTY)
            out << NEWLINE unless out.end_with?(NEWLINE)
            out << "-------------------------------\n"
          elsif node.name == TD || node.name == TH
            out << (data_table?(parent.parent) ? TABLE_SEPARATOR : SPACE)
          elsif node.name == A
            href = node[HREF]
            if href &&
                href =~ ABSOLUTE_URL_PATTERN &&
                node.text =~ NOT_WHITESPACE_PATTERN &&
                node.text != href &&
                node.text != href[NON_PROTOCOL_PATTERN, 1] # use only text for <a href="mailto:[email protected]">[email protected]</a>
              out << " (#{href}) "
            end
          elsif PARAGRAPH_TAGS.include?(node.name)
            append_paragraph_breaks(out)
          elsif BLOCK_TAGS.include?(node.name)
            append_block_breaks(out)
          end
        end
      end
      out
    end

    # Set formatting options that will be passed to child elements for a tag.
    def child_options(node, options)
      if node.name == UL
        level = options[:ul] || -1
        level += 1
        options.merge(:list => :ul, :ul => level)
      elsif node.name == OL
        level = options[:ol] || -1
        level += 1
        options.merge(:list => :ol, :ol => level, :number => NUMBERS[level % 2])
      elsif node.name == PRE
        options.merge(:pre => true)
      else
        options
      end
    end

    # Add double line breaks between paragraph elements. If line breaks already exist,
    # new ones will only be added to get to two.
    def append_paragraph_breaks(out)
      out.sub!(TRAILING_WHITESPACE, EMPTY)
      if out.end_with?(NEWLINE)
        out << NEWLINE unless out.end_with?("\n\n")
      else
        out << "\n\n"
      end
    end

    # Add a single line break between block elements. If a line break already exists,
    # none will be added.
    def append_block_breaks(out)
      out.sub!(TRAILING_WHITESPACE, EMPTY)
      out << NEWLINE unless out.end_with?(NEWLINE)
    end

    # Add an appropriate bullet or number to a list element.
    def format_list_item(out, options)
      if options[:list] == :ul
        out << "#{'*' * (options[:ul] + 1)} "
      elsif options[:list] == :ol
        number = options[:number]
        options[:number] = number.next
        out << "#{number}. "
      end
    end

    def data_table?(table)
      table.attributes['border'].to_s.to_i > 0
    end
  end
 end

 xml.instruct!
 xml.rss 'xmlns:itunes' => 'http://www.itunes.com/dtds/podcast-1.0.dtd', 'xmlns:atom' => 'http://www.w3.org/2005/Atom', :version => '2.0' do
  xml.channel do
    xml.title config[:blog_title]
    xml.description config[:blog_description]
    xml.link URI.join(config[:blog_url], blog.options.prefix.to_s, 'podcast.xml')
    xml.atom :link, 'rel' => 'self', 'href' => URI.join(config[:blog_url], blog.options.prefix.to_s, 'podcast.xml')
    xml.language 'en-CA'
    xml.lastBuildDate blog.articles.first.date.rfc2822
    xml.pubDate blog.articles.first.date.rfc2822
    xml.itunes :author, 'John Frank'
    xml.itunes :keywords, config[:blog_keywords].join(', ')
    xml.itunes :explicit, (config[:blog_clean] ? 'clean' : 'yes')
    xml.itunes :image, :href => URI.join(config[:blog_url], image_path('icon.png'))
    xml.itunes :summary, HtmlToPlainText.truncate(HtmlToPlainText.plain_text(config[:blog_description]), 1950)
    xml.itunes :owner do
      xml.itunes :name, 'John Frank'
      xml.itunes :email, '[email protected]'
    end
    xml.itunes :category, :text => 'Science & Medicine' do
      xml.itunes :category, :text => 'Medicine'
    end

    blog.articles.each do |article|
      xml.item do
        xml.title article.title
        xml.pubDate article.date.rfc822
        xml.enclosure :url => tracked_url(podcast_url(article)), :length => File.size(podcast_source_path(article)), :type => 'audio/mpeg'
        xml.link URI.join(config[:blog_url], article.url)
        xml.guid({ :isPermaLink => true }, URI.join(config[:blog_url], article.url))
        xml.itunes :author, 'John Frank'
        xml.itunes :summary do
          xml.cdata! HtmlToPlainText.truncate(HtmlToPlainText.plain_text(article.body), 3950)
        end
        xml.itunes :duration, Mp3Info.new(podcast_source_path(article)).length.to_i
        # xml.description do
        #   xml.cdata! article.body + partial(:audio_tag, :locals => { :article => article })
        # end
        # Most RSS readers will pull out the link to the enclosure, so no need to include it here.
        xml.description do
          xml.cdata! article.body
        end
      end
    end
  end
 end
	page '/podcast.xml', layout: false

	# Methods defined in the helpers block are available in templates
	helpers do
	def podcast_source_path(article)
	"source/audio/#{article.data.file}"
	end
	end
	# -- coding: utf-8 --
	require 'mp3info'
	require 'nokogiri'

	# The main method on this module +plain_text+ will convert a string of HTML to a plain text approximation.
	class HtmlToPlainText

	IGNORE_TAGS = %w(script style object applet iframe).inject({}){\|h, t\| h[t] = true; h}.freeze
	PARAGRAPH_TAGS = %w(p h1 h2 h3 h4 h5 h6 table ol ul dl dd blockquote dialog figure aside section).inject({}){\|h, t\| h[t] = true; h}.freeze
	BLOCK_TAGS = %w(div address li dt center del article header header footer nav pre legend tr).inject({}){\|h, t\| h[t] = true; h}.freeze
	WHITESPACE = [" ", "\n", "\r"].freeze
	PLAINTEXT = "plaintext".freeze
	PRE = "pre".freeze
	BR = "br".freeze
	HR = "hr".freeze
	TD = "td".freeze
	TH = "th".freeze
	TR = "tr".freeze
	OL = "ol".freeze
	UL = "ul".freeze
	LI = "li".freeze
	A = "a".freeze
	TABLE = "table".freeze
	NUMBERS = ["1", "a"].freeze
	ABSOLUTE_URL_PATTERN = /^[a-z]+:\/\/[a-z0-9]/i.freeze
	HTML_PATTERN = /[<&]/.freeze
	TRAILING_WHITESPACE = /[ \t]+$/.freeze
	BODY_TAG_XPATH = "/html/body".freeze
	CARRIDGE_RETURN_PATTERN = /\r(\n?)/.freeze
	LINE_BREAK_PATTERN = /[\n\r]/.freeze
	NON_PROTOCOL_PATTERN = /:\/?\/?(.*)/.freeze
	NOT_WHITESPACE_PATTERN = /\S/.freeze
	SPACE = " ".freeze
	EMPTY = "".freeze
	NEWLINE = "\n".freeze
	HREF = "href".freeze
	TABLE_SEPARATOR = " \| ".freeze

	class << self
	# Convert some HTML into a plain text approximation.

	def truncate(str, truncate_at, options = {})
	return str.dup unless str.length > truncate_at

	options[:omission] \|\|= '...'
	length_with_room_for_omission = truncate_at - options[:omission].length
	stop = if options[:separator]
	str.rindex(options[:separator], length_with_room_for_omission) \|\| length_with_room_for_omission
	else
	length_with_room_for_omission
	end

	"#{str[0...stop]}#{options[:omission]}"
	end

	def plain_text(html)
	return nil if html.nil?
	return html.dup unless html =~ HTML_PATTERN
	body = Nokogiri::HTML::Document.parse(html).xpath(BODY_TAG_XPATH).first
	return unless body
	convert_node_to_plain_text(body).strip.gsub(CARRIDGE_RETURN_PATTERN, NEWLINE)
	end

	private

	# Convert an HTML node to plain text. This method is called recursively with the output and
	# formatting options for special tags.
	def convert_node_to_plain_text(parent, out = '', options = {})
	if PARAGRAPH_TAGS.include?(parent.name)
	append_paragraph_breaks(out)
	elsif BLOCK_TAGS.include?(parent.name)
	append_block_breaks(out)
	end

	format_list_item(out, options) if parent.name == LI
	out << "\| " if parent.name == TR && data_table?(parent.parent)

	parent.children.each do \|node\|
	if node.text? \|\| node.cdata?
	text = node.text
	unless options[:pre]
	text = node.text.gsub(LINE_BREAK_PATTERN, SPACE).squeeze(SPACE)
	text.lstrip! if WHITESPACE.include?(out[-1, 1])
	end
	out << text
	elsif node.name == PLAINTEXT
	out << node.text
	elsif node.element? && !IGNORE_TAGS.include?(node.name)
	convert_node_to_plain_text(node, out, child_options(node, options))

	if node.name == BR
	out.sub!(TRAILING_WHITESPACE, EMPTY)
	out << NEWLINE
	elsif node.name == HR
	out.sub!(TRAILING_WHITESPACE, EMPTY)
	out << NEWLINE unless out.end_with?(NEWLINE)
	out << "-------------------------------\n"
	elsif node.name == TD \|\| node.name == TH
	out << (data_table?(parent.parent) ? TABLE_SEPARATOR : SPACE)
	elsif node.name == A
	href = node[HREF]
	if href &&
	href =~ ABSOLUTE_URL_PATTERN &&
	node.text =~ NOT_WHITESPACE_PATTERN &&
	node.text != href &&
	node.text != href[NON_PROTOCOL_PATTERN, 1] # use only text for <a href="mailto:[email protected]">[email protected]</a>
	out << " (#{href}) "
	end
	elsif PARAGRAPH_TAGS.include?(node.name)
	append_paragraph_breaks(out)
	elsif BLOCK_TAGS.include?(node.name)
	append_block_breaks(out)
	end
	end
	end
	out
	end

	# Set formatting options that will be passed to child elements for a tag.
	def child_options(node, options)
	if node.name == UL
	level = options[:ul] \|\| -1
	level += 1
	options.merge(:list => :ul, :ul => level)
	elsif node.name == OL
	level = options[:ol] \|\| -1
	level += 1
	options.merge(:list => :ol, :ol => level, :number => NUMBERS[level % 2])
	elsif node.name == PRE
	options.merge(:pre => true)
	else
	options
	end
	end

	# Add double line breaks between paragraph elements. If line breaks already exist,
	# new ones will only be added to get to two.
	def append_paragraph_breaks(out)
	out.sub!(TRAILING_WHITESPACE, EMPTY)
	if out.end_with?(NEWLINE)
	out << NEWLINE unless out.end_with?("\n\n")
	else
	out << "\n\n"
	end
	end

	# Add a single line break between block elements. If a line break already exists,
	# none will be added.
	def append_block_breaks(out)
	out.sub!(TRAILING_WHITESPACE, EMPTY)
	out << NEWLINE unless out.end_with?(NEWLINE)
	end

	# Add an appropriate bullet or number to a list element.
	def format_list_item(out, options)
	if options[:list] == :ul
	out << "#{'' (options[:ul] + 1)} "
	elsif options[:list] == :ol
	number = options[:number]
	options[:number] = number.next
	out << "#{number}. "
	end
	end

	def data_table?(table)
	table.attributes['border'].to_s.to_i > 0
	end
	end
	end

	xml.instruct!
	xml.rss 'xmlns:itunes' => 'http://www.itunes.com/dtds/podcast-1.0.dtd', 'xmlns:atom' => 'http://www.w3.org/2005/Atom', :version => '2.0' do
	xml.channel do
	xml.title config[:blog_title]
	xml.description config[:blog_description]
	xml.link URI.join(config[:blog_url], blog.options.prefix.to_s, 'podcast.xml')
	xml.atom :link, 'rel' => 'self', 'href' => URI.join(config[:blog_url], blog.options.prefix.to_s, 'podcast.xml')
	xml.language 'en-CA'
	xml.lastBuildDate blog.articles.first.date.rfc2822
	xml.pubDate blog.articles.first.date.rfc2822
	xml.itunes :author, 'John Frank'
	xml.itunes :keywords, config[:blog_keywords].join(', ')
	xml.itunes :explicit, (config[:blog_clean] ? 'clean' : 'yes')
	xml.itunes :image, :href => URI.join(config[:blog_url], image_path('icon.png'))
	xml.itunes :summary, HtmlToPlainText.truncate(HtmlToPlainText.plain_text(config[:blog_description]), 1950)
	xml.itunes :owner do
	xml.itunes :name, 'John Frank'
	xml.itunes :email, '[email protected]'
	end
	xml.itunes :category, :text => 'Science & Medicine' do
	xml.itunes :category, :text => 'Medicine'
	end

	blog.articles.each do \|article\|
	xml.item do
	xml.title article.title
	xml.pubDate article.date.rfc822
	xml.enclosure :url => tracked_url(podcast_url(article)), :length => File.size(podcast_source_path(article)), :type => 'audio/mpeg'
	xml.link URI.join(config[:blog_url], article.url)
	xml.guid({ :isPermaLink => true }, URI.join(config[:blog_url], article.url))
	xml.itunes :author, 'John Frank'
	xml.itunes :summary do
	xml.cdata! HtmlToPlainText.truncate(HtmlToPlainText.plain_text(article.body), 3950)
	end
	xml.itunes :duration, Mp3Info.new(podcast_source_path(article)).length.to_i
	# xml.description do
	# xml.cdata! article.body + partial(:audio_tag, :locals => { :article => article })
	# end
	# Most RSS readers will pull out the link to the enclosure, so no need to include it here.
	xml.description do
	xml.cdata! article.body
	end
	end
	end
	end
	end