Skip to content

Instantly share code, notes, and snippets.

@StephenFiser
Created November 25, 2015 00:45
Show Gist options
  • Save StephenFiser/c4ded819638826e5b037 to your computer and use it in GitHub Desktop.
Save StephenFiser/c4ded819638826e5b037 to your computer and use it in GitHub Desktop.
require 'open-uri'
class Webpage
attr_reader :address
def initialize(address)
@address = address
end
def headlines
HeadlineFinder.new(page_content).results
end
private
def page_content
@content ||= open(address).read
end
end
class HeadlineFinder
attr_reader :text
TAG_NAMES = ["h1", "h2", "h3", "h4"]
def initialize(text)
@text = text
end
def results
TAG_NAMES.map do |tag_name|
TagBuilder.all_content_for(tag_name, text)
end.flatten.compact
end
end
class TagBuilder
class << self
def all_content_for(name, context)
headlines = []
offset = 0
until offset == "at the end"
headline = new(name, context, offset)
headlines << headline
offset = headline.next_offset
end
headlines
end
end
attr_reader :name, :context, :opening, :closing
def initialize(name, context, offset = 0)
@name = name
@context = context
@opening = OpeningTag.new(name, context, offset)
@closing = ClosingTag.new(name, context, offset)
end
def content
return nil unless content_available?
if context.index(name) != nil
context[content_start..content_finish]
else
"Could not find any #{name} tags."
end
end
def content_start
opening.finish + 1
end
def content_finish
closing.start - 1
end
def next_offset
if opening.start != nil
closing.final_position
else
"at the end"
end
end
def content_available?
opening.present? && closing.present?
end
end
class OpeningTag
attr_reader :name, :context, :offset
def initialize(name, context, offset = 0)
@name = name
@context = context
@offset = offset
end
def present?
start != nil && finish != nil
end
def start
context.index("<#{name}", offset)
end
def finish
context.index(">", start)
end
end
class ClosingTag
attr_reader :name, :context, :offset
def initialize(name, context, offset = 0)
@name = name
@context = context
@offset = offset
end
def present?
start != nil
end
def start
context.index("</#{name}>", offset)
end
def final_position
# 3 for the <, /, and >
start + 3 + name.length
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment