Skip to content

Instantly share code, notes, and snippets.

@bootcoder
Created October 28, 2015 19:11
Show Gist options
  • Save bootcoder/c7bdde66b9e2df2e1a8a to your computer and use it in GitHub Desktop.
Save bootcoder/c7bdde66b9e2df2e1a8a to your computer and use it in GitHub Desktop.
require_relative 'html_whitespace_cleaner'
require 'nokogiri'
require 'awesome_print'
class Parser
attr_accessor :parsed_data
def initialize
@parsed_data = []
end
def open_file(location)
html = File.read(location)
clean_html = HTMLWhitespaceCleaner.clean(html)
@nokogiri_document = Nokogiri.parse(clean_html)
puts "FILE LOADED"
end
def post_args
Hash[title:title, url: url, post_author: post_author, points: points, post_days_ago: post_days_ago]
end
def comment_args
empty = []
comment_authors.zip(comment_content,comment_days_ago).each do |item|
empty << Hash[author: item[0], content: item[1], days_ago: item[2 ]]
end
empty
end
private
def title
@nokogiri_document.css('title').inner_text.chomp(' | Hacker News')
end
def url
@nokogiri_document.css('td.title').inner_text.scan(/\(([^\)]+)\)/)[0][0]
end
def post_author
@nokogiri_document.css("span.score + a").text
end
def points
@nokogiri_document.css("span.score").first.text
end
def post_days_ago
@nokogiri_document.css("span.score + a + a").text.chomp(' days ago')
end
def comment_authors
names = []
@nokogiri_document.css(".comhead a:first-child").each do |name|
names << name.inner_text
end
names
end
def comment_content
container = []
@nokogiri_document.css('.c00').each do |comment|
# container << comment.inner_text.chomp('-----')
container << "-> #{comment.inner_text[0,80]}..."
end
container
end
def comment_days_ago
days = []
@nokogiri_document.css(".comhead a:nth-child(2)").each do |data|
days << data.text.chomp(" days ago")
end
days
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment