Created
October 28, 2015 19:11
-
-
Save bootcoder/c7bdde66b9e2df2e1a8a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require_relative 'html_whitespace_cleaner' | |
require 'nokogiri' | |
require 'awesome_print' | |
class Parser | |
attr_accessor :parsed_data | |
def initialize | |
@parsed_data = [] | |
end | |
def open_file(location) | |
html = File.read(location) | |
clean_html = HTMLWhitespaceCleaner.clean(html) | |
@nokogiri_document = Nokogiri.parse(clean_html) | |
puts "FILE LOADED" | |
end | |
def post_args | |
Hash[title:title, url: url, post_author: post_author, points: points, post_days_ago: post_days_ago] | |
end | |
def comment_args | |
empty = [] | |
comment_authors.zip(comment_content,comment_days_ago).each do |item| | |
empty << Hash[author: item[0], content: item[1], days_ago: item[2 ]] | |
end | |
empty | |
end | |
private | |
def title | |
@nokogiri_document.css('title').inner_text.chomp(' | Hacker News') | |
end | |
def url | |
@nokogiri_document.css('td.title').inner_text.scan(/\(([^\)]+)\)/)[0][0] | |
end | |
def post_author | |
@nokogiri_document.css("span.score + a").text | |
end | |
def points | |
@nokogiri_document.css("span.score").first.text | |
end | |
def post_days_ago | |
@nokogiri_document.css("span.score + a + a").text.chomp(' days ago') | |
end | |
def comment_authors | |
names = [] | |
@nokogiri_document.css(".comhead a:first-child").each do |name| | |
names << name.inner_text | |
end | |
names | |
end | |
def comment_content | |
container = [] | |
@nokogiri_document.css('.c00').each do |comment| | |
# container << comment.inner_text.chomp('-----') | |
container << "-> #{comment.inner_text[0,80]}..." | |
end | |
container | |
end | |
def comment_days_ago | |
days = [] | |
@nokogiri_document.css(".comhead a:nth-child(2)").each do |data| | |
days << data.text.chomp(" days ago") | |
end | |
days | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment