Created
August 18, 2011 20:57
-
-
Save MelanieS/1155184 to your computer and use it in GitHub Desktop.
Checks posts in a blog and returns posts that aren't long enough
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'nokogiri' | |
require 'open-uri' | |
def get_posts(url) | |
posts = [] | |
doc = Nokogiri::HTML(open(url)) | |
doc.css('link[rel=alternate]').each do |e| | |
posts << e['href'] | |
end | |
posts = posts.drop(1) | |
return posts | |
end | |
def pull_text(urls) | |
results = [] | |
urls.each do |e| | |
doc = Nokogiri::HTML(open(e)) | |
words = doc.search('fantasmagore').inner_text | |
results << words | |
end | |
return results | |
end | |
def count_text(arr) | |
count = 0 | |
count_arr = [] | |
arr.each do |current_string| | |
current_string = current_string.delete(",").gsub(" ", ",") | |
temp_arr = current_string.delete("\n").split(",") | |
count = temp_arr.count | |
count_arr << count | |
end | |
return count_arr | |
end | |
def match_arrs(keys, values) | |
count = keys.count | |
counter = 0 | |
matches = Hash.new | |
while counter != count | |
matches[keys[counter]] = values[counter] | |
counter = counter + 1 | |
end | |
return matches | |
end | |
def find_rejects(hash) | |
array = [] | |
hash.delete_if {|key, value| value > 300} | |
hash.each_key {|k| array << k } | |
return array | |
end | |
def create_list(matches) | |
matches.each {|key, value| | |
puts "The content at the following url needs to be fixed:" | |
puts key | |
puts | |
puts "The current content is:" | |
puts value} | |
end | |
puts "Enter feed url:" | |
url = gets.chomp | |
post_urls = get_posts(url) | |
text = pull_text(post_urls) | |
count = count_text(text) | |
matched = match_arrs(post_urls, count) | |
rejects = find_rejects(matched) | |
short_posts = pull_text(rejects) | |
match = match_arrs(rejects, short_posts) | |
bad_list = create_list(match) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment