Created
August 7, 2010 07:20
-
-
Save watura/512552 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
require "rexml/document" | |
require 'open-uri' | |
require './cityofhypes' | |
include REXML | |
class TopArticles | |
def self.scoring(rss_url,id) | |
# return id,score,url | |
# id = RSS Feed ID | |
# [{:id => Feed ID for filtering articles | |
# :score => aritlce's score determined by CityOfHypes | |
# :url => aritcle's URL}...each aritcle] | |
rss = Document.new(open(rss_url).read) rescue return | |
link = rss.elements.collect("rss/channel/item/link"){|element| element.text } | |
scores = link.map {|url| {:id =>id,:score => CityOfHypes.score_of(url), :url =>url}} | |
end | |
def self.top_articles(urls,each_feed,max) | |
#returns array of top articles | |
# urls:RSS Feeds | |
# each_feed: maximum number of each feed | |
# max : maximum number for all articles | |
# [ | |
# { | |
# :id => Feed ID, | |
# :score => score given by cityofhypes | |
# :url => article's url | |
# } | |
# ] | |
list = urls.collect.with_index{|url,id| self.scoring(url,id)}.flatten | |
list.sort!{|a,b| a[:score].to_i <=> b[:score].to_i } | |
count = Array.new(urls.size,0) | |
ta = list.inject([]) do |score,info| | |
return score if score.size >= max | |
if count[info[:id].to_i].to_i < each_feed | |
count[info[:id].to_i] += 1 | |
score << info | |
end | |
score | |
end | |
return ta | |
end | |
end | |
# sample URL 'TechCrunch JP' 'gizmodo jp' engadget jp' 'gigazine' 'lifehacker jp' 'lifehacker us' 'gizmodo us' | |
urls = ["http://www.pheedo.jp/f/JapaneseTechCrunch","http://www.gizmodo.jp/index.xml", | |
"http://japanese.engadget.com/rss.xml","http://gigazine.net/index.php/news/rss_2.0/", | |
"http://www.lifehacker.jp/index.xml","http://lifehacker.com/tag/top/index.xml", | |
"http://gizmodo.com/tag/top/index.xml"] | |
p TopArticles.top_articles(urls,2,10) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment