Created
June 28, 2015 16:50
-
-
Save cheshire137/47d5484fd0eb54143e77 to your computer and use it in GitHub Desktop.
Delicious and Pocket RSS to JSON
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# encoding: utf-8 | |
require 'rubygems' | |
# require 'nokogiri' | |
require 'json' | |
require 'rss' | |
require 'open-uri' | |
require 'uri' | |
class RSSFetcher | |
attr_reader :urls, :link_limit, :url_cache_path, :should_write_url_cache | |
def initialize urls, link_limit | |
@urls = urls | |
@link_limit = link_limit | |
@should_write_url_cache = false | |
@url_cache_path = File.join(File.dirname(__FILE__), | |
'rss-link-title-cache.json') | |
end | |
def print_json | |
print "Content-type: application/json\r\n\r\n" | |
begin | |
feeds = get_rss_feeds | |
all_links = merge_rss_feeds(feeds) | |
recent_links = filter_links(all_links) | |
write_url_title_cache(recent_links) if @should_write_url_cache | |
print_json_object recent_links | |
rescue => e | |
print_json_object({error: e.message}) | |
end | |
end | |
private | |
def filter_links all_links | |
all_links.sort! {|a, b| b[:date] <=> a[:date] } # newest first | |
all_links.each do |link| | |
link[:title] = get_link_title(link[:title], link[:url]) | |
end | |
all_links.select {|link| | |
title = link[:title] | |
title && title.strip != '' | |
}[0..@link_limit] | |
end | |
def get_cache_path url | |
domain = URI.parse(url).host | |
File.join(File.dirname(__FILE__), "rss-cache-#{domain}.xml") | |
end | |
def get_link_title original_title, url | |
# Pocket has Untitled for some links | |
(original_title == 'Untitled') ? get_title_for_url(url) : original_title | |
end | |
def get_rss_feed url | |
rss = nil | |
cache_path = get_cache_path(url) | |
if File.exists?(cache_path) | |
minutes_old = (Time.now - File.mtime(cache_path)).to_i / 60 | |
rss = open(cache_path) if minutes_old < 360 # 6 hours | |
end | |
write_cache = false | |
unless rss | |
rss = open(url) | |
write_cache = true | |
end | |
parsed_rss = RSS::Parser.parse(rss) | |
if write_cache | |
File.open(cache_path, 'w:UTF-8') {|file| file.puts parsed_rss.to_s } | |
end | |
parsed_rss | |
end | |
def get_rss_feeds | |
@urls.map {|url| get_rss_feed(url) } | |
end | |
def get_title_for_url url | |
if File.exists?(@url_cache_path) | |
json_str = '' | |
File.open(@url_cache_path, 'r:UTF-8') do |file| | |
json_str = file.read | |
end | |
url_titles = JSON.parse(json_str) | |
if title=url_titles[url] | |
return title | |
end | |
end | |
@should_write_url_cache = true | |
# page = Nokogiri::HTML(open(url)) | |
# page.css('title').text | |
source = open(url).read | |
source[/<title>(.*)<\/title>/, 1] | |
rescue OpenURI::HTTPError | |
nil | |
end | |
def merge_rss_feeds feeds | |
links = [] | |
feeds.each do |feed| | |
source = feed.channel.title | |
feed.items.each do |item| | |
links << {url: item.link, date: item.pubDate, source: source, | |
title: item.title} | |
end | |
end | |
links | |
end | |
def print_json_object obj | |
print JSON.generate(obj) + "\r\n" | |
end | |
def write_url_title_cache links | |
url_titles = {} | |
links.each do |link| | |
url_titles[link[:url]] = link[:title] | |
end | |
File.open(@url_cache_path, 'w:UTF-8') do |file| | |
file.puts JSON.generate(url_titles) | |
end | |
end | |
end | |
urls = ['http://getpocket.com/users/your_user_name/feed/all', | |
'http://feeds.delicious.com/v2/rss/your_user_name'] | |
RSSFetcher.new(urls, 30).print_json |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment