Skip to content

Instantly share code, notes, and snippets.

<root>
<schedule>
<cinema>奧海城</cinema>
<house>1</house>
<movie>未來戰士2018</movie>
<cat>IIB</cat>
<time>Jun 03 2009 (Wed) 10:20PM</time>
</schedule>
</root>
require 'rubygems'
require 'scrubyt'
require 'nokogiri'
require 'iconv'
# Fetch the seatplan for the schedule
movies = Scrubyt::Extractor.define do
# use a proper schedule link here
fetch "http://www2.cinema.com.hk/revamp/html/show_seat.php?lang=c&show_id=13105241"
require 'rubygems'
require 'scrubyt'
require 'nokogiri'
require 'iconv'
# Fetch the seatplan for the schedule
movies = Scrubyt::Extractor.define do
# use a proper schedule link here
fetch "http://www2.cinema.com.hk/revamp/html/show_seat.php?lang=c&show_id=13105241"
<?xml version="1.0"?>
<root>
<schedule>
<cinema>&#x5967;&#x6D77;&#x57CE;</cinema>
<house>1</house>
<movie>&#x672A;&#x4F86;&#x6230;&#x58EB;2018</movie>
<cat>IIB</cat>
<time>Jun 03 2009 (Wed) 10:20PM</time>
<seats>210</seats></schedule>
require 'rubygems'
require 'scrubyt'
# Find avaliable schedules
schedules = Scrubyt::Extractor.define do
fetch "http://www.cinema.com.hk/revamp/html/movie_ticketing.php?lang=c&mode=ticketing"
cinema "//select[@class='show_pulldown']" do
schedule "//option/@value"
end
end
<root>
<cinema>
<schedule>10065063</schedule>
<schedule>0</schedule>
<schedule>10065064</schedule>
<schedule>10065065</schedule>
<schedule>10065066</schedule>
<schedule>10065067</schedule>
</cinema>
<cinema>
@xml = Hpricot.XML(doc)
items = (@xml/"//item")
items.to_a.first(@item_limit).each do |item|
link = (item/"link")
desc = (item/"description")
guid = (item/"guid")
# fetch content using the link
# replace the item description with content of the link
# link is the link to full text article
doc = open(link).read
hdoc = Hpricot(doc)
text = (hdoc.search(".livewords")).inner_html
module Fullfeed
module Extractor
class YahooNewsHongKongExtractor < XpathExtractor
# register this extractor to the system
register
def initialize
super(%r{http://hk\.rd\.yahoo.com/news/rss/\*http://.+\.html}, ".livewords")
end
end
require "rubygems"
require "fullfeed"
require "yahoo_news_hong_kong_extractor"
# create full text RSS feed from Yahoo! News HK
feed = Fullfeed::Feed.new("http://hk.news.yahoo.com/rss/hongkong/rss.xml")
puts feed.fetch