Skip to content

Instantly share code, notes, and snippets.

@mattn
Created January 15, 2009 01:16
Show Gist options
  • Save mattn/47205 to your computer and use it in GitHub Desktop.
Save mattn/47205 to your computer and use it in GitHub Desktop.
use strict;
use warnings;
use URI;
use Web::Scraper;
use YAML;
my $staff = scraper {
process '//table[@width="565" and descendant::a[contains(@href,".asx")] ]', 'entries[]' => scraper {
process 'td.main_title2 > p', title => 'TEXT';
process '//table[@width="553"]//td[@class="main_txt1"]', body => 'TEXT';
process 'td.main_txt2', date => 'TEXT';
process '//a[contains(@href, ".asx")]', enclosure => [ '@href',
sub { +{ url => $_, type => 'video/x-ms-asf' } } ];
process '//a[contains(@href, ".asx")]', link => '@href';
};
process 'title', title => 'TEXT';
process 'span.main_title3 > strong > img', image => ['@src', sub { +{ url => $_} } ];
process '//table[@width="573"]//td[@class="main_txt1"]', 'description' => 'TEXT';
};
warn Dump $staff->scrape( URI->new("http://www.animate.tv/digital/web_radio/detail_104.html") );
warn Dump $staff->scrape( URI->new("http://www.animate.tv/digital/web_radio/detail_104.html") );
#warn Dump $staff->scrape( URI->new("http://www.animate.tv/digital/web_radio/detail_106.html") ); # 404 video not found
warn Dump $staff->scrape( URI->new("http://www.animate.tv/digital/web_radio/detail_107.html") );
warn Dump $staff->scrape( URI->new("http://www.animate.tv/digital/web_radio/detail_108.html") );
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment