Skip to content

Instantly share code, notes, and snippets.

@beppu
Created August 3, 2010 18:45
Show Gist options
  • Select an option

  • Save beppu/506902 to your computer and use it in GitHub Desktop.

Select an option

Save beppu/506902 to your computer and use it in GitHub Desktop.
#!/usr/bin/perl
use common::sense;
use Web::Scraper;
use Data::Dump 'pp';
use URI;
use IO::All;
use aliased 'Squatting::H';
my $url_pattern = "http://www.enterprisecorruption.com/?page_id=%d";
my @id = qw( 41 42 11 43 44 50 51 45 9 46 47 48 10 49 );
my $scraper = scraper {
process "title", title => 'TEXT';
process "#primary", body => 'TEXT';
};
# another way of converting from html to text
# w3m -no-graph -dump http://www.enterprisecorruption.com/\?page_id=41 | sed '/^Jump to comment/,$d' | sed -n -e :a -e '1,4!{P;N;D;};N;ba'
# better text to speech?
# use vocaloid software?
# use osx's say command?
my $Article = H->new({
i => 0,
filename_txt => sub {
my ($self) = @_;
sprintf('%02d-%s.txt', $self->i, $self->title);
},
filename_wav => sub {
my ($self) = @_;
sprintf('%02d-%s.txt', $self->i, $self->title);
},
filename_mp3 => sub {
my ($self) = @_;
sprintf('%02d-%s.mp3', $self->i, $self->title);
},
});
my $i = 1;
for my $id (@id) {
my $article = $Article->clone($scraper->scrape(URI->new(sprintf($url_pattern, $id))));
$article->i($i++);
$article->{title} =~ s/^...//;
$article->{title} =~ s/\s*EnterpriseCorruption\.com$//;
print "$article->{title}\n";
io($article->filename_txt) < $article->body;
system(sprintf("text2wave -scale 50 -o '%s' '%s'", $article->filename_wav, $article->filename_txt));
system(sprintf('lame "%s" "%s"', $article->filename_wav, $article->filename_mp3));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment