Created
May 12, 2012 15:35
-
-
Save keiya/2667184 to your computer and use it in GitHub Desktop.
www.traileraddict.com downloader
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/perl | |
| # | |
| # by Keiya Chinen | |
| use strict; | |
| use warnings; | |
| use utf8; | |
| use LWP; | |
| use LWP::ConnCache; | |
| use Data::Dumper; | |
| use Text::CSV_XS; | |
| use Digest::SHA qw(sha256_hex); | |
| $| = 1; | |
| # http keepalive | |
| my $conncache = LWP::ConnCache->new; | |
| my $ua = LWP::UserAgent->new; | |
| my $base_url = 'http://www.traileraddict.com/'; | |
| my $list_base_url = $base_url.'thefilms/'; | |
| open (my $FH, ">> trailer_db.tsv"); | |
| open (my $ER, ">> fail.log"); | |
| flock($FH, 2); | |
| for (my $i = 1; $i <= 95; $i++) { | |
| my $movie_list = access($list_base_url.$i,$ua,$conncache); | |
| my @movie_detail_paths = $movie_list =~ m|<a href="/tags/(.+?)">|g; | |
| print('movie list page '.$i.' : '.$#movie_detail_paths." movies detected.\n"); | |
| foreach my $movie_detail_path (@movie_detail_paths) { | |
| print('movie page : '.$movie_detail_path."\n"); | |
| my $movie_detail = access($base_url.'tags/'.$movie_detail_path,$ua,$conncache); | |
| my %movie_metadata; | |
| $movie_detail =~ m|<span style="color:#848D57;">Studio:</span> (.+?)</div>|; | |
| $movie_metadata{'Studio'} = $1; | |
| $movie_detail =~ m|<span style="color:#848D57;">Release:</span> (.+?)</div>|; | |
| $movie_metadata{'Release'} = $1; | |
| $movie_detail =~ m|<span style="color:#848D57;">Director:</span> (.+?)</div>|; | |
| $movie_metadata{'Director'} = $1; | |
| $movie_detail =~ m|<span style="color:#848D57;">Writer:</span> (.+?)</div>|; | |
| $movie_metadata{'Writer'} = $1; | |
| $movie_detail =~ m|<span style="color:#848D57;">Cast:</span> (.+?)</div>|; | |
| $movie_metadata{'Cast'} = $1; | |
| $movie_detail =~ m|<span style="color:#848D57;">Genre:</span> (.+?)</div>|; | |
| $movie_metadata{'Genre'} = $1; | |
| my @movie_trailer_paths = $movie_detail =~ m|<a href="/trailer/${movie_detail_path}/(.+?)"|g; | |
| my %already_scanned; | |
| my @movie_trailer_paths_uniq = uniqArray(\@movie_trailer_paths); | |
| foreach my $movie_trailer_path (@movie_trailer_paths_uniq) { | |
| $already_scanned{$movie_trailer_path} = 1; | |
| my $movie_trailer = access($base_url.'trailer/'.$movie_detail_path.'/'.$movie_trailer_path,$ua,$conncache); | |
| $movie_trailer =~ m/var tkey = (\d+);/; | |
| my $tid = $1; | |
| my $movie_metadata_raw = access($base_url.'fvare.php?tid='.$tid,$ua,$conncache); | |
| my @movie_metadata_elem = split(/&/,$movie_metadata_raw); | |
| foreach my $movie_metadata_val (@movie_metadata_elem) { | |
| my @movie_metadata_pair = split(/=/,$movie_metadata_val); | |
| $movie_metadata_pair[1] =~ tr/+/ /; | |
| $movie_metadata_pair[1] =~ s/%([0-9A-Fa-f][0-9A-Fa-f])/pack('H2', $1)/eg; | |
| $movie_metadata{$movie_metadata_pair[0]} = $movie_metadata_pair[1]; | |
| } | |
| $movie_metadata{'filename'} = Digest::SHA::sha256_hex( | |
| $movie_metadata{'filmtitle'}.'||'. | |
| $movie_metadata{'title'}.'||'. | |
| $movie_metadata{'Studio'}.'||'. | |
| $movie_metadata{'Release'}.'||'. | |
| $movie_metadata{'Director'}.'||'. | |
| $movie_metadata{'Writer'}.'||'. | |
| $movie_metadata{'Cast'} | |
| ); | |
| my $res = save($movie_metadata{'fileurl'},$ua,$movie_metadata{'filename'},$conncache); | |
| print $FH | |
| nonl($movie_metadata{'filmtitle'})."\t". | |
| nonl($movie_metadata{'title'})."\t". | |
| nonl($movie_metadata{'trailerd'})."\t". | |
| nonl($movie_metadata{'filename'})."\t". | |
| $res->content_type."\t". | |
| nonl($movie_metadata{'Studio'})."\t". | |
| nonl($movie_metadata{'Release'})."\t". | |
| nonl($movie_metadata{'Director'})."\t". | |
| nonl($movie_metadata{'Writer'})."\t". | |
| nonl($movie_metadata{'Cast'})."\t". | |
| nonl($movie_metadata{'Genre'})."\n"; | |
| } | |
| } | |
| } | |
| close($FH); | |
| close($ER); | |
| sub access{ | |
| print('[GET] '.$_[0]."\n"); | |
| my $ua = $_[1]; | |
| $ua->conn_cache( $_[2] ); | |
| my $response = $ua->get($_[0]); | |
| if ($response->is_success) { | |
| my $return = $response->decoded_content; | |
| return $return; | |
| } else { | |
| #die "Failed\n"; | |
| print $ER "Could not access : $_[0]\n"; | |
| return undef; | |
| } | |
| } | |
| sub save { | |
| print('[SAV] '.$_[0]."\n"); | |
| my $ua = $_[1]; | |
| $ua->conn_cache( $_[3] ); | |
| my $response = $ua->get($_[0],':content_file' => $_[2]); | |
| if ($response->is_success) { | |
| return $response; | |
| } else { | |
| print $ER "Could not save : $_[0]\n"; | |
| return undef; | |
| } | |
| } | |
| sub nonl { | |
| my $str = shift; | |
| if ($str) { | |
| $str =~ s/[\n\r]//g; | |
| } | |
| return $str; | |
| } | |
| # http://katsubemakito.net/cgiperl/variable/array/post-44.html | |
| sub uniqArray{ | |
| my $array = shift; | |
| my %hash = (); | |
| foreach my $value ( @$array ){ | |
| $hash{$value} = 1; | |
| } | |
| return( | |
| keys %hash | |
| ); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment