Last active
August 29, 2015 14:07
-
-
Save ksc91u/73f9ee5fd722c6eedd0c to your computer and use it in GitHub Desktop.
Extract subtitles from zip and conv to UTF8
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use File::Basename; | |
use Encode; | |
require Encode::Detect; | |
use base qw(Encode::Encoding); | |
use Encode qw(find_encoding); | |
use Encode::Detect::Detector; | |
use URL::Encode qw(url_encode_utf8 url_encode); | |
if($ARGV[1]=~m/(rar)|(zip)/i){ | |
$dest = $ARGV[0]; | |
$zip = $ARGV[1]; | |
&extract($zip); | |
}else{ | |
$dest = $ARGV[0]; | |
system("mkdir /tmp/sub"); | |
for($j=1; $j<=$#ARGV; $j++){ | |
system("cp '$ARGV[$j]' /tmp/sub"); | |
} | |
} | |
#&getSub($ARGV[0]); | |
&conv(); | |
&moveTo($ARGV[0]); | |
system("rm -rf /tmp/sub"); | |
sub getSub{ | |
system("mkdir /tmp/sub"); | |
my $f = shift; | |
($filename, $directories, $suffix) = fileparse($f); | |
$filename=~m/(.*S\d\dE\d\d).*/; | |
$filename = $1; | |
print $1; | |
$url = "http://shooter.cn/search2/".url_encode($1); | |
$out = `curl -k $url`; | |
my @matches = $out =~ /class="introtitle".*?href="(.*?\.xml)"/g; | |
for $m (@matches){ | |
$out=`curl -k http://shooter.cn/$m`; | |
} | |
} | |
sub moveTo{ | |
$f = shift; | |
($filename, $directories, $suffix) = fileparse($f); | |
@filename_parts = split /\./,$filename; | |
pop @filename_parts; | |
$episode_name = join(".",@filename_parts); | |
opendir ( DIR, "/tmp/sub") || die "Error in opendir"; | |
@files = grep {/\.(ssa|ass|aas|srt)$/} readdir(DIR) ; | |
foreach $subname (@files){ | |
@subname_parts = split /\./,$subname; | |
$s = $#subname_parts; | |
$sub_name = join(".", ($episode_name, $subname_parts[$s-1], $subname_parts[$s] )); | |
system("mv -f \"/tmp/sub/$subname\" \"$directories/$sub_name\""); | |
} | |
closedir DIR; | |
} | |
sub extract{ | |
$z = shift; | |
system("mkdir /tmp/sub"); | |
#if($z=~m/\.rar$/){ | |
# system("unrar e $z /tmp/sub"); | |
#}elsif($z=~m/\.zip$/){ | |
# system("unzip $z -d /tmp/sub"); | |
#}else{ | |
# system("tar xvf $z -C /tmp/sub"); | |
#} | |
system("unar -no-directory -output-directory /tmp/sub $z"); | |
system("cd /tmp/sub; for i in `find . -type d|grep -v \"\\.\$\"`; do mv \$i/*.ass \$i/*.srt \$i/*.aas /tmp/sub/; done"); | |
system("rm -f /tmp/sub/*简体*"); | |
#system("rm -f /tmp/sub/*gb*"); | |
#system("rm -f /tmp/sub/*lol*"); | |
} | |
sub guess_encoding{ | |
my $filename = shift; | |
local $/=undef; | |
open FILE, $filename or die "Couldn't open file: $!"; | |
$string = <FILE>; | |
my $encoding = detect($string); | |
if(!defined($encoding) || length($encoding) < 3){ | |
return "ISO8859-1"; | |
}else{ | |
return $encoding; | |
} | |
} | |
sub conv{ | |
opendir ( DIR, "/tmp/sub") || die "Error in opendir"; | |
@files = grep {/\.(ass|aas|srt)$/} readdir(DIR) ; | |
foreach $filename (@files){ | |
$encoding = uc(&guess_encoding("/tmp/sub/$filename")); | |
if ($encoding=~/BIG/){ | |
$encoding = "BIG5-HKSCS"; | |
} | |
print "Encoding ... $encoding\n"; | |
unless( !defined($encoding)){ | |
system("iconv -f $encoding -t utf-8 \"/tmp/sub/$filename\" > /tmp/sub/123"); | |
} | |
system("mv -f /tmp/sub/123 \"/tmp/sub/$filename\""); | |
} | |
closedir DIR; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment