Last active
November 17, 2020 23:23
-
-
Save lestrrat/ee562b47cdbe6e2916281b1a3a50f37d to your computer and use it in GitHub Desktop.
開始時間/終了時間/英語/日本語 の4つのカラムのTSVファイルから、日本語訳文をSRTファイル形式に整形するツール。1行目にヘッダがある想定。使い方はコメントの中に。
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Preparation: | |
# | |
# 1. Install perl5, cpanm | |
# 2. cpanm -L local -n Text::CSV_XS | |
# | |
# Execution: | |
# | |
# perl script.pl source.tsv | |
# | |
use strict; | |
use utf8; | |
use lib "local/lib/perl5"; | |
use Text::CSV_XS; | |
use Encode qw(encode_utf8); | |
my $filename = shift @ARGV; | |
my $csv = Text::CSV_XS->new({binary => 1, sep_char => "\t"}); | |
open my $fh, "<:encoding(utf8)", $filename or die "foo: $!"; | |
# TSV file format: | |
# col 1: start time | |
# col 2: end time | |
# col 3: English | |
# col 4: Japanese | |
<$fh>; # trash the header | |
my $line = 1; | |
while (my $row = $csv->getline($fh)) { | |
# format: | |
# ==== | |
# $entry_no | |
# $start_timestamp --> $end_timestamp | |
# $lines_of_text | |
# <empty new line> | |
# ==== | |
if ($row->[3] eq "") { | |
next; | |
} | |
print "$line\n"; | |
print "$row->[0] --> $row->[1]\n"; | |
my $jp = encode_utf8($row->[3]); | |
print $jp, "\n"; | |
print "\n"; | |
$line++; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment