-
-
Save dohyunkim/240afe4b21f9d0aed465 to your computer and use it in GitHub Desktop.
romanization of hangul pronunciation -- 제1차 라텍 스터디 과제
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env perl | |
| my @LC = map {$_ eq "-" ? "" : $_} qw( | |
| g kk n d tt r m b pp s ss - j jj ch k t p h | |
| ); | |
| my @MV = qw( | |
| a ae ya yae eo e yeo ye o wa wae oe yo u wo we wi yu eu ui i | |
| ); | |
| my @TC = map {$_ eq "-" ? "" : $_} qw( | |
| - G kk ks n nj nh D R lG lm lB ls lt lp lh m B ps s ss ng j ch k t p h | |
| ); | |
| my %TCNV = ( | |
| G => "k", D => "t", B => "p", R => "l" | |
| ); | |
| my @LC_A = qw( | |
| g kk n d tt l m b pp s ss - j jj ch k t p h | |
| ); | |
| my @TC_A = map {$_ eq "-" ? "" : $_} qw( | |
| - g kk gs n nj nh d l lg lm lb ls lt lp lh m b bs s ss ng j ch k t p h | |
| ); | |
| my %HYPH = ( | |
| bss => 1, gss => 1, jj => 1, kk => 1, kkk => 1, lpp => 1, lss => 1, | |
| ltt => 1, njj => 1, pp => 1, ss => 1, sss => 1, tt => 1 | |
| ); | |
| my %R2LC; for my $i (0 .. $#LC_A) { $R2LC{ $LC_A[$i] } = $i; } | |
| my %R2MV; for my $i (0 .. $#MV ) { $R2MV{ $MV[$i] } = $i; } | |
| my %R2TC; for my $i (0 .. $#TC_A) { $R2TC{ $TC_A[$i] } = $i; } | |
| sub jamo2syllable { | |
| return chr (((shift) * 21 + (shift)) * 28 + (shift) + 0xAC00); | |
| } | |
| sub hangulize { | |
| my @hanguls; | |
| for my $roman (split '-', lc shift) { | |
| my ($cho, $jung, $jong); | |
| my @roms = split /([aeiouwy]+)/, $roman; | |
| for my $i (0 .. $#roms) { | |
| my $rom = $roms[$i]; | |
| if ($i % 2 == 1) { # MV | |
| $jung = $R2MV{$rom}; | |
| push @hanguls, jamo2syllable($cho, $jung, 0) if $i == $#roms; | |
| } | |
| else { | |
| if ($i == 0) { # LC | |
| $rom = "-" unless $rom; | |
| $cho = $R2LC{$rom}; | |
| } | |
| elsif ($i == $#roms) { # TC | |
| $jong = $R2TC{$rom}; | |
| push @hanguls, jamo2syllable($cho, $jung, $jong); | |
| } | |
| else { # TC.LC | |
| for my $tc (keys %R2TC) { | |
| for my $lc (keys %R2LC) { | |
| if ($rom eq $tc.$lc) { | |
| $jong = $R2TC{$tc}; | |
| push @hanguls, jamo2syllable($cho, $jung, $jong); | |
| $cho = $R2LC{$lc}; | |
| last; | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| return join '',@hanguls; | |
| } | |
| sub romanize { | |
| my ($academy, $capital, $last, @romans) = (shift, shift); | |
| my @L_C = $academy ? @LC_A : @LC; | |
| my @T_C = $academy ? @TC_A : @TC; | |
| for my $ch (split '',shift) { | |
| $ch = ord $ch; | |
| if ($ch >= 0xAC00 and $ch <= 0xD7A3) { | |
| $ch = $ch - 0xAC00; | |
| my $cho = $L_C[ $ch / 588 ]; | |
| my $jung = $MV[ ($ch % 588) / 28 ]; | |
| my $jong = $T_C[ $ch % 28 ]; | |
| push @romans, "-" if $academy and $last and $HYPH{ $last.$cho }; | |
| $last = $jong; | |
| push @romans, $cho unless $academy and @romans == 0 and $cho eq "-"; | |
| push @romans, $jung; | |
| push @romans, $jong; | |
| } | |
| else { | |
| push @romans, chr $ch; | |
| $last = ""; | |
| } | |
| } | |
| my $roman = join '', @romans; | |
| unless ($academy) { | |
| $roman =~ s/([GDBR])(\-?[aeiouwy])/lc($1).$2/ge; | |
| $roman =~ s/([GDBR])/$TCNV{$1}/g; | |
| $roman =~ s/lr/ll/g; | |
| } | |
| $roman = ucfirst $roman if $capital; | |
| return $roman; | |
| } | |
| my @lines; | |
| open my $fh, "<:encoding(UTF-8)", $ARGV[0] or die; | |
| while(<$fh>) { | |
| next if /^\s*\%/; | |
| s/\\Romanize\s*(\*?){(.*?)}\[(.*?)\]/"$1$2=".romanize(0,$1,$3)/ge; | |
| s/\\Romanize\s*(\*?){(.*?)}/"$1$2=".romanize(0,$1,$2)/ge; | |
| s/\\RomanizeA\s*(\*?){(.*?)}/"$1$2=".romanize(1,$1,$2)/ge; | |
| s/\\Hangulize\s*{(.*?)}/"$1=".hangulize($1)/ge; | |
| push @lines, $_; | |
| last if /\\end{document}/; | |
| } | |
| close $fh; | |
| (my $jobname = $ARGV[0]) =~ s/\.\w+$//; | |
| open $fh, "|xelatex -jobname=$jobname" or die; | |
| binmode $fh, ":encoding(UTF-8)"; | |
| print $fh "\\relax\n", @lines; | |
| close $fh; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| \documentclass{article} | |
| \usepackage{kotex} | |
| \begin{document} | |
| \Romanize{벚꽃}[벋꼳]\par | |
| \Romanize*{영동}\par | |
| \Romanize{백암}\par | |
| \Romanize{월곶}[월곧]\par | |
| \Romanize{한밭}[한받]\par | |
| \Romanize{구리}\par | |
| \Romanize{설악}\par | |
| \Romanize{울릉}\par | |
| \Romanize{대관령}[대괄령]\par | |
| \Romanize{학여울}[항녀울]\par | |
| \Romanize*{해-운대}\par | |
| \bigskip | |
| \RomanizeA{벚꽃}\par | |
| \RomanizeA{영동}\par | |
| \RomanizeA{백암}\par | |
| \RomanizeA*{옥천}\par | |
| \RomanizeA{월곶}\par | |
| \RomanizeA{한밭}\par | |
| \RomanizeA{칠곡}\par | |
| \RomanizeA{대관령}\par | |
| \RomanizeA{학여울}\par | |
| \RomanizeA{없었습니다.}\par | |
| \bigskip | |
| \Hangulize{eobs-eoss-seubnida}\par | |
| \end{document} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
컴파일은...
perl r.pl t.tex