-
-
Save niratama/afa4ca12961c2ca232be to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use utf8; | |
use strict; | |
use warnings; | |
use Encode qw(encode_utf8 decode_utf8); | |
# 文字が指定のバイト数を超えている場合に切り取る。 | |
sub kirisute_gomen { | |
my ($string, $byte_len, $str_len) = @_; | |
return $string if (length(encode_utf8($string)) <= $byte_len && length($string) <= $str_len); | |
# 指定の文字数まで切り取って、byte超えてたら1文字ずづ捨てていく作戦 | |
my $result = substr($string, 0, $str_len); | |
while (length(encode_utf8($result)) > $byte_len || length($result) > $str_len) { | |
$result = substr($result, 0, -1); | |
} | |
return $result; | |
} | |
sub re_kirisute_gomen { | |
my ($string, $byte_len, $str_len) = @_; | |
# とりあえず指定文字数までぶった切ってバイト列化 | |
my $byte_str = encode_utf8(substr($string, 0, $str_len)); | |
if (length($byte_str) > $byte_len) { | |
# 指定バイト数より長かったら一旦指定バイト数まで切り詰めて | |
$byte_str = substr($byte_str, 0, $byte_len); | |
# バイト列の最後に中途半端なutf-8コードが残っていたら削除する | |
$byte_str =~ s/(?: | |
[\xc0-\xdf]| | |
[\xe0-\xef][\x80-\xbf]{0,1}| | |
[\xf0-\xf7][\x80-\xbf]{0,2}| | |
[\xf8-\xfb][\x80-\xbf]{0,3}| | |
[\xfc-\xfd][\x80-\xbf]{0,4} | |
)\z//msx; | |
} | |
# バイト列をデコードして返す | |
return decode_utf8($byte_str); | |
} | |
sub show_string_detail { | |
my $str = shift; | |
my $utf8_str = encode_utf8($str); | |
printf '"%s" %d characters, %d bytes', $utf8_str, length($str), length($utf8_str); | |
print "\n"; | |
} | |
# 5文字(10byte)制限のテスト | |
show_string_detail(kirisute_gomen('あいうえお', 10, 5)); # あいう 9byte | |
show_string_detail(kirisute_gomen('あいueお', 10, 5)); # あいue 8byte | |
print "\n"; | |
show_string_detail(re_kirisute_gomen('あいうえお', 10, 5)); # あいう 9byte | |
show_string_detail(re_kirisute_gomen('あいueお', 10, 5)); # あいue 8byte | |
# | |
show_string_detail(re_kirisute_gomen('あいuえお', 10, 5)); | |
show_string_detail(re_kirisute_gomen('あいうДお', 10, 5)); | |
show_string_detail(re_kirisute_gomen('あいueДお', 10, 5)); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment