Skip to content

Instantly share code, notes, and snippets.

@tokuhirom
Created February 17, 2009 04:12
Show Gist options
  • Save tokuhirom/65573 to your computer and use it in GitHub Desktop.
Save tokuhirom/65573 to your computer and use it in GitHub Desktop.
From 699d735d94897a2b281b6924535bff573c47e5d6 Mon Sep 17 00:00:00 2001
From: Tokuhiro Matsuno <[email protected]>
Date: Tue, 17 Feb 2009 12:36:22 +0900
Subject: [PATCH] use perl5.8's unicode way
---
lib/FormValidator/Simple/Plugin/Japanese.pm | 78 +++++++++------------------
1 files changed, 25 insertions(+), 53 deletions(-)
diff --git a/lib/FormValidator/Simple/Plugin/Japanese.pm b/lib/FormValidator/Simple/Plugin/Japanese.pm
index ea6da51..1105f32 100644
--- a/lib/FormValidator/Simple/Plugin/Japanese.pm
+++ b/lib/FormValidator/Simple/Plugin/Japanese.pm
@@ -4,76 +4,48 @@ use base qw/
FormValidator::Simple::Plugin::Number::Phone::JP
/;
use FormValidator::Simple::Exception;
+use FormValidator::Simple::Validator;
use FormValidator::Simple::Constants;
-use Unicode::RecursiveDowngrade;
use Mail::Address::MobileJp;
-use Jcode;
+use Encode ();
our $VERSION = '0.05';
-# plugin specific method
-sub __japanese_check_charset {
- my ($self, $charset) = @_;
- my @charsets = (
- [qw/UTF-8 utf8 utf8/],
- [qw/EUC-JP euc euc /],
- [qw/Shift_JIS shiftjis sjis/],
- );
- foreach my $set ( @charsets ) {
- foreach my $name ( @$set ) {
- if( $charset eq $name ) {
- return $set->[2];
- }
- }
- }
- FormValidator::Simple::Exception->throw(
- qq/wrong charset "$charset"./
- );
-}
-
-# plugin specific method
-sub __japanese_encode2euc {
- my ($self, $value) = @_;
- my $charset = $self->options->{charset} || 'utf8';
- $charset = $self->__japanese_check_charset($charset);
-
- my $rd = Unicode::RecursiveDowngrade->new;
- $rd->filter( sub { Jcode->new($value, $charset)->euc } );
- return $rd->downgrade($value);
-}
-
-# plugin specific method
-sub __japanese_delete_sp {
- my ($self, $value) = @_;
- $value = $self->__japanese_encode2euc($value);
- $value =~ s/ //g;
- my $ascii = '[\x00-\x7F]';
- my $two_bytes = '[\x8E\xA1-\xFE][\xA1-\xFE]';
- my $three_bytes = '\x8F[\xA1-\xFE][\xA1-\xFE]';
- $value =~ s/\G((?:$ascii|$two_bytes|$three_bytes)*?)(?:\xA1\xA1)/$1/g;
- return $value;
-}
+our ($DECODE, $DELETE_SP);
+BEGIN {
+ $DECODE = sub {
+ my ($self, $str) = @_;
+ return $str if Encode::is_utf8($str);
+
+ my $charset = $self->options->{charset} || 'utf8';
+ return Encode::decode($charset, $str);
+ };
+ $DELETE_SP = sub {
+ local $_ = shift;
+ s/\s//g;
+ $_;
+ };
+};
sub HIRAGANA {
my ($self, $params, $args) = @_;
- my $value = $self->__japanese_delete_sp( $params->[0] );
- return $value =~ /^(?:\xA4[\x00-\xFF]|\xA1\xBC)+$/ ? TRUE : FALSE;
+ my $v = $DELETE_SP->($DECODE->($self, $params->[0]));
+ return $v =~ /^\p{InHiragana}+$/ ? TRUE : FALSE;
}
sub KATAKANA {
my ($self, $params, $args) = @_;
- my $value = $self->__japanese_delete_sp( $params->[0] );
- return $value =~ /^(?:\xA5[\x00-\xFF]|\xA1\xBC)+$/ ? TRUE : FALSE;
+ my $v = $DELETE_SP->($DECODE->($self, $params->[0]));
+ return $v =~ /^\p{InKatakana}+$/ ? TRUE : FALSE;
}
+# this method is only for backward compatibility.
+# you can count correct multi-byte string length in perl5.8's unicode way.
+# please use LENGTH instead of this.
sub JLENGTH {
my ($self, $params, $args) = @_;
- my $min = $args->[0] || 0;
- my $max = $args->[1] || 0;
- my $value = $self->__japanese_encode2euc( $params->[0] );
- my $length = Jcode->new($value, 'euc')->jlength;
- $min += 0; $max += 0; $max ||= $min;
- return ($min <= $length and $length <= $max) ? TRUE : FALSE;
+ my $v = $DELETE_SP->($DECODE->($self, $params->[0]));
+ FormValidator::Simple::Validator::LENGTH( $self, [$v], $args );
}
sub ZIP_JP {
--
1.6.1.2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment