Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save vovkasm/3856881 to your computer and use it in GitHub Desktop.
Save vovkasm/3856881 to your computer and use it in GitHub Desktop.
HTML::Tidy: fix inversed utf8::encode logic
diff --git a/lib/HTML/Tidy.pm b/lib/HTML/Tidy.pm
index 26ee3b0..6393b00 100644
--- a/lib/HTML/Tidy.pm
+++ b/lib/HTML/Tidy.pm
@@ -223,7 +223,7 @@ sub parse {
}
my $html = join( '', @_ );
- utf8::encode($html) unless utf8::is_utf8($html);
+ utf8::encode($html) if utf8::is_utf8($html);
my ($errorblock,$newline) = _tidy_messages( $html,
$self->{config_file},
$self->{tidy_options}
@@ -308,7 +308,7 @@ sub clean {
}
my $text = join( '', @_ );
- utf8::encode($text) unless utf8::is_utf8($text);
+ utf8::encode($text) if utf8::is_utf8($text);
if ( defined $text ) {
$text .= "\n";
}
diff --git a/t/unicode.t b/t/unicode.t
index 811b6fa..b88dcfd 100644
--- a/t/unicode.t
+++ b/t/unicode.t
@@ -4,7 +4,8 @@
use warnings;
use strict;
-use Test::More tests => 7;
+use Test::More tests => 10;
+use Encode ();
BEGIN {
use_ok( 'HTML::Tidy' );
@@ -43,6 +44,15 @@ $tidy->parse( '', $html );
@messages = $tidy->messages;
is( scalar @messages, 0, q{There still shouldn't be any errors});
+# try send bytes to clean method
+my $html2 = Encode::encode('utf8',$html);
+ok(!utf8::is_utf8($html2), 'html2 is row bytes');
+my $clean2 = $tidy->clean( $html2 );
+ok(utf8::is_utf8($clean2), 'but cleaned output is string');
+$clean2 =~ s/"HTML Tidy.+w3\.org"/"Tidy"/;
+$clean2 =~ s/"(HTML Tidy|tidyp).+w3\.org"/"Tidy"/;
+is($clean2, $reference, q{Cleanup didn't break anything});
+
__DATA__
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">
<html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment