Created
June 11, 2013 15:22
-
-
Save shalk/5757771 to your computer and use it in GitHub Desktop.
fix a bug in LWP 'usc-cn' can not be recognized correctly.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# fix a bug in LWP 'usc-cn' can not be recognized correctly. | |
# | |
# when i use method HTTP::RESPONSE::decoded_content ,that can not decode 'usc-cn' html | |
# but it use 'iso-8859-1' to decode html. | |
# though 'iso-8859-1' is a universe code , the decode method can work well. | |
# but HTTP::RESPONSE::content_charset can not get the right code name . | |
# | |
# there are two causes | |
# one is Encode::Encoding::mime_name can not get usc-cn, usc-cn is not in the IANA charset name list. | |
# the other is HTTP::RESPONSE::decoded_content havdn't implement usc-cn decode,we only can use iso-8859-1 to replace. | |
# by shalk 2013-6-11 | |
# | |
# example code | |
use 5.010; | |
use strict; | |
use LWP; | |
use Encode; | |
use IO::HTML; | |
my $line= '69.123.123.123'; | |
my $url="http://ip138.com/ips138.asp?ip=$line&action=2"; | |
my $browser = LWP::UserAgent->new; | |
my $response = $browser->get($url); | |
if($response -> is_success) { | |
#print "res:",$response,"\n"; | |
my $html1 = $response -> decoded_content; | |
say "----------------"; | |
say 'content_charset:',$response->content_charset; | |
if( $html1 =~ m/<li>(.*?)<\/li>/){ | |
print "line=$1\n"; | |
} | |
} | |
######################################################################### | |
# fix code in lib | |
# we can make a litte modification in lib code | |
# first place /HTTP/Message.pm | |
# about line 204 | |
######################################################################### | |
my $encoding = IO::HTML::find_charset_in($cref, { encoding => 1, | |
need_pragma => 0 }); | |
return $encoding->mime_name if $encoding; | |
#changed into | |
return IO::HTML::find_charset_in($cref, { encoding => 0, | |
need_pragma => 0 }); | |
######################################################################### | |
# second place /HTTP/Message.pm | |
# about line 364 | |
######################################################################### | |
elsif ($charset eq "us-ascii" || $charset eq "iso-8859-1") { | |
# changed to | |
elsif ($charset eq "us-ascii" || $charset eq "iso-8859-1"||$charset eq "euc-cn") { | |
# now we can |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment