Skip to content

Instantly share code, notes, and snippets.

@hoehrmann
Created May 31, 2013 11:23
Show Gist options
  • Save hoehrmann/5684369 to your computer and use it in GitHub Desktop.
Save hoehrmann/5684369 to your computer and use it in GitHub Desktop.
HTML Validation script using only CPAN modules. Originally http://www.w3.org/mid/[email protected]
#!perl -w
BEGIN
{
$ENV{SP_CHARSET_FIXED} = 1;
$ENV{SP_ENCODING} = "UTF-8";
$ENV{SP_BCTF} = "UTF-8";
}
sub ErrorHandler::new {bless {p=>$_[1]}, shift}
sub ErrorHandler::error
{
push @{$_[0]->{errors}}, $_[0]->{p}->split_message($_[1])
}
use strict;
use warnings;
use SGML::Parser::OpenSP 0.99 qw();
use HTML::Encoding qw();
use HTML::Doctype qw();
use LWP::UserAgent qw();
use Encode qw();
use I18N::Charset qw();
our $HTML_CATALOG = "sgml.soc";
our $XML_CATALOG = "xml.soc";
our @SP_OPTS =
qw/
non-sgml-char-ref
valid
no-duplicate
/;
my $u = LWP::UserAgent->new;
my $p = SGML::Parser::OpenSP->new;
my $h = HTML::Doctype::Detector->new($p);
my $e = ErrorHandler->new($p);
my $r = $u->get("http://www.sun.com");
my $name1 = HTML::Encoding::encoding_from_http_message($r);
my $name2 = I18N::Charset::enco_charset_name($name1);
my $text = Encode::decode($name2 => $r->content);
# XHTML detection
$p->handler($h);
$p->catalogs($HTML_CATALOG);
$p->parse_string($text);
my $is_xhtml = $h->is_xhtml;
# Validation
$p->handler($e);
$p->catalogs($is_xhtml ? $XML_CATALOG : $HTML_CATALOG);
$p->warnings(@SP_OPTS, $is_xhtml ? 'xml' : ());
$p->parse_string($text);
foreach my $error (@{$e->{errors}})
{
my $prim = $error->{primary_message};
printf "[%4d %4d %s]: %s\n",
$prim->{LineNumber},
$prim->{ColumnNumber},
$prim->{Severity},
$prim->{Text}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment