Skip to content

Instantly share code, notes, and snippets.

@sharifulin
Created October 8, 2010 13:46
Show Gist options
  • Save sharifulin/616824 to your computer and use it in GitHub Desktop.
Save sharifulin/616824 to your computer and use it in GitHub Desktop.
Duty clean code and html
sub clean_code($){
for (my $str = shift) {
s/<(xml|style)>.*?<\/\1>//sg;
s/<!--.*?-->//sg;
s/<\/?([a-z0-9:]+)[^>]*?>/(grep { lc($1) eq $_ } 'p', 'a', 'b', 'strong', 'br', 'img') ? $& : ''/iesg; #tags
s/(<img[^>]+class=)("[^"]*"|\S+)/my ($r,$t) = ($1,$2); $r.($t =~ m!^"! ? $t : qq!"$t"!)/iesg;
s/(<\/?[a-z0-9]+)\s*([^>]*)>/ $2 ?
(join ' ', $1, map { join '=', $_->[0], $_->[1] } grep { my $t = $_; grep { lc($t->[0]) eq $_ } 'href', 'alt', 'src' } map { [ split qr!=!, $_, 2 ] } split qr!(?:(?<=")\s|\s(?="))!, $2).'>'
: $1.'>' /iesg;
s/(<\/?[a-z0-9]+)\s*([^>]*)>/ $2 ?
(join ' ', $1, map { join '=', $_->[0], $_->[1] } grep { my $t = $_; grep { lc($t->[0]) eq $_ } 'href', 'alt', 'src' } map { [ split qr!=!, $_, 2 ] } split qr!(?:(?<=")\s|\s(?="))!, $2).'>'
: $1.'>' /iesg;
s/(?:\n\s*){2,}/\n/g;
s/<([^>]+)><\/\1>//g;
s/<![^>]+>//g;
#~ s/<img\s+/<img class="img-blog-nowh" /gi;
return $_;
}
}
sub clean_html($) {
for (my $str = shift) {
s{<[^>]+>}{ }sg;
return $_;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment