Skip to content

Instantly share code, notes, and snippets.

@leedo
Created March 19, 2012 22:30
Show Gist options
  • Save leedo/2127708 to your computer and use it in GitHub Desktop.
Save leedo/2127708 to your computer and use it in GitHub Desktop.
truncate-html.pl
#!/usr/bin/env perl
use strict;
use warnings;
use HTML::Parser;
my $len = 0;
my $max = 85;
my @close = ();
my @open = ();
my $parser = HTML::Parser->new(
start_h => [ \&start_tag, 'tag,text' ],
text_h => [\&tag_text, 'self,text' ],
end_h => [\&end_tag, 'text' ],
);
$parser->parse($_) while <STDIN>;
$parser->eof;
sub start_tag {
my ($tag, $text) = @_;
push @open, $text;
push @close, $tag;
}
sub tag_text {
my ($parser, $text) = @_;
my @words = split "\b", $text;
while (my $word = shift @words) {
if (length($word) + $len > $max) {
print " &hellip;";
print "</$_>" for @close;
exit 0;
}
if (@open) {
print join "", @open;
@open = ();
}
$len += length $word;
print $word;
}
}
sub end_tag {
print shift;
pop @close;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment