Last active
December 19, 2015 23:59
-
-
Save MattOates/6038521 to your computer and use it in GitHub Desktop.
Filter out a set of tags based on their unique text contents
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use strict; | |
use warnings; | |
use 5.10.0; | |
#Let you define the --tag=rule on the command line | |
use Getopt::Long; | |
#Depends on the Mojo DOM because its light weight and nice to use | |
use Mojo::DOM; | |
my $tag; | |
GetOptions ('tag|t=s' => \$tag); | |
#Get a DOM for the XML, change <DATA> to <> for command line use | |
my $dom = Mojo::DOM->new(do { local $/; <DATA> }); | |
#Map all the tags to key->val hash where the unique keys are the compacted text from each tag mapping to the last seen tag | |
my %unique_tags = map {my $u=$_->text; $u=~s/\s+/_/g; $u => $_} $dom->find($tag)->each; | |
#Print back out the tags we slurped for each unique value | |
print join "\n", values %unique_tags; | |
__DATA__ | |
<?xml version="1.0"?> | |
<rule> | |
something goes here whoa | |
</rule> | |
<rule> | |
something goes there | |
</rule> | |
<rule> | |
something goes here whoa | |
</rule> | |
<rule> | |
something goes there | |
</rule> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment