Created
September 26, 2012 06:23
-
-
Save comewalk/3786418 to your computer and use it in GitHub Desktop.
get permalinks from sitemap.xml
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use strict; | |
use warnings; | |
use XML::LibXML; | |
my $filename = $ARGV[0]; | |
my $parser = XML::LibXML->new; | |
my $doc = $parser->parse_file($filename); | |
for my $item ($doc->findnodes("//*[local-name()='url']")) { | |
my $loc = $item->findvalue("*[local-name()='loc']"); | |
print "$loc\n"; | |
} | |
__END__ | |
=head1 SYNOPSIS | |
$ perl parse_sitemap.pl sitemap.xml | |
=cut |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
like this
$ curl -s -L -O http://example.com/sitemap.xml && perl -I cpan-lib parse_sitemap.pl sitemap.xml