Skip to content

Instantly share code, notes, and snippets.

@Ovid
Last active November 12, 2021 12:33
Show Gist options
  • Save Ovid/010e2a42511bb6c8706d3ad702a1730a to your computer and use it in GitHub Desktop.
Save Ovid/010e2a42511bb6c8706d3ad702a1730a to your computer and use it in GitHub Desktop.
Sample of PPR-based dependency scanner
#!/usr/bin/env perl
# I decided it was time to learn PPR::X (Damian Conway's excellent regex-based Perl parser)
#
# This code is still heuristic in nature, but the challenge seemd fun.
#
# I thought it would be an interesting project to try to extract dependencies from
# Perl code. Thanks to `haj` giving me the clue needed to find a parsing error
use strict;
use warnings;
use String::Util 'trim';
use List::Util 'uniq';
use Text::Balanced qw(
extract_multiple
extract_quotelike
);
use Test::Most;
use 5.018;
use PPR::X; # extensible PPR
# These track the dependencies of the respective commands...
my %DEPENDENCIES;
# Augment standard PPR grammar to recognize the three commands and accumulate their arguments...
my $dependencies_matcher = qr{
# Match an entire Perl document...
(?&PerlDocument)
(?(DEFINE)
# Override how 'use'/'no' statements are matched...
(?<PerlUseStatement>
(?: use ) (?>(?&PerlNWS))
(?>
# use aliased "Some::Module" => "Module";
aliased (?&PerlNWS)
(?>
((?>(?&PerlQuotelike))) # Match and capture module name
(?{ push @{$DEPENDENCIES{use}}, 'aliased' }) # Remember it
(?{ push @{$DEPENDENCIES{aliased}}, $^N }) # Remember it
)
(?&PerlOWS)
(?&PerlComma)
(?&PerlOWS)
(?>
(?>(?&PerlQuotelike)) # Match identifier name
)
|
# use parent 'Some::Module';
# use parent qw(Foo Bar::Baz);
# use parent -norequire, 'Tie::StdHash';
parent (?&PerlNWS) (?:-norequire (?&PerlOWS) (?&PerlComma) (?&PerlOWS))?
(?:
(?>
((?>(?&PerlQuotelike))) # Match and capture module name
(?{ push @{$DEPENDENCIES{use}}, 'parent' }) # Remember it
(?{ push @{$DEPENDENCIES{parent}}, $^N }) # Remember it
)
|
(?>
((?>(?&PerlParenthesesList))) # Match and capture module name
(?{ push @{$DEPENDENCIES{use}}, 'parent' }) # Remember it
(?{ push @{$DEPENDENCIES{parent}}, $^N }) # Remember it
)
)
|
# use base 'Some::Module';
# use base qw(Foo Bar::Baz);
base (?&PerlNWS)
(?:
(?>
((?>(?&PerlQuotelike))) # Match and capture module name
(?{ push @{$DEPENDENCIES{use}}, 'base' }) # Remember it
(?{ push @{$DEPENDENCIES{base}}, $^N }) # Remember it
)
|
(?>
((?>(?&PerlList))) # Match and capture module name
(?{ push @{$DEPENDENCIES{use}}, 'base' }) # Remember it
(?{ push @{$DEPENDENCIES{base}}, $^N }) # Remember it
)
)
|
((?>(?&PerlQualifiedIdentifier))) # Match and capture module name
(?{ push @{$DEPENDENCIES{use}}, $^N }) # Remember it
# Not sure if we should extract the optional module version number too
# (currently not doing so; would have to change the %DEPENDENCIES data structure to accommodate this extra info)...
(?: (?>(?&PerlNWS)) (?&PerlVersionNumber)
(?! (?>(?&PerlOWS)) (?> (?&PerlInfixBinaryOperator) | (?&PerlComma) | \? ) )
)?+
(?: (?>(?&PerlNWS)) (?&PerlPodSequence) )?+
(?: (?>(?&PerlOWS)) (?&PerlExpression) )?+
|
((?&PerlVersionNumber)) # I guess version specifiers are a kind of dependency (?)
(?{ push @{$DEPENDENCIES{use}}, $^N }) # So remember them too
)
(?>(?&PerlOWSOrEND)) (?> ; | (?= \} | \z ))
)
# Augment how Perl function/sub calls are matched...
(?<PerlCall>
(?>
# Treat 'require' calls specially...
require (?&PerlNWS)
((?&PerlExpression)) # Match and capture argument
(?{ push @{$DEPENDENCIES{require}}, $^N }) # Remember it
|
# Treat 'extends' calls specially...
extends (?&PerlNWS)
(?:
(?>
# XXX Here's where it goes wrong. For some reason, if I have
# `extends 'Foo', 'Bar';`, it only matches 'Foo'
(((?&PerlList))) # Match and capture module name
(?{ push @{$DEPENDENCIES{extends}}, $^N }) # Remember it
)
|
(?>
((?>(?&PerlQuotelike))) # Match and capture module name
(?{ push @{$DEPENDENCIES{extends}}, $^N }) # Remember it
)
)
|
# Treat 'do' calls specially...
do (?&PerlNWS) ((?&PerlExpression)) # Match and capture argument
(?{ push @{$DEPENDENCIES{do}}, $^N }) # Remember it
|
# Otherwise, fall back to standard matching behaviour...
(?&PerlStdCall)
)
)
)
$PPR::X::GRAMMAR
}xms;
my $code = <<'END';
# yes, this code is rubbish ...
use parent qw(Base::Class);
use aliased q'Preferred::Customer' => 'Jerk';
use aliased "Some::Module" => 'DoesntMatter';
use parent -norequire, ('Base1', 'Base2');
use base ('Whee1', 'Wee::Ble');
use Foo::Bar;
use This::That 3.14;
use parent qw(ThisParent That::Parent);
use 5.001;
if ( require Bar::Baz ) {
require 'Bar/Baz2.pm';
require v5.34;
do 'Baz.qux';
}
END
ok my $deps = dependencies_from_string($code),
'We should be able to fetch dependencies';
my $expected = {
aliased => [ 'Preferred::Customer', 'Some::Module' ],
base => [ 'Whee1', 'Wee::Ble' ],
do => ['Baz.qux'],
require => [ 'Bar::Baz', "Bar/Baz2.pm", 'v5.34' ],
use => [ 'parent', 'aliased', 'base', 'Foo::Bar', 'This::That', '5.001', ],
parent => [ 'Base::Class', 'Base1', 'Base2', 'ThisParent', 'That::Parent' ],
};
eq_or_diff $deps, $expected, '... and they should match our expectations';
ok $deps = dependencies_from_string($code),
'We should be able to fetch dependencies again';
eq_or_diff $deps, $expected, '... and they should match our expectations';
my $moose = <<'END';
package Foo;
use Moose;
extends 'Foo::Bar', 'Baz';
with (
'Role1',
'Role2' => { -exclude => 'some_method' },
);
with qw(
Role3
Role4
);
END
$deps = dependencies_from_string($moose);
$expected = { use => ['Moose'], extends => [ 'Foo::Bar', 'Baz' ] };
eq_or_diff $deps, $expected, 'We can handle the moose';
sub dependencies_from_string {
my $string = shift;
my $identifier_re = qr/(?:[A-Z_a-z][0-9A-Z_a-z]*)/;
my $contains_package = qr/ $identifier_re (?::: $identifier_re ) * /x;
%DEPENDENCIES = ();
if ( $string =~ $dependencies_matcher ) {
my $uses_moose;
foreach my $key ( keys %DEPENDENCIES ) {
my $deps = $DEPENDENCIES{$key};
$deps->@* = uniq $deps->@*;
my @new_deps;
foreach my $dep ( $deps->@* ) {
$dep = trim($dep);
my @next = $dep;
if (
# it must look like a list
$dep =~ /\A \s* (?&PerlList) \s* \Z $PPR::X::GRAMMAR/x
&&
# but don't include `use 5.001` and things like that
$dep !~
/\A \s* (?&PerlVersionNumber) \s* \Z $PPR::X::GRAMMAR/x
)
{
# we have something like ('Base1', 'Base2'), so let's split it
# apart and match the bits having what looks like package
# names
@next =
grep { /$contains_package/ } extract_multiple($dep);
}
foreach my $next (@next) {
# takes quotelike strings ('', "", q{}, and so on) and returns
# the unquoted content in element 5
my @quote_data = extract_quotelike($next);
my $name_without_quotes = $quote_data[5] // $next;
foreach my $result ( split /\s+/ => $name_without_quotes ) {
if ( $result =~ /^(?:Moose|Moo|Mo)$/ ) {
$uses_moose = 1;
}
push @new_deps => $result;
}
}
}
$DEPENDENCIES{$key} = \@new_deps;
}
unless ($uses_moose) { delete $DEPENDENCIES{extends} }
}
return \%DEPENDENCIES;
}
done_testing;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment