Last active
November 12, 2021 12:33
-
-
Save Ovid/010e2a42511bb6c8706d3ad702a1730a to your computer and use it in GitHub Desktop.
Sample of PPR-based dependency scanner
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# I decided it was time to learn PPR::X (Damian Conway's excellent regex-based Perl parser) | |
# | |
# This code is still heuristic in nature, but the challenge seemd fun. | |
# | |
# I thought it would be an interesting project to try to extract dependencies from | |
# Perl code. Thanks to `haj` giving me the clue needed to find a parsing error | |
use strict; | |
use warnings; | |
use String::Util 'trim'; | |
use List::Util 'uniq'; | |
use Text::Balanced qw( | |
extract_multiple | |
extract_quotelike | |
); | |
use Test::Most; | |
use 5.018; | |
use PPR::X; # extensible PPR | |
# These track the dependencies of the respective commands... | |
my %DEPENDENCIES; | |
# Augment standard PPR grammar to recognize the three commands and accumulate their arguments... | |
my $dependencies_matcher = qr{ | |
# Match an entire Perl document... | |
(?&PerlDocument) | |
(?(DEFINE) | |
# Override how 'use'/'no' statements are matched... | |
(?<PerlUseStatement> | |
(?: use ) (?>(?&PerlNWS)) | |
(?> | |
# use aliased "Some::Module" => "Module"; | |
aliased (?&PerlNWS) | |
(?> | |
((?>(?&PerlQuotelike))) # Match and capture module name | |
(?{ push @{$DEPENDENCIES{use}}, 'aliased' }) # Remember it | |
(?{ push @{$DEPENDENCIES{aliased}}, $^N }) # Remember it | |
) | |
(?&PerlOWS) | |
(?&PerlComma) | |
(?&PerlOWS) | |
(?> | |
(?>(?&PerlQuotelike)) # Match identifier name | |
) | |
| | |
# use parent 'Some::Module'; | |
# use parent qw(Foo Bar::Baz); | |
# use parent -norequire, 'Tie::StdHash'; | |
parent (?&PerlNWS) (?:-norequire (?&PerlOWS) (?&PerlComma) (?&PerlOWS))? | |
(?: | |
(?> | |
((?>(?&PerlQuotelike))) # Match and capture module name | |
(?{ push @{$DEPENDENCIES{use}}, 'parent' }) # Remember it | |
(?{ push @{$DEPENDENCIES{parent}}, $^N }) # Remember it | |
) | |
| | |
(?> | |
((?>(?&PerlParenthesesList))) # Match and capture module name | |
(?{ push @{$DEPENDENCIES{use}}, 'parent' }) # Remember it | |
(?{ push @{$DEPENDENCIES{parent}}, $^N }) # Remember it | |
) | |
) | |
| | |
# use base 'Some::Module'; | |
# use base qw(Foo Bar::Baz); | |
base (?&PerlNWS) | |
(?: | |
(?> | |
((?>(?&PerlQuotelike))) # Match and capture module name | |
(?{ push @{$DEPENDENCIES{use}}, 'base' }) # Remember it | |
(?{ push @{$DEPENDENCIES{base}}, $^N }) # Remember it | |
) | |
| | |
(?> | |
((?>(?&PerlList))) # Match and capture module name | |
(?{ push @{$DEPENDENCIES{use}}, 'base' }) # Remember it | |
(?{ push @{$DEPENDENCIES{base}}, $^N }) # Remember it | |
) | |
) | |
| | |
((?>(?&PerlQualifiedIdentifier))) # Match and capture module name | |
(?{ push @{$DEPENDENCIES{use}}, $^N }) # Remember it | |
# Not sure if we should extract the optional module version number too | |
# (currently not doing so; would have to change the %DEPENDENCIES data structure to accommodate this extra info)... | |
(?: (?>(?&PerlNWS)) (?&PerlVersionNumber) | |
(?! (?>(?&PerlOWS)) (?> (?&PerlInfixBinaryOperator) | (?&PerlComma) | \? ) ) | |
)?+ | |
(?: (?>(?&PerlNWS)) (?&PerlPodSequence) )?+ | |
(?: (?>(?&PerlOWS)) (?&PerlExpression) )?+ | |
| | |
((?&PerlVersionNumber)) # I guess version specifiers are a kind of dependency (?) | |
(?{ push @{$DEPENDENCIES{use}}, $^N }) # So remember them too | |
) | |
(?>(?&PerlOWSOrEND)) (?> ; | (?= \} | \z )) | |
) | |
# Augment how Perl function/sub calls are matched... | |
(?<PerlCall> | |
(?> | |
# Treat 'require' calls specially... | |
require (?&PerlNWS) | |
((?&PerlExpression)) # Match and capture argument | |
(?{ push @{$DEPENDENCIES{require}}, $^N }) # Remember it | |
| | |
# Treat 'extends' calls specially... | |
extends (?&PerlNWS) | |
(?: | |
(?> | |
# XXX Here's where it goes wrong. For some reason, if I have | |
# `extends 'Foo', 'Bar';`, it only matches 'Foo' | |
(((?&PerlList))) # Match and capture module name | |
(?{ push @{$DEPENDENCIES{extends}}, $^N }) # Remember it | |
) | |
| | |
(?> | |
((?>(?&PerlQuotelike))) # Match and capture module name | |
(?{ push @{$DEPENDENCIES{extends}}, $^N }) # Remember it | |
) | |
) | |
| | |
# Treat 'do' calls specially... | |
do (?&PerlNWS) ((?&PerlExpression)) # Match and capture argument | |
(?{ push @{$DEPENDENCIES{do}}, $^N }) # Remember it | |
| | |
# Otherwise, fall back to standard matching behaviour... | |
(?&PerlStdCall) | |
) | |
) | |
) | |
$PPR::X::GRAMMAR | |
}xms; | |
my $code = <<'END'; | |
# yes, this code is rubbish ... | |
use parent qw(Base::Class); | |
use aliased q'Preferred::Customer' => 'Jerk'; | |
use aliased "Some::Module" => 'DoesntMatter'; | |
use parent -norequire, ('Base1', 'Base2'); | |
use base ('Whee1', 'Wee::Ble'); | |
use Foo::Bar; | |
use This::That 3.14; | |
use parent qw(ThisParent That::Parent); | |
use 5.001; | |
if ( require Bar::Baz ) { | |
require 'Bar/Baz2.pm'; | |
require v5.34; | |
do 'Baz.qux'; | |
} | |
END | |
ok my $deps = dependencies_from_string($code), | |
'We should be able to fetch dependencies'; | |
my $expected = { | |
aliased => [ 'Preferred::Customer', 'Some::Module' ], | |
base => [ 'Whee1', 'Wee::Ble' ], | |
do => ['Baz.qux'], | |
require => [ 'Bar::Baz', "Bar/Baz2.pm", 'v5.34' ], | |
use => [ 'parent', 'aliased', 'base', 'Foo::Bar', 'This::That', '5.001', ], | |
parent => [ 'Base::Class', 'Base1', 'Base2', 'ThisParent', 'That::Parent' ], | |
}; | |
eq_or_diff $deps, $expected, '... and they should match our expectations'; | |
ok $deps = dependencies_from_string($code), | |
'We should be able to fetch dependencies again'; | |
eq_or_diff $deps, $expected, '... and they should match our expectations'; | |
my $moose = <<'END'; | |
package Foo; | |
use Moose; | |
extends 'Foo::Bar', 'Baz'; | |
with ( | |
'Role1', | |
'Role2' => { -exclude => 'some_method' }, | |
); | |
with qw( | |
Role3 | |
Role4 | |
); | |
END | |
$deps = dependencies_from_string($moose); | |
$expected = { use => ['Moose'], extends => [ 'Foo::Bar', 'Baz' ] }; | |
eq_or_diff $deps, $expected, 'We can handle the moose'; | |
sub dependencies_from_string { | |
my $string = shift; | |
my $identifier_re = qr/(?:[A-Z_a-z][0-9A-Z_a-z]*)/; | |
my $contains_package = qr/ $identifier_re (?::: $identifier_re ) * /x; | |
%DEPENDENCIES = (); | |
if ( $string =~ $dependencies_matcher ) { | |
my $uses_moose; | |
foreach my $key ( keys %DEPENDENCIES ) { | |
my $deps = $DEPENDENCIES{$key}; | |
$deps->@* = uniq $deps->@*; | |
my @new_deps; | |
foreach my $dep ( $deps->@* ) { | |
$dep = trim($dep); | |
my @next = $dep; | |
if ( | |
# it must look like a list | |
$dep =~ /\A \s* (?&PerlList) \s* \Z $PPR::X::GRAMMAR/x | |
&& | |
# but don't include `use 5.001` and things like that | |
$dep !~ | |
/\A \s* (?&PerlVersionNumber) \s* \Z $PPR::X::GRAMMAR/x | |
) | |
{ | |
# we have something like ('Base1', 'Base2'), so let's split it | |
# apart and match the bits having what looks like package | |
# names | |
@next = | |
grep { /$contains_package/ } extract_multiple($dep); | |
} | |
foreach my $next (@next) { | |
# takes quotelike strings ('', "", q{}, and so on) and returns | |
# the unquoted content in element 5 | |
my @quote_data = extract_quotelike($next); | |
my $name_without_quotes = $quote_data[5] // $next; | |
foreach my $result ( split /\s+/ => $name_without_quotes ) { | |
if ( $result =~ /^(?:Moose|Moo|Mo)$/ ) { | |
$uses_moose = 1; | |
} | |
push @new_deps => $result; | |
} | |
} | |
} | |
$DEPENDENCIES{$key} = \@new_deps; | |
} | |
unless ($uses_moose) { delete $DEPENDENCIES{extends} } | |
} | |
return \%DEPENDENCIES; | |
} | |
done_testing; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment