Created
June 12, 2018 14:49
-
-
Save dakkar/a0259ef2a231e7047e6d886b8f3e052c to your computer and use it in GitHub Desktop.
how to check what caused a negative lookahead to fail a regex (i.e. what it did match)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use strict; | |
use warnings; | |
use 5.020; | |
use experimental 'signatures'; | |
my @span=(-1,-1); | |
our $start=-1; | |
# this regex matches a series of -, some non-dashes that do not match | |
# 'some.{0,3}thing', and more dashes | |
# | |
# when the pattern does not match, we want to know if it failed | |
# because of the inner negative lookahead, or other reasons | |
# | |
# also, if it failed because of the lookahead, we want to know what it | |
# matched | |
my $re = qr{ | |
^ | |
[-]+ # series of dashes | |
(?! # negative lookahead | |
(?{ local $start=pos() }) # save (backtracking-safe) where we start | |
some.{0,3}thing | |
# if we get here, we matched the lookahead, save where it | |
# started and where it stopped | |
(?{ @span=($start,pos()) }) | |
) | |
[^-]+ # the non-dashes | |
[-]+ # final dashes | |
$ | |
}smx; | |
sub test($str) { | |
# initialise the markers | |
@span=(-1,-1);$start=-1; | |
if ($str =~ $re) { | |
say "<$str> matches"; | |
} | |
else { | |
say "<$str> does not match"; | |
} | |
# did the lookahead match? | |
if ($span[0]>=0) { | |
say " negative span = @span"; | |
say ' negative match = ', | |
substr($str,$span[0],$span[1]-$span[0]); | |
} | |
} | |
test('------foo bar---'); | |
test('-----a--'); | |
test('----some things--'); | |
test('----someBADthings--'); | |
test('abcdef'); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<------foo bar---> matches | |
<-----a--> matches | |
<----some things--> does not match | |
negative span = 4 14 | |
negative match = some thing | |
<----someBADthings--> does not match | |
negative span = 4 16 | |
negative match = someBADthing | |
<abcdef> does not match |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment