Created
February 24, 2012 10:56
-
-
Save leoliu/1900111 to your computer and use it in GitHub Desktop.
lookaround regexp extension for Emacs 23.4
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From 5aa9e8dfef5a801ad4a9dd9e01a72c4386e3baa3 Mon Sep 17 00:00:00 2001 | |
From: Leo <[email protected]> | |
Date: Sun, 22 May 2011 14:17:52 +0800 | |
Subject: [PATCH] Merge patch for lookaround regexp extension | |
Patch extracted from the bzr repo on | |
http://cx4a.org/hack/emacs-regexp-extension.html and also available on | |
http://paste.pocoo.org/show/393041. | |
--- | |
src/regex.c | 324 ++++++++++++++++++++++++++++++++++++++++++++-- | |
test/regexp-testsuite.el | 280 +++++++++++++++++++++++++++++++++++++++ | |
2 files changed, 591 insertions(+), 13 deletions(-) | |
create mode 100644 test/regexp-testsuite.el | |
diff --git a/src/regex.c b/src/regex.c | |
index a3a4d97c..740e99bf 100644 | |
--- a/src/regex.c | |
+++ b/src/regex.c | |
@@ -736,7 +736,14 @@ typedef enum | |
syntaxspec, | |
/* Matches any character whose syntax is not that specified. */ | |
- notsyntaxspec | |
+ notsyntaxspec, | |
+ | |
+ lookahead, | |
+ lookahead_not, | |
+ lookbehind, | |
+ lookbehind_not, | |
+ lookaround_succeed, | |
+ lookaround_fail | |
#ifdef emacs | |
,before_dot, /* Succeeds if before point. */ | |
@@ -1034,6 +1041,36 @@ print_partial_compiled_pattern (start, end) | |
fprintf (stderr, "/stop_memory/%d", *p++); | |
break; | |
+ case lookahead: | |
+ extract_number_and_incr (&mcnt, &p); | |
+ fprintf (stderr, "/lookahead/%d", mcnt); | |
+ break; | |
+ | |
+ case lookahead_not: | |
+ extract_number_and_incr (&mcnt, &p); | |
+ fprintf (stderr, "/lookahead_not/%d", mcnt); | |
+ break; | |
+ | |
+ case lookbehind: | |
+ extract_number_and_incr (&mcnt, &p); | |
+ extract_number_and_incr (&mcnt2, &p); | |
+ fprintf (stderr, "/lookbehind/%d/%d", mcnt, mcnt2); | |
+ break; | |
+ | |
+ case lookbehind_not: | |
+ extract_number_and_incr (&mcnt, &p); | |
+ extract_number_and_incr (&mcnt2, &p); | |
+ fprintf (stderr, "/lookbehind_not/%d/%d", mcnt, mcnt2); | |
+ break; | |
+ | |
+ case lookaround_succeed: | |
+ fprintf (stderr, "/lookaround_succeed"); | |
+ break; | |
+ | |
+ case lookaround_fail: | |
+ fprintf (stderr, "/lookaround_fail"); | |
+ break; | |
+ | |
case duplicate: | |
fprintf (stderr, "/duplicate/%d", *p++); | |
break; | |
@@ -1601,11 +1638,17 @@ do { \ | |
} \ | |
else \ | |
{ \ | |
- regend[reg] = POP_FAILURE_POINTER (); \ | |
- regstart[reg] = POP_FAILURE_POINTER (); \ | |
- DEBUG_PRINT4 (" Pop reg %d (spanning %p -> %p)\n", \ | |
- reg, regstart[reg], regend[reg]); \ | |
- } \ | |
+ re_char *start, *end; \ | |
+ end = POP_FAILURE_POINTER (); \ | |
+ start = POP_FAILURE_POINTER (); \ | |
+ if (!discard_saved_regs) \ | |
+ { \ | |
+ regstart[reg] = start; \ | |
+ regend[reg] = end; \ | |
+ DEBUG_PRINT4 (" Pop reg %d (spanning %p -> %p)\n", \ | |
+ reg, regstart[reg], regend[reg]); \ | |
+ } \ | |
+ } \ | |
} while (0) | |
/* Check that we are not stuck in an infinite loop. */ | |
@@ -1703,7 +1746,7 @@ do { \ | |
while (fail_stack.frame < fail_stack.avail) \ | |
POP_FAILURE_REG_OR_COUNT (); \ | |
\ | |
- pat = POP_FAILURE_POINTER (); \ | |
+ pat = POP_FAILURE_POINTER (); \ | |
DEBUG_PRINT2 (" Popping pattern %p: ", pat); \ | |
DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ | |
\ | |
@@ -1725,6 +1768,29 @@ do { \ | |
} while (0) /* POP_FAILURE_POINT */ | |
+#define FINISH_LOOKAROUND() \ | |
+ do { \ | |
+ re_char *str, *pat; \ | |
+ re_opcode_t op; \ | |
+ discard_saved_regs = 1; \ | |
+ while (!FAIL_STACK_EMPTY ()) \ | |
+ { \ | |
+ POP_FAILURE_POINT (str, pat); \ | |
+ op = (re_opcode_t) *pat; \ | |
+ if (op == lookahead \ | |
+ || op == lookahead_not \ | |
+ || op == lookbehind \ | |
+ || op == lookbehind_not) \ | |
+ { \ | |
+ d = str; \ | |
+ dend = ((d >= string1 && d <= end1) \ | |
+ ? end_match_1 : end_match_2); \ | |
+ break; \ | |
+ } \ | |
+ } \ | |
+ discard_saved_regs = 0; \ | |
+ } while (0); | |
+ | |
/* Registers are set to a sentinel when they haven't yet matched. */ | |
#define REG_UNSET(e) ((e) == NULL) | |
@@ -1923,6 +1989,7 @@ typedef struct | |
pattern_offset_t fixup_alt_jump; | |
pattern_offset_t laststart_offset; | |
regnum_t regnum; | |
+ int lookaround; | |
} compile_stack_elt_t; | |
@@ -2523,6 +2590,8 @@ static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type | |
compile_stack, | |
regnum_t regnum)); | |
+static int exact_chars_in_pattern_buffer _RE_ARGS ((struct re_pattern_buffer *bufp, re_char *p, re_char *pend)); | |
+ | |
/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. | |
Returns one of error codes defined in `regex.h', or zero for success. | |
@@ -3269,6 +3338,7 @@ regex_compile (pattern, size, syntax, bufp) | |
handle_open: | |
{ | |
int shy = 0; | |
+ int lookaround = 0; | |
regnum_t regnum = 0; | |
if (p+1 < pend) | |
{ | |
@@ -3290,6 +3360,27 @@ regex_compile (pattern, size, syntax, bufp) | |
case '1': case '2': case '3': case '4': | |
case '5': case '6': case '7': case '8': case '9': | |
regnum = 10*regnum + (c - '0'); break; | |
+ case '=': | |
+ /* Positive lookahead assertion. */ | |
+ shy = lookaround = 1; | |
+ break; | |
+ case '!': | |
+ /* Negative lookahead assertion. */ | |
+ shy = lookaround = 2; | |
+ break; | |
+ case '<': | |
+ { | |
+ PATFETCH (c); | |
+ if (c == '=') | |
+ /* Positive lookbehind assertion. */ | |
+ shy = lookaround = -1; | |
+ else if (c == '!') | |
+ /* Negative lookbehind assertion. */ | |
+ shy = lookaround = -2; | |
+ else | |
+ FREE_STACK_RETURN (REG_BADPAT); | |
+ } | |
+ break; | |
default: | |
/* Only (?:...) is supported right now. */ | |
FREE_STACK_RETURN (REG_BADPAT); | |
@@ -3336,6 +3427,7 @@ regex_compile (pattern, size, syntax, bufp) | |
= fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; | |
COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; | |
COMPILE_STACK_TOP.regnum = regnum; | |
+ COMPILE_STACK_TOP.lookaround = lookaround; | |
/* Do not push a start_memory for groups beyond the last one | |
we can represent in the compiled pattern. */ | |
@@ -3385,6 +3477,7 @@ regex_compile (pattern, size, syntax, bufp) | |
later groups should continue to be numbered higher, | |
as in `(ab)c(de)' -- the second group is #2. */ | |
regnum_t regnum; | |
+ int lookaround; | |
compile_stack.avail--; | |
begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; | |
@@ -3397,13 +3490,40 @@ regex_compile (pattern, size, syntax, bufp) | |
/* If we've reached MAX_REGNUM groups, then this open | |
won't actually generate any code, so we'll have to | |
clear pending_exact explicitly. */ | |
+ lookaround = COMPILE_STACK_TOP.lookaround; | |
pending_exact = 0; | |
/* We're at the end of the group, so now we know how many | |
groups were inside this one. */ | |
if (regnum <= MAX_REGNUM && regnum > 0) | |
BUF_PUSH_2 (stop_memory, regnum); | |
- } | |
+ else if (lookaround) | |
+ { | |
+ if (lookaround > 0) | |
+ { | |
+ /* Positive/negative lookahead assertion. */ | |
+ GET_BUFFER_SPACE (3); | |
+ INSERT_JUMP (lookaround == 1 ? lookahead : lookahead_not, laststart, b + 4); | |
+ b += 3; | |
+ } | |
+ else | |
+ { | |
+ /* Positive/negative lookbehind assertion. */ | |
+ int count = exact_chars_in_pattern_buffer (bufp, laststart, b); | |
+ if (count == -1) /* variable length */ | |
+ FREE_STACK_RETURN (REG_BADPAT); | |
+ | |
+ GET_BUFFER_SPACE (5); | |
+ INSERT_JUMP2 (lookaround == -1 ? lookbehind : lookbehind_not, laststart, b + 6, count); | |
+ b += 5; | |
+ } | |
+ | |
+ /* Negative form. */ | |
+ if (lookaround > 1 || lookaround < -1) | |
+ BUF_PUSH (lookaround_fail); | |
+ BUF_PUSH (lookaround_succeed); | |
+ } | |
+ } | |
break; | |
@@ -3957,10 +4077,16 @@ at_begline_loc_p (pattern, p, syntax) | |
/* After an alternative? */ | |
|| (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)) | |
/* After a shy subexpression? */ | |
- || ((syntax & RE_SHY_GROUPS) && prev - 2 >= pattern | |
- && prev[-1] == '?' && prev[-2] == '(' | |
- && (syntax & RE_NO_BK_PARENS | |
- || (prev - 3 >= pattern && prev[-3] == '\\'))); | |
+ || ((syntax & RE_SHY_GROUPS) | |
+ && ((prev - 2 >= pattern | |
+ && prev[-1] == '?' && prev[-2] == '(' | |
+ && (syntax & RE_NO_BK_PARENS | |
+ || (prev - 3 >= pattern && prev[-3] == '\\'))) | |
+ || (prev - 3 >= pattern | |
+ && (*prev == '=' || *prev == '!') | |
+ && prev[-1] == '<' && prev[-2] == '?' && prev[-3] == '(' | |
+ && (syntax & RE_NO_BK_PARENS | |
+ || (prev - 4 >= pattern && prev[-4] == '\\'))))); | |
} | |
@@ -4205,6 +4331,13 @@ analyse_first (p, pend, fastmap, multibyte) | |
} | |
break; | |
+ case lookahead: | |
+ case lookahead_not: | |
+ case lookbehind: | |
+ case lookbehind_not: | |
+ if (!fastmap) break; | |
+ return -1; | |
+ | |
/* All cases after this match the empty string. These end with | |
`continue'. */ | |
@@ -4829,7 +4962,7 @@ skip_noops (p, pend) | |
{ | |
case start_memory: | |
case stop_memory: | |
- p += 2; break; | |
+ p += 2; break; | |
case no_op: | |
p += 1; break; | |
case jump: | |
@@ -4845,6 +4978,93 @@ skip_noops (p, pend) | |
return p; | |
} | |
+static int | |
+exact_chars_in_pattern_buffer (bufp, p, pend) | |
+ struct re_pattern_buffer *bufp; | |
+ re_char *p, *pend; | |
+{ | |
+ int count = 0; | |
+ while (p < pend) | |
+ { | |
+ switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) | |
+ { | |
+ case exactn: | |
+ { | |
+ int mcnt = *p++; | |
+ int buf_charlen; | |
+ while (mcnt > 0) { | |
+ STRING_CHAR_AND_LENGTH (p, buf_charlen); | |
+ p += buf_charlen; | |
+ mcnt -= buf_charlen; | |
+ count++; | |
+ } | |
+ } | |
+ break; | |
+ case start_memory: | |
+ case stop_memory: | |
+ p++; | |
+ break; | |
+#ifdef emacs | |
+ case categoryspec: | |
+ case notcategoryspec: | |
+#endif /* emacs */ | |
+ case syntaxspec: | |
+ case notsyntaxspec: | |
+ p++; | |
+ case anychar: | |
+ count++; | |
+ break; | |
+ | |
+ case charset: | |
+ case charset_not: | |
+ if (CHARSET_RANGE_TABLE_EXISTS_P (p - 1)) | |
+ { | |
+ int mcnt; | |
+ p = CHARSET_RANGE_TABLE (p - 1); | |
+ EXTRACT_NUMBER_AND_INCR (mcnt, p); | |
+ p = CHARSET_RANGE_TABLE_END (p, mcnt); | |
+ } | |
+ else | |
+ p += 1 + CHARSET_BITMAP_SIZE (p - 1); | |
+ count++; | |
+ break; | |
+ | |
+#ifdef emacs | |
+ case before_dot: | |
+ case at_dot: | |
+ case after_dot: | |
+#endif /* emacs */ | |
+ case no_op: | |
+ case begline: | |
+ case endline: | |
+ case begbuf: | |
+ case endbuf: | |
+ case wordbound: | |
+ case notwordbound: | |
+ case wordbeg: | |
+ case wordend: | |
+ case symbeg: | |
+ case symend: | |
+ /* Zero width. */ | |
+ continue; | |
+ case lookahead: | |
+ case lookahead_not: | |
+ case lookbehind: | |
+ case lookbehind_not: | |
+ /* Skip to lookaround_success. */ | |
+ while (p < pend) | |
+ { | |
+ if ((re_opcode_t) *p++ == lookaround_succeed) | |
+ break; | |
+ } | |
+ break; | |
+ default: | |
+ return -1; | |
+ } | |
+ } | |
+ return count; | |
+} | |
+ | |
/* Non-zero if "p1 matches something" implies "p2 fails". */ | |
static int | |
mutually_exclusive_p (bufp, p1, p2) | |
@@ -5202,6 +5422,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |
re_char **best_regstart, **best_regend; | |
#endif | |
+ /* Discard a saved register from the stack. */ | |
+ boolean discard_saved_regs = 0; | |
+ | |
/* Logically, this is `best_regend[0]'. But we don't want to have to | |
allocate space for that if we're not allocating space for anything | |
else (see below). Also, we never need info about register 0 for | |
@@ -5774,6 +5997,77 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |
p += 1; | |
break; | |
+ case lookahead: | |
+ case lookahead_not: | |
+ DEBUG_PRINT1 ((re_opcode_t) *(p - 1) == lookahead ? "EXECUTING lookahead.\n" : "EXECUTING lookahead_not.\n"); | |
+ | |
+ p += 2; | |
+ PUSH_FAILURE_POINT (p - 3, d); | |
+ break; | |
+ | |
+ case lookbehind: | |
+ case lookbehind_not: | |
+ { | |
+ int mcnt, count; | |
+ boolean not = (re_opcode_t) *(p - 1) != lookbehind; | |
+ | |
+ EXTRACT_NUMBER_AND_INCR (mcnt, p); | |
+ EXTRACT_NUMBER_AND_INCR (count, p); | |
+ | |
+ DEBUG_PRINT2 (not | |
+ ? "EXECUTING lookbehind_not %d.\n" | |
+ : "EXECUTING lookbehind %d.\n", count); | |
+ | |
+ dfail = d; | |
+ while (d != string1 && count > 0) | |
+ { | |
+ if (d == string2) | |
+ { | |
+ if (!string1) | |
+ break; | |
+ d = end1; | |
+ dend = end_match_1; | |
+ } | |
+ | |
+ if (target_multibyte) | |
+ { | |
+ re_char *dhead = (d >= string1 && d <= end1) ? string1 : string2; | |
+ PREV_CHAR_BOUNDARY (d, dhead); | |
+ } | |
+ else | |
+ d--; | |
+ count--; | |
+ } | |
+ | |
+ if (count > 0) | |
+ { | |
+ if (not) | |
+ { | |
+ /* There is no enough string to match. | |
+ So just make it succeeded here. */ | |
+ d = dfail; | |
+ p = p - 2 + mcnt; | |
+ break; | |
+ } | |
+ else | |
+ goto fail; | |
+ } | |
+ | |
+ PUSH_FAILURE_POINT (p - 5, dfail); | |
+ } | |
+ break; | |
+ | |
+ case lookaround_succeed: | |
+ DEBUG_PRINT1 ("EXECUTING lookaround_succeed.\n"); | |
+ | |
+ FINISH_LOOKAROUND(); | |
+ break; | |
+ | |
+ case lookaround_fail: | |
+ DEBUG_PRINT1 ("EXECUTING lookaround_fail.\n"); | |
+ | |
+ FINISH_LOOKAROUND(); | |
+ goto fail; | |
/* \<digit> has been turned into a `duplicate' command which is | |
followed by the numeric value of <digit> as the register number. */ | |
@@ -6415,12 +6709,16 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |
case on_failure_jump_loop: | |
case on_failure_jump: | |
case succeed_n: | |
+ case lookahead_not: | |
+ case lookbehind_not: | |
d = str; | |
continue_failure_jump: | |
EXTRACT_NUMBER_AND_INCR (mcnt, pat); | |
p = pat + mcnt; | |
break; | |
+ case lookahead: | |
+ case lookbehind: | |
case no_op: | |
/* A special frame used for nastyloops. */ | |
goto fail; | |
diff --git a/test/regexp-testsuite.el b/test/regexp-testsuite.el | |
new file mode 100644 | |
index 00000000..c79d6fdb | |
--- /dev/null | |
+++ b/test/regexp-testsuite.el | |
@@ -0,0 +1,280 @@ | |
+;; -*-coding:utf-8-*- | |
+ | |
+(require 'cl) | |
+ | |
+(defvar regexp-testsuite-success nil) | |
+ | |
+(defmacro regexp-testsuite-test (name &rest form) | |
+ (declare (indent 1)) | |
+ `(,(if noninteractive | |
+ 'princ-list | |
+ 'message) | |
+ (format "%s ... %s" | |
+ ,name | |
+ (condition-case nil | |
+ (if (progn ,@form) | |
+ 'ok | |
+ (setq regexp-testsuite-success nil) | |
+ 'fail) | |
+ (error (progn | |
+ (setq regexp-testsuite-success nil) | |
+ 'invalid)))))) | |
+ | |
+(defun regexp-testsuite-expect-invalid (regexp) | |
+ (regexp-testsuite-test (format "expect-invalid %S" regexp) | |
+ (condition-case nil | |
+ (prog1 nil (string-match regexp "")) | |
+ (error t)))) | |
+ | |
+(defun regexp-testsuite-expect-match (regexp string &optional group-number group-string) | |
+ (regexp-testsuite-test (format "expect-match %S %S" regexp string) | |
+ (and (string-match regexp string) | |
+ (if group-number | |
+ (equal (match-string group-number string) group-string) | |
+ t)))) | |
+ | |
+(defun regexp-testsuite-expect-not-match (regexp string) | |
+ (regexp-testsuite-test (format "expect-not-match %S %S" regexp string) | |
+ (not (string-match regexp string)))) | |
+ | |
+(defun regexp-testsuite-run () | |
+ (interactive) | |
+ (setq regexp-testsuite-success t) | |
+ (regexp-testsuite-expect-match "\\(?=\\)" "") | |
+ (regexp-testsuite-expect-not-match "\\(?=a\\)" "") | |
+ (regexp-testsuite-expect-match "a\\(?=b\\)b" "ab") | |
+ (regexp-testsuite-expect-not-match "a\\(?=b\\)c" "ab") | |
+ (regexp-testsuite-expect-match "\\(?=a\\)a" "a") | |
+ (regexp-testsuite-expect-not-match "\\(?=b\\)a" "a") | |
+ (regexp-testsuite-expect-match "\\(?=^\\)a" "a") | |
+ (regexp-testsuite-expect-match "a\\(?=$\\)$" "a") | |
+ (regexp-testsuite-expect-match "a\\(?=\\)$" "a") | |
+ (regexp-testsuite-expect-match "a\\(?=.*c\\)b" "abc") | |
+ (regexp-testsuite-expect-not-match "a\\(?=.*d\\)b" "abc") | |
+ (regexp-testsuite-expect-match "a\\(?=b\\|c\\|d\\|e\\)" "ae") | |
+ (regexp-testsuite-expect-not-match "a\\(?=b\\|c\\|d\\|e\\)" "af") | |
+ (regexp-testsuite-expect-match "a\\(?=\\(b\\)\\)b" "ab" 1 "b") | |
+ (regexp-testsuite-expect-match "a\\(\\(?=b\\)\\)" "ab" 1 "") | |
+ (regexp-testsuite-expect-match "a\\(?=\\(b\\)\\)" "ab" 1 "b") | |
+ (regexp-testsuite-expect-match "\\(a\\(?=\\(b\\)\\)\\2\\)\\1" "abab" 1 "ab") | |
+ (regexp-testsuite-expect-not-match "\\(a\\)\\(?=\\(b\\)\\)\\1" "ab") | |
+ (regexp-testsuite-expect-match "\\(a\\(?=b\\(?=c\\)\\)\\)" "abc" 1 "a") | |
+ (regexp-testsuite-expect-not-match "\\(a\\(?=b\\(?=c\\)\\)\\)" "abd") | |
+ (regexp-testsuite-expect-not-match "\\(?!\\)" "") | |
+ (regexp-testsuite-expect-match "\\(?!a\\)" "") | |
+ (regexp-testsuite-expect-not-match "a\\(?!b\\)b" "ab") | |
+ (regexp-testsuite-expect-match "a\\(?!b\\)c" "ac") | |
+ (regexp-testsuite-expect-not-match "\\(?!a\\)a" "a") | |
+ (regexp-testsuite-expect-match "\\(?!b\\)a" "a") | |
+ (regexp-testsuite-expect-match "\\(?!^\\)a" "ba") | |
+ (regexp-testsuite-expect-not-match "\\(?!^\\)a" "a") | |
+ (regexp-testsuite-expect-not-match "a\\(?!$\\)$" "a") | |
+ (regexp-testsuite-expect-not-match "a\\(?!\\)$" "a") | |
+ (regexp-testsuite-expect-not-match "a\\(?!.*c\\)b" "abc") | |
+ (regexp-testsuite-expect-match "a\\(?!.*d\\)b" "abc") | |
+ (regexp-testsuite-expect-not-match "a\\(?!b\\|c\\|d\\|e\\)" "ae") | |
+ (regexp-testsuite-expect-match "a\\(?!b\\|c\\|d\\|e\\)" "af") | |
+ (regexp-testsuite-expect-match "a\\(?!\\(b\\)\\)c" "ac") | |
+ (regexp-testsuite-expect-match "a\\(\\(?!b\\)\\)" "ac") | |
+ (regexp-testsuite-expect-match "a\\(?!b\\(?!c\\)\\)" "abc") | |
+ (regexp-testsuite-expect-not-match "a\\(?!b\\(?=\\(c\\)\\)\\)" "abc") | |
+ (regexp-testsuite-expect-not-match "a\\(?!b\\(?!c\\)\\)" "abd") | |
+ (regexp-testsuite-expect-match "\\(?<=\\)" "") | |
+ (regexp-testsuite-expect-not-match "\\(?<=a\\)" "") | |
+ (regexp-testsuite-expect-match "\\(?<=a\\)" "a") | |
+ (regexp-testsuite-expect-not-match "\\(?<=b\\)" "a") | |
+ (regexp-testsuite-expect-match "\\(?<=^\\)" "") | |
+ (regexp-testsuite-expect-not-match "a\\(?<=^\\)" "") | |
+ (regexp-testsuite-expect-match "\\(?<=$\\)" "") | |
+ (regexp-testsuite-expect-not-match "\\(?<=$\\)a" "") | |
+ (regexp-testsuite-expect-match "\\(?<=a\\)b" "ab") | |
+ (regexp-testsuite-expect-not-match "\\(?<=c\\)b" "ab") | |
+ (regexp-testsuite-expect-match "\\(?<=\\(?<=a\\)\\)b" "ab") | |
+ (regexp-testsuite-expect-not-match "\\(?<=\\(?<=b\\)\\)b" "ab") | |
+ (regexp-testsuite-expect-match "\\(?<=\\(?=a\\).\\)b" "ab") | |
+ (regexp-testsuite-expect-match "\\(?<=\\(a\\)\\)b\\1" "aba" 1 "a") | |
+ (regexp-testsuite-expect-match "\\(?<=.\\)a" "aa") | |
+ (regexp-testsuite-expect-match "\\(?<=\\(.\\)\\)a" "aa") | |
+ (regexp-testsuite-expect-match "\\(?<=\\w\\)a" "aa") | |
+ (regexp-testsuite-expect-not-match "\\(?<=\\w\\)a" "!a") | |
+ (regexp-testsuite-expect-match "\\(?<=\\sw\\)a" "aa") | |
+ (regexp-testsuite-expect-not-match "\\(?<=\\sw\\)a" "!a") | |
+ (regexp-testsuite-expect-match "\\(?<=\\cg\\)a" "λa") | |
+ (regexp-testsuite-expect-not-match "\\(?<=\\Cg\\)a" "λa") | |
+ (regexp-testsuite-expect-match "\\(?<=[a-z]\\)" "aa") | |
+ (regexp-testsuite-expect-not-match "\\(?<=[a-z]\\)a" "1a") | |
+ (regexp-testsuite-expect-match "\\(?<=[^a-z]\\)" "1a") | |
+ (regexp-testsuite-expect-not-match "\\(?<=[^a-z]\\)" "aa") | |
+ (regexp-testsuite-expect-match "\\(?<=[:ascii:]\\)a" "aa") | |
+ (regexp-testsuite-expect-match "\\(?<=\\`\\)" "") | |
+ (regexp-testsuite-expect-not-match "a\\(?<=\\`\\)" "a") | |
+ (regexp-testsuite-expect-match "\\(?<=\\'\\)" "") | |
+ (regexp-testsuite-expect-not-match "\\(?<=\\'\\)a" "a") | |
+ (regexp-testsuite-expect-not-match "\\(?<=\\=\\)" "") | |
+ (regexp-testsuite-expect-match "\\(?<=\\b\\)a" "a") | |
+ (regexp-testsuite-expect-not-match "a\\(?<=\\b\\)b" "ab") | |
+ (regexp-testsuite-expect-match "\\(?<=\\B\\)a" "aa") | |
+ (regexp-testsuite-expect-not-match "\\(?<=\\B\\)a" " a") | |
+ (regexp-testsuite-expect-match "\\(?<=\\<\\)a" "a") | |
+ (regexp-testsuite-expect-not-match "a\\(?<=\\<\\)b" "ab") | |
+ (regexp-testsuite-expect-match "a\\(?<=\\>\\)" "a") | |
+ (regexp-testsuite-expect-not-match "a\\(?<=\\>\\)b" "ab") | |
+ (regexp-testsuite-expect-match "\\(?<=\\_<\\)a" "a") | |
+ (regexp-testsuite-expect-not-match "a\\(?<=\\_<\\)b" "ab") | |
+ (regexp-testsuite-expect-match "a\\(?<=\\_>\\)" "a") | |
+ (regexp-testsuite-expect-not-match "a\\(?<=\\_>\\)b" "ab") | |
+ (regexp-testsuite-expect-invalid "\\(?<=\\(.\\)\\1\\)") ; duplicate | |
+ (regexp-testsuite-expect-invalid "\\(?<=a*\\)") ; variable width | |
+ (regexp-testsuite-expect-invalid "\\(?<=a*?\\)") ; variable width | |
+ (regexp-testsuite-expect-invalid "\\(?<=a+\\)") ; variable width | |
+ (regexp-testsuite-expect-invalid "\\(?<=a+?\\)") ; variable width | |
+ (regexp-testsuite-expect-invalid "\\(?<=a?\\)") ; variable width | |
+ (regexp-testsuite-expect-invalid "\\(?<=a??\\)") ; variable width | |
+ (regexp-testsuite-expect-invalid "\\(?<=a\\{1,4\\}\\)") ; variable width | |
+ (regexp-testsuite-expect-invalid "\\(?<=a\\|bb\\|ccc\\)") ; variable width | |
+ (regexp-testsuite-expect-invalid "\\(?<=a\\{4\\}\\)") ; fixed width but not supported yet | |
+ (regexp-testsuite-expect-invalid "\\(?<=a\\|\\b\\c\\)") ; fixed width but not supported yet | |
+ (regexp-testsuite-expect-not-match "\\(?<!\\)" "") | |
+ (regexp-testsuite-expect-match "\\(?<!a\\)" "") | |
+ (regexp-testsuite-expect-match "\\(?<!a\\)" "a") | |
+ (regexp-testsuite-expect-not-match "\\(?<!a\\)b" "ab") | |
+ (regexp-testsuite-expect-match "\\(?<!b\\)" "a") | |
+ (regexp-testsuite-expect-not-match "\\(?<!^\\)" "") | |
+ (regexp-testsuite-expect-not-match "a\\(?<!^\\)" "") | |
+ (regexp-testsuite-expect-not-match "\\(?<!$\\)" "") | |
+ (regexp-testsuite-expect-match "\\(?<=a\\)b" "ab") | |
+ (regexp-testsuite-expect-match "\\(?<!c\\)b" "ab") | |
+ (regexp-testsuite-expect-match "\\(?<!\\(?<!a\\)\\)b" "ab") | |
+ (regexp-testsuite-expect-not-match "\\(?<!\\(?<!b\\)\\)b" "ab") | |
+ (regexp-testsuite-expect-match "\\(?<!\\(?!a\\).\\)b" "ab") | |
+ (regexp-testsuite-expect-match "\\(?<!.\\)a" "aa") | |
+ (regexp-testsuite-expect-not-match "\\(?<!.\\)b" "ab") | |
+ (regexp-testsuite-expect-not-match "\\(?<!\\(.\\)\\)b" "ab") | |
+ (regexp-testsuite-expect-not-match "\\(?<!\\w\\)b" "ab") | |
+ (regexp-testsuite-expect-not-match "\\(?<!\\w\\)b" "ab") | |
+ (regexp-testsuite-expect-not-match "\\(?<!\\sw\\)b" "ab") | |
+ (regexp-testsuite-expect-match "\\(?<!\\sw\\)a" "!a") | |
+ (regexp-testsuite-expect-not-match "\\(?<!\\cg\\)a" "λa") | |
+ (regexp-testsuite-expect-match "\\(?<!\\Cg\\)a" "λa") | |
+ (regexp-testsuite-expect-match "\\(?<![a-z]\\)" "aa") | |
+ (regexp-testsuite-expect-match "\\(?<![a-z]\\)a" "1a") | |
+ (regexp-testsuite-expect-not-match "\\(?<![^a-z]\\)a" "1a") | |
+ (regexp-testsuite-expect-not-match "\\(?<![:ascii:]\\)b" "ab") | |
+ (regexp-testsuite-expect-not-match "\\(?<!\\`\\)" "") | |
+ (regexp-testsuite-expect-match "a\\(?<!\\`\\)" "a") | |
+ (regexp-testsuite-expect-not-match "\\(?<!\\'\\)" "") | |
+ (regexp-testsuite-expect-match "\\(?<!\\'\\)a" "a") | |
+ (regexp-testsuite-expect-match "\\(?<!\\=\\)" "") | |
+ (regexp-testsuite-expect-not-match "\\(?<!\\b\\)a" "a") | |
+ (regexp-testsuite-expect-match "a\\(?<!\\b\\)b" "ab") | |
+ (regexp-testsuite-expect-not-match "\\(?<!\\B\\)b" "ab") | |
+ (regexp-testsuite-expect-match "\\(?<!\\B\\)a" " a") | |
+ (regexp-testsuite-expect-not-match "\\(?<!\\<\\)a" "a") | |
+ (regexp-testsuite-expect-match "a\\(?<!\\<\\)b" "ab") | |
+ (regexp-testsuite-expect-not-match "a\\(?<!\\>\\)" "a") | |
+ (regexp-testsuite-expect-match "a\\(?<!\\>\\)b" "ab") | |
+ (regexp-testsuite-expect-not-match "\\(?<!\\_<\\)a" "a") | |
+ (regexp-testsuite-expect-match "a\\(?<!\\_<\\)b" "ab") | |
+ (regexp-testsuite-expect-not-match "a\\(?<!\\_>\\)" "a") | |
+ (regexp-testsuite-expect-match "a\\(?<!\\_>\\)b" "ab") | |
+ (regexp-testsuite-expect-invalid "\\(?<!\\(.\\)\\1\\)") ; duplicate | |
+ (regexp-testsuite-expect-invalid "\\(?<!a*\\)") ; variable width | |
+ (regexp-testsuite-expect-invalid "\\(?<!a*?\\)") ; variable width | |
+ (regexp-testsuite-expect-invalid "\\(?<!a+\\)") ; variable width | |
+ (regexp-testsuite-expect-invalid "\\(?<!a+?\\)") ; variable width | |
+ (regexp-testsuite-expect-invalid "\\(?<!a?\\)") ; variable width | |
+ (regexp-testsuite-expect-invalid "\\(?<!a??\\)") ; variable width | |
+ (regexp-testsuite-expect-invalid "\\(?<!a\\{1,4\\}\\)") ; variable width | |
+ (regexp-testsuite-expect-invalid "\\(?<!a\\|bb\\|ccc\\)") ; variable width | |
+ (regexp-testsuite-expect-invalid "\\(?<!a\\{4\\}\\)") ; fixed width but not supported yet | |
+ (regexp-testsuite-expect-invalid "\\(?<!a\\|\\b\\c\\)") ; fixed width but not supported yet | |
+ | |
+ (regexp-testsuite-expect-match "Hello, \\(?=世界\\)" "Hello, 世界!") | |
+ (regexp-testsuite-expect-not-match "Hello, \\(?=せかい\\)" "Hello, 世界!") | |
+ (regexp-testsuite-expect-match "Hello, \\(?!せかい\\)" "Hello, 世界!") | |
+ (regexp-testsuite-expect-not-match "Hello, \\(?!世界\\)" "Hello, 世界!") | |
+ (regexp-testsuite-expect-match "\\(?<=こんにちは\\), World!" "こんにちは, World!") | |
+ (regexp-testsuite-expect-not-match "\\(?<=こんにちわ\\), World!" "こんにちは, World!") | |
+ (regexp-testsuite-expect-match "\\(?<!こんにちわ\\), World!" "こんにちは, World!") | |
+ (regexp-testsuite-expect-not-match "\\(?<!こんにちは\\), World!" "こんにちは, World!") | |
+ | |
+ (with-temp-buffer | |
+ (insert "abracadabra") | |
+ (goto-char (point-min)) | |
+ (regexp-testsuite-test "re-search-forward lookahead" | |
+ (equal | |
+ (loop while (re-search-forward "a\\(?=b\\)" nil t) | |
+ collect (point)) | |
+ '(2 9)))) | |
+ | |
+ (with-temp-buffer | |
+ (insert "abracadabra") | |
+ (regexp-testsuite-test "re-search-backward lookahead" | |
+ (equal | |
+ (loop while (re-search-backward "a\\(?=b\\)" nil t) | |
+ collect (point)) | |
+ '(8 1)))) | |
+ | |
+ (with-temp-buffer | |
+ (insert "abracadabra") | |
+ (goto-char (point-min)) | |
+ (regexp-testsuite-test "re-search-forward lookbehind" | |
+ (equal | |
+ (loop while (re-search-forward "\\(?<=a\\)b" nil t) | |
+ collect (point)) | |
+ '(3 10)))) | |
+ | |
+ (with-temp-buffer | |
+ (insert "abracadabra") | |
+ (regexp-testsuite-test "re-search-backward lookbehind" | |
+ (equal | |
+ (loop while (re-search-backward "\\(?<=a\\)b" nil t) | |
+ collect (point)) | |
+ '(9 2)))) | |
+ | |
+ (with-temp-buffer | |
+ (insert "abcdebc") | |
+ (goto-char 3) | |
+ (regexp-testsuite-test "re-search-backward lookbehind 2" | |
+ (eq (re-search-forward "\\(?<=b\\)c" nil t) 4))) | |
+ | |
+ (with-temp-buffer | |
+ (insert "abcdebc") | |
+ (goto-char 7) | |
+ ;; search-backward with lookahead over bound is not supported yet | |
+ (regexp-testsuite-test "re-search-backward not supported" | |
+ (eq (re-search-backward "b\\(?=c\\)" nil t) 2))) | |
+ | |
+ (if regexp-testsuite-success | |
+ (message "Test success!") | |
+ (message "Test failed."))) | |
+ | |
+(defun regexp-testsuite-benchmark (file) | |
+ (interactive (list (read-file-name "Large file: " | |
+ nil | |
+ (progn | |
+ (require 'find-func) | |
+ (let ((file (concat (or find-function-C-source-directory "~/src/emacs") "/src/xdisp.c"))) | |
+ (if (file-exists-p file) | |
+ file)))))) | |
+ (require 'benchmark) | |
+ (let (count) | |
+ (with-temp-buffer | |
+ (insert-file-contents file) | |
+ (dolist (pair '((point-min . re-search-forward) (point-max . re-search-backward))) | |
+ (dolist (regexp '("unsigned \\(?:char\\|int\\|long\\)" "unsigned \\(?=char\\|int\\|long\\)" | |
+ "\\(?:unsigned \\)int" "\\(?<=unsigned \\)int")) | |
+ (setq count 0) | |
+ (funcall (if noninteractive | |
+ 'princ-list | |
+ 'message) | |
+ (format "%s: %s elapsed (%s found)" | |
+ regexp | |
+ (car (benchmark-run 10 | |
+ (progn | |
+ (goto-char (funcall (car pair))) | |
+ (while (funcall (cdr pair) regexp nil t) | |
+ (setq count (1+ count)))))) | |
+ count))))))) | |
+ | |
+(provide 'regexp-testsuite) | |
-- | |
1.7.8 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment