Skip to content

Instantly share code, notes, and snippets.

@agentzh
Created April 23, 2013 06:32
Show Gist options
  • Save agentzh/5441274 to your computer and use it in GitHub Desktop.
Save agentzh/5441274 to your computer and use it in GitHub Desktop.
feature: add "U" regex option to ngx_lua's ngx.re API (Version 2)
diff --git a/src/ngx_http_lua_regex.c b/src/ngx_http_lua_regex.c
index 63f0a1a..0ae60bc 100644
--- a/src/ngx_http_lua_regex.c
+++ b/src/ngx_http_lua_regex.c
@@ -38,6 +38,7 @@
#define NGX_LUA_RE_MODE_DFA (1<<1)
#define NGX_LUA_RE_MODE_JIT (1<<2)
#define NGX_LUA_RE_MODE_DUPNAMES (1<<3)
+#define NGX_LUA_RE_NO_UTF8_CHECK (1<<4)
#define NGX_LUA_RE_DFA_MODE_WORKSPACE_COUNT (100)
@@ -93,14 +94,14 @@ static void ngx_http_lua_re_collect_named_captures(lua_State *L,
unsigned flags, ngx_str_t *subj);
-#define ngx_http_lua_regex_exec(re, e, s, start, captures, size) \
- pcre_exec(re, e, (const char *) (s)->data, (s)->len, start, 0, \
+#define ngx_http_lua_regex_exec(re, e, s, start, captures, size, opts) \
+ pcre_exec(re, e, (const char *) (s)->data, (s)->len, start, opts, \
captures, size)
#define ngx_http_lua_regex_dfa_exec(re, e, s, start, captures, size, ws, \
- wscount) \
- pcre_dfa_exec(re, e, (const char *) (s)->data, (s)->len, start, 0, \
+ wscount, opts) \
+ pcre_dfa_exec(re, e, (const char *) (s)->data, (s)->len, start, opts, \
captures, size, ws, wscount)
@@ -128,6 +129,7 @@ ngx_http_lua_ngx_re_match(lua_State *L)
pcre_extra *sd = NULL;
int name_entry_size, name_count;
u_char *name_table;
+ int exec_opts;
ngx_http_lua_regex_compile_t re_comp;
@@ -429,6 +431,13 @@ exec:
}
}
+ if (flags & NGX_LUA_RE_NO_UTF8_CHECK) {
+ exec_opts = PCRE_NO_UTF8_CHECK;
+
+ } else {
+ exec_opts = 0;
+ }
+
if (flags & NGX_LUA_RE_MODE_DFA) {
#if LUA_HAVE_PCRE_DFA
@@ -436,7 +445,7 @@ exec:
int ws[NGX_LUA_RE_DFA_MODE_WORKSPACE_COUNT];
rc = ngx_http_lua_regex_dfa_exec(re_comp.regex, sd, &subj,
(int) pos, cap, ovecsize, ws,
- sizeof(ws)/sizeof(ws[0]));
+ sizeof(ws)/sizeof(ws[0]), exec_opts);
#else /* LUA_HAVE_PCRE_DFA */
@@ -447,7 +456,7 @@ exec:
} else {
rc = ngx_http_lua_regex_exec(re_comp.regex, sd, &subj, (int) pos, cap,
- ovecsize);
+ ovecsize, exec_opts);
}
if (rc == NGX_REGEX_NO_MATCHED) {
@@ -894,6 +903,7 @@ ngx_http_lua_ngx_re_gmatch_iterator(lua_State *L)
const char *msg = NULL;
int name_entry_size, name_count;
u_char *name_table;
+ int exec_opts;
/* upvalues in order: subj ctx offset */
@@ -949,6 +959,13 @@ ngx_http_lua_ngx_re_gmatch_iterator(lua_State *L)
}
}
+ if (ctx->flags & NGX_LUA_RE_NO_UTF8_CHECK) {
+ exec_opts = PCRE_NO_UTF8_CHECK;
+
+ } else {
+ exec_opts = 0;
+ }
+
if (ctx->flags & NGX_LUA_RE_MODE_DFA) {
#if LUA_HAVE_PCRE_DFA
@@ -957,7 +974,7 @@ ngx_http_lua_ngx_re_gmatch_iterator(lua_State *L)
rc = ngx_http_lua_regex_dfa_exec(ctx->regex, ctx->regex_sd, &subj,
offset, cap, ctx->captures_len, ws,
- sizeof(ws)/sizeof(ws[0]));
+ sizeof(ws)/sizeof(ws[0]), exec_opts);
#else /* LUA_HAVE_PCRE_DFA */
msg = "at least pcre 6.0 is required for the DFA mode";
@@ -967,7 +984,8 @@ ngx_http_lua_ngx_re_gmatch_iterator(lua_State *L)
} else {
rc = ngx_http_lua_regex_exec(ctx->regex, ctx->regex_sd, &subj,
- offset, cap, ctx->captures_len);
+ offset, cap, ctx->captures_len,
+ exec_opts);
}
if (rc == NGX_REGEX_NO_MATCHED) {
@@ -1099,6 +1117,11 @@ ngx_http_lua_ngx_re_parse_opts(lua_State *L, ngx_http_lua_regex_compile_t *re,
re->options |= PCRE_UTF8;
break;
+ case 'U':
+ re->options |= PCRE_UTF8;
+ flags |= NGX_LUA_RE_NO_UTF8_CHECK;
+ break;
+
case 'x':
re->options |= PCRE_EXTENDED;
break;
@@ -1193,6 +1216,7 @@ ngx_http_lua_ngx_re_sub_helper(lua_State *L, unsigned global)
pcre_extra *sd = NULL;
int name_entry_size, name_count;
u_char *name_table;
+ int exec_opts;
ngx_http_lua_regex_compile_t re_comp;
ngx_http_lua_complex_value_t *ctpl = NULL;
@@ -1572,6 +1596,13 @@ exec:
}
}
+ if (flags & NGX_LUA_RE_NO_UTF8_CHECK) {
+ exec_opts = PCRE_NO_UTF8_CHECK;
+
+ } else {
+ exec_opts = 0;
+ }
+
for (;;) {
if (flags & NGX_LUA_RE_MODE_DFA) {
@@ -1580,7 +1611,8 @@ exec:
int ws[NGX_LUA_RE_DFA_MODE_WORKSPACE_COUNT];
rc = ngx_http_lua_regex_dfa_exec(re_comp.regex, sd, &subj,
offset, cap, ovecsize, ws,
- sizeof(ws)/sizeof(ws[0]));
+ sizeof(ws)/sizeof(ws[0]),
+ exec_opts);
#else /* LUA_HAVE_PCRE_DFA */
@@ -1591,7 +1623,7 @@ exec:
} else {
rc = ngx_http_lua_regex_exec(re_comp.regex, sd, &subj, offset, cap,
- ovecsize);
+ ovecsize, exec_opts);
}
if (rc == NGX_REGEX_NO_MATCHED) {
diff --git a/t/034-match.t b/t/034-match.t
index 6efefe7..c76c520 100644
--- a/t/034-match.t
+++ b/t/034-match.t
@@ -9,7 +9,7 @@ use Test::Nginx::Socket;
repeat_each(2);
-plan tests => repeat_each() * (blocks() * 2 + 10);
+plan tests => repeat_each() * (blocks() * 2 + 14);
#no_diff();
no_long_string();
@@ -945,3 +945,71 @@ error: pcre_exec\(\) failed: -10 on "你.*?" using "你好"
--- no_error_log
[error]
+
+
+=== TEST 43: UTF-8 mode without UTF-8 sequence checks
+--- config
+ location /re {
+ content_by_lua '
+ local m = ngx.re.match("你好", ".", "U")
+ if m then
+ ngx.say(m[0])
+ else
+ ngx.say("not matched!")
+ end
+ ';
+ }
+--- stap
+probe process("$LIBPCRE_PATH").function("pcre_compile") {
+ printf("compile opts: %x\n", $options)
+}
+
+probe process("$LIBPCRE_PATH").function("pcre_exec") {
+ printf("exec opts: %x\n", $options)
+}
+
+--- stap_out
+compile opts: 800
+exec opts: 2000
+
+--- request
+ GET /re
+--- response_body
+你
+--- no_error_log
+[error]
+
+
+
+=== TEST 44: UTF-8 mode with UTF-8 sequence checks
+--- config
+ location /re {
+ content_by_lua '
+ local m = ngx.re.match("你好", ".", "u")
+ if m then
+ ngx.say(m[0])
+ else
+ ngx.say("not matched!")
+ end
+ ';
+ }
+--- stap
+probe process("$LIBPCRE_PATH").function("pcre_compile") {
+ printf("compile opts: %x\n", $options)
+}
+
+probe process("$LIBPCRE_PATH").function("pcre_exec") {
+ printf("exec opts: %x\n", $options)
+}
+
+--- stap_out
+compile opts: 800
+exec opts: 0
+
+--- request
+ GET /re
+--- response_body
+你
+--- no_error_log
+[error]
+
diff --git a/t/035-gmatch.t b/t/035-gmatch.t
index 2661cdf..a5c7c82 100644
--- a/t/035-gmatch.t
+++ b/t/035-gmatch.t
@@ -9,7 +9,7 @@ use Test::Nginx::Socket;
repeat_each(5);
-plan tests => repeat_each() * (blocks() * 2 + 3);
+plan tests => repeat_each() * (blocks() * 2 + 7);
our $HtmlDir = html_dir;
@@ -741,3 +741,73 @@ error: pcre_exec\(\) failed: -10 on "你.*?"
--- no_error_log
[error]
+
+
+=== TEST 28: UTF-8 mode without UTF-8 sequence checks
+--- config
+ location /re {
+ content_by_lua '
+ local it = ngx.re.gmatch("你好", ".", "U")
+ local m = it()
+ if m then
+ ngx.say(m[0])
+ else
+ ngx.say("not matched!")
+ end
+ ';
+ }
+--- stap
+probe process("$LIBPCRE_PATH").function("pcre_compile") {
+ printf("compile opts: %x\n", $options)
+}
+
+probe process("$LIBPCRE_PATH").function("pcre_exec") {
+ printf("exec opts: %x\n", $options)
+}
+
+--- stap_out
+compile opts: 800
+exec opts: 2000
+
+--- request
+ GET /re
+--- response_body
+你
+--- no_error_log
+[error]
+
+
+
+=== TEST 29: UTF-8 mode with UTF-8 sequence checks
+--- config
+ location /re {
+ content_by_lua '
+ local it = ngx.re.gmatch("你好", ".", "u")
+ local m = it()
+ if m then
+ ngx.say(m[0])
+ else
+ ngx.say("not matched!")
+ end
+ ';
+ }
+--- stap
+probe process("$LIBPCRE_PATH").function("pcre_compile") {
+ printf("compile opts: %x\n", $options)
+}
+
+probe process("$LIBPCRE_PATH").function("pcre_exec") {
+ printf("exec opts: %x\n", $options)
+}
+
+--- stap_out
+compile opts: 800
+exec opts: 0
+
+--- request
+ GET /re
+--- response_body
+你
+--- no_error_log
+[error]
+
diff --git a/t/036-sub.t b/t/036-sub.t
index 43f8830..18d2715 100644
--- a/t/036-sub.t
+++ b/t/036-sub.t
@@ -9,7 +9,7 @@ use Test::Nginx::Socket;
repeat_each(2);
-plan tests => repeat_each() * (blocks() * 2 + 9);
+plan tests => repeat_each() * (blocks() * 2 + 13);
#no_diff();
no_long_string();
@@ -507,3 +507,67 @@ error: pcre_exec\(\) failed: -10 on "你.*?" using "你好"
--- no_error_log
[error]
+
+
+=== TEST 26: UTF-8 mode without UTF-8 sequence checks
+--- config
+ location /re {
+ content_by_lua '
+ local s, n, err = ngx.re.sub("你好", ".", "a", "U")
+ if s then
+ ngx.say("s: ", s)
+ end
+ ';
+ }
+--- stap
+probe process("$LIBPCRE_PATH").function("pcre_compile") {
+ printf("compile opts: %x\n", $options)
+}
+
+probe process("$LIBPCRE_PATH").function("pcre_exec") {
+ printf("exec opts: %x\n", $options)
+}
+
+--- stap_out
+compile opts: 800
+exec opts: 2000
+
+--- request
+ GET /re
+--- response_body
+s: a好
+--- no_error_log
+[error]
+
+
+
+=== TEST 27: UTF-8 mode with UTF-8 sequence checks
+--- config
+ location /re {
+ content_by_lua '
+ local s, n, err = ngx.re.sub("你好", ".", "a", "u")
+ if s then
+ ngx.say("s: ", s)
+ end
+ ';
+ }
+--- stap
+probe process("$LIBPCRE_PATH").function("pcre_compile") {
+ printf("compile opts: %x\n", $options)
+}
+
+probe process("$LIBPCRE_PATH").function("pcre_exec") {
+ printf("exec opts: %x\n", $options)
+}
+
+--- stap_out
+compile opts: 800
+exec opts: 0
+
+--- request
+ GET /re
+--- response_body
+s: a好
+--- no_error_log
+[error]
+
diff --git a/t/037-gsub.t b/t/037-gsub.t
index 9a26390..31b97e6 100644
--- a/t/037-gsub.t
+++ b/t/037-gsub.t
@@ -9,7 +9,7 @@ log_level('warn');
repeat_each(2);
-plan tests => repeat_each() * (blocks() * 2 + 10);
+plan tests => repeat_each() * (blocks() * 2 + 14);
#no_diff();
no_long_string();
@@ -430,3 +430,71 @@ error: pcre_exec\(\) failed: -10 on "你.*?" using "你好"
--- no_error_log
[error]
+
+
+=== TEST 21: UTF-8 mode without UTF-8 sequence checks
+--- config
+ location /re {
+ content_by_lua '
+ local s, n, err = ngx.re.gsub("你好", ".", "a", "U")
+ if s then
+ ngx.say("s: ", s)
+ end
+ ';
+ }
+--- stap
+probe process("$LIBPCRE_PATH").function("pcre_compile") {
+ printf("compile opts: %x\n", $options)
+}
+
+probe process("$LIBPCRE_PATH").function("pcre_exec") {
+ printf("exec opts: %x\n", $options)
+}
+
+--- stap_out
+compile opts: 800
+exec opts: 2000
+exec opts: 2000
+exec opts: 2000
+
+--- request
+ GET /re
+--- response_body
+s: aa
+--- no_error_log
+[error]
+
+
+
+=== TEST 22: UTF-8 mode with UTF-8 sequence checks
+--- config
+ location /re {
+ content_by_lua '
+ local s, n, err = ngx.re.gsub("你好", ".", "a", "u")
+ if s then
+ ngx.say("s: ", s)
+ end
+ ';
+ }
+--- stap
+probe process("$LIBPCRE_PATH").function("pcre_compile") {
+ printf("compile opts: %x\n", $options)
+}
+
+probe process("$LIBPCRE_PATH").function("pcre_exec") {
+ printf("exec opts: %x\n", $options)
+}
+
+--- stap_out
+compile opts: 800
+exec opts: 0
+exec opts: 0
+exec opts: 0
+
+--- request
+ GET /re
+--- response_body
+s: aa
+--- no_error_log
+[error]
+
diff --git a/t/048-match-dfa.t b/t/048-match-dfa.t
index 31ecb6e..a29c203 100644
--- a/t/048-match-dfa.t
+++ b/t/048-match-dfa.t
@@ -9,7 +9,7 @@ use Test::Nginx::Socket;
repeat_each(2);
-plan tests => repeat_each() * (blocks() * 2);
+plan tests => repeat_each() * (blocks() * 2 + 4);
#no_diff();
no_long_string();
@@ -114,3 +114,71 @@ nil
--- response_body
not matched!
+
+
+=== TEST 6: UTF-8 mode without UTF-8 sequence checks
+--- config
+ location /re {
+ content_by_lua '
+ local m = ngx.re.match("你好", ".", "Ud")
+ if m then
+ ngx.say(m[0])
+ else
+ ngx.say("not matched!")
+ end
+ ';
+ }
+--- stap
+probe process("$LIBPCRE_PATH").function("pcre_compile") {
+ printf("compile opts: %x\n", $options)
+}
+
+probe process("$LIBPCRE_PATH").function("pcre_dfa_exec") {
+ printf("exec opts: %x\n", $options)
+}
+
+--- stap_out
+compile opts: 800
+exec opts: 2000
+
+--- request
+ GET /re
+--- response_body
+你
+--- no_error_log
+[error]
+
+
+
+=== TEST 7: UTF-8 mode with UTF-8 sequence checks
+--- config
+ location /re {
+ content_by_lua '
+ local m = ngx.re.match("你好", ".", "ud")
+ if m then
+ ngx.say(m[0])
+ else
+ ngx.say("not matched!")
+ end
+ ';
+ }
+--- stap
+probe process("$LIBPCRE_PATH").function("pcre_compile") {
+ printf("compile opts: %x\n", $options)
+}
+
+probe process("$LIBPCRE_PATH").function("pcre_dfa_exec") {
+ printf("exec opts: %x\n", $options)
+}
+
+--- stap_out
+compile opts: 800
+exec opts: 0
+
+--- request
+ GET /re
+--- response_body
+你
+--- no_error_log
+[error]
+
diff --git a/t/050-gmatch-dfa.t b/t/050-gmatch-dfa.t
index 2849276..a8a97d5 100644
--- a/t/050-gmatch-dfa.t
+++ b/t/050-gmatch-dfa.t
@@ -9,7 +9,7 @@ use Test::Nginx::Socket;
repeat_each(2);
-plan tests => repeat_each() * (blocks() * 2 + 1);
+plan tests => repeat_each() * (blocks() * 2 + 5);
#no_diff();
#no_long_string();
@@ -217,3 +217,73 @@ error: failed to compile regex "(abc": pcre_compile() failed: missing ) in "(abc
--- no_error_log
[error]
+
+
+=== TEST 11: UTF-8 mode without UTF-8 sequence checks
+--- config
+ location /re {
+ content_by_lua '
+ local it = ngx.re.gmatch("你好", ".", "Ud")
+ local m = it()
+ if m then
+ ngx.say(m[0])
+ else
+ ngx.say("not matched!")
+ end
+ ';
+ }
+--- stap
+probe process("$LIBPCRE_PATH").function("pcre_compile") {
+ printf("compile opts: %x\n", $options)
+}
+
+probe process("$LIBPCRE_PATH").function("pcre_dfa_exec") {
+ printf("exec opts: %x\n", $options)
+}
+
+--- stap_out
+compile opts: 800
+exec opts: 2000
+
+--- request
+ GET /re
+--- response_body
+你
+--- no_error_log
+[error]
+
+
+
+=== TEST 12: UTF-8 mode with UTF-8 sequence checks
+--- config
+ location /re {
+ content_by_lua '
+ local it = ngx.re.gmatch("你好", ".", "ud")
+ local m = it()
+ if m then
+ ngx.say(m[0])
+ else
+ ngx.say("not matched!")
+ end
+ ';
+ }
+--- stap
+probe process("$LIBPCRE_PATH").function("pcre_compile") {
+ printf("compile opts: %x\n", $options)
+}
+
+probe process("$LIBPCRE_PATH").function("pcre_dfa_exec") {
+ printf("exec opts: %x\n", $options)
+}
+
+--- stap_out
+compile opts: 800
+exec opts: 0
+
+--- request
+ GET /re
+--- response_body
+你
+--- no_error_log
+[error]
+
diff --git a/t/052-sub-dfa.t b/t/052-sub-dfa.t
index 62e3b07..2341863 100644
--- a/t/052-sub-dfa.t
+++ b/t/052-sub-dfa.t
@@ -9,7 +9,7 @@ use Test::Nginx::Socket;
repeat_each(2);
-plan tests => repeat_each() * (blocks() * 2 + 2);
+plan tests => repeat_each() * (blocks() * 2 + 6);
#no_diff();
no_long_string();
@@ -135,3 +135,67 @@ error: failed to compile regex "(abc": pcre_compile() failed: missing ) in "(abc
--- no_error_log
[error]
+
+
+=== TEST 7: UTF-8 mode without UTF-8 sequence checks
+--- config
+ location /re {
+ content_by_lua '
+ local s, n, err = ngx.re.sub("你好", ".", "a", "Ud")
+ if s then
+ ngx.say("s: ", s)
+ end
+ ';
+ }
+--- stap
+probe process("$LIBPCRE_PATH").function("pcre_compile") {
+ printf("compile opts: %x\n", $options)
+}
+
+probe process("$LIBPCRE_PATH").function("pcre_dfa_exec") {
+ printf("exec opts: %x\n", $options)
+}
+
+--- stap_out
+compile opts: 800
+exec opts: 2000
+
+--- request
+ GET /re
+--- response_body
+s: a好
+--- no_error_log
+[error]
+
+
+
+=== TEST 8: UTF-8 mode with UTF-8 sequence checks
+--- config
+ location /re {
+ content_by_lua '
+ local s, n, err = ngx.re.sub("你好", ".", "a", "ud")
+ if s then
+ ngx.say("s: ", s)
+ end
+ ';
+ }
+--- stap
+probe process("$LIBPCRE_PATH").function("pcre_compile") {
+ printf("compile opts: %x\n", $options)
+}
+
+probe process("$LIBPCRE_PATH").function("pcre_dfa_exec") {
+ printf("exec opts: %x\n", $options)
+}
+
+--- stap_out
+compile opts: 800
+exec opts: 0
+
+--- request
+ GET /re
+--- response_body
+s: a好
+--- no_error_log
+[error]
+
diff --git a/t/054-gsub-dfa.t b/t/054-gsub-dfa.t
index 21f55a3..a452726 100644
--- a/t/054-gsub-dfa.t
+++ b/t/054-gsub-dfa.t
@@ -9,7 +9,7 @@ use Test::Nginx::Socket;
repeat_each(2);
-plan tests => repeat_each() * (blocks() * 2 + 1);
+plan tests => repeat_each() * (blocks() * 2 + 5);
#no_diff();
no_long_string();
@@ -132,3 +132,71 @@ error: failed to compile regex "(abc": pcre_compile() failed: missing ) in "(abc
--- no_error_log
[error]
+
+
+=== TEST 7: UTF-8 mode without UTF-8 sequence checks
+--- config
+ location /re {
+ content_by_lua '
+ local s, n, err = ngx.re.gsub("你好", ".", "a", "Ud")
+ if s then
+ ngx.say("s: ", s)
+ end
+ ';
+ }
+--- stap
+probe process("$LIBPCRE_PATH").function("pcre_compile") {
+ printf("compile opts: %x\n", $options)
+}
+
+probe process("$LIBPCRE_PATH").function("pcre_dfa_exec") {
+ printf("exec opts: %x\n", $options)
+}
+
+--- stap_out
+compile opts: 800
+exec opts: 2000
+exec opts: 2000
+exec opts: 2000
+
+--- request
+ GET /re
+--- response_body
+s: aa
+--- no_error_log
+[error]
+
+
+
+=== TEST 8: UTF-8 mode with UTF-8 sequence checks
+--- config
+ location /re {
+ content_by_lua '
+ local s, n, err = ngx.re.gsub("你好", ".", "a", "ud")
+ if s then
+ ngx.say("s: ", s)
+ end
+ ';
+ }
+--- stap
+probe process("$LIBPCRE_PATH").function("pcre_compile") {
+ printf("compile opts: %x\n", $options)
+}
+
+probe process("$LIBPCRE_PATH").function("pcre_dfa_exec") {
+ printf("exec opts: %x\n", $options)
+}
+
+--- stap_out
+compile opts: 800
+exec opts: 0
+exec opts: 0
+exec opts: 0
+
+--- request
+ GET /re
+--- response_body
+s: aa
+--- no_error_log
+[error]
+
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment