Created
February 14, 2019 18:20
-
-
Save xeioex/35d9cc06fb9559ca32ce1e085c7f2d92 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # HG changeset patch | |
| # User Dmitry Volyntsev <[email protected]> | |
| # Date 1550168391 -10800 | |
| # Thu Feb 14 21:19:51 2019 +0300 | |
| # Node ID 518e6a7b270d8221ab7f1768f1410c419cac0078 | |
| # Parent dde9a253361e8d76639f492d7c5d81bcb9521f18 | |
| Fixed String.prototype.split() for unicode strings. | |
| This closes #95 issue on Github. | |
| diff --git a/njs/njs.h b/njs/njs.h | |
| --- a/njs/njs.h | |
| +++ b/njs/njs.h | |
| @@ -232,7 +232,7 @@ NXT_EXPORT void njs_vm_retval_set(njs_vm | |
| NXT_EXPORT u_char * njs_string_alloc(njs_vm_t *vm, njs_value_t *value, | |
| uint32_t size, uint32_t length); | |
| NXT_EXPORT njs_ret_t njs_string_create(njs_vm_t *vm, njs_value_t *value, | |
| - u_char *start, uint32_t size, uint32_t length); | |
| + const u_char *start, uint32_t size, uint32_t length); | |
| NXT_EXPORT nxt_int_t njs_value_string_copy(njs_vm_t *vm, nxt_str_t *retval, | |
| const njs_value_t *value, uintptr_t *next); | |
| diff --git a/njs/njs_array.c b/njs/njs_array.c | |
| --- a/njs/njs_array.c | |
| +++ b/njs/njs_array.c | |
| @@ -184,7 +184,7 @@ njs_array_add(njs_vm_t *vm, njs_array_t | |
| njs_ret_t | |
| -njs_array_string_add(njs_vm_t *vm, njs_array_t *array, u_char *start, | |
| +njs_array_string_add(njs_vm_t *vm, njs_array_t *array, const u_char *start, | |
| size_t size, size_t length) | |
| { | |
| njs_ret_t ret; | |
| diff --git a/njs/njs_array.h b/njs/njs_array.h | |
| --- a/njs/njs_array.h | |
| +++ b/njs/njs_array.h | |
| @@ -17,8 +17,8 @@ | |
| njs_array_t *njs_array_alloc(njs_vm_t *vm, uint32_t length, uint32_t spare); | |
| njs_ret_t njs_array_add(njs_vm_t *vm, njs_array_t *array, njs_value_t *value); | |
| -njs_ret_t njs_array_string_add(njs_vm_t *vm, njs_array_t *array, u_char *start, | |
| - size_t size, size_t length); | |
| +njs_ret_t njs_array_string_add(njs_vm_t *vm, njs_array_t *array, | |
| + const u_char *start, size_t size, size_t length); | |
| njs_ret_t njs_array_expand(njs_vm_t *vm, njs_array_t *array, uint32_t prepend, | |
| uint32_t size); | |
| njs_ret_t njs_array_constructor(njs_vm_t *vm, njs_value_t *args, | |
| diff --git a/njs/njs_regexp.c b/njs/njs_regexp.c | |
| --- a/njs/njs_regexp.c | |
| +++ b/njs/njs_regexp.c | |
| @@ -389,8 +389,8 @@ njs_regexp_compile_trace_handler(nxt_tra | |
| nxt_int_t | |
| -njs_regexp_match(njs_vm_t *vm, nxt_regex_t *regex, u_char *subject, size_t len, | |
| - nxt_regex_match_data_t *match_data) | |
| +njs_regexp_match(njs_vm_t *vm, nxt_regex_t *regex, const u_char *subject, | |
| + size_t len, nxt_regex_match_data_t *match_data) | |
| { | |
| nxt_int_t ret; | |
| nxt_trace_handler_t handler; | |
| diff --git a/njs/njs_regexp.h b/njs/njs_regexp.h | |
| --- a/njs/njs_regexp.h | |
| +++ b/njs/njs_regexp.h | |
| @@ -25,8 +25,8 @@ njs_token_t njs_regexp_literal(njs_vm_t | |
| njs_value_t *value); | |
| njs_regexp_pattern_t *njs_regexp_pattern_create(njs_vm_t *vm, | |
| u_char *string, size_t length, njs_regexp_flags_t flags); | |
| -nxt_int_t njs_regexp_match(njs_vm_t *vm, nxt_regex_t *regex, u_char *subject, | |
| - size_t len, nxt_regex_match_data_t *match_data); | |
| +nxt_int_t njs_regexp_match(njs_vm_t *vm, nxt_regex_t *regex, | |
| + const u_char *subject, size_t len, nxt_regex_match_data_t *match_data); | |
| njs_regexp_t *njs_regexp_alloc(njs_vm_t *vm, njs_regexp_pattern_t *pattern); | |
| njs_ret_t njs_regexp_prototype_exec(njs_vm_t *vm, njs_value_t *args, | |
| nxt_uint_t nargs, njs_index_t unused); | |
| diff --git a/njs/njs_string.c b/njs/njs_string.c | |
| --- a/njs/njs_string.c | |
| +++ b/njs/njs_string.c | |
| @@ -77,7 +77,7 @@ static njs_ret_t njs_string_prototype_pa | |
| static njs_ret_t njs_string_match_multiple(njs_vm_t *vm, njs_value_t *args, | |
| njs_regexp_pattern_t *pattern); | |
| static njs_ret_t njs_string_split_part_add(njs_vm_t *vm, njs_array_t *array, | |
| - njs_utf8_t utf8, u_char *start, size_t size); | |
| + njs_utf8_t utf8, const u_char *start, size_t size); | |
| static njs_ret_t njs_string_replace_regexp(njs_vm_t *vm, njs_value_t *args, | |
| njs_string_replace_t *r); | |
| static njs_ret_t njs_string_replace_regexp_function(njs_vm_t *vm, | |
| @@ -111,10 +111,11 @@ static njs_ret_t njs_string_decode(njs_v | |
| njs_ret_t | |
| -njs_string_create(njs_vm_t *vm, njs_value_t *value, u_char *start, | |
| +njs_string_create(njs_vm_t *vm, njs_value_t *value, const u_char *start, | |
| uint32_t size, uint32_t length) | |
| { | |
| - u_char *dst, *src; | |
| + u_char *dst; | |
| + const u_char *src; | |
| njs_string_t *string; | |
| value->type = NJS_STRING; | |
| @@ -153,7 +154,7 @@ njs_string_create(njs_vm_t *vm, njs_valu | |
| value->long_string.data = string; | |
| - string->start = start; | |
| + string->start = (u_char *) start; | |
| string->length = length; | |
| string->retain = 1; | |
| } | |
| @@ -2729,12 +2730,11 @@ njs_string_prototype_split(njs_vm_t *vm, | |
| njs_index_t unused) | |
| { | |
| int ret, *captures; | |
| - u_char *p, *start, *next; | |
| size_t size; | |
| uint32_t limit; | |
| njs_utf8_t utf8; | |
| njs_array_t *array; | |
| - const u_char *end; | |
| + const u_char *p, *start, *next, *end; | |
| njs_regexp_utf8_t type; | |
| njs_string_prop_t string, split; | |
| njs_regexp_pattern_t *pattern; | |
| @@ -2798,8 +2798,8 @@ njs_string_prototype_split(njs_vm_t *vm, | |
| /* Empty split string. */ | |
| if (p == next) { | |
| - p++; | |
| - next++; | |
| + p = nxt_utf8_next(p, end); | |
| + next = p; | |
| } | |
| size = p - start; | |
| @@ -2845,8 +2845,8 @@ njs_string_prototype_split(njs_vm_t *vm, | |
| /* Empty split regexp. */ | |
| if (p == next) { | |
| - p++; | |
| - next++; | |
| + p = nxt_utf8_next(p, end); | |
| + next = p; | |
| } | |
| size = p - start; | |
| @@ -2887,7 +2887,7 @@ done: | |
| static njs_ret_t | |
| njs_string_split_part_add(njs_vm_t *vm, njs_array_t *array, njs_utf8_t utf8, | |
| - u_char *start, size_t size) | |
| + const u_char *start, size_t size) | |
| { | |
| ssize_t length; | |
| diff --git a/njs/njs_string.h b/njs/njs_string.h | |
| --- a/njs/njs_string.h | |
| +++ b/njs/njs_string.h | |
| @@ -100,7 +100,7 @@ typedef enum { | |
| nxt_inline uint32_t | |
| -njs_string_length(njs_utf8_t utf8, u_char *start, size_t size) | |
| +njs_string_length(njs_utf8_t utf8, const u_char *start, size_t size) | |
| { | |
| ssize_t length; | |
| diff --git a/njs/test/njs_unit_test.c b/njs/test/njs_unit_test.c | |
| --- a/njs/test/njs_unit_test.c | |
| +++ b/njs/test/njs_unit_test.c | |
| @@ -5078,6 +5078,18 @@ static njs_unit_test_t njs_test[] = | |
| { nxt_string("'abc'.split('')"), | |
| nxt_string("a,b,c") }, | |
| + { nxt_string("'αβγ'.split('')"), | |
| + nxt_string("α,β,γ") }, | |
| + | |
| + { nxt_string("'囲碁織'.split('')"), | |
| + nxt_string("囲,碁,織") }, | |
| + | |
| + { nxt_string("'𝟘𝟙𝟚𝟛'.split('')"), | |
| + nxt_string("𝟘,𝟙,𝟚,𝟛") }, | |
| + | |
| + { nxt_string("'囲α碁α織'.split('α')"), | |
| + nxt_string("囲,碁,織") }, | |
| + | |
| { nxt_string("'abc'.split('abc')"), | |
| nxt_string(",") }, | |
| diff --git a/nxt/nxt_pcre.c b/nxt/nxt_pcre.c | |
| --- a/nxt/nxt_pcre.c | |
| +++ b/nxt/nxt_pcre.c | |
| @@ -209,13 +209,13 @@ nxt_pcre_default_free(void *p, void *mem | |
| nxt_int_t | |
| -nxt_regex_match(nxt_regex_t *regex, u_char *subject, size_t len, | |
| +nxt_regex_match(nxt_regex_t *regex, const u_char *subject, size_t len, | |
| nxt_regex_match_data_t *match_data, nxt_regex_context_t *ctx) | |
| { | |
| int ret; | |
| - ret = pcre_exec(regex->code, regex->extra, (char *) subject, len, 0, 0, | |
| - match_data->captures, match_data->ncaptures); | |
| + ret = pcre_exec(regex->code, regex->extra, (const char *) subject, len, | |
| + 0, 0, match_data->captures, match_data->ncaptures); | |
| /* PCRE_ERROR_NOMATCH is -1. */ | |
| diff --git a/nxt/nxt_regex.h b/nxt/nxt_regex.h | |
| --- a/nxt/nxt_regex.h | |
| +++ b/nxt/nxt_regex.h | |
| @@ -35,7 +35,7 @@ NXT_EXPORT nxt_regex_match_data_t *nxt_r | |
| nxt_regex_context_t *ctx); | |
| NXT_EXPORT void nxt_regex_match_data_free(nxt_regex_match_data_t *match_data, | |
| nxt_regex_context_t *ctx); | |
| -NXT_EXPORT nxt_int_t nxt_regex_match(nxt_regex_t *regex, u_char *subject, | |
| +NXT_EXPORT nxt_int_t nxt_regex_match(nxt_regex_t *regex, const u_char *subject, | |
| size_t len, nxt_regex_match_data_t *match_data, nxt_regex_context_t *ctx); | |
| NXT_EXPORT int *nxt_regex_captures(nxt_regex_match_data_t *match_data); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment