Skip to content

Instantly share code, notes, and snippets.

@xeioex
Created February 14, 2019 18:20
Show Gist options
  • Save xeioex/35d9cc06fb9559ca32ce1e085c7f2d92 to your computer and use it in GitHub Desktop.
Save xeioex/35d9cc06fb9559ca32ce1e085c7f2d92 to your computer and use it in GitHub Desktop.
# HG changeset patch
# User Dmitry Volyntsev <[email protected]>
# Date 1550168391 -10800
# Thu Feb 14 21:19:51 2019 +0300
# Node ID 518e6a7b270d8221ab7f1768f1410c419cac0078
# Parent dde9a253361e8d76639f492d7c5d81bcb9521f18
Fixed String.prototype.split() for unicode strings.
This closes #95 issue on Github.
diff --git a/njs/njs.h b/njs/njs.h
--- a/njs/njs.h
+++ b/njs/njs.h
@@ -232,7 +232,7 @@ NXT_EXPORT void njs_vm_retval_set(njs_vm
NXT_EXPORT u_char * njs_string_alloc(njs_vm_t *vm, njs_value_t *value,
uint32_t size, uint32_t length);
NXT_EXPORT njs_ret_t njs_string_create(njs_vm_t *vm, njs_value_t *value,
- u_char *start, uint32_t size, uint32_t length);
+ const u_char *start, uint32_t size, uint32_t length);
NXT_EXPORT nxt_int_t njs_value_string_copy(njs_vm_t *vm, nxt_str_t *retval,
const njs_value_t *value, uintptr_t *next);
diff --git a/njs/njs_array.c b/njs/njs_array.c
--- a/njs/njs_array.c
+++ b/njs/njs_array.c
@@ -184,7 +184,7 @@ njs_array_add(njs_vm_t *vm, njs_array_t
njs_ret_t
-njs_array_string_add(njs_vm_t *vm, njs_array_t *array, u_char *start,
+njs_array_string_add(njs_vm_t *vm, njs_array_t *array, const u_char *start,
size_t size, size_t length)
{
njs_ret_t ret;
diff --git a/njs/njs_array.h b/njs/njs_array.h
--- a/njs/njs_array.h
+++ b/njs/njs_array.h
@@ -17,8 +17,8 @@
njs_array_t *njs_array_alloc(njs_vm_t *vm, uint32_t length, uint32_t spare);
njs_ret_t njs_array_add(njs_vm_t *vm, njs_array_t *array, njs_value_t *value);
-njs_ret_t njs_array_string_add(njs_vm_t *vm, njs_array_t *array, u_char *start,
- size_t size, size_t length);
+njs_ret_t njs_array_string_add(njs_vm_t *vm, njs_array_t *array,
+ const u_char *start, size_t size, size_t length);
njs_ret_t njs_array_expand(njs_vm_t *vm, njs_array_t *array, uint32_t prepend,
uint32_t size);
njs_ret_t njs_array_constructor(njs_vm_t *vm, njs_value_t *args,
diff --git a/njs/njs_regexp.c b/njs/njs_regexp.c
--- a/njs/njs_regexp.c
+++ b/njs/njs_regexp.c
@@ -389,8 +389,8 @@ njs_regexp_compile_trace_handler(nxt_tra
nxt_int_t
-njs_regexp_match(njs_vm_t *vm, nxt_regex_t *regex, u_char *subject, size_t len,
- nxt_regex_match_data_t *match_data)
+njs_regexp_match(njs_vm_t *vm, nxt_regex_t *regex, const u_char *subject,
+ size_t len, nxt_regex_match_data_t *match_data)
{
nxt_int_t ret;
nxt_trace_handler_t handler;
diff --git a/njs/njs_regexp.h b/njs/njs_regexp.h
--- a/njs/njs_regexp.h
+++ b/njs/njs_regexp.h
@@ -25,8 +25,8 @@ njs_token_t njs_regexp_literal(njs_vm_t
njs_value_t *value);
njs_regexp_pattern_t *njs_regexp_pattern_create(njs_vm_t *vm,
u_char *string, size_t length, njs_regexp_flags_t flags);
-nxt_int_t njs_regexp_match(njs_vm_t *vm, nxt_regex_t *regex, u_char *subject,
- size_t len, nxt_regex_match_data_t *match_data);
+nxt_int_t njs_regexp_match(njs_vm_t *vm, nxt_regex_t *regex,
+ const u_char *subject, size_t len, nxt_regex_match_data_t *match_data);
njs_regexp_t *njs_regexp_alloc(njs_vm_t *vm, njs_regexp_pattern_t *pattern);
njs_ret_t njs_regexp_prototype_exec(njs_vm_t *vm, njs_value_t *args,
nxt_uint_t nargs, njs_index_t unused);
diff --git a/njs/njs_string.c b/njs/njs_string.c
--- a/njs/njs_string.c
+++ b/njs/njs_string.c
@@ -77,7 +77,7 @@ static njs_ret_t njs_string_prototype_pa
static njs_ret_t njs_string_match_multiple(njs_vm_t *vm, njs_value_t *args,
njs_regexp_pattern_t *pattern);
static njs_ret_t njs_string_split_part_add(njs_vm_t *vm, njs_array_t *array,
- njs_utf8_t utf8, u_char *start, size_t size);
+ njs_utf8_t utf8, const u_char *start, size_t size);
static njs_ret_t njs_string_replace_regexp(njs_vm_t *vm, njs_value_t *args,
njs_string_replace_t *r);
static njs_ret_t njs_string_replace_regexp_function(njs_vm_t *vm,
@@ -111,10 +111,11 @@ static njs_ret_t njs_string_decode(njs_v
njs_ret_t
-njs_string_create(njs_vm_t *vm, njs_value_t *value, u_char *start,
+njs_string_create(njs_vm_t *vm, njs_value_t *value, const u_char *start,
uint32_t size, uint32_t length)
{
- u_char *dst, *src;
+ u_char *dst;
+ const u_char *src;
njs_string_t *string;
value->type = NJS_STRING;
@@ -153,7 +154,7 @@ njs_string_create(njs_vm_t *vm, njs_valu
value->long_string.data = string;
- string->start = start;
+ string->start = (u_char *) start;
string->length = length;
string->retain = 1;
}
@@ -2729,12 +2730,11 @@ njs_string_prototype_split(njs_vm_t *vm,
njs_index_t unused)
{
int ret, *captures;
- u_char *p, *start, *next;
size_t size;
uint32_t limit;
njs_utf8_t utf8;
njs_array_t *array;
- const u_char *end;
+ const u_char *p, *start, *next, *end;
njs_regexp_utf8_t type;
njs_string_prop_t string, split;
njs_regexp_pattern_t *pattern;
@@ -2798,8 +2798,8 @@ njs_string_prototype_split(njs_vm_t *vm,
/* Empty split string. */
if (p == next) {
- p++;
- next++;
+ p = nxt_utf8_next(p, end);
+ next = p;
}
size = p - start;
@@ -2845,8 +2845,8 @@ njs_string_prototype_split(njs_vm_t *vm,
/* Empty split regexp. */
if (p == next) {
- p++;
- next++;
+ p = nxt_utf8_next(p, end);
+ next = p;
}
size = p - start;
@@ -2887,7 +2887,7 @@ done:
static njs_ret_t
njs_string_split_part_add(njs_vm_t *vm, njs_array_t *array, njs_utf8_t utf8,
- u_char *start, size_t size)
+ const u_char *start, size_t size)
{
ssize_t length;
diff --git a/njs/njs_string.h b/njs/njs_string.h
--- a/njs/njs_string.h
+++ b/njs/njs_string.h
@@ -100,7 +100,7 @@ typedef enum {
nxt_inline uint32_t
-njs_string_length(njs_utf8_t utf8, u_char *start, size_t size)
+njs_string_length(njs_utf8_t utf8, const u_char *start, size_t size)
{
ssize_t length;
diff --git a/njs/test/njs_unit_test.c b/njs/test/njs_unit_test.c
--- a/njs/test/njs_unit_test.c
+++ b/njs/test/njs_unit_test.c
@@ -5078,6 +5078,18 @@ static njs_unit_test_t njs_test[] =
{ nxt_string("'abc'.split('')"),
nxt_string("a,b,c") },
+ { nxt_string("'αβγ'.split('')"),
+ nxt_string("α,β,γ") },
+
+ { nxt_string("'囲碁織'.split('')"),
+ nxt_string("囲,碁,織") },
+
+ { nxt_string("'𝟘𝟙𝟚𝟛'.split('')"),
+ nxt_string("𝟘,𝟙,𝟚,𝟛") },
+
+ { nxt_string("'囲α碁α織'.split('α')"),
+ nxt_string("囲,碁,織") },
+
{ nxt_string("'abc'.split('abc')"),
nxt_string(",") },
diff --git a/nxt/nxt_pcre.c b/nxt/nxt_pcre.c
--- a/nxt/nxt_pcre.c
+++ b/nxt/nxt_pcre.c
@@ -209,13 +209,13 @@ nxt_pcre_default_free(void *p, void *mem
nxt_int_t
-nxt_regex_match(nxt_regex_t *regex, u_char *subject, size_t len,
+nxt_regex_match(nxt_regex_t *regex, const u_char *subject, size_t len,
nxt_regex_match_data_t *match_data, nxt_regex_context_t *ctx)
{
int ret;
- ret = pcre_exec(regex->code, regex->extra, (char *) subject, len, 0, 0,
- match_data->captures, match_data->ncaptures);
+ ret = pcre_exec(regex->code, regex->extra, (const char *) subject, len,
+ 0, 0, match_data->captures, match_data->ncaptures);
/* PCRE_ERROR_NOMATCH is -1. */
diff --git a/nxt/nxt_regex.h b/nxt/nxt_regex.h
--- a/nxt/nxt_regex.h
+++ b/nxt/nxt_regex.h
@@ -35,7 +35,7 @@ NXT_EXPORT nxt_regex_match_data_t *nxt_r
nxt_regex_context_t *ctx);
NXT_EXPORT void nxt_regex_match_data_free(nxt_regex_match_data_t *match_data,
nxt_regex_context_t *ctx);
-NXT_EXPORT nxt_int_t nxt_regex_match(nxt_regex_t *regex, u_char *subject,
+NXT_EXPORT nxt_int_t nxt_regex_match(nxt_regex_t *regex, const u_char *subject,
size_t len, nxt_regex_match_data_t *match_data, nxt_regex_context_t *ctx);
NXT_EXPORT int *nxt_regex_captures(nxt_regex_match_data_t *match_data);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment