Last active
February 4, 2023 11:52
-
-
Save vmarkovtsev/9b3e803efa8950c909b0b14d63532e0a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@cython.cdivision(True) | |
cdef PyObject *_write_json(PyObject *obj, SpecNode &spec, chunked_stream &stream) nogil: | |
cdef: | |
PyObject *key = NULL | |
PyObject *value = NULL | |
PyObject *r | |
Py_ssize_t pos = 0, size, i, j, item_len, char_len | |
unsigned int kind | |
char sym | |
char *data | |
int aux, auxdiv, rem, year, month, day | |
long val_long, div | |
npy_int64 obval | |
double val_double | |
float val_float | |
char buffer[24] | |
SpecNode *nested | |
if obj == Py_None: | |
stream.write(b"null", 4) | |
return NULL | |
if spec.type == DT_MODEL: | |
if Py_TYPE(obj).tp_members == NULL: | |
# this is just a check for __slots__, it's hard to validate better without GIL | |
return obj | |
stream.write(b"{", 1) | |
kind = 0 | |
for i in range(<Py_ssize_t> dereference(spec.nested).size()): | |
nested = &dereference(spec.nested)[i] | |
value = dereference(<PyObject **>((<char *> obj) + nested.offset)) | |
if (nested.flags & DF_OPTIONAL) and not (nested.flags & DF_VERBATIM): | |
if value == NULL or value == Py_None: | |
continue | |
if PyList_CheckExact(value) and PyList_GET_SIZE(value) == 0: | |
continue | |
if PyDict_CheckExact(value) and PyDict_Size(value) == 0: | |
continue | |
if PyArray_CheckExact(value) and PyArray_NDIM(value) == 1 and PyArray_DIM(value, 0) == 0: | |
continue | |
if nested.type == DT_FLOAT: | |
if PyFloat_CheckExact(value): | |
val_double = PyFloat_AS_DOUBLE(value) | |
elif PyObject_TypeCheck(value, &PyDoubleArrType_Type): | |
PyArray_ScalarAsCtype(value, &val_double) | |
elif PyObject_TypeCheck(value, &PyFloatArrType_Type): | |
PyArray_ScalarAsCtype(value, &val_float) | |
val_double = val_float | |
if val_double != val_double: | |
continue | |
if kind: | |
stream.write(b",", 1) | |
else: | |
kind = 1 | |
if nested.flags & DF_KEY_UNMAPPED: | |
stream.write(b'"', 1) | |
stream.write(<const char *> nested.key, strlen(<const char *> nested.key)) | |
stream.write(b'"', 1) | |
else: | |
r = _write_json(<PyObject *> nested.key, fake_str_model, stream) | |
if r != NULL: | |
return r | |
stream.write(b":", 1) | |
r = _write_json(value, dereference(nested), stream) | |
if r != NULL: | |
return r | |
stream.write(b"}", 1) | |
elif spec.type == DT_DICT: | |
stream.write(b"{", 1) | |
if not PyDict_CheckExact(obj): | |
return obj | |
while PyDict_Next(obj, &pos, &key, &value): | |
if pos != 1: | |
stream.write(b",", 1) | |
r = _write_json(key, dereference(spec.nested)[0], stream) | |
if r != NULL: | |
return r | |
stream.write(b":", 1) | |
r = _write_json(value, dereference(spec.nested)[1], stream) | |
if r != NULL: | |
return r | |
stream.write(b"}", 1) | |
elif spec.type == DT_LIST: | |
stream.write(b"[", 1) | |
nested = &dereference(spec.nested)[0] | |
if not PyList_CheckExact(obj): | |
if not PyArray_CheckExact(obj) or not PyArray_IS_C_CONTIGUOUS(obj) or PyArray_NDIM(obj) != 1: | |
return obj | |
npdata = <PyObject **> PyArray_DATA(obj) | |
for i in range(PyArray_DIM(obj, 0)): | |
if i != 0: | |
stream.write(b",", 1) | |
r = _write_json(npdata[i], dereference(nested), stream) | |
if r != NULL: | |
return r | |
else: | |
for i in range(PyList_GET_SIZE(obj)): | |
if i != 0: | |
stream.write(b",", 1) | |
r = _write_json(PyList_GET_ITEM(obj, i), dereference(nested), stream) | |
if r != NULL: | |
return r | |
stream.write(b"]", 1) | |
elif spec.type == DT_STRING: | |
stream.write(b'"', 1) | |
if PyUnicode_Check(obj): | |
data = <char *> PyUnicode_DATA(obj) | |
kind = PyUnicode_KIND(obj) | |
item_len = PyUnicode_GET_LENGTH(obj) | |
if kind == PyUnicode_1BYTE_KIND: | |
for i in range(item_len): | |
stream.write(buffer, ucs4_to_utf8_json((<uint8_t *> data)[i], buffer)) | |
elif kind == PyUnicode_2BYTE_KIND: | |
for i in range(item_len): | |
stream.write(buffer, ucs4_to_utf8_json((<uint16_t *> data)[i], buffer)) | |
elif kind == PyUnicode_4BYTE_KIND: | |
for i in range(item_len): | |
stream.write(buffer, ucs4_to_utf8_json((<uint32_t *> data)[i], buffer)) | |
elif PyBytes_Check(obj): | |
data = PyBytes_AS_STRING(obj) | |
item_len = PyBytes_GET_SIZE(obj) | |
for i in range(item_len): | |
stream.write(buffer, ucs4_to_utf8_json((<uint8_t *> data)[i], buffer)) | |
else: | |
return obj | |
stream.write(b'"', 1) | |
elif spec.type == DT_DT: | |
buffer[0] = buffer[21] = b'"' | |
if not PyDateTime_Check(obj): | |
if PyObject_TypeCheck(obj, &PyDatetimeArrType_Type): | |
npy_unit = (<PyDatetimeScalarObject *> obj).obmeta.base | |
obval = (<PyDatetimeScalarObject *> obj).obval | |
if npy_unit == NPY_FR_ns: | |
obval //= 1000000000 | |
elif npy_unit == NPY_FR_us: | |
obval //= 1000000 | |
elif npy_unit != NPY_FR_s: | |
return obj | |
val_long = obval // (60 * 60 * 24) | |
obval = obval - val_long * 60 * 60 * 24 | |
year = month = day = 0 | |
set_datetimestruct_days(val_long, &year, &month, &day) | |
aux = year | |
pos = 4 | |
while pos > 0: | |
auxdiv = aux | |
aux = aux // 10 | |
buffer[pos] = auxdiv - aux * 10 + ord(b"0") | |
pos -= 1 | |
buffer[5] = b"-" | |
aux = month | |
if aux < 10: | |
buffer[6] = b"0" | |
buffer[7] = ord(b"0") + aux | |
else: | |
buffer[6] = b"1" | |
buffer[7] = ord(b"0") + aux - 10 | |
buffer[8] = b"-" | |
aux = day | |
auxdiv = aux // 10 | |
buffer[9] = ord(b"0") + auxdiv | |
buffer[10] = ord(b"0") + aux - auxdiv * 10 | |
buffer[11] = b"T" | |
auxdiv = obval // 60 | |
aux = obval - auxdiv * 60 | |
rem = auxdiv | |
auxdiv = aux // 10 | |
buffer[18] = ord(b"0") + auxdiv | |
buffer[19] = ord(b"0") + aux - auxdiv * 10 | |
buffer[20] = b"Z" | |
auxdiv = rem // 60 | |
aux = rem - auxdiv * 60 | |
rem = auxdiv | |
auxdiv = aux // 10 | |
buffer[15] = ord(b"0") + auxdiv | |
buffer[16] = ord(b"0") + aux - auxdiv * 10 | |
buffer[17] = b":" | |
aux = rem | |
auxdiv = aux // 10 | |
buffer[12] = ord(b"0") + auxdiv | |
buffer[13] = ord(b"0") + aux - auxdiv * 10 | |
buffer[14] = b":" | |
else: | |
return obj | |
else: | |
if (<PyDateTime_CAPI *> PyDateTimeAPI).TimeZone_UTC != PyDateTime_DATE_GET_TZINFO(obj): | |
return obj | |
aux = PyDateTime_GET_YEAR(obj) | |
pos = 4 | |
while pos > 0: | |
auxdiv = aux | |
aux = aux // 10 | |
buffer[pos] = auxdiv - aux * 10 + ord(b"0") | |
pos -= 1 | |
buffer[5] = b"-" | |
aux = PyDateTime_GET_MONTH(obj) | |
if aux < 10: | |
buffer[6] = b"0" | |
buffer[7] = ord(b"0") + aux | |
else: | |
buffer[6] = b"1" | |
buffer[7] = ord(b"0") + aux - 10 | |
buffer[8] = b"-" | |
aux = PyDateTime_GET_DAY(obj) | |
auxdiv = aux // 10 | |
buffer[9] = ord(b"0") + auxdiv | |
buffer[10] = ord(b"0") + aux - auxdiv * 10 | |
buffer[11] = b"T" | |
aux = PyDateTime_DATE_GET_HOUR(obj) | |
auxdiv = aux // 10 | |
buffer[12] = ord(b"0") + auxdiv | |
buffer[13] = ord(b"0") + aux - auxdiv * 10 | |
buffer[14] = b":" | |
aux = PyDateTime_DATE_GET_MINUTE(obj) | |
auxdiv = aux // 10 | |
buffer[15] = ord(b"0") + auxdiv | |
buffer[16] = ord(b"0") + aux - auxdiv * 10 | |
buffer[17] = b":" | |
aux = PyDateTime_DATE_GET_SECOND(obj) | |
auxdiv = aux // 10 | |
buffer[18] = ord(b"0") + auxdiv | |
buffer[19] = ord(b"0") + aux - auxdiv * 10 | |
buffer[20] = b"Z" | |
stream.write(buffer, 22) | |
elif spec.type == DT_TD: | |
stream.write(b'"', 1) | |
if not PyDelta_Check(obj): | |
if PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type): | |
npy_unit = (<PyDatetimeScalarObject *> obj).obmeta.base | |
val_long = (<PyDatetimeScalarObject *> obj).obval | |
if npy_unit == NPY_FR_ns: | |
val_long //= 1000000000 | |
elif npy_unit == NPY_FR_us: | |
val_long //= 1000000 | |
elif npy_unit != NPY_FR_s: | |
return obj | |
else: | |
return obj | |
else: | |
val_long = PyDateTime_DELTA_GET_DAYS(obj) | |
val_long *= 24 * 3600 | |
val_long += PyDateTime_DELTA_GET_SECONDS(obj) | |
if val_long < 0: | |
stream.write(b"-", 1) | |
val_long = -val_long | |
if val_long == 0: | |
stream.write(b"0", 1) | |
else: | |
pos = 0 | |
while val_long: | |
div = val_long | |
val_long = val_long // 10 | |
buffer[pos] = div - val_long * 10 + ord(b"0") | |
pos += 1 | |
for i in range(pos // 2): | |
sym = buffer[i] | |
div = pos - i - 1 | |
buffer[i] = buffer[div] | |
buffer[div] = sym | |
stream.write(buffer, pos) | |
stream.write(b's"', 2) | |
elif spec.type == DT_LONG: | |
if PyLong_CheckExact(obj): | |
val_long = PyLong_AsLong(obj) | |
elif PyObject_TypeCheck(obj, &PyIntegerArrType_Type): | |
val_long = 0 | |
PyArray_ScalarAsCtype(obj, &val_long) | |
else: | |
return obj | |
if val_long < 0: | |
stream.write(b"-", 1) | |
val_long = -val_long | |
if val_long == 0: | |
stream.write(b"0", 1) | |
else: | |
pos = 0 | |
while val_long: | |
div = val_long | |
val_long = val_long // 10 | |
buffer[pos] = div - val_long * 10 + ord(b"0") | |
pos += 1 | |
for i in range(pos // 2): | |
sym = buffer[i] | |
div = pos - i - 1 | |
buffer[i] = buffer[div] | |
buffer[div] = sym | |
stream.write(buffer, pos) | |
elif spec.type == DT_BOOL: | |
if obj == Py_True: | |
stream.write(b"true", 4) | |
elif obj == Py_False: | |
stream.write(b"false", 5) | |
else: | |
return obj | |
elif spec.type == DT_FLOAT: | |
if PyFloat_CheckExact(obj): | |
val_double = PyFloat_AS_DOUBLE(obj) | |
elif PyLong_CheckExact(obj): | |
val_double = PyLong_AsLong(obj) | |
elif PyObject_TypeCheck(obj, &PyDoubleArrType_Type): | |
PyArray_ScalarAsCtype(obj, &val_double) | |
elif PyObject_TypeCheck(obj, &PyFloatArrType_Type): | |
PyArray_ScalarAsCtype(obj, &val_float) | |
val_double = val_float | |
elif PyObject_TypeCheck(obj, &PyIntegerArrType_Type): | |
val_long = 0 | |
PyArray_ScalarAsCtype(obj, &val_long) | |
val_double = val_long | |
else: | |
return obj | |
gcvt(val_double, 24, buffer) | |
stream.write(buffer, strlen(buffer)) | |
elif spec.type == DT_FREEFORM: | |
r = _write_freeform_json(obj, stream) | |
if r != NULL: | |
return r | |
else: | |
return obj | |
return NULL | |
# Adapted from CPython, licensed under PSF2 (BSD-like) | |
cdef inline int ucs4_to_utf8_json(uint32_t ucs4, char *utf8) nogil: | |
if ucs4 == 0: | |
return 0 | |
if ucs4 == b"\\" or ucs4 == b'"': | |
utf8[0] = b"\\" | |
utf8[1] = ucs4 | |
return 2 | |
if ucs4 < 0x20: | |
# Escape control chars | |
utf8[0] = b"\\" | |
utf8[1] = b"u" | |
utf8[2] = b"0" | |
utf8[3] = b"0" | |
utf8[4] = b"0" if ucs4 < 0x10 else b"1" | |
ucs4 &= 0x0F | |
if ucs4 > 0x09: | |
utf8[5] = (ucs4 - 0x0A) + ord(b"A") | |
else: | |
utf8[5] = ucs4 + ord(b"0") | |
return 6 | |
if ucs4 < 0x80: | |
# Encode ASCII | |
utf8[0] = ucs4 | |
return 1 | |
if ucs4 < 0x0800: | |
# Encode Latin-1 | |
utf8[0] = 0xc0 | (ucs4 >> 6) | |
utf8[1] = 0x80 | (ucs4 & 0x3f) | |
return 2 | |
if 0xD800 <= ucs4 <= 0xDFFF: | |
return 0 | |
if ucs4 < 0x10000: | |
utf8[0] = 0xe0 | (ucs4 >> 12) | |
utf8[1] = 0x80 | ((ucs4 >> 6) & 0x3f) | |
utf8[2] = 0x80 | (ucs4 & 0x3f) | |
return 3 | |
# Encode UCS4 Unicode ordinals | |
utf8[0] = 0xf0 | (ucs4 >> 18) | |
utf8[1] = 0x80 | ((ucs4 >> 12) & 0x3f) | |
utf8[2] = 0x80 | ((ucs4 >> 6) & 0x3f) | |
utf8[3] = 0x80 | (ucs4 & 0x3f) | |
return 4 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment