Skip to content

Instantly share code, notes, and snippets.

@vmarkovtsev
Last active February 4, 2023 11:52
Show Gist options
  • Save vmarkovtsev/9b3e803efa8950c909b0b14d63532e0a to your computer and use it in GitHub Desktop.
Save vmarkovtsev/9b3e803efa8950c909b0b14d63532e0a to your computer and use it in GitHub Desktop.
@cython.cdivision(True)
cdef PyObject *_write_json(PyObject *obj, SpecNode &spec, chunked_stream &stream) nogil:
cdef:
PyObject *key = NULL
PyObject *value = NULL
PyObject *r
Py_ssize_t pos = 0, size, i, j, item_len, char_len
unsigned int kind
char sym
char *data
int aux, auxdiv, rem, year, month, day
long val_long, div
npy_int64 obval
double val_double
float val_float
char buffer[24]
SpecNode *nested
if obj == Py_None:
stream.write(b"null", 4)
return NULL
if spec.type == DT_MODEL:
if Py_TYPE(obj).tp_members == NULL:
# this is just a check for __slots__, it's hard to validate better without GIL
return obj
stream.write(b"{", 1)
kind = 0
for i in range(<Py_ssize_t> dereference(spec.nested).size()):
nested = &dereference(spec.nested)[i]
value = dereference(<PyObject **>((<char *> obj) + nested.offset))
if (nested.flags & DF_OPTIONAL) and not (nested.flags & DF_VERBATIM):
if value == NULL or value == Py_None:
continue
if PyList_CheckExact(value) and PyList_GET_SIZE(value) == 0:
continue
if PyDict_CheckExact(value) and PyDict_Size(value) == 0:
continue
if PyArray_CheckExact(value) and PyArray_NDIM(value) == 1 and PyArray_DIM(value, 0) == 0:
continue
if nested.type == DT_FLOAT:
if PyFloat_CheckExact(value):
val_double = PyFloat_AS_DOUBLE(value)
elif PyObject_TypeCheck(value, &PyDoubleArrType_Type):
PyArray_ScalarAsCtype(value, &val_double)
elif PyObject_TypeCheck(value, &PyFloatArrType_Type):
PyArray_ScalarAsCtype(value, &val_float)
val_double = val_float
if val_double != val_double:
continue
if kind:
stream.write(b",", 1)
else:
kind = 1
if nested.flags & DF_KEY_UNMAPPED:
stream.write(b'"', 1)
stream.write(<const char *> nested.key, strlen(<const char *> nested.key))
stream.write(b'"', 1)
else:
r = _write_json(<PyObject *> nested.key, fake_str_model, stream)
if r != NULL:
return r
stream.write(b":", 1)
r = _write_json(value, dereference(nested), stream)
if r != NULL:
return r
stream.write(b"}", 1)
elif spec.type == DT_DICT:
stream.write(b"{", 1)
if not PyDict_CheckExact(obj):
return obj
while PyDict_Next(obj, &pos, &key, &value):
if pos != 1:
stream.write(b",", 1)
r = _write_json(key, dereference(spec.nested)[0], stream)
if r != NULL:
return r
stream.write(b":", 1)
r = _write_json(value, dereference(spec.nested)[1], stream)
if r != NULL:
return r
stream.write(b"}", 1)
elif spec.type == DT_LIST:
stream.write(b"[", 1)
nested = &dereference(spec.nested)[0]
if not PyList_CheckExact(obj):
if not PyArray_CheckExact(obj) or not PyArray_IS_C_CONTIGUOUS(obj) or PyArray_NDIM(obj) != 1:
return obj
npdata = <PyObject **> PyArray_DATA(obj)
for i in range(PyArray_DIM(obj, 0)):
if i != 0:
stream.write(b",", 1)
r = _write_json(npdata[i], dereference(nested), stream)
if r != NULL:
return r
else:
for i in range(PyList_GET_SIZE(obj)):
if i != 0:
stream.write(b",", 1)
r = _write_json(PyList_GET_ITEM(obj, i), dereference(nested), stream)
if r != NULL:
return r
stream.write(b"]", 1)
elif spec.type == DT_STRING:
stream.write(b'"', 1)
if PyUnicode_Check(obj):
data = <char *> PyUnicode_DATA(obj)
kind = PyUnicode_KIND(obj)
item_len = PyUnicode_GET_LENGTH(obj)
if kind == PyUnicode_1BYTE_KIND:
for i in range(item_len):
stream.write(buffer, ucs4_to_utf8_json((<uint8_t *> data)[i], buffer))
elif kind == PyUnicode_2BYTE_KIND:
for i in range(item_len):
stream.write(buffer, ucs4_to_utf8_json((<uint16_t *> data)[i], buffer))
elif kind == PyUnicode_4BYTE_KIND:
for i in range(item_len):
stream.write(buffer, ucs4_to_utf8_json((<uint32_t *> data)[i], buffer))
elif PyBytes_Check(obj):
data = PyBytes_AS_STRING(obj)
item_len = PyBytes_GET_SIZE(obj)
for i in range(item_len):
stream.write(buffer, ucs4_to_utf8_json((<uint8_t *> data)[i], buffer))
else:
return obj
stream.write(b'"', 1)
elif spec.type == DT_DT:
buffer[0] = buffer[21] = b'"'
if not PyDateTime_Check(obj):
if PyObject_TypeCheck(obj, &PyDatetimeArrType_Type):
npy_unit = (<PyDatetimeScalarObject *> obj).obmeta.base
obval = (<PyDatetimeScalarObject *> obj).obval
if npy_unit == NPY_FR_ns:
obval //= 1000000000
elif npy_unit == NPY_FR_us:
obval //= 1000000
elif npy_unit != NPY_FR_s:
return obj
val_long = obval // (60 * 60 * 24)
obval = obval - val_long * 60 * 60 * 24
year = month = day = 0
set_datetimestruct_days(val_long, &year, &month, &day)
aux = year
pos = 4
while pos > 0:
auxdiv = aux
aux = aux // 10
buffer[pos] = auxdiv - aux * 10 + ord(b"0")
pos -= 1
buffer[5] = b"-"
aux = month
if aux < 10:
buffer[6] = b"0"
buffer[7] = ord(b"0") + aux
else:
buffer[6] = b"1"
buffer[7] = ord(b"0") + aux - 10
buffer[8] = b"-"
aux = day
auxdiv = aux // 10
buffer[9] = ord(b"0") + auxdiv
buffer[10] = ord(b"0") + aux - auxdiv * 10
buffer[11] = b"T"
auxdiv = obval // 60
aux = obval - auxdiv * 60
rem = auxdiv
auxdiv = aux // 10
buffer[18] = ord(b"0") + auxdiv
buffer[19] = ord(b"0") + aux - auxdiv * 10
buffer[20] = b"Z"
auxdiv = rem // 60
aux = rem - auxdiv * 60
rem = auxdiv
auxdiv = aux // 10
buffer[15] = ord(b"0") + auxdiv
buffer[16] = ord(b"0") + aux - auxdiv * 10
buffer[17] = b":"
aux = rem
auxdiv = aux // 10
buffer[12] = ord(b"0") + auxdiv
buffer[13] = ord(b"0") + aux - auxdiv * 10
buffer[14] = b":"
else:
return obj
else:
if (<PyDateTime_CAPI *> PyDateTimeAPI).TimeZone_UTC != PyDateTime_DATE_GET_TZINFO(obj):
return obj
aux = PyDateTime_GET_YEAR(obj)
pos = 4
while pos > 0:
auxdiv = aux
aux = aux // 10
buffer[pos] = auxdiv - aux * 10 + ord(b"0")
pos -= 1
buffer[5] = b"-"
aux = PyDateTime_GET_MONTH(obj)
if aux < 10:
buffer[6] = b"0"
buffer[7] = ord(b"0") + aux
else:
buffer[6] = b"1"
buffer[7] = ord(b"0") + aux - 10
buffer[8] = b"-"
aux = PyDateTime_GET_DAY(obj)
auxdiv = aux // 10
buffer[9] = ord(b"0") + auxdiv
buffer[10] = ord(b"0") + aux - auxdiv * 10
buffer[11] = b"T"
aux = PyDateTime_DATE_GET_HOUR(obj)
auxdiv = aux // 10
buffer[12] = ord(b"0") + auxdiv
buffer[13] = ord(b"0") + aux - auxdiv * 10
buffer[14] = b":"
aux = PyDateTime_DATE_GET_MINUTE(obj)
auxdiv = aux // 10
buffer[15] = ord(b"0") + auxdiv
buffer[16] = ord(b"0") + aux - auxdiv * 10
buffer[17] = b":"
aux = PyDateTime_DATE_GET_SECOND(obj)
auxdiv = aux // 10
buffer[18] = ord(b"0") + auxdiv
buffer[19] = ord(b"0") + aux - auxdiv * 10
buffer[20] = b"Z"
stream.write(buffer, 22)
elif spec.type == DT_TD:
stream.write(b'"', 1)
if not PyDelta_Check(obj):
if PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type):
npy_unit = (<PyDatetimeScalarObject *> obj).obmeta.base
val_long = (<PyDatetimeScalarObject *> obj).obval
if npy_unit == NPY_FR_ns:
val_long //= 1000000000
elif npy_unit == NPY_FR_us:
val_long //= 1000000
elif npy_unit != NPY_FR_s:
return obj
else:
return obj
else:
val_long = PyDateTime_DELTA_GET_DAYS(obj)
val_long *= 24 * 3600
val_long += PyDateTime_DELTA_GET_SECONDS(obj)
if val_long < 0:
stream.write(b"-", 1)
val_long = -val_long
if val_long == 0:
stream.write(b"0", 1)
else:
pos = 0
while val_long:
div = val_long
val_long = val_long // 10
buffer[pos] = div - val_long * 10 + ord(b"0")
pos += 1
for i in range(pos // 2):
sym = buffer[i]
div = pos - i - 1
buffer[i] = buffer[div]
buffer[div] = sym
stream.write(buffer, pos)
stream.write(b's"', 2)
elif spec.type == DT_LONG:
if PyLong_CheckExact(obj):
val_long = PyLong_AsLong(obj)
elif PyObject_TypeCheck(obj, &PyIntegerArrType_Type):
val_long = 0
PyArray_ScalarAsCtype(obj, &val_long)
else:
return obj
if val_long < 0:
stream.write(b"-", 1)
val_long = -val_long
if val_long == 0:
stream.write(b"0", 1)
else:
pos = 0
while val_long:
div = val_long
val_long = val_long // 10
buffer[pos] = div - val_long * 10 + ord(b"0")
pos += 1
for i in range(pos // 2):
sym = buffer[i]
div = pos - i - 1
buffer[i] = buffer[div]
buffer[div] = sym
stream.write(buffer, pos)
elif spec.type == DT_BOOL:
if obj == Py_True:
stream.write(b"true", 4)
elif obj == Py_False:
stream.write(b"false", 5)
else:
return obj
elif spec.type == DT_FLOAT:
if PyFloat_CheckExact(obj):
val_double = PyFloat_AS_DOUBLE(obj)
elif PyLong_CheckExact(obj):
val_double = PyLong_AsLong(obj)
elif PyObject_TypeCheck(obj, &PyDoubleArrType_Type):
PyArray_ScalarAsCtype(obj, &val_double)
elif PyObject_TypeCheck(obj, &PyFloatArrType_Type):
PyArray_ScalarAsCtype(obj, &val_float)
val_double = val_float
elif PyObject_TypeCheck(obj, &PyIntegerArrType_Type):
val_long = 0
PyArray_ScalarAsCtype(obj, &val_long)
val_double = val_long
else:
return obj
gcvt(val_double, 24, buffer)
stream.write(buffer, strlen(buffer))
elif spec.type == DT_FREEFORM:
r = _write_freeform_json(obj, stream)
if r != NULL:
return r
else:
return obj
return NULL
# Adapted from CPython, licensed under PSF2 (BSD-like)
cdef inline int ucs4_to_utf8_json(uint32_t ucs4, char *utf8) nogil:
if ucs4 == 0:
return 0
if ucs4 == b"\\" or ucs4 == b'"':
utf8[0] = b"\\"
utf8[1] = ucs4
return 2
if ucs4 < 0x20:
# Escape control chars
utf8[0] = b"\\"
utf8[1] = b"u"
utf8[2] = b"0"
utf8[3] = b"0"
utf8[4] = b"0" if ucs4 < 0x10 else b"1"
ucs4 &= 0x0F
if ucs4 > 0x09:
utf8[5] = (ucs4 - 0x0A) + ord(b"A")
else:
utf8[5] = ucs4 + ord(b"0")
return 6
if ucs4 < 0x80:
# Encode ASCII
utf8[0] = ucs4
return 1
if ucs4 < 0x0800:
# Encode Latin-1
utf8[0] = 0xc0 | (ucs4 >> 6)
utf8[1] = 0x80 | (ucs4 & 0x3f)
return 2
if 0xD800 <= ucs4 <= 0xDFFF:
return 0
if ucs4 < 0x10000:
utf8[0] = 0xe0 | (ucs4 >> 12)
utf8[1] = 0x80 | ((ucs4 >> 6) & 0x3f)
utf8[2] = 0x80 | (ucs4 & 0x3f)
return 3
# Encode UCS4 Unicode ordinals
utf8[0] = 0xf0 | (ucs4 >> 18)
utf8[1] = 0x80 | ((ucs4 >> 12) & 0x3f)
utf8[2] = 0x80 | ((ucs4 >> 6) & 0x3f)
utf8[3] = 0x80 | (ucs4 & 0x3f)
return 4
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment