diff options
Diffstat (limited to 'third_party/simplejson/_speedups.c')
-rw-r--r-- | third_party/simplejson/_speedups.c | 215 |
1 files changed, 215 insertions, 0 deletions
diff --git a/third_party/simplejson/_speedups.c b/third_party/simplejson/_speedups.c new file mode 100644 index 0000000..8f290bb4 --- /dev/null +++ b/third_party/simplejson/_speedups.c @@ -0,0 +1,215 @@ +#include "Python.h" +#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) +typedef int Py_ssize_t; +#define PY_SSIZE_T_MAX INT_MAX +#define PY_SSIZE_T_MIN INT_MIN +#endif + +static Py_ssize_t +ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars); +static PyObject * +ascii_escape_unicode(PyObject *pystr); +static PyObject * +ascii_escape_str(PyObject *pystr); +static PyObject * +py_encode_basestring_ascii(PyObject* self __attribute__((__unused__)), PyObject *pystr); +void init_speedups(void); + +#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '/' && c != '"') + +#define MIN_EXPANSION 6 +#ifdef Py_UNICODE_WIDE +#define MAX_EXPANSION (2 * MIN_EXPANSION) +#else +#define MAX_EXPANSION MIN_EXPANSION +#endif + +static Py_ssize_t +ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) { + Py_UNICODE x; + output[chars++] = '\\'; + switch (c) { + case '/': output[chars++] = (char)c; break; + case '\\': output[chars++] = (char)c; break; + case '"': output[chars++] = (char)c; break; + case '\b': output[chars++] = 'b'; break; + case '\f': output[chars++] = 'f'; break; + case '\n': output[chars++] = 'n'; break; + case '\r': output[chars++] = 'r'; break; + case '\t': output[chars++] = 't'; break; + default: +#ifdef Py_UNICODE_WIDE + if (c >= 0x10000) { + /* UTF-16 surrogate pair */ + Py_UNICODE v = c - 0x10000; + c = 0xd800 | ((v >> 10) & 0x3ff); + output[chars++] = 'u'; + x = (c & 0xf000) >> 12; + output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); + x = (c & 0x0f00) >> 8; + output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); + x = (c & 0x00f0) >> 4; + output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); + x = (c & 0x000f); + output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); + c = 0xdc00 | (v & 0x3ff); + output[chars++] = '\\'; + } +#endif + output[chars++] = 'u'; + x = (c & 0xf000) >> 12; + output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); + x = (c & 0x0f00) >> 8; + output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); + x = (c & 0x00f0) >> 4; + output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); + x = (c & 0x000f); + output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); + } + return chars; +} + +static PyObject * +ascii_escape_unicode(PyObject *pystr) { + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t output_size; + Py_ssize_t chars; + PyObject *rval; + char *output; + Py_UNICODE *input_unicode; + + input_chars = PyUnicode_GET_SIZE(pystr); + input_unicode = PyUnicode_AS_UNICODE(pystr); + /* One char input can be up to 6 chars output, estimate 4 of these */ + output_size = 2 + (MIN_EXPANSION * 4) + input_chars; + rval = PyString_FromStringAndSize(NULL, output_size); + if (rval == NULL) { + return NULL; + } + output = PyString_AS_STRING(rval); + chars = 0; + output[chars++] = '"'; + for (i = 0; i < input_chars; i++) { + Py_UNICODE c = input_unicode[i]; + if (S_CHAR(c)) { + output[chars++] = (char)c; + } else { + chars = ascii_escape_char(c, output, chars); + } + if (output_size - chars < (1 + MAX_EXPANSION)) { + /* There's more than four, so let's resize by a lot */ + output_size *= 2; + /* This is an upper bound */ + if (output_size > 2 + (input_chars * MAX_EXPANSION)) { + output_size = 2 + (input_chars * MAX_EXPANSION); + } + if (_PyString_Resize(&rval, output_size) == -1) { + return NULL; + } + output = PyString_AS_STRING(rval); + } + } + output[chars++] = '"'; + if (_PyString_Resize(&rval, chars) == -1) { + return NULL; + } + return rval; +} + +static PyObject * +ascii_escape_str(PyObject *pystr) { + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t output_size; + Py_ssize_t chars; + PyObject *rval; + char *output; + char *input_str; + + input_chars = PyString_GET_SIZE(pystr); + input_str = PyString_AS_STRING(pystr); + /* One char input can be up to 6 chars output, estimate 4 of these */ + output_size = 2 + (MIN_EXPANSION * 4) + input_chars; + rval = PyString_FromStringAndSize(NULL, output_size); + if (rval == NULL) { + return NULL; + } + output = PyString_AS_STRING(rval); + chars = 0; + output[chars++] = '"'; + for (i = 0; i < input_chars; i++) { + Py_UNICODE c = (Py_UNICODE)input_str[i]; + if (S_CHAR(c)) { + output[chars++] = (char)c; + } else if (c > 0x7F) { + /* We hit a non-ASCII character, bail to unicode mode */ + PyObject *uni; + Py_DECREF(rval); + uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); + if (uni == NULL) { + return NULL; + } + rval = ascii_escape_unicode(uni); + Py_DECREF(uni); + return rval; + } else { + chars = ascii_escape_char(c, output, chars); + } + /* An ASCII char can't possibly expand to a surrogate! */ + if (output_size - chars < (1 + MIN_EXPANSION)) { + /* There's more than four, so let's resize by a lot */ + output_size *= 2; + if (output_size > 2 + (input_chars * MIN_EXPANSION)) { + output_size = 2 + (input_chars * MIN_EXPANSION); + } + if (_PyString_Resize(&rval, output_size) == -1) { + return NULL; + } + output = PyString_AS_STRING(rval); + } + } + output[chars++] = '"'; + if (_PyString_Resize(&rval, chars) == -1) { + return NULL; + } + return rval; +} + +PyDoc_STRVAR(pydoc_encode_basestring_ascii, + "encode_basestring_ascii(basestring) -> str\n" + "\n" + "..." +); + +static PyObject * +py_encode_basestring_ascii(PyObject* self __attribute__((__unused__)), PyObject *pystr) { + /* METH_O */ + if (PyString_Check(pystr)) { + return ascii_escape_str(pystr); + } else if (PyUnicode_Check(pystr)) { + return ascii_escape_unicode(pystr); + } + PyErr_SetString(PyExc_TypeError, "first argument must be a string"); + return NULL; +} + +#define DEFN(n, k) \ + { \ + #n, \ + (PyCFunction)py_ ##n, \ + k, \ + pydoc_ ##n \ + } +static PyMethodDef speedups_methods[] = { + DEFN(encode_basestring_ascii, METH_O), + {} +}; +#undef DEFN + +void +init_speedups(void) +{ + PyObject *m; + m = Py_InitModule4("_speedups", speedups_methods, NULL, NULL, PYTHON_API_VERSION); +} |