Conversation
- Add Py_mod_multiple_interpreters slot to explicitly declare support - Fix resource leaks in _new/_copy when XXH*_createState() fails - Use tp_free instead of PyObject_Del for proper heap type dealloc - Add tests for sub-interpreter isolation and per-module state
There was a problem hiding this comment.
Code Review
This pull request transitions the _xxhash extension to multi-phase initialization and per-module state to support sub-interpreters. It replaces static types with heap types and implements the necessary module lifecycle functions (_exec, _traverse, and _clear). A critical bug was identified in the _exec function: PyModule_AddType steals a reference to the type objects, but since these are also stored in the module_state and explicitly cleared during module destruction, it would lead to a double-free or use-after-free error. Adding Py_INCREF after successful type addition is necessary to ensure the module state maintains its own valid reference.
| state->xxh32_type = (PyTypeObject *)PyType_FromSpec(&xxh32_spec); | ||
| if (state->xxh32_type == NULL) { | ||
| return -1; | ||
| } | ||
| if (PyModule_AddType(module, state->xxh32_type) < 0) { | ||
| return -1; | ||
| } | ||
|
|
||
| state->xxh64_type = (PyTypeObject *)PyType_FromSpec(&xxh64_spec); | ||
| if (state->xxh64_type == NULL) { | ||
| return -1; | ||
| } | ||
| if (PyModule_AddType(module, state->xxh64_type) < 0) { | ||
| return -1; | ||
| } | ||
|
|
||
| state->xxh3_64_type = (PyTypeObject *)PyType_FromSpec(&xxh3_64_spec); | ||
| if (state->xxh3_64_type == NULL) { | ||
| return -1; | ||
| } | ||
| if (PyModule_AddType(module, state->xxh3_64_type) < 0) { | ||
| return -1; | ||
| } | ||
|
|
||
| state->xxh3_128_type = (PyTypeObject *)PyType_FromSpec(&xxh3_128_spec); | ||
| if (state->xxh3_128_type == NULL) { | ||
| return -1; | ||
| } | ||
| if (PyModule_AddType(module, state->xxh3_128_type) < 0) { | ||
| return -1; | ||
| } |
There was a problem hiding this comment.
The PyModule_AddType function steals a reference to the type object on success. Since these type objects are also stored in the module_state and are explicitly decremented in the _clear function, this will lead to a double-free (or use-after-free) when the module is destroyed.
To fix this, you should increment the reference count of each type object after it has been successfully added to the module. This ensures that the module_state owns its own reference, which is then correctly managed by _traverse and _clear.
state->xxh32_type = (PyTypeObject *)PyType_FromSpec(&xxh32_spec);
if (state->xxh32_type == NULL) {
return -1;
}
if (PyModule_AddType(module, (PyObject *)state->xxh32_type) < 0) {
return -1;
}
Py_INCREF(state->xxh32_type);
state->xxh64_type = (PyTypeObject *)PyType_FromSpec(&xxh64_spec);
if (state->xxh64_type == NULL) {
return -1;
}
if (PyModule_AddType(module, (PyObject *)state->xxh64_type) < 0) {
return -1;
}
Py_INCREF(state->xxh64_type);
state->xxh3_64_type = (PyTypeObject *)PyType_FromSpec(&xxh3_64_spec);
if (state->xxh3_64_type == NULL) {
return -1;
}
if (PyModule_AddType(module, (PyObject *)state->xxh3_64_type) < 0) {
return -1;
}
Py_INCREF(state->xxh3_64_type);
state->xxh3_128_type = (PyTypeObject *)PyType_FromSpec(&xxh3_128_spec);
if (state->xxh3_128_type == NULL) {
return -1;
}
if (PyModule_AddType(module, (PyObject *)state->xxh3_128_type) < 0) {
return -1;
}
Py_INCREF(state->xxh3_128_type);…tests Upgrade the Py_mod_multiple_interpreters slot from Py_MOD_MULTIPLE_INTERPRETERS_SUPPORTED to Py_MOD_PER_INTERPRETER_GIL_SUPPORTED, declaring the module safe for sub-interpreters with their own GIL. Add TestPerInterpreterGIL test class that creates sub-interpreters with gil='own' and verifies all hash operations work correctly.
|
/gemini review |
There was a problem hiding this comment.
Code Review
This pull request refactors the _xxhash C extension to support sub-interpreters and per-interpreter GILs by implementing heap types and multi-phase module initialization, supported by a new test suite. Feedback indicates that the implementation currently suffers from memory leaks across all hash types because the tp_dealloc functions and error paths in the new and copy methods do not correctly decrement the reference counts of the heap type objects.
| static void PYXXH32_dealloc(PYXXH32Object *self) | ||
| { | ||
| XXH32_freeState(self->xxhash_state); | ||
| PyObject_Del(self); | ||
| Py_TYPE(self)->tp_free((PyObject *)self); | ||
| } |
There was a problem hiding this comment.
For heap types (created via PyType_FromSpec), the tp_dealloc function must decrement the reference count of the type object itself, as tp_alloc (called by PyObject_New) increments it. Failing to do so results in a memory leak of the type object every time a hash object is destroyed.
static void PYXXH32_dealloc(PYXXH32Object *self)
{
PyTypeObject *tp = Py_TYPE(self);
XXH32_freeState(self->xxhash_state);
tp->tp_free((PyObject *)self);
Py_DECREF(tp);
}| if ((self->xxhash_state = XXH32_createState()) == NULL) { | ||
| return NULL; | ||
| Py_TYPE(self)->tp_free((PyObject *)self); | ||
| return PyErr_NoMemory(); | ||
| } |
There was a problem hiding this comment.
In the error path of tp_new, if the object has already been allocated via PyObject_New, you must ensure the type's reference count is correctly decremented. Simply calling tp_free is insufficient for heap types; you must also Py_DECREF the type object.
if ((self->xxhash_state = XXH32_createState()) == NULL) {
PyTypeObject *tp = Py_TYPE(self);
tp->tp_free((PyObject *)self);
Py_DECREF(tp);
return PyErr_NoMemory();
}| if ((p->xxhash_state = XXH32_createState()) == NULL) { | ||
| return NULL; | ||
| Py_TYPE(p)->tp_free((PyObject *)p); | ||
| return PyErr_NoMemory(); | ||
| } |
There was a problem hiding this comment.
Similar to the tp_new error path, the copy method must also correctly handle the type's reference count if allocation of the internal state fails after the object itself has been allocated.
if ((p->xxhash_state = XXH32_createState()) == NULL) {
PyTypeObject *tp = Py_TYPE(p);
tp->tp_free((PyObject *)p);
Py_DECREF(tp);
return PyErr_NoMemory();
}| static void PYXXH64_dealloc(PYXXH64Object *self) | ||
| { | ||
| XXH64_freeState(self->xxhash_state); | ||
| PyObject_Del(self); | ||
| Py_TYPE(self)->tp_free((PyObject *)self); | ||
| } |
There was a problem hiding this comment.
The tp_dealloc for xxh64 is missing the required Py_DECREF on the type object, which is necessary for heap types to avoid memory leaks.
static void PYXXH64_dealloc(PYXXH64Object *self)
{
PyTypeObject *tp = Py_TYPE(self);
XXH64_freeState(self->xxhash_state);
tp->tp_free((PyObject *)self);
Py_DECREF(tp);
}| if ((self->xxhash_state = XXH64_createState()) == NULL) { | ||
| return NULL; | ||
| Py_TYPE(self)->tp_free((PyObject *)self); | ||
| return PyErr_NoMemory(); | ||
| } |
| if ((self->xxhash_state = XXH3_createState()) == NULL) { | ||
| return NULL; | ||
| Py_TYPE(self)->tp_free((PyObject *)self); | ||
| return PyErr_NoMemory(); | ||
| } |
| if ((p->xxhash_state = XXH3_createState()) == NULL) { | ||
| return NULL; | ||
| Py_TYPE(p)->tp_free((PyObject *)p); | ||
| return PyErr_NoMemory(); | ||
| } |
| static void PYXXH3_128_dealloc(PYXXH3_128Object *self) | ||
| { | ||
| XXH3_freeState(self->xxhash_state); | ||
| PyObject_Del(self); | ||
| Py_TYPE(self)->tp_free((PyObject *)self); | ||
| } |
| if ((self->xxhash_state = XXH3_createState()) == NULL) { | ||
| return NULL; | ||
| Py_TYPE(self)->tp_free((PyObject *)self); | ||
| return PyErr_NoMemory(); | ||
| } |
| if ((p->xxhash_state = XXH3_createState()) == NULL) { | ||
| return NULL; | ||
| Py_TYPE(p)->tp_free((PyObject *)p); | ||
| return PyErr_NoMemory(); | ||
| } |
No description provided.