diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 18eb97a..371432e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -9,6 +9,8 @@ jobs: include: - python-version: '3.14' toxenv: 'py314' + - python-version: '3.14t' + toxenv: 'py314t' steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} @@ -18,7 +20,13 @@ jobs: - name: Install build dependencies run: | brew update -q - brew install -q autoconf automake gettext gnu-sed libtool pkgconfig python@${{ matrix.python-version }} tox + brew install -q autoconf automake gettext gnu-sed libtool pkgconfig tox + # Keep the GIL python@3.14 brew install only on the GIL matrix + # entry; setup-python already supplies the interpreter that the + # build will use, but the brew python preserves prior behavior. + if [[ "${{ matrix.python-version }}" != *t ]]; then + brew install -q python@${{ matrix.python-version }} + fi brew link --force gettext ln -s /usr/local/bin/glibtoolize /usr/local/bin/libtoolize python3 -m pip install setuptools @@ -43,14 +51,32 @@ jobs: toxenv: 'py313' - python-version: '3.14' toxenv: 'py314' + - python-version: '3.14t' + apt-python-pkg: '3.14-nogil' + toxenv: 'py314t' steps: - uses: actions/checkout@v4 - name: Install build dependencies run: | - sudo add-apt-repository universe && - sudo add-apt-repository -y ppa:deadsnakes/ppa && - sudo apt-get update && - sudo apt-get install -y autoconf automake autopoint autotools-dev build-essential git libtool pkg-config python${{ matrix.python-version }} python${{ matrix.python-version }}-dev python${{ matrix.python-version }}-venv python3-pip python3-setuptools + sudo add-apt-repository universe + sudo add-apt-repository -y ppa:deadsnakes/ppa + sudo apt-get update + + # The free-threaded interpreter is shipped as `python3.14-nogil` + # (binary `python3.14t`); other matrix entries use the standard + # `python3.X` package name. + APT_PY='${{ matrix.apt-python-pkg || matrix.python-version }}' + + sudo apt-get install -y \ + autoconf automake autopoint autotools-dev build-essential git \ + libtool pkg-config python3-pip python3-setuptools \ + python${APT_PY} + + # Deadsnakes packages experimental free-threaded binaries as monolithic packages, + # so there are no separate -dev or -venv packages to install for nogil. + if [[ "${APT_PY}" != *-nogil ]]; then + sudo apt-get install -y python${APT_PY}-dev python${APT_PY}-venv + fi - name: Install tox run: | python3 -m pip install tox @@ -63,7 +89,7 @@ jobs: runs-on: windows-latest strategy: matrix: - python-version: ['3.14'] + python-version: ['3.14', '3.14t'] architecture: ['x86', 'x64'] steps: - uses: actions/checkout@v4 diff --git a/README b/README index 8c0e444..b597d47 100644 --- a/README +++ b/README @@ -4,9 +4,9 @@ This is a Python binding against the libtsk (SleuthKit library). The aim is to make the binding reflect the TSK API as much as possible in capabilities, while at the same time having a nice Pythonic OO interface: -4.12.1: https://www.sleuthkit.org/sleuthkit/docs/api-docs/4.12.1/ +4.13.0: https://www.sleuthkit.org/sleuthkit/docs/api-docs/4.13.0/ -NOTE: Currently the 4.14.0 API docs are not available, 4.12.1 is the closest. +NOTE: Currently the 4.14.0 API docs are not available, 4.13.0 is the closest. WARNING: use pytsk at your own risk. libtsk is known to have many defects. For processing data from untrusted sources it is highly recommended to add diff --git a/aff4_errors.h b/aff4_errors.h index f096ce2..a7c426e 100644 --- a/aff4_errors.h +++ b/aff4_errors.h @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef AFF4_ERRORS_H_ #define AFF4_ERRORS_H_ diff --git a/class_parser.py b/class_parser.py index 7948c99..71bd44b 100644 --- a/class_parser.py +++ b/class_parser.py @@ -371,43 +371,12 @@ def private_functions(self): */ static int TOTAL_CCLASSES=0; -/* This is a global reference to this module so classes can call each - * other. - */ -static PyObject *g_module = NULL; - #define CONSTRUCT_INITIALIZE(cclass, virt_cclass, constructor, object, ...) \\ (cclass)(((virt_cclass) (&__ ## cclass))->constructor(object, ## __VA_ARGS__)) #undef BUFF_SIZE #define BUFF_SIZE 10240 -/* Python compatibility macros - */ -#if !defined( PyMODINIT_FUNC ) -#if PY_MAJOR_VERSION >= 3 -#define PyMODINIT_FUNC PyObject * -#else -#define PyMODINIT_FUNC void -#endif -#endif /* !defined( PyMODINIT_FUNC ) */ - -#if !defined( PyVarObject_HEAD_INIT ) -#define PyVarObject_HEAD_INIT( type, size ) \\ - PyObject_HEAD_INIT( type ) \\ - size, - -#endif /* !defined( PyVarObject_HEAD_INIT ) */ - -#if PY_MAJOR_VERSION >= 3 -#define Py_TPFLAGS_HAVE_ITER 0 -#endif - -#if !defined( Py_TYPE ) -#define Py_TYPE( object ) \\ - ( ( (PyObject *) object )->ob_type ) - -#endif /* !defined( Py_TYPE ) */ /* Generic wrapper type */ @@ -436,8 +405,15 @@ def private_functions(self): }} python_wrappers[{classes_length:d}]; /* Create the relevant wrapper from the item based on the lookup table. + * + * If parent is non-NULL, the wrapped child takes a strong reference to it + * via python_object1. This keeps the parent Python wrapper (and therefore + * its underlying libtsk handle) alive for as long as the child exists, + * which matters under free-threaded Python where a different thread may + * drop the parent's last visible reference while this thread is still + * using a child object derived from it. */ -Gen_wrapper new_class_wrapper(Object item, int item_is_python_object) {{ +Gen_wrapper new_class_wrapper(Object item, int item_is_python_object, PyObject *parent) {{ Gen_wrapper result = NULL; Object cls = NULL; struct python_wrapper_map_t *python_wrapper = NULL; @@ -457,12 +433,20 @@ def private_functions(self): PyErr_Clear(); result = (Gen_wrapper) _PyObject_New(python_wrapper->python_type); + if(result == NULL) {{ + return NULL; + }} result->base = item; result->base_is_python_object = item_is_python_object; result->base_is_internal = 1; result->python_object1 = NULL; result->python_object2 = NULL; + if(parent != NULL) {{ + Py_IncRef(parent); + result->python_object1 = parent; + }} + python_wrapper->initialize_proxies(result, (void *) item); return result; @@ -556,15 +540,18 @@ def private_functions(self): }} mro = ob_type->tp_mro; -#if PY_MAJOR_VERSION >= 3 py_method = PyUnicode_FromString(method); -#else - py_method = PyString_FromString(method); -#endif + if(py_method == NULL) {{ + return 0; + }} number_of_items = PySequence_Size(mro); for(item_index = 0; item_index < number_of_items; item_index++) {{ + int contains_result = 0; item_object = PySequence_GetItem(mro, item_index); + if(item_object == NULL) {{ + break; + }} // Ok - we got to the base class - finish up if(item_object == (PyObject *) type) {{ @@ -576,8 +563,11 @@ def private_functions(self): * PyDict_Contains). */ dict = PyObject_GetAttrString(item_object, "__dict__"); - if(dict != NULL && PySequence_Contains(dict, py_method)) {{ - found = 1; + if(dict != NULL) {{ + contains_result = PySequence_Contains(dict, py_method); + if(contains_result > 0) {{ + found = 1; + }} }} Py_DecRef(dict); Py_DecRef(item_object); @@ -603,38 +593,67 @@ def private_functions(self): char *error_str = NULL; int *error_type = (int *) {get_current_error:s}(&error_str); -#if PY_MAJOR_VERSION >= 3 PyObject *utf8_string_object = NULL; -#endif // Fetch the exception state and convert it to a string: PyErr_Fetch(&exception_type, &exception_value, &exception_traceback); + /* exception_value can be NULL when the original exception was + * raised via PyErr_SetNone(type) (e.g. KeyboardInterrupt). + */ + if(exception_value == NULL) {{ + if(error_str != NULL) {{ + const char *placeholder = "Python exception raised without value"; + size_t placeholder_len = strlen(placeholder); + if(placeholder_len > (size_t)(BUFF_SIZE - 1)) {{ + placeholder_len = (size_t)(BUFF_SIZE - 1); + }} + memcpy(error_str, placeholder, placeholder_len); + error_str[placeholder_len] = 0; + }} + *error_type = ERuntimeError; + PyErr_Restore(exception_type, exception_value, exception_traceback); + return; + }} + string_object = PyObject_Repr(exception_value); -#if PY_MAJOR_VERSION >= 3 - utf8_string_object = PyUnicode_AsUTF8String(string_object); + if(string_object != NULL) {{ + utf8_string_object = PyUnicode_AsUTF8String(string_object); + }} if(utf8_string_object != NULL) {{ str_c = PyBytes_AsString(utf8_string_object); }} -#else - str_c = PyString_AsString(string_object); -#endif if(str_c != NULL) {{ strncpy(error_str, str_c, BUFF_SIZE-1); error_str[BUFF_SIZE - 1] = 0; *error_type = ERuntimeError; + }} else {{ + /* Repr/encode failed; record a generic message so callers + * still observe ERuntimeError instead of EZero (which would + * make check_error() report success). + */ + if(error_str != NULL) {{ + const char *placeholder = "Python exception (repr failed)"; + size_t placeholder_len = strlen(placeholder); + if(placeholder_len > (size_t)(BUFF_SIZE - 1)) {{ + placeholder_len = (size_t)(BUFF_SIZE - 1); + }} + memcpy(error_str, placeholder, placeholder_len); + error_str[placeholder_len] = 0; + }} + *error_type = ERuntimeError; }} PyErr_Restore(exception_type, exception_value, exception_traceback); -#if PY_MAJOR_VERSION >= 3 if( utf8_string_object != NULL ) {{ Py_DecRef(utf8_string_object); }} -#endif - Py_DecRef(string_object); + if(string_object != NULL) {{ + Py_DecRef(string_object); + }} return; }} @@ -671,29 +690,19 @@ def private_functions(self): #else long_value = PyLong_AsUnsignedLong(integer_object); #endif - }} -#if PY_MAJOR_VERSION < 3 - if(result == 0) {{ - PyErr_Clear(); - - result = PyObject_IsInstance(integer_object, (PyObject *) &PyInt_Type); - - if(result == -1) {{ + /* PyLong_AsUnsignedLong / PyLong_AsUnsignedLongLong returns + * (unsigned)-1 and sets OverflowError when the value does + * not fit. Surfacing the original exception is more useful + * than continuing into the generic "out of bounds" path + * below, which would clobber the OverflowError with a fresh + * ValueError. + */ + if(PyErr_Occurred()) {{ pytsk_fetch_error(); return (uint64_t) -1; - - }} else if(result != 0) {{ - PyErr_Clear(); - -#if defined( HAVE_LONG_LONG ) - long_value = PyInt_AsUnsignedLongLongMask(integer_object); -#else - long_value = PyInt_AsUnsignedLongMask(integer_object); -#endif }} }} -#endif /* PY_MAJOR_VERSION < 3 */ if(result == 0) {{ if(PyErr_Occurred()) {{ pytsk_fetch_error(); @@ -767,8 +776,9 @@ def initialise_class(self, class_name, out, done=None): " if (PyType_Ready(&{name:s}_Type) < 0) {{\n" " goto on_error;\n" " }}\n" - " Py_IncRef((PyObject *)&{name:s}_Type);\n" - " PyModule_AddObject(module, \"{name:s}\", (PyObject *)&{name:s}_Type);\n").format( + " if (PyModule_AddType(module, &{name:s}_Type) < 0) {{\n" + " goto on_error;\n" + " }}\n").format( **values_dict)) def write(self, out): @@ -863,8 +873,7 @@ def write(self, out): " {{NULL, NULL, 0, NULL}} /* Sentinel */\n" "}};\n" "\n" - "#if PY_MAJOR_VERSION >= 3\n" - "\n" + "\n" "/* The {module:s} module definition\n" " */\n" "PyModuleDef {module:s}_module_definition = {{\n" @@ -888,15 +897,10 @@ def write(self, out): " NULL,\n" "}};\n" "\n" - "#endif /* PY_MAJOR_VERSION >= 3 */\n" "\n" "/* Initializes the {module:s} module\n" " */\n" - "#if PY_MAJOR_VERSION >= 3\n" - "PyMODINIT_FUNC PyInit_{module:s}(void) {{\n" - "#else\n" - "PyMODINIT_FUNC init{module:s}(void) {{\n" - "#endif\n" + "PyObject * PyInit_{module:s}(void) {{\n" " PyGILState_STATE gil_state;\n" "\n" " PyObject *module = NULL;\n" @@ -907,21 +911,12 @@ def write(self, out): " * This function must be called before grabbing the GIL\n" " * otherwise the module will segfault on a version mismatch\n" " */\n" - "#if PY_MAJOR_VERSION >= 3\n" - " module = PyModule_Create(\n" + " module = PyModule_Create(\n" " &{module:s}_module_definition );\n" - "#else\n" - " module = Py_InitModule3(\n" - " \"{module:s}\",\n" - " {module:s}_module_methods,\n" - " \"Python {module:s} module.\" );\n" - "#endif\n" + " if (module == NULL) {{\n" - "#if PY_MAJOR_VERSION >= 3\n" - " return(NULL);\n" - "#else\n" - " return;\n" - "#endif\n" + " return(NULL);\n" + " }}\n" "\n" "#ifdef Py_GIL_DISABLED\n" @@ -937,13 +932,12 @@ def write(self, out): "\n" " d = PyModule_GetDict(module);\n" "\n" - " /* Make sure threads are enabled */\n" - "#if PY_VERSION_HEX < 0x03070000\n" - " PyEval_InitThreads();\n" - "#endif\n" " gil_state = PyGILState_Ensure();\n" "\n" - " g_module = module;\n").format(**values_dict)) + " /* Reset class registry so reimport or subinterpreter import\n" + " * repopulates from scratch and never overruns python_wrappers[].\n" + " */\n" + " TOTAL_CCLASSES = 0;\n").format(**values_dict)) # The trick is to initialise the classes in order of their # inheritance. The following code will order initializations @@ -961,19 +955,13 @@ def write(self, out): elif type == "string": if constant == "TSK_VERSION_STR": out.write(( - "#if PY_MAJOR_VERSION >= 3\n" - " tmp = PyUnicode_FromString((char *){0:s});\n" - "#else\n" - " tmp = PyString_FromString((char *){0:s});\n" - "#endif\n").format(constant)) + " tmp = PyUnicode_FromString((char *){0:s});\n" + ).format(constant)) else: out.write(( - "#if PY_MAJOR_VERSION >= 3\n" - " tmp = PyBytes_FromString((char *){0:s});\n" - "#else\n" - " tmp = PyString_FromString((char *){0:s});\n" - "#endif\n").format(constant)) + " tmp = PyBytes_FromString((char *){0:s});\n" + ).format(constant)) else: out.write( " /* I dont know how to convert {0:s} type {1:s} */\n".format( @@ -988,20 +976,14 @@ def write(self, out): out.write( " PyGILState_Release(gil_state);\n" "\n" - "#if PY_MAJOR_VERSION >= 3\n" - " return module;\n" - "#else\n" - " return;\n" - "#endif\n" + " return module;\n" + "\n" "on_error:\n" " PyGILState_Release(gil_state);\n" "\n" - "#if PY_MAJOR_VERSION >= 3\n" - " return NULL;\n" - "#else\n" - " return;\n" - "#endif\n" + " return NULL;\n" + "}\n" "\n" "#ifdef __cplusplus\n" @@ -1125,11 +1107,8 @@ def to_python_object(self, name=None, result="Py_result", **kwargs): " Py_IncRef(Py_None);\n" " {result:s} = Py_None;\n" " }} else {{\n" - "#if PY_MAJOR_VERSION >= 3\n" - " {result:s} = PyBytes_FromStringAndSize((char *){name:s}, {length:s});\n" - "#else\n" - " {result:s} = PyString_FromStringAndSize((char *){name:s}, {length:s});\n" - "#endif\n" + " {result:s} = PyBytes_FromStringAndSize((char *){name:s}, {length:s});\n" + " if(!{result:s}) {{\n" " goto on_error;\n" " }}\n" @@ -1155,14 +1134,15 @@ def from_python_object(self, source, destination, method, context="NULL"): "\n" " PyErr_Clear();\n" "\n" - "#if PY_MAJOR_VERSION >= 3\n" - " if(PyBytes_AsStringAndSize({source:s}, &buff, &length) == -1) {{\n" - "#else\n" - " if(PyString_AsStringAndSize({source:s}, &buff, &length) == -1) {{\n" - "#endif\n" + " if(PyBytes_AsStringAndSize({source:s}, &buff, &length) == -1) {{\n" + + " goto on_error;\n" + " }}\n" + " {destination:s} = (char *) talloc_size({context:s}, length + 1);\n" + " if({destination:s} == NULL) {{\n" + " PyErr_NoMemory();\n" " goto on_error;\n" " }}\n" - " {destination:s} = talloc_size({context:s}, length + 1);\n" " memcpy({destination:s}, buff, length);\n" " {destination:s}[length] = 0;\n" "}};\n").format(**values_dict) @@ -1187,11 +1167,8 @@ def to_python_object(self, name=None, result="Py_result", **kwargs): return ( " PyErr_Clear();\n" - "#if PY_MAJOR_VERSION >= 3\n" - " {result:s} = PyBytes_FromStringAndSize((char *){name:s}, {length:s});\n" - "#else\n" - " {result:s} = PyString_FromStringAndSize((char *){name:s}, {length:s});\n" - "#endif\n").format(**values_dict) + " {result:s} = PyBytes_FromStringAndSize((char *){name:s}, {length:s});\n" + ).format(**values_dict) class Char_and_Length(Type): @@ -1232,11 +1209,8 @@ def to_python_object(self, name=None, result="Py_result", **kwargs): return ( " PyErr_Clear();\n" - "#if PY_MAJOR_VERSION >= 3\n" - " {result:s} = PyBytes_FromStringAndSize((char *){name:s}, {length:s});\n" - "#else\n" - " {result:s} = PyString_FromStringAndSize((char *){name:s}, {length:s});\n" - "#endif\n" + " {result:s} = PyBytes_FromStringAndSize((char *){name:s}, {length:s});\n" + "\n" " if(!{result:s}) {{\n" " goto on_error;\n" @@ -1258,13 +1232,15 @@ def to_python_object(self, name=None, result="Py_result", **kwargs): "name": name or self.name, "result": result} - return ( + code = ( " PyErr_Clear();\n" - "#if PY_MAJOR_VERSION >= 3\n" - " {result:s} = PyLong_FromLong({name:s});\n" - "#else\n" - " {result:s} = PyInt_FromLong({name:s});\n" - "#endif\n").format(**values_dict) + " {result:s} = PyLong_FromLong({name:s});\n").format(**values_dict) + if kwargs.get('sense') == 'proxied': + code += ( + " if({result:s} == NULL) {{\n" + " goto on_error;\n" + " }}\n").format(**values_dict) + return code def from_python_object(self, source, destination, method, **kwargs): values_dict = { @@ -1273,11 +1249,7 @@ def from_python_object(self, source, destination, method, **kwargs): return ( " PyErr_Clear();\n" - "#if PY_MAJOR_VERSION >= 3\n" - " {destination:s} = PyLong_AsLongMask({source:s});\n" - "#else\n" - " {destination:s} = PyInt_AsLongMask({source:s});\n" - "#endif\n").format(**values_dict) + " {destination:s} = PyLong_AsLongMask({source:s});\n").format(**values_dict) def comment(self): return "{0:s} {1:s} ".format(self.original_type, self.name) @@ -1298,24 +1270,23 @@ def to_python_object(self, name=None, result="Py_result", **kwargs): " PyErr_Clear();\n" " {result:s} = PyList_New(0);\n" " for(array_index = 0; array_index < {array_size:s}; array_index++) {{\n" - "#if PY_MAJOR_VERSION >= 3\n" - " PyList_Append({result:s}, PyLong_FromLong((long) {name:s}[array_index]));\n" - "#else\n" - " PyList_Append({result:s}, PyInt_FromLong((long) {name:s}[array_index]));\n" - "#endif\n" + " PyList_Append({result:s}, PyLong_FromLong((long) {name:s}[array_index]));\n" + " }}\n" ).format(**values_dict) else: values_dict = { "name": name or self.name, "result": result} - return ( + code = ( " PyErr_Clear();\n" - "#if PY_MAJOR_VERSION >= 3\n" - " {result:s} = PyLong_FromLong((long) {name:s});\n" - "#else\n" - " {result:s} = PyInt_FromLong((long) {name:s});\n" - "#endif\n").format(**values_dict) + " {result:s} = PyLong_FromLong((long) {name:s});\n").format(**values_dict) + if kwargs.get('sense') == 'proxied': + code += ( + " if({result:s} == NULL) {{\n" + " goto on_error;\n" + " }}\n").format(**values_dict) + return code def from_python_object(self, source, destination, method, **kwargs): values_dict = { @@ -1324,11 +1295,8 @@ def from_python_object(self, source, destination, method, **kwargs): return ( " PyErr_Clear();\n" - "#if PY_MAJOR_VERSION >= 3\n" - " {destination:s} = PyLong_AsUnsignedLongMask({source:s});\n" - "#else\n" - " {destination:s} = PyInt_AsUnsignedLongMask({source:s});\n" - "#endif\n").format(**values_dict) + " {destination:s} = PyLong_AsUnsignedLongMask({source:s});\n" + ).format(**values_dict) class Integer8(Integer): @@ -1364,13 +1332,19 @@ def to_python_object(self, name=None, result="Py_result", **kwargs): "name": name or self.name, "result": result} - return ( + code = ( " PyErr_Clear();\n" "#if defined( HAVE_LONG_LONG )\n" " {result:s} = PyLong_FromLongLong({name:s});\n" "#else\n" " {result:s} = PyLong_FromLong({name:s});\n" "#endif\n").format(**values_dict) + if kwargs.get('sense') == 'proxied': + code += ( + " if({result:s} == NULL) {{\n" + " goto on_error;\n" + " }}\n").format(**values_dict) + return code def from_python_object(self, source, destination, method, **kwargs): values_dict = { @@ -1379,19 +1353,11 @@ def from_python_object(self, source, destination, method, **kwargs): return ( " PyErr_Clear();\n" - "#if PY_MAJOR_VERSION >= 3\n" "#if defined( HAVE_LONG_LONG )\n" " {destination:s} = PyLong_AsLongLongMask({source:s});\n" "#else\n" " {destination:s} = PyLong_AsLongMask({source:s});\n" - "#endif\n" - "#else\n" - "#if defined( HAVE_LONG_LONG )\n" - " {destination:s} = PyInt_AsLongLongMask({source:s});\n" - "#else\n" - " {destination:s} = PyInt_AsLongMask({source:s});\n" - "#endif\n" - "#endif /* PY_MAJOR_VERSION >= 3 */\n").format(**values_dict) + "#endif\n").format(**values_dict) class Integer64Unsigned(Integer): @@ -1403,13 +1369,19 @@ def to_python_object(self, name=None, result="Py_result", **kwargs): "name": name or self.name, "result": result} - return ( + code = ( " PyErr_Clear();\n" "#if defined( HAVE_LONG_LONG )\n" " {result:s} = PyLong_FromUnsignedLongLong({name:s});\n" "#else\n" " {result:s} = PyLong_FromUnsignedLong({name:s});\n" "#endif\n").format(**values_dict) + if kwargs.get('sense') == 'proxied': + code += ( + " if({result:s} == NULL) {{\n" + " goto on_error;\n" + " }}\n").format(**values_dict) + return code def from_python_object(self, source, destination, method, **kwargs): values_dict = { @@ -1420,19 +1392,11 @@ def from_python_object(self, source, destination, method, **kwargs): # long and int objects. return ( " PyErr_Clear();\n" - "#if PY_MAJOR_VERSION >= 3\n" "#if defined( HAVE_LONG_LONG )\n" " {destination:s} = PyLong_AsUnsignedLongLongMask({source:s});\n" "#else\n" " {destination:s} = PyLong_AsUnsignedLongMask({source:s});\n" - "#endif\n" - "#else\n" - "#if defined( HAVE_LONG_LONG )\n" - " {destination:s} = PyInt_AsUnsignedLongLongMask({source:s});\n" - "#else\n" - " {destination:s} = PyInt_AsUnsignedLongMask({source:s});\n" - "#endif\n" - "#endif /* PY_MAJOR_VERSION >= 3 */\n").format(**values_dict) + "#endif\n").format(**values_dict) class Long(Integer): @@ -1444,10 +1408,15 @@ def to_python_object(self, name=None, result="Py_result", **kwargs): "name": name or self.name, "result": result} - return ( + code = ( "PyErr_Clear();\n" - "{result:s} = PyLong_FromLongLong({name:s});\n").format( - **values_dict) + "{result:s} = PyLong_FromLongLong({name:s});\n").format(**values_dict) + if kwargs.get('sense') == 'proxied': + code += ( + "if({result:s} == NULL) {{\n" + " goto on_error;\n" + "}}\n").format(**values_dict) + return code def from_python_object(self, source, destination, method, **kwargs): values_dict = { @@ -1469,10 +1438,15 @@ def to_python_object(self, name=None, result="Py_result", **kwargs): "name": name or self.name, "result": result} - return ( + code = ( "PyErr_Clear();\n" - "{result:s} = PyLong_FromUnsignedLong({name:s});\n").format( - **values_dict) + "{result:s} = PyLong_FromUnsignedLong({name:s});\n").format(**values_dict) + if kwargs.get('sense') == 'proxied': + code += ( + "if({result:s} == NULL) {{\n" + " goto on_error;\n" + "}}\n").format(**values_dict) + return code def from_python_object(self, source, destination, method, **kwargs): values_dict = { @@ -1500,11 +1474,8 @@ def to_python_object(self, name=None, result="Py_result", **kwargs): " char *str_{name:s} = &{name:s};\n" "\n" " PyErr_Clear();\n" - "#if PY_MAJOR_VERSION >= 3\n" - " {result:s} = PyBytes_FromStringAndSize(str_{name:s}, 1);\n" - "#else\n" - " {result:s} = PyString_FromStringAndSize(str_{name:s}, 1);\n" - "#endif\n" + " {result:s} = PyBytes_FromStringAndSize(str_{name:s}, 1);\n" + "\n" " if(!{result:s}) {{\n" " goto on_error;\n" @@ -1630,28 +1601,25 @@ def pre_call(self, method, **kwargs): return ( " PyErr_Clear();\n" "\n" - "#if PY_MAJOR_VERSION >= 3\n" - " tmp_{name:s} = PyBytes_FromStringAndSize(NULL, {length:s});\n" - "#else\n" - " tmp_{name:s} = PyString_FromStringAndSize(NULL, {length:s});\n" - "#endif\n" + " tmp_{name:s} = PyBytes_FromStringAndSize(NULL, {length:s});\n" + " if(!tmp_{name:s}) {{\n" " goto on_error;\n" " }}\n" "\n" - "#if PY_MAJOR_VERSION >= 3\n" - " PyBytes_AsStringAndSize(tmp_{name:s}, &{name:s}, (Py_ssize_t *)&{length:s});\n" - "#else\n" - " PyString_AsStringAndSize(tmp_{name:s}, &{name:s}, (Py_ssize_t *)&{length:s});\n" - "#endif\n").format(**values_dict) + " PyBytes_AsStringAndSize(tmp_{name:s}, &{name:s}, (Py_ssize_t *)&{length:s});\n" + ).format(**values_dict) def to_python_object(self, name=None, result="Py_result", sense="in", **kwargs): if "results" in kwargs: kwargs["results"].pop(0) if sense == "proxied": - return "py_{0:s} = PyLong_FromLong({1:s});\n".format( - self.name, self.length) + return ( + "py_{0:s} = PyLong_FromSize_t((size_t) {1:s});\n" + "if(py_{0:s} == NULL) {{\n" + " goto on_error;\n" + "}}\n").format(self.name, self.length) values_dict = { "length": self.length, @@ -1668,17 +1636,15 @@ def to_python_object(self, name=None, result="Py_result", sense="in", **kwargs): "\n" " // Do we need to truncate the buffer for a short read?\n" " }} else if(func_return < (uint64_t) {length:s}) {{\n" - "#if PY_MAJOR_VERSION >= 3\n" - " _PyBytes_Resize(&tmp_{name:s}, (Py_ssize_t) func_return);\n" - "#else\n" - " _PyString_Resize(&tmp_{name:s}, (Py_ssize_t) func_return);\n" - "#endif\n" + " _PyBytes_Resize(&tmp_{name:s}, (Py_ssize_t) func_return);\n" + " }}\n" "\n" " {result:s} = tmp_{name:s};\n").format(**values_dict) def python_proxy_post_call(self, result="Py_result"): values_dict = { + "length": self.length, "name": self.name, "result": result} @@ -1687,16 +1653,24 @@ def python_proxy_post_call(self, result="Py_result"): " char *tmp_buff = NULL;\n" " Py_ssize_t tmp_len = 0;\n" "\n" - "#if PY_MAJOR_VERSION >= 3\n" - " if(PyBytes_AsStringAndSize({result:s}, &tmp_buff, &tmp_len) == -1) {{\n" - "#else\n" - " if(PyString_AsStringAndSize({result:s}, &tmp_buff, &tmp_len) == -1) {{\n" - "#endif\n" + " if(PyBytes_AsStringAndSize({result:s}, &tmp_buff, &tmp_len) == -1) {{\n" + " goto on_error;\n" " }}\n" + " /* Bound the user-controlled return length to the buffer\n" + " * size that libtsk requested; a Python override that\n" + " * returns more bytes than asked for would otherwise\n" + " * overflow the caller's buffer.\n" + " */\n" + " if((size_t) tmp_len > (size_t) {length:s}) {{\n" + " tmp_len = (Py_ssize_t) {length:s};\n" + " }}\n" " memcpy({name:s}, tmp_buff, tmp_len);\n" " Py_DecRef({result:s});\n" " {result:s} = PyLong_FromLong(tmp_len);\n" + " if({result:s} == NULL) {{\n" + " goto on_error;\n" + " }}\n" "}}\n").format(**values_dict) @@ -1725,11 +1699,8 @@ def to_python_object(self, name=None, result="Py_result", **kwargs): return ( " PyErr_Clear();\n" - "#if PY_MAJOR_VERSION >= 3\n" - " {result:s} = PyBytes_FromStringAndSize((char *){name:s}->dptr, {name:s}->dsize);\n" - "#else\n" - " {result:s} = PyString_FromStringAndSize((char *){name:s}->dptr, {name:s}->dsize);\n" - "#endif\n" + " {result:s} = PyBytes_FromStringAndSize((char *){name:s}->dptr, {name:s}->dsize);\n" + " talloc_free({name:s});\n").format(**values_dict) def from_python_object(self, source, destination, method, **kwargs): @@ -1747,11 +1718,8 @@ def from_python_object(self, source, destination, method, **kwargs): "\n" " PyErr_Clear();\n" "\n" - "#if PY_MAJOR_VERSION >= 3\n" - " if(PyBytes_AsStringAndSize({source:s}, &buf, &tmp) == -1) {{\n" - "#else\n" - " if(PyString_AsStringAndSize({source:s}, &buf, &tmp) == -1) {{\n" - "#endif\n" + " if(PyBytes_AsStringAndSize({source:s}, &buf, &tmp) == -1) {{\n" + " goto on_error;\n" " }}\n" "\n" @@ -1781,11 +1749,8 @@ def from_python_object(self, source, destination, method, **kwargs): "\n" " PyErr_Clear();\n" "\n" - "#if PY_MAJOR_VERSION >= 3\n" - " if(PyBytes_AsStringAndSize({source:s}, &buf, &tmp) == -1) {{\n" - "#else\n" - " if(PyString_AsStringAndSize({source:s}, &buf, &tmp) == -1) {{\n" - "#endif\n" + " if(PyBytes_AsStringAndSize({source:s}, &buf, &tmp) == -1) {{\n" + " goto on_error;\n" " }}\n" " // Take a copy of the Python string - This leaks - how to fix it?\n" @@ -1802,11 +1767,8 @@ def to_python_object(self, name=None, result="Py_result", **kwargs): return ( " PyErr_Clear();\n" - "#if PY_MAJOR_VERSION >= 3\n" - " {result:s} = PyBytes_FromStringAndSize((char *){name:s}.dptr, {name:s}.dsize);\n" - "#else\n" - " {result:s} = PyString_FromStringAndSize((char *){name:s}.dptr, {name:s}.dsize);\n" - "#endif\n").format(**values_dict) + " {result:s} = PyBytes_FromStringAndSize((char *){name:s}.dptr, {name:s}.dsize);\n" + ).format(**values_dict) class Void(Type): @@ -1887,11 +1849,8 @@ def from_python_object(self, source, destination, method, context="NULL"): " if(!tmp) {{\n" " goto on_error;\n" " }}\n" - "#if PY_MAJOR_VERSION >= 3\n" - " {destination:s}[i] = PyBytes_AsString(tmp);\n" - "#else\n" - " {destination:s}[i] = PyString_AsString(tmp);\n" - "#endif\n" + " {destination:s}[i] = PyBytes_AsString(tmp);\n" + "\n" " if(!{destination:s}[i]) {{\n" " Py_DecRef(tmp);\n" @@ -2030,8 +1989,13 @@ def assign(self, call, method, target=None, **kwargs): " goto on_error;\n" " }\n") + # Pass the calling pyXxx wrapper as parent so the child holds + # a strong reference back to it. Required for free-threaded + # safety: without this, another thread could drop the parent's + # last visible reference and free the underlying libtsk handle + # while this child is still in use. result += ( - " wrapped_{name:s} = new_class_wrapper(returned_object, self->base_is_python_object);\n" + " wrapped_{name:s} = new_class_wrapper(returned_object, self->base_is_python_object, (PyObject *) self);\n" "\n" " if(wrapped_{name:s} == NULL) {{\n" " if(returned_object != NULL) {{\n" @@ -2064,8 +2028,12 @@ def to_python_object( "result": result} if sense == "proxied": + # Proxied path: wrapping a libtsk struct produced inside a + # user-overridden Python method. The caller's Python frame + # already holds the parent reference, so no additional + # parent keepalive is needed here. return ( - "{result:s} = (PyObject *) new_class_wrapper((Object){name:s}, 0);\n").format( + "{result:s} = (PyObject *) new_class_wrapper((Object){name:s}, 0, NULL);\n").format( **values_dict) return "{result:s} = (PyObject *) wrapped_{name:s};\n".format( @@ -2132,23 +2100,42 @@ def assign(self, call, method, target=None, borrowed=True, **kwargs): " PyErr_Clear();\n" "\n" " wrapped_{name:s} = (Gen_wrapper) PyObject_New(py{type:s}, &{type:s}_Type);\n" + " if(wrapped_{name:s} == NULL) {{\n" + " return NULL;\n" + " }}\n" "\n").format(**values_dict) if borrowed: + # The struct base points into memory owned by the parent + # Python wrapper. Keep the parent alive via python_object1 + # so a different thread cannot free the underlying libtsk + # handle while this borrowed wrapper is still in use. result += ( " // Base is borrowed from another object.\n" " wrapped_{name:s}->base = {call:s};\n" " wrapped_{name:s}->base_is_python_object = 0;\n" " wrapped_{name:s}->base_is_internal = 0;\n" - " wrapped_{name:s}->python_object1 = NULL;\n" + " Py_IncRef((PyObject *) self);\n" + " wrapped_{name:s}->python_object1 = (PyObject *) self;\n" " wrapped_{name:s}->python_object2 = NULL;\n" "\n").format(**values_dict) else: + # Method-return path (borrowed=False is passed by the + # method-call codegen). In practice every libtsk method + # we wrap that returns a struct hands back a pointer into + # parent-owned memory (e.g. tsk_vs_part_get returns a + # TSK_VS_PART_INFO * inside the parent TSK_VS_INFO), and + # the generated *_dealloc never frees self->base. So the + # wrapper is logically *not* the owner; mark it as such + # via base_is_internal = 0 to avoid misleading future code + # that might gate a free/close on that flag. Keep the + # parent alive via python_object1. result += ( " wrapped_{name:s}->base = {call:s};\n" " wrapped_{name:s}->base_is_python_object = 0;\n" - " wrapped_{name:s}->base_is_internal = 1;\n" - " wrapped_{name:s}->python_object1 = NULL;\n" + " wrapped_{name:s}->base_is_internal = 0;\n" + " Py_IncRef((PyObject *) self);\n" + " wrapped_{name:s}->python_object1 = (PyObject *) self;\n" " wrapped_{name:s}->python_object2 = NULL;\n" "\n").format(**values_dict) @@ -2156,6 +2143,9 @@ def assign(self, call, method, target=None, borrowed=True, **kwargs): result += ( " if(wrapped_{name:s}->base == NULL) {{\n" " Py_DecRef((PyObject *) wrapped_{name:s});\n" + " if(check_error()) {{\n" + " goto on_error;\n" + " }}\n" " return NULL;\n" " }}\n").format(**values_dict) @@ -2195,8 +2185,18 @@ def definition(self, default="NULL", sense="in", **kwargs): class PointerStructWrapper(StructWrapper): def from_python_object(self, source, destination, method, **kwargs): - return "{0:s} = ({1:s} *) ((Gen_wrapper) {2:s})->base;\n".format( - destination, self.original_type, source) + values_dict = { + "destination": destination, + "source": source, + "type": self.original_type} + return ( + " if({source:s} == NULL || !type_check({source:s}, &{type:s}_Type)) {{\n" + " PyErr_Format(PyExc_RuntimeError,\n" + " \"proxied {type:s} method returned NULL or wrong type\");\n" + " goto on_error;\n" + " }}\n" + " {destination:s} = ({type:s} *) ((Gen_wrapper) {source:s})->base;\n" + ).format(**values_dict) def byref(self): return "&wrapped_{0:s}".format(self.name) @@ -2471,6 +2471,16 @@ def error_condition(self): if "DESTRUCTOR" in self.return_type.attributes: result += "self->base = NULL;\n" + # If a Python wrapper was already allocated but check_error() fired + # in the postcall, it must be released to avoid a refcount leak. + if isinstance(self.return_type, + (StructWrapper, PointerStructWrapper, Wrapper, PointerWrapper)): + name = self.return_type.name + result += ( + " if(wrapped_{0:s} != NULL) {{\n" + " Py_DecRef((PyObject *) wrapped_{0:s});\n" + " }}\n").format(name) + if hasattr(self, "args"): for type in self.args: if hasattr(type, "error_cleanup"): @@ -2736,6 +2746,10 @@ def write_definition(self, out): out.write(( "{{\n" + " if(self->base == NULL) {{\n" + " return PyErr_Format(PyExc_RuntimeError,\n" + " \"{class_name:s}.{method:s}: object is not bound to any libtsk handle\");\n" + " }}\n" " (({class_name:s}) self->base)->{method:s}(({class_name:s}) self->base);\n" " return PyObject_SelfIter((PyObject *) self);\n" "}}\n").format(**values_dict)) @@ -2860,8 +2874,19 @@ def write_definition(self, out): # Assign the initialise_proxies handler out.write(( - " self->python_object1 = NULL;\n" - " self->python_object2 = NULL;\n" + " /* Release any state from a prior __init__ call so that\n" + " * re-initialization does not leak keepalives or libtsk handles.\n" + " */\n" + " Py_CLEAR(self->python_object1);\n" + " Py_CLEAR(self->python_object2);\n" + " if(self->base != NULL) {{\n" + " if(self->base_is_python_object != 0) {{\n" + " Py_DecRef((PyObject *) self->base);\n" + " }} else if(self->base_is_internal != 0) {{\n" + " talloc_free(self->base);\n" + " }}\n" + " self->base = NULL;\n" + " }}\n" "\n" " /* Initialise is used to keep a reference on the object?\n" " * If not called no longer valid warnings have been seen\n" @@ -2883,6 +2908,10 @@ def write_definition(self, out): "\n" " /* Allocate a new instance */\n" " self->base = ({class_name:s}) alloc_{class_name:s}();\n" + " if(self->base == NULL) {{\n" + " PyErr_NoMemory();\n" + " goto on_error;\n" + " }}\n" " self->base_is_python_object = 0;\n" " self->base_is_internal = 1;\n" " self->object_is_proxied = 0;\n" @@ -3054,12 +3083,17 @@ def built_ins(self, out): "name": attr.name} out.write(( - "#if PY_MAJOR_VERSION >= 3\n" - " string_object = PyUnicode_FromString(\"{name:s}\");\n" - "#else\n" - " string_object = PyString_FromString(\"{name:s}\");\n" - "#endif\n" - " PyList_Append(list_object, string_object);\n" + " string_object = PyUnicode_FromString(\"{name:s}\");\n" + + " if(string_object == NULL) {{\n" + " Py_DecRef(list_object);\n" + " goto on_error;\n" + " }}\n" + " if(PyList_Append(list_object, string_object) < 0) {{\n" + " Py_DecRef(string_object);\n" + " Py_DecRef(list_object);\n" + " goto on_error;\n" + " }}\n" " Py_DecRef(string_object);\n" "\n").format(**values_dict)) @@ -3067,19 +3101,22 @@ def built_ins(self, out): out.write(( "\n" " for(i = {0:s}_methods; i->ml_name; i++) {{\n" - "#if PY_MAJOR_VERSION >= 3\n" - " string_object = PyUnicode_FromString(i->ml_name);\n" - "#else\n" - " string_object = PyString_FromString(i->ml_name);\n" - "#endif\n" - " PyList_Append(list_object, string_object);\n" + " string_object = PyUnicode_FromString(i->ml_name);\n" + + " if(string_object == NULL) {{\n" + " Py_DecRef(list_object);\n" + " goto on_error;\n" + " }}\n" + " if(PyList_Append(list_object, string_object) < 0) {{\n" + " Py_DecRef(string_object);\n" + " Py_DecRef(list_object);\n" + " goto on_error;\n" + " }}\n" " Py_DecRef(string_object);\n" " }}\n" - "#if PY_MAJOR_VERSION >= 3\n" " if( utf8_string_object != NULL ) {{\n" " Py_DecRef(utf8_string_object);\n" " }}\n" - "#endif\n" " return list_object;\n" " }}\n").format(self.class_name)) @@ -3096,9 +3133,7 @@ def write_definition(self, out): " PyObject *result = NULL;\n" " char *name = NULL;\n" "\n" - "#if PY_MAJOR_VERSION >= 3\n" " PyObject *utf8_string_object = NULL;\n" - "#endif\n" "\n" " // Try to hand it off to the Python native handler first\n" " result = PyObject_GenericGetAttr((PyObject*) self, pyname);\n" @@ -3109,22 +3144,16 @@ def write_definition(self, out): "\n" " PyErr_Clear();\n" " // No - nothing interesting was found by python\n" - "#if PY_MAJOR_VERSION >= 3\n" " utf8_string_object = PyUnicode_AsUTF8String(pyname);\n" "\n" " if(utf8_string_object != NULL) {{\n" " name = PyBytes_AsString(utf8_string_object);\n" " }}\n" - "#else\n" - " name = PyString_AsString(pyname);\n" - "#endif\n" "\n" " if(!self->base) {{\n" - "#if PY_MAJOR_VERSION >= 3\n" " if( utf8_string_object != NULL ) {{\n" " Py_DecRef(utf8_string_object);\n" " }}\n" - "#endif\n" " return PyErr_Format(PyExc_RuntimeError, \"Wrapped object ({class_name:s}.{name:s}) no longer valid\");\n" " }}\n" " if(!name) {{\n" @@ -3135,22 +3164,18 @@ def write_definition(self, out): out.write( "\n" - "#if PY_MAJOR_VERSION >= 3\n" " if( utf8_string_object != NULL ) {{\n" " Py_DecRef(utf8_string_object);\n" " }}\n" - "#endif\n" " return PyObject_GenericGetAttr((PyObject *) self, pyname);\n") # Write the error part of the function. if self.error_set: out.write( "on_error:\n" - "#if PY_MAJOR_VERSION >= 3\n" " if( utf8_string_object != NULL ) {{\n" " Py_DecRef(utf8_string_object);\n" - " }}\n" - "#endif\n" + self.error_condition()) + " }}\n" + self.error_condition()) out.write("}\n\n") @@ -3176,6 +3201,13 @@ def write_definition_getters(self, out): " PyObject *Py_result = NULL;\n" "{python_def:s}\n" "\n" + " if(self->base == NULL) {{\n" + " return PyErr_Format(PyExc_RuntimeError,\n" + " \"{class_name:s}.{name:s}: object is not bound \"\n" + " \"to any libtsk handle (was it instantiated \"\n" + " \"directly?)\");\n" + " }}\n" + "\n" "{python_assign:s}\n" "{python_obj:s}\n" "\n" @@ -3272,11 +3304,16 @@ def _write_definition(self, out): " // Grab the GIL so we can do Python stuff\n" " gil_state = PyGILState_Ensure();\n" "\n" - "#if PY_MAJOR_VERSION >= 3\n" " method_name = PyUnicode_FromString(\"{0:s}\");\n" - "#else\n" - " method_name = PyString_FromString(\"{0:s}\");\n" - "#endif\n").format(self.name)) + " /* PyUnicode_FromString sets MemoryError on failure;\n" + " * propagate via the proxied error machinery rather than\n" + " * passing NULL to PyObject_CallMethodObjArgs (which would\n" + " * raise SystemError and lose the original cause).\n" + " */\n" + " if(method_name == NULL) {{\n" + " pytsk_fetch_error();\n" + " goto on_error;\n" + " }}\n").format(self.name)) out.write("\n// Obtain Python objects for all the args:\n") for arg in self.args: @@ -3293,7 +3330,6 @@ def _write_definition(self, out): out.write( "\n" " // Now call the method\n" - " PyErr_Clear();\n" " Py_result = PyObject_CallMethodObjArgs((PyObject *) ((Object) self)->extension, method_name, ") for arg in self.args: @@ -3322,12 +3358,27 @@ def _write_definition(self, out): "Py_result", self.return_type.name, self, context="self") out.write(" {0:s}".format(return_type)) - out.write( - " if(Py_result != NULL) {\n" - " Py_DecRef(Py_result);\n" - " }\n" - " Py_DecRef(method_name);\n" - "\n") + # For Wrapper return types the Python wrapper keeps the C object alive. + # Transfer ownership to the parent's python_object2 so the C pointer + # remains valid after this callback returns to libtsk. + if isinstance(self.return_type, Wrapper) and not isinstance( + self.return_type, (StructWrapper, PointerStructWrapper)): + out.write( + " if(Py_result != NULL) {\n" + " PyObject *old = ((Gen_wrapper) ((Object) self)->extension)->python_object2;\n" + " if(old != NULL) Py_DecRef(old);\n" + " ((Gen_wrapper) ((Object) self)->extension)->python_object2 = Py_result;\n" + " Py_result = NULL;\n" + " }\n" + " Py_DecRef(method_name);\n" + "\n") + else: + out.write( + " if(Py_result != NULL) {\n" + " Py_DecRef(Py_result);\n" + " }\n" + " Py_DecRef(method_name);\n" + "\n") # Decref all our Python objects: for arg in self.args: @@ -3401,6 +3452,18 @@ def write_destructor(self, out): " if(self->base != NULL) {{\n" " self->base = NULL;\n" " }}\n" + " /* Drop the parent keepalive that was attached when\n" + " * this struct wrapper was produced from a borrowed\n" + " * libtsk pointer. NULL when no parent was attached.\n" + " */\n" + " if(self->python_object2 != NULL) {{\n" + " Py_DecRef(self->python_object2);\n" + " self->python_object2 = NULL;\n" + " }}\n" + " if(self->python_object1 != NULL) {{\n" + " Py_DecRef(self->python_object1);\n" + " self->python_object1 = NULL;\n" + " }}\n" " ob_type = Py_TYPE(self);\n" " if(ob_type != NULL && ob_type->tp_free != NULL) {{\n" " ob_type->tp_free((PyObject*) self);\n" @@ -3658,8 +3721,7 @@ def numeric_protocol(self, out): args[type] = "0" out.write(( - "#if PY_MAJOR_VERSION >= 3\n" - "static PyNumberMethods {class:s}_as_number = {{\n" + "static PyNumberMethods {class:s}_as_number = {{\n" " (binaryfunc) 0, /* nb_add */\n" " (binaryfunc) 0, /* nb_subtract */\n" " (binaryfunc) 0, /* nb_multiply */\n" @@ -3698,52 +3760,7 @@ def numeric_protocol(self, out): "\n" " (unaryfunc) 0, /* nb_index */\n" "}};\n" - "#else\n" - "static PyNumberMethods {class:s}_as_number = {{\n" - " (binaryfunc) 0, /* nb_add */\n" - " (binaryfunc) 0, /* nb_subtract */\n" - " (binaryfunc) 0, /* nb_multiply */\n" - " (binaryfunc) 0, /* nb_divide */\n" - " (binaryfunc) 0, /* nb_remainder */\n" - " (binaryfunc) 0, /* nb_divmod */\n" - " (ternaryfunc) 0, /* nb_power */\n" - " (unaryfunc) 0, /* nb_negative */\n" - " (unaryfunc) 0, /* nb_positive */\n" - " (unaryfunc) 0, /* nb_absolute */\n" - " (inquiry) {nonzero:s}, /* nb_nonzero */\n" - " (unaryfunc) 0, /* nb_invert */\n" - " (binaryfunc) 0, /* nb_lshift */\n" - " (binaryfunc) 0, /* nb_rshift */\n" - " (binaryfunc) 0, /* nb_and */\n" - " (binaryfunc) 0, /* nb_xor */\n" - " (binaryfunc) 0, /* nb_or */\n" - " (coercion) 0, /* nb_coerce */\n" - " (unaryfunc) {int:s}, /* nb_int */\n" - " (unaryfunc) 0, /* nb_long */\n" - " (unaryfunc) 0, /* nb_float */\n" - " (unaryfunc) 0, /* nb_oct */\n" - " (unaryfunc) 0, /* nb_hex */\n" - "\n" - " (binaryfunc) 0, /* nb_inplace_add */\n" - " (binaryfunc) 0, /* nb_inplace_subtract */\n" - " (binaryfunc) 0, /* nb_inplace_multiply */\n" - " (binaryfunc) 0, /* nb_inplace_divide */\n" - " (binaryfunc) 0, /* nb_inplace_remainder */\n" - " (ternaryfunc) 0, /* nb_inplace_power */\n" - " (binaryfunc) 0, /* nb_inplace_lshift */\n" - " (binaryfunc) 0, /* nb_inplace_rshift */\n" - " (binaryfunc) 0, /* nb_inplace_and */\n" - " (binaryfunc) 0, /* nb_inplace_xor */\n" - " (binaryfunc) 0, /* nb_inplace_or */\n" - "\n" - " (binaryfunc) 0, /* nb_floor_divide */\n" - " (binaryfunc) 0, /* nb_true_divide */\n" - " (binaryfunc) 0, /* nb_inplace_floor_divide */\n" - " (binaryfunc) 0, /* nb_inplace_true_divide */\n" - "\n" - " (unaryfunc) 0, /* nb_index */\n" - "}};\n" - "#endif /* PY_MAJOR_VERSION >= 3 */\n" + "\n").format(**args)) return "&{class:s}_as_number".format(**args) @@ -3999,7 +4016,10 @@ def struct(self, out): "int {class_name:s}_init_type(\n" " PyTypeObject *type_object )\n" "{{\n" - " type_object->tp_dict = PyDict_New();\n").format( + " type_object->tp_dict = PyDict_New();\n" + " if(type_object->tp_dict == NULL) {{\n" + " return 0;\n" + " }}\n").format( **values_dict)) if self.values: @@ -4010,11 +4030,23 @@ def struct(self, out): "class_name": self.class_name, "value": attr} + # Each enum value must succeed; otherwise the type's + # tp_dict is partially populated and the module loads + # with broken constants. Bail out cleanly so PyType_Ready + # never sees a half-initialized enum. out.write(( " integer_object = PyLong_FromLong({value:s});\n" - "\n" - " PyDict_SetItemString(type_object->tp_dict, \"{value:s}\", integer_object);\n" - "\n" + " if(integer_object == NULL) {{\n" + " Py_DecRef(type_object->tp_dict);\n" + " type_object->tp_dict = NULL;\n" + " return 0;\n" + " }}\n" + " if(PyDict_SetItemString(type_object->tp_dict, \"{value:s}\", integer_object) < 0) {{\n" + " Py_DecRef(integer_object);\n" + " Py_DecRef(type_object->tp_dict);\n" + " type_object->tp_dict = NULL;\n" + " return 0;\n" + " }}\n" " Py_DecRef(integer_object);\n" "\n").format(**values_dict)) @@ -4076,11 +4108,8 @@ def to_python_object(self, name=None, result="Py_result", **kwargs): return ( " PyErr_Clear();\n" - "#if PY_MAJOR_VERSION >= 3\n" - " {result:s} = PyLong_FromLong({name:s});\n" - "#else\n" - " {result:s} = PyInt_FromLong({name:s});\n" - "#endif\n").format(**values_dict) + " {result:s} = PyLong_FromLong({name:s});\n" + ).format(**values_dict) def pre_call(self, method, **kwargs): method.error_set = True diff --git a/setup.py b/setup.py index 8fcac07..f92b814 100755 --- a/setup.py +++ b/setup.py @@ -26,8 +26,6 @@ """ -from __future__ import print_function - import copy import glob import re @@ -53,9 +51,9 @@ version_tuple = (sys.version_info[0], sys.version_info[1]) -if version_tuple < (3, 7): +if version_tuple < (3, 10): print(( - 'Unsupported Python version: {0:s}, version 3.7 or higher ' + 'Unsupported Python version: {0:s}, version 3.10 or higher ' 'required.').format(sys.version)) sys.exit(1) @@ -157,19 +155,26 @@ def configure_source(self, compiler): define_macros = [("HAVE_TSK_LIBTSK_H", "")] if compiler.compiler_type == "msvc": + # TSK_MULTITHREAD_LIB makes libtsk's tsk_error_* state per-thread + # and turns the cache_lock primitives into real CRITICAL_SECTIONs. + # Required for the module's Py_MOD_GIL_NOT_USED declaration. define_macros.extend([ ("WIN32", "1"), ("UNICODE", "1"), ("NOMINMAX", "1"), + ("TSK_MULTITHREAD_LIB", "1"), ("_CRT_SECURE_NO_WARNINGS", "1")]) # TODO: ("GUID_WINDOWS", "1"), else: # We want to build as much as possible self contained Python - # binding. + # binding. Multithreading is left enabled (configure default) so + # libtsk's error reporting is per-thread and cache_lock is a real + # mutex; otherwise concurrent calls from different threads would + # scramble tsk_error_get() and race on libtsk's internal caches. command = [ - "sh", "configure", "--disable-java", "--disable-multithreading", + "sh", "configure", "--disable-java", "--without-afflib", "--without-libbfio", "--without-libewf", "--without-libvhdi", "--without-libvmdk", "--without-libvslvm", "--without-zlib"] diff --git a/tests/security.py b/tests/security.py new file mode 100644 index 0000000..9a56331 --- /dev/null +++ b/tests/security.py @@ -0,0 +1,196 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +"""Security regression tests for pytsk3. + +Each test pins a previously-fixed vulnerability so that a future +refactor cannot silently reintroduce it. +""" + +import io +import os +import unittest + +import pytsk3 + + +_TEST_IMAGE = os.path.join('test_data', 'image.raw') + + +class HeapOverflowOnProxiedReadTest(unittest.TestCase): + """Pin the bound on Python-supplied bytes from a subclass read(). + + ProxiedImg_Info_read used to memcpy(buf, tmp_buff, tmp_len) with no + upper bound on tmp_len, where tmp_len comes from the bytes object + the Python override returned. A subclass returning more bytes than + libtsk requested overflowed the libtsk-allocated heap buffer. + + The codegen now clamps tmp_len to the requested length before the + copy. Returning oversized bytes must therefore be truncated rather + than corrupting heap memory. + """ + + def testOversizedSubclassReadIsClamped(self): + # The proxied callback path is only invoked when libtsk itself + # calls back into the Python subclass's read() (e.g. during + # FS_Info auto-detection). A direct `img.read(...)` from Python + # uses normal MRO and does not exercise the C memcpy. So the + # test feeds the real test image through a subclass whose read() + # is instrumented to return way more bytes than libtsk asks for; + # if the memcpy bound were missing, libtsk's cache slot would be + # corrupted and FS_Info would either crash or read garbage. + file_path = _TEST_IMAGE + file_size = os.stat(file_path).st_size + file_object = open(file_path, 'rb') + self.addCleanup(file_object.close) + + class _OversizedSubclass(pytsk3.Img_Info): + + def __init__(self): + self._file_object = file_object + self._file_size = file_size + self.max_overflow = 0 + super().__init__(url='', type=pytsk3.TSK_IMG_TYPE_RAW) + + def get_size(self): + return self._file_size + + def read(self, offset, size): + self._file_object.seek(offset, os.SEEK_SET) + data = self._file_object.read(size) + # Return 4x what libtsk asked for. The C-side memcpy bound + # is what stops this from corrupting libtsk's cache slot. + # Track the largest overflow we attempted so the assertion + # below is meaningful even if libtsk asks for tiny sizes. + bloated = data + b'\xff' * len(data) * 3 + self.max_overflow = max(self.max_overflow, len(bloated) - size) + return bloated + + img = _OversizedSubclass() + try: + # FS_Info construction makes libtsk call img.read() multiple + # times to identify the filesystem. With the clamp in place + # this succeeds and returns valid file metadata. Without the + # clamp this either crashes or scrambles cache state, making + # the directory listing wrong / empty / corrupt. + fs = pytsk3.FS_Info(img, offset=0) + directory = fs.open_dir('/') + names = sorted( + entry.info.name.name + for entry in directory + if entry.info and entry.info.name) + # We deliberately returned oversized buffers from every read. + self.assertGreater(img.max_overflow, 0, + 'subclass never overflowed -- test inert') + # Listing must include the well-known fixture entries. + self.assertIn(b'passwords.txt', names) + finally: + img.close() + + +class ExitMethodDoesNotKillInterpreterTest(unittest.TestCase): + """Pin the FS_Info.exit() denial-of-service fix. + + FS_Info_exit used to call exit(0), letting any caller of + fs_info.exit() terminate the host Python interpreter. Now it must + raise a clean RuntimeError instead. + """ + + def testExitRaisesRuntimeError(self): + img = pytsk3.Img_Info(url=_TEST_IMAGE) + fs = pytsk3.FS_Info(img, offset=0) + with self.assertRaises(RuntimeError): + fs.exit() + # Process is still alive; reach into pytsk after exit() to prove + # we did not just survive in C-side limbo. + file_object = fs.open_meta(15) + self.assertEqual(file_object.info.meta.size, 116) + + +class StructWrapperUninitializedAccessTest(unittest.TestCase): + """Pin the property-getter NULL-base guard. + + Direct instantiation (e.g. `pytsk3.TSK_FS_BLOCK()`) leaves + self->base == NULL. Property access used to dereference NULL and + crash; it must now raise RuntimeError instead. + """ + + def _struct_classes(self): + candidates = [ + 'TSK_FS_BLOCK', 'TSK_FS_INFO', 'TSK_FS_NAME', 'TSK_FS_META', + 'TSK_FS_FILE', 'TSK_FS_ATTR', 'TSK_FS_ATTR_RUN', + 'TSK_VS_INFO', 'TSK_VS_PART_INFO', + ] + return [c for c in candidates if hasattr(pytsk3, c)] + + def _candidate_attrs(self, class_name): + """Static list of property names per struct, so the test does not + have to call dir() on an unbound instance (dir() routes through + the wrapper's __getattr__ which itself trips the NULL-base + guard, before we get a chance to test individual properties). + """ + return { + 'TSK_FS_BLOCK': ('tag', 'fs_info', 'addr', 'flags'), + 'TSK_FS_INFO': ('tag', 'block_count', 'block_size', 'inum_count'), + 'TSK_FS_NAME': ('name', 'meta_addr', 'flags'), + 'TSK_FS_META': ('addr', 'size', 'mode', 'type'), + 'TSK_FS_FILE': ('tag', 'fs_info'), + 'TSK_FS_ATTR': ('flags', 'name', 'type', 'id', 'size'), + 'TSK_FS_ATTR_RUN': ('addr', 'len', 'offset', 'flags'), + 'TSK_VS_INFO': ('tag', 'vstype', 'block_size', 'offset'), + 'TSK_VS_PART_INFO': ('tag', 'addr', 'start', 'len', 'flags'), + }.get(class_name, ()) + + def testEveryStructWrapperRaisesOnUnboundAccess(self): + classes = self._struct_classes() + self.assertGreater(len(classes), 0, 'no struct wrappers found') + guard_hits = 0 + for class_name in classes: + cls = getattr(pytsk3, class_name) + try: + instance = cls() + except TypeError: + # Some classes refuse direct construction; that's fine -- the + # vulnerability requires a NULL self->base reachable from + # Python, and refusing __init__ achieves the same end. + continue + for attr in self._candidate_attrs(class_name): + try: + getattr(instance, attr) + except RuntimeError: + # Raised by the NULL-base guard in the property getter + # prelude. This is the desired behavior. + guard_hits += 1 + except (AttributeError, TypeError, IOError): + # Acceptable -- attribute does not exist on this build / + # version, or the wrapper's __getattr__ refused before + # reaching the property, both of which still avoid the + # crash this test is pinning. + pass + self.assertGreater(guard_hits, 0, + 'NULL-base guard was never exercised; test is inert') + + +class ErrorMessageWithoutLibtskErrnoTest(unittest.TestCase): + """Pin the safe_tsk_error_get() helper. + + RaiseError(..., "%s", tsk_error_get()) used to pass a NULL pointer + to %s when libtsk recorded no t_errno -- undefined behavior on + glibc. The wrapper now substitutes a placeholder string. + """ + + def testInvalidImageProducesUsableMessage(self): + # Hitting an internal libtsk error path that may not set t_errno + # ultimately raises through RaiseError; the resulting Python + # exception message must be a non-empty string with no NUL byte + # and no literal "(null)" leak from a NULL-on-%s formatter. + img = pytsk3.Img_Info(url=_TEST_IMAGE) + with self.assertRaises(IOError) as ctx: + pytsk3.FS_Info(img, offset=999_999_999_999) + message = str(ctx.exception) + self.assertTrue(message) + self.assertNotIn('\x00', message) + self.assertNotIn('(null)', message) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_lib.py b/tests/test_lib.py index 5517375..4e78578 100644 --- a/tests/test_lib.py +++ b/tests/test_lib.py @@ -1,12 +1,22 @@ """Shared test case.""" import os +import threading import pytsk3 class FileObjectImageInfo(pytsk3.Img_Info): - """Img_Info that uses a file-like object.""" + """Img_Info that uses a file-like object. + + Thread-safety: pytsk3 may invoke read() concurrently from multiple + threads (true under free-threaded Python; possible even with the + GIL via cooperative thread switches across the seek+read pair). + Most file-like objects implement read positioning as a stateful + seek+read on a single fd, so concurrent calls would race. We + serialize the seek/read pair under a per-instance lock so that + pytsk3 callers can share one FileObjectImageInfo across threads. + """ def __init__( self, file_object, file_size, image_type=pytsk3.TSK_IMG_TYPE_RAW): @@ -27,6 +37,10 @@ def __init__( # pytsk3.Img_Info does not let you set attributes after initialization. self._file_object = file_object self._file_size = file_size + # Guards the seek+read pair on _file_object. Created before the + # parent __init__ because pytsk3 may begin proxying read() calls + # as soon as the constructor returns from Python's perspective. + self._read_lock = threading.Lock() # Using the old parent class invocation style otherwise some versions # of pylint complain also setting type to RAW to make sure Img_Info # does not do detection. @@ -37,7 +51,8 @@ def __init__( def close(self): """Closes the volume IO object.""" - self._file_object = None + with self._read_lock: + self._file_object = None def read(self, offset, size): """Reads a byte string from the image object at the specified offset. @@ -49,8 +64,12 @@ def read(self, offset, size): Returns: A byte string containing the data read. """ - self._file_object.seek(offset, os.SEEK_SET) - return self._file_object.read(size) + with self._read_lock: + file_object = self._file_object + if file_object is None: + return b'' + file_object.seek(offset, os.SEEK_SET) + return file_object.read(size) def get_size(self): """Retrieves the size.""" diff --git a/tests/thread_safety.py b/tests/thread_safety.py new file mode 100644 index 0000000..1368a48 --- /dev/null +++ b/tests/thread_safety.py @@ -0,0 +1,755 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +"""Thread-safety and free-threaded Python regression tests for pytsk3. + +These exercise patterns that can crash or scramble state when: + * libtsk is built without TSK_MULTITHREAD_LIB (cache_lock no-ops, + tsk_error_get backed by a single global), or + * Python wrapper objects yielded from iteration / property access do + not hold a strong reference to their parent (use-after-free when + another thread drops the parent's last visible reference). + +Most tests run on both GIL and free-threaded builds. They are +deliberately stress-shaped: a barrier synchronizes all threads to +release together so the contended window is wide enough to surface +races. +""" + +import gc +import os +import sys +import threading +import unittest + +import pytsk3 + +import test_lib + + +_TEST_IMAGE = os.path.join('test_data', 'image.raw') +_TEST_VOLUME = os.path.join('test_data', 'tsk_volume_system.raw') + + +def _is_free_threaded(): + """True when running on a free-threaded Python build.""" + return hasattr(sys, '_is_gil_enabled') and not sys._is_gil_enabled() + + +def _run_concurrently(target, count, *args, **kwargs): + """Run target in count threads released by a shared barrier. + + Returns the list of per-thread results (or the raised exception) in + thread-start order. Re-raises the first exception seen so the test + framework reports it. + """ + barrier = threading.Barrier(count) + results = [None] * count + errors = [None] * count + + def runner(index): + try: + barrier.wait() + results[index] = target(index, *args, **kwargs) + except BaseException as exc: # pylint: disable=broad-except + errors[index] = exc + + threads = [threading.Thread(target=runner, args=(i,)) for i in range(count)] + for thread in threads: + thread.start() + for thread in threads: + thread.join() + + for exc in errors: + if exc is not None: + raise exc + return results + + +class ModuleFreeThreadingTest(unittest.TestCase): + """Verifies the module's free-threaded compatibility declaration.""" + + def testImportSucceeds(self): + """The module must import without forcing the GIL back on.""" + self.assertTrue(hasattr(pytsk3, 'Img_Info')) + + @unittest.skipUnless( + _is_free_threaded(), + 'requires a free-threaded Python build') + def testModuleDoesNotForceGil(self): + """Importing pytsk3 must not flip the interpreter back to GIL mode. + + PyUnstable_Module_SetGIL(module, Py_MOD_GIL_NOT_USED) is the + declaration that prevents this; if it is missing the runtime would + re-enable the GIL at import time and sys._is_gil_enabled() would + return True. + """ + self.assertFalse(sys._is_gil_enabled()) + + +class ConcurrentImgInfoTest(unittest.TestCase): + """Threads each operating on independent Img_Info instances.""" + + def setUp(self): + self._test_file = _TEST_IMAGE + self._file_size = os.stat(self._test_file).st_size + + def testIndependentImgInfoReads(self): + """Each thread builds its own Img_Info and reads concurrently. + + Validates that libtsk's per-thread error reporting and + cache_lock primitives behave correctly under independent use -- + if TSK_MULTITHREAD_LIB is missing this can scramble error state + between threads. + """ + expected = { + 0x5800: b'place,user,passw', + 0x7c00: b'This is another ', + } + + def worker(_index): + img = pytsk3.Img_Info(url=self._test_file) + try: + for _ in range(50): + for offset, value in expected.items(): + self.assertEqual(img.read(offset, len(value)), value) + finally: + img.close() + + _run_concurrently(worker, 8) + + def testSharedImgInfoConcurrentRead(self): + """Many threads share one Img_Info and call read() concurrently.""" + img = pytsk3.Img_Info(url=self._test_file) + try: + + def worker(_index): + for _ in range(100): + self.assertEqual( + img.read(0x5800, 16), b'place,user,passw') + + _run_concurrently(worker, 8) + finally: + img.close() + + +class ConcurrentFsInfoTest(unittest.TestCase): + """Threads operating concurrently against FS_Info objects.""" + + def setUp(self): + self._test_file = _TEST_IMAGE + + def testIndependentFsInfos(self): + """Each thread builds its own Img_Info+FS_Info and walks /. + + This is the supported "one libtsk handle per thread" pattern. + """ + + def worker(_index): + img = pytsk3.Img_Info(url=self._test_file) + fs = pytsk3.FS_Info(img, offset=0) + directory = fs.open_dir('/') + names = [] + for entry in directory: + if entry.info and entry.info.name: + names.append(entry.info.name.name) + return names + + results = _run_concurrently(worker, 8) + # Every thread must have observed the same root listing. + self.assertTrue(all(r == results[0] for r in results)) + self.assertIn(b'passwords.txt', results[0]) + + def testSharedFsInfoConcurrentOpenMeta(self): + """Threads share an FS_Info and open files by inode in parallel. + + libtsk's inode/block caches are accessed under cache_lock when + TSK_MULTITHREAD_LIB is enabled; this is the test that breaks when + that flag was disabled at build time. + """ + img = pytsk3.Img_Info(url=self._test_file) + fs = pytsk3.FS_Info(img, offset=0) + + def worker(_index): + for _ in range(50): + file_object = fs.open_meta(15) + self.assertEqual(file_object.info.meta.size, 116) + # Read the file content fully -- this hits the FS cache. + data = file_object.read_random(0, 116) + self.assertEqual(len(data), 116) + + _run_concurrently(worker, 8) + + +class ParentKeepaliveTest(unittest.TestCase): + """Children yielded from iteration / properties keep their parent alive. + + Without parent keepalive the underlying libtsk handle can be freed + while a yielded child is still in use, producing a use-after-free. + These tests drop every visible reference to the parent and force a + GC pass before touching the child. + """ + + def setUp(self): + self._test_file = _TEST_IMAGE + + def _yield_first_file(self): + """Return a File yielded from iteration, with no caller-visible parent.""" + img = pytsk3.Img_Info(url=self._test_file) + fs = pytsk3.FS_Info(img, offset=0) + directory = fs.open_dir('/') + iterator = iter(directory) + first = next(iterator) + # Caller does not receive img / fs / directory / iterator; only the + # File. The C-level keepalive must hold them alive. + return first + + def testIteratedFileSurvivesParentDrop(self): + """A File yielded by Directory iteration must outlive its FS_Info.""" + file_object = self._yield_first_file() + # Force any cyclic collection now -- if our keepalive is wrong the + # FS_Info / Img_Info would be reclaimed here. + gc.collect() + self.assertIsNotNone(file_object.info) + if file_object.info.name: + # Touching the borrowed name buffer reaches into FS-owned memory. + self.assertIsNotNone(file_object.info.name.name) + + def testOpenedFileSurvivesParentDrop(self): + """A File from FS_Info.open_meta must outlive its FS_Info.""" + img = pytsk3.Img_Info(url=self._test_file) + fs = pytsk3.FS_Info(img, offset=0) + file_object = fs.open_meta(15) + del fs + del img + gc.collect() + # passwords.txt is 116 bytes at inode 15 in the fixture image. + self.assertEqual(file_object.read_random(0, 16), b'place,user,passw') + + def testDirectoryFromOpenDirSurvivesParentDrop(self): + """A Directory from FS_Info.open_dir must outlive its FS_Info.""" + img = pytsk3.Img_Info(url=self._test_file) + fs = pytsk3.FS_Info(img, offset=0) + directory = fs.open_dir('/') + del fs + del img + gc.collect() + names = [entry.info.name.name + for entry in directory if entry.info and entry.info.name] + self.assertIn(b'passwords.txt', names) + + def testStructGetterSurvivesParentDrop(self): + """A borrowed struct from a property getter must outlive its parent. + + file.info is a borrowed pyTSK_FS_FILE pointer into FS-owned memory. + Releasing the File wrapper must not invalidate the borrowed view. + """ + img = pytsk3.Img_Info(url=self._test_file) + fs = pytsk3.FS_Info(img, offset=0) + file_object = fs.open_meta(15) + info = file_object.info + meta = info.meta + del file_object + del fs + del img + gc.collect() + self.assertEqual(meta.size, 116) + + +class ConcurrentParentDropTest(unittest.TestCase): + """One thread iterates while another drops the parent reference. + + This is the multi-threaded version of ParentKeepaliveTest. It only + meaningfully races on free-threaded builds, but is harmless on the + GIL build (just exercises the keepalive path under thread switches). + """ + + def setUp(self): + self._test_file = _TEST_IMAGE + + def testReadAfterParentDroppedOnOtherThread(self): + img = pytsk3.Img_Info(url=self._test_file) + fs = pytsk3.FS_Info(img, offset=0) + file_object = fs.open_meta(15) + + holder = {'fs': fs, 'img': img} + + def reader(_index): + for _ in range(100): + self.assertEqual( + file_object.read_random(0, 16), b'place,user,passw') + + def dropper(_index): + # Drop visible parents while the reader is mid-flight; the + # file_object's python_object1 keepalive must keep the libtsk + # handle alive until the file_object itself is released. + holder.clear() + gc.collect() + + barrier = threading.Barrier(2) + errors = [] + + def runner(target, index): + try: + barrier.wait() + target(index) + except BaseException as exc: # pylint: disable=broad-except + errors.append(exc) + + threads = [ + threading.Thread(target=runner, args=(reader, 0)), + threading.Thread(target=runner, args=(dropper, 1)), + ] + for thread in threads: + thread.start() + for thread in threads: + thread.join() + if errors: + raise errors[0] + + +class ConcurrentErrorPathTest(unittest.TestCase): + """tsk_error_get must be per-thread under concurrent failures. + + When TSK_MULTITHREAD_LIB is missing libtsk falls back to a single + global TSK_ERROR_INFO; concurrent failing calls then scramble each + other's errno + errstr buffers. We trigger a known-bogus open in + many threads at once and assert the resulting Python exception + message is well-formed every time. + """ + + def testConcurrentInvalidOpens(self): + img = pytsk3.Img_Info(url=_TEST_IMAGE) + + def worker(index): + seen_errors = 0 + for _ in range(50): + try: + # Inode 19 does not exist in the fixture; this consistently + # raises IOError and writes to libtsk's error buffer. + fs = pytsk3.FS_Info(img, offset=0) + fs.open_meta(19) + except IOError as exc: + seen_errors += 1 + # The string must be intact (no NUL truncation, no garbage) + # even when other threads are racing the same error path. + message = str(exc) + self.assertTrue(message) + self.assertNotIn('\x00', message) + self.assertEqual(seen_errors, 50, f'thread {index} lost errors') + + _run_concurrently(worker, 8) + + +class ConcurrentVolumeInfoTest(unittest.TestCase): + """Iterating Volume_Info from per-thread instances.""" + + def setUp(self): + self._test_file = _TEST_VOLUME + + def testIndependentVolumeInfos(self): + + def worker(_index): + img = pytsk3.Img_Info(url=self._test_file) + vs = pytsk3.Volume_Info(img) + addrs = [part.addr for part in vs] + return addrs + + results = _run_concurrently(worker, 8) + self.assertTrue(all(r == results[0] for r in results)) + self.assertGreater(len(results[0]), 0) + + def testVolumePartSurvivesParentDrop(self): + """Volume_Info.iternext yields TSK_VS_PART_INFO borrowed from the VS. + + With the parent keepalive, the part wrapper must remain valid + after Volume_Info and Img_Info go out of scope. + """ + img = pytsk3.Img_Info(url=self._test_file) + vs = pytsk3.Volume_Info(img) + parts = list(vs) + del vs + del img + gc.collect() + # Touching addr / start / len / desc reaches into VS-owned memory. + for part in parts: + _ = part.addr + _ = part.start + _ = part.len + + +class CloseDuringReadTest(unittest.TestCase): + """Img_Info.close() while another thread is mid-read. + + Img_Info_read takes the per-instance state_lock around the + img_is_open check and the actual tsk_img_read call; Img_Info_close + takes the same lock while flipping img_is_open. As a result a + close() call cannot tear state down mid-read, and any read started + after close has flipped the flag observes a clean IOError. + """ + + def setUp(self): + self._test_file = _TEST_IMAGE + + def testCloseConcurrentWithReader(self): + img = pytsk3.Img_Info(url=self._test_file) + stop = threading.Event() + errors = [] + + def reader(): + while not stop.is_set(): + try: + # Either succeeds with the right bytes, or raises a clean + # IOError once close() lands. Anything else (segfault, + # garbage bytes) is a bug. + data = img.read(0x5800, 16) + if data: + self.assertEqual(data, b'place,user,passw') + except IOError: + return # post-close path + except BaseException as exc: # pylint: disable=broad-except + errors.append(exc) + return + + threads = [threading.Thread(target=reader) for _ in range(4)] + for thread in threads: + thread.start() + # Let readers spin up briefly, then close from this thread. + threading.Event().wait(0.05) + img.close() + stop.set() + for thread in threads: + thread.join() + if errors: + raise errors[0] + + +class SharedFileConcurrentReadTest(unittest.TestCase): + """One File handle hammered from many threads at different offsets. + + pytsk3 does not synchronize File.read_random itself; libtsk's FS + cache_lock is what serializes the underlying read. This test + exercises that path -- if libtsk's cache_lock were a no-op (the + pre-fix state) the resulting bytes would scramble across threads. + """ + + def setUp(self): + self._test_file = _TEST_IMAGE + + def testSharedFileReadRandom(self): + img = pytsk3.Img_Info(url=self._test_file) + fs = pytsk3.FS_Info(img, offset=0) + file_object = fs.open_meta(15) + # Snapshot the full file once (uncontended) and use it as the + # ground truth for every concurrent read below. + expected = file_object.read_random(0, 116) + self.assertEqual(len(expected), 116) + + def worker(_index): + for _ in range(200): + for off in (0, 16, 32, 48, 64, 80): + chunk = file_object.read_random(off, 16) + self.assertEqual(chunk, expected[off:off + 16]) + + _run_concurrently(worker, 8) + + +class RecursiveWalkStressTest(unittest.TestCase): + """Recursive directory walks under many threads, each on its own FS. + + This is the soak test: long-running concurrent allocation churn + exercises every yield path through new_class_wrapper + + StructWrapper.assign + dealloc. A regression in the parent + keepalive (extra Py_IncRef without matching DecRef, or vice versa) + would surface as a steady leak; a refcount asymmetry surfaces as + an immediate crash. + """ + + def setUp(self): + self._test_file = _TEST_IMAGE + + def _walk(self, fs, directory, depth=0): + count = 0 + if depth > 8: + return count + for entry in directory: + count += 1 + if not entry.info or not entry.info.name: + continue + name = entry.info.name.name + if name in (b'.', b'..'): + continue + if not entry.info.meta: + continue + meta_type = entry.info.meta.type + if meta_type == pytsk3.TSK_FS_META_TYPE_DIR: + try: + sub = entry.as_directory() + except IOError: + continue + count += self._walk(fs, sub, depth + 1) + return count + + def testRecursiveWalkStress(self): + + def worker(_index): + img = pytsk3.Img_Info(url=self._test_file) + fs = pytsk3.FS_Info(img, offset=0) + total = 0 + for _ in range(20): + directory = fs.open_dir('/') + total += self._walk(fs, directory) + return total + + results = _run_concurrently(worker, 8) + # Every thread must have visited the same number of entries. + self.assertTrue(all(r == results[0] for r in results), results) + self.assertGreater(results[0], 0) + + +class GcUnderLoadTest(unittest.TestCase): + """Force GC in a hot loop while workers iterate. + + Cyclic GC visits all tracked objects and may run __del__ / + tp_dealloc on things that became unreachable since the last + collection. A bug in our parent keepalive (e.g. forgetting to + Py_IncRef in new_class_wrapper) would surface here as a UAF when + GC reaps a parent the worker is still using. + """ + + def setUp(self): + self._test_file = _TEST_IMAGE + + def testGcConcurrentWithIteration(self): + img = pytsk3.Img_Info(url=self._test_file) + fs = pytsk3.FS_Info(img, offset=0) + stop = threading.Event() + errors = [] + + def gc_worker(): + while not stop.is_set(): + gc.collect() + + def iter_worker(_index): + try: + for _ in range(100): + directory = fs.open_dir('/') + names = [] + for entry in directory: + if entry.info and entry.info.name: + names.append(entry.info.name.name) + self.assertIn(b'passwords.txt', names) + except BaseException as exc: # pylint: disable=broad-except + errors.append(exc) + + gc_thread = threading.Thread(target=gc_worker) + gc_thread.start() + try: + _run_concurrently(iter_worker, 4) + finally: + stop.set() + gc_thread.join() + if errors: + raise errors[0] + + +class GilStaysOffTest(unittest.TestCase): + """sys._is_gil_enabled() must remain False across pytsk3 operations. + + testModuleDoesNotForceGil only checks the post-import state. A + bug that re-enables the GIL on a specific code path (e.g. an + unguarded private API call) wouldn't be caught there. Hammer a + representative mix of pytsk3 operations and confirm the GIL + stays off the entire time. + """ + + @unittest.skipUnless( + _is_free_threaded(), + 'requires a free-threaded Python build') + def testGilStaysOffAcrossOperations(self): + self.assertFalse(sys._is_gil_enabled()) + img = pytsk3.Img_Info(url=_TEST_IMAGE) + self.assertFalse(sys._is_gil_enabled()) + fs = pytsk3.FS_Info(img, offset=0) + self.assertFalse(sys._is_gil_enabled()) + directory = fs.open_dir('/') + self.assertFalse(sys._is_gil_enabled()) + for entry in directory: + if entry.info and entry.info.meta: + _ = entry.info.meta.size + self.assertFalse(sys._is_gil_enabled()) + file_object = fs.open_meta(15) + self.assertEqual(file_object.read_random(0, 16), b'place,user,passw') + self.assertFalse(sys._is_gil_enabled()) + img.close() + self.assertFalse(sys._is_gil_enabled()) + + +class IteratorCursorThreadSafetyTest(unittest.TestCase): + """Sharing one Directory iterator across threads should be safe. + + Before the per-instance iter_lock was introduced, two threads + iterating the same Directory would race on self->current and + produce skipped/duplicated entries. With iter_lock each entry + index is consumed exactly once across all threads, so the union + of yields is the full directory listing. + """ + + def setUp(self): + self._test_file = _TEST_IMAGE + + def testSharedDirectoryIterator(self): + img = pytsk3.Img_Info(url=self._test_file) + fs = pytsk3.FS_Info(img, offset=0) + directory = fs.open_dir('/') + + # Build a baseline: how many entries are in /. Use a fresh + # Directory so the shared one below starts at cursor 0 (the C + # constructor sets current = 0; we deliberately do not call iter() + # here because that would also reset). + baseline = sum(1 for _ in fs.open_dir('/')) + + yielded = [] + yielded_lock = threading.Lock() + + def worker(_index): + # Note: do NOT call iter(directory) here. Directory is its own + # iterator and __iter__ resets the cursor under iter_lock -- + # which is correct behavior for "for x in d:" in a single + # thread, but would defeat the test that all workers consume + # from the same cursor sequence. + while True: + try: + entry = directory.__next__() + except StopIteration: + return + with yielded_lock: + yielded.append(entry) + + _run_concurrently(worker, 4) + # Cursor was advanced exactly baseline times across all workers. + # An off-by-one (missed lock release, double-advance) would change + # this count; a true race (skipping the cap-at-INT_MAX check) + # could overshoot indefinitely. + self.assertEqual(len(yielded), baseline) + + +def _have_subinterpreters(): + """True when the public test_support.interpreters API is available.""" + try: + import test.support.interpreters # noqa: F401 + return True + except ImportError: + pass + try: + import _interpreters # noqa: F401 + return True + except ImportError: + return False + + +_SUBINTERP_SCRIPT = ( + 'import pytsk3\n' + 'img = pytsk3.Img_Info(url=' + repr(_TEST_IMAGE) + ')\n' + 'assert img.get_size() == 102400\n' + 'fs = pytsk3.FS_Info(img, offset=0)\n' + 'f = fs.open_meta(15)\n' + "assert f.read_random(0, 16) == b'place,user,passw'\n") + + +def _is_subinterpreter_load_unsupported(exc): + """Detect 'module does not support loading in subinterpreters'. + + Python 3.12+ refuses to load single-phase-init C extension modules + into a subinterpreter unless they declare the + Py_mod_multiple_interpreters slot. pytsk3 still uses single-phase + init, so this ImportError is expected on 3.12 / 3.13. The message + bubbles up wrapped (e.g. as _xxsubinterpreters.RunFailedError) so + we have to match on the inner ImportError text. + """ + text = str(exc) + return ('does not support loading in subinterpreters' in text + or 'is not allowed in subinterpreters' in text) + + +class SubinterpreterImportTest(unittest.TestCase): + """pytsk3 must initialize cleanly inside a subinterpreter. + + tsk_init() is wrapped in std::call_once so the C class templates + are only initialized once across all interpreters. Importing + pytsk3 in a fresh subinterpreter exercises that path and surfaces + any cross-subinterpreter state leak. + + The subinterpreter API has shifted across Python versions: + * 3.11: only the private `_xxsubinterpreters` module, with a + `.run_string(id, script)` signature. + * 3.12-3.13: `test.support.interpreters` available but the + Interpreter object exposes `.run(script)`, not `.exec(...)`. + These versions also enforce PEP 489: single-phase-init C + modules (which pytsk3 still is) cannot load into a + subinterpreter, so this test is a no-op skip there. + * 3.14+: `interp.exec(script)` is the documented method, and the + default policy here permits the load. + This test probes each API in turn and skips when none works. + """ + + @unittest.skipUnless(_have_subinterpreters(), + 'subinterpreter API not available') + def testImportInSubinterpreter(self): + # Try the public API first. + try: + from test.support import interpreters # type: ignore + except ImportError: + interpreters = None # pylint: disable=invalid-name + + if interpreters is not None: + interp = interpreters.create() + try: + runner = getattr(interp, 'exec', None) or getattr(interp, 'run', None) + if runner is None: + self.skipTest( + 'test.support.interpreters has no exec/run on this build') + try: + runner(_SUBINTERP_SCRIPT) + except Exception as exc: # pylint: disable=broad-except + if _is_subinterpreter_load_unsupported(exc): + self.skipTest( + 'pytsk3 uses single-phase init; this Python version ' + 'forbids loading such modules in a subinterpreter') + raise + return + finally: + close = getattr(interp, 'close', None) + if close is not None: + close() + + # Fall back to the private API. The module name and run_string + # signature both vary across versions; tolerate either. + try: + import _interpreters # type: ignore # noqa: F401 + private = _interpreters + except ImportError: + try: + import _xxsubinterpreters # type: ignore # noqa: F401 + private = _xxsubinterpreters + except ImportError: + self.skipTest('no usable subinterpreter API') + + interp_id = private.create() + try: + run_string = getattr(private, 'run_string', None) + if run_string is None: + self.skipTest('private subinterpreter API has no run_string') + try: + run_string(interp_id, _SUBINTERP_SCRIPT) + except Exception as exc: # pylint: disable=broad-except + if _is_subinterpreter_load_unsupported(exc): + self.skipTest( + 'pytsk3 uses single-phase init; this Python version ' + 'forbids loading such modules in a subinterpreter') + raise + finally: + private.destroy(interp_id) + + +if __name__ == '__main__': + unittest.main() diff --git a/tox.ini b/tox.ini index 519c844..225c8f1 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py3{10,11,12,13,14} +envlist = py3{10,11,12,13,14,14t} [testenv] pip_pre = True diff --git a/tsk3.cpp b/tsk3.cpp index 5967bac..42f4772 100644 --- a/tsk3.cpp +++ b/tsk3.cpp @@ -26,6 +26,8 @@ extern "C" { extern void tsk_init_lock(tsk_lock_t * lock); extern void tsk_deinit_lock(tsk_lock_t * lock); +extern void tsk_take_lock(tsk_lock_t * lock); +extern void tsk_release_lock(tsk_lock_t * lock); } #endif /* defined( TSK_MULTITHREAD_LIB ) */ @@ -37,6 +39,16 @@ extern void tsk_deinit_lock(tsk_lock_t * lock); ssize_t IMG_INFO_read(TSK_IMG_INFO *self, TSK_OFF_T off, char *buf, size_t len); void IMG_INFO_close(TSK_IMG_INFO *self); +/* tsk_error_get() returns NULL when libtsk did not record a t_errno + * for the failure (some EOF / bounds paths return -1 without setting + * one). safe_tsk_error_get() guarantees a usable string. All call sites + * in pytsk pass through this wrapper. + */ +static const char *safe_tsk_error_get(void) { + const char *e = tsk_error_get(); + return (e != NULL) ? e : "unknown libtsk error"; +} + /* This macro is used to receive the object reference from a member of the type. */ #define GET_Object_from_member(type, object, member) \ @@ -48,15 +60,22 @@ static int Img_Info_dest(Img_Info self) { if(self == NULL) { return -1; } - tsk_img_close((TSK_IMG_INFO *) self->img); + if(self->img != NULL) { + tsk_img_close((TSK_IMG_INFO *) self->img); - if(self->img_is_internal != 0) { + if(self->img_is_internal != 0) { #if defined( TSK_MULTITHREAD_LIB ) - tsk_deinit_lock(&(self->img->base.cache_lock)); + tsk_deinit_lock(&(self->img->base.cache_lock)); #endif - // If img is internal talloc will free it. + // If img is internal talloc will free it. + } + self->img = NULL; + } + + if(self->state_lock_initialized != 0) { + tsk_deinit_lock(&self->state_lock); + self->state_lock_initialized = 0; } - self->img = NULL; return 0; } @@ -69,6 +88,14 @@ static Img_Info Img_Info_Con(Img_Info self, char *urn, TSK_IMG_TYPE_ENUM type) { RaiseError(EInvalidParameter, "Invalid parameter: self."); return NULL; } + /* Initialize the state_lock before any path that touches img_is_open. + * Destruction (Img_Info_dest) checks state_lock_initialized so this is + * safe even if a later step in this constructor fails. + */ + tsk_init_lock(&self->state_lock); + self->state_lock_initialized = 1; + talloc_set_destructor((void *) self, (int(*)(void *)) &Img_Info_dest); + if(urn != NULL && urn[0] != 0) { #ifdef TSK_VERSION_NUM self->img = (Extended_TSK_IMG_INFO *) tsk_img_open_utf8(1, (const char **) &urn, type, 0); @@ -111,14 +138,12 @@ static Img_Info Img_Info_Con(Img_Info self, char *urn, TSK_IMG_TYPE_ENUM type) { #endif } if(self->img == NULL) { - RaiseError(EIOError, "Unable to open image: %s", tsk_error_get()); + RaiseError(EIOError, "Unable to open image: %s", safe_tsk_error_get()); tsk_error_reset(); return NULL; } self->img_is_open = 1; - talloc_set_destructor((void *) self, (int(*)(void *)) &Img_Info_dest); - return self; } @@ -129,10 +154,6 @@ uint64_t Img_Info_read(Img_Info self, TSK_OFF_T off, OUT char *buf, size_t len) RaiseError(EInvalidParameter, "Invalid parameter: self."); return 0; } - if(self->img_is_open == 0) { - RaiseError(EIOError, "Invalid Img_Info not opened."); - return 0; - } if(off < 0) { RaiseError(EIOError, "Invalid offset value out of bounds."); return 0; @@ -141,10 +162,38 @@ uint64_t Img_Info_read(Img_Info self, TSK_OFF_T off, OUT char *buf, size_t len) RaiseError(EInvalidParameter, "Invalid parameter: buf."); return 0; } - read_count = CALL((TSK_IMG_INFO *) self->img, read, off, buf, len); + /* Take state_lock around the open-check and the read so a parallel + * Img_Info_close cannot flip img_is_open mid-read and tear down + * libtsk image state under us. tsk_img_read internally takes the + * libtsk cache_lock, so we acquire state_lock first and then + * cache_lock inside libtsk -- a fixed order that avoids deadlock + * since close() never touches cache_lock. + */ + if(self->state_lock_initialized != 0) { + tsk_take_lock(&self->state_lock); + } + if(self->img_is_open == 0) { + if(self->state_lock_initialized != 0) { + tsk_release_lock(&self->state_lock); + } + RaiseError(EIOError, "Invalid Img_Info not opened."); + return 0; + } + /* Go through tsk_img_read rather than the raw vtbl read directly: + * libtsk's raw_read uses lseek+read against a shared fd, whose + * file-position state races across threads. tsk_img_read holds + * cache_lock for the duration of the read, which serializes the + * positional access and is required by raw_read's documented + * "assumes we are under a lock" contract. + */ + read_count = tsk_img_read((TSK_IMG_INFO *) self->img, off, buf, len); + + if(self->state_lock_initialized != 0) { + tsk_release_lock(&self->state_lock); + } if(read_count < 0) { - RaiseError(EIOError, "Unable to read image: %s", tsk_error_get()); + RaiseError(EIOError, "Unable to read image: %s", safe_tsk_error_get()); tsk_error_reset(); return 0; } @@ -152,8 +201,21 @@ uint64_t Img_Info_read(Img_Info self, TSK_OFF_T off, OUT char *buf, size_t len) } void Img_Info_close(Img_Info self) { - if(self != NULL) { - self->img_is_open = 0; + if(self == NULL) { + return; + } + /* Synchronize with concurrent Img_Info_read: that path holds + * state_lock around the img_is_open check and the read itself, + * so once we acquire the lock here all in-flight reads have + * completed and any subsequent reader observes img_is_open == 0 + * and returns a clean error. + */ + if(self->state_lock_initialized != 0) { + tsk_take_lock(&self->state_lock); + } + self->img_is_open = 0; + if(self->state_lock_initialized != 0) { + tsk_release_lock(&self->state_lock); } } @@ -196,7 +258,14 @@ ssize_t IMG_INFO_read(TSK_IMG_INFO *img, TSK_OFF_T off, char *buf, size_t len) { if(len == 0) { return 0; } - return (ssize_t) CALL(self->container, read, (uint64_t) off, buf, len); + { + uint64_t n = CALL(self->container, read, (uint64_t) off, buf, len); + char *unused_buf; + if(*aff4_get_current_error(&unused_buf) != EZero) { + return -1; + } + return (ssize_t) n; + } } /* FS_Info destructor @@ -239,7 +308,7 @@ static FS_Info FS_Info_Con(FS_Info self, Img_Info img, TSK_OFF_T offset, if(!self->info) { RaiseError(EIOError, "Unable to open the image as a filesystem at offset: 0x%08" PRIxOFF " with error: %s", - offset, tsk_error_get()); + offset, safe_tsk_error_get()); tsk_error_reset(); return NULL; } @@ -289,21 +358,23 @@ static File FS_Info_open(FS_Info self, ZString path) { info = tsk_fs_file_open(self->info, NULL, path); if(info == NULL) { - RaiseError(EIOError, "Unable to open file: %s", tsk_error_get()); + RaiseError(EIOError, "Unable to open file: %s", safe_tsk_error_get()); tsk_error_reset(); goto on_error; } // CONSTRUCT_CREATE calls _talloc_memdup to allocate memory for the object. object = CONSTRUCT_CREATE(File, File, NULL); - if(object != NULL) { - // CONSTRUCT_INITIALIZE calls the constructor function on the object. - if(CONSTRUCT_INITIALIZE(File, File, Con, object, self, info) == NULL) { - goto on_error; - } - // Tell the File object to manage info. - object->info_is_internal = 1; + if(object == NULL) { + RaiseError(ENoMemory, "Unable to allocate File."); + goto on_error; } + // CONSTRUCT_INITIALIZE calls the constructor function on the object. + if(CONSTRUCT_INITIALIZE(File, File, Con, object, self, info) == NULL) { + goto on_error; + } + // Tell the File object to manage info. + object->info_is_internal = 1; return object; on_error: @@ -331,21 +402,23 @@ static File FS_Info_open_meta(FS_Info self, TSK_INUM_T inode) { info = tsk_fs_file_open_meta(self->info, NULL, inode); if(info == NULL) { - RaiseError(EIOError, "Unable to open file: %s", tsk_error_get()); + RaiseError(EIOError, "Unable to open file: %s", safe_tsk_error_get()); tsk_error_reset(); goto on_error; } // CONSTRUCT_CREATE calls _talloc_memdup to allocate memory for the object. object = CONSTRUCT_CREATE(File, File, NULL); - if(object != NULL) { - // CONSTRUCT_INITIALIZE calls the constructor function on the object. - if(CONSTRUCT_INITIALIZE(File, File, Con, object, self, info) == NULL) { - goto on_error; - } - // Tell the File object to manage info. - object->info_is_internal = 1; + if(object == NULL) { + RaiseError(ENoMemory, "Unable to allocate File."); + goto on_error; + } + // CONSTRUCT_INITIALIZE calls the constructor function on the object. + if(CONSTRUCT_INITIALIZE(File, File, Con, object, self, info) == NULL) { + goto on_error; } + // Tell the File object to manage info. + object->info_is_internal = 1; return object; on_error: @@ -360,7 +433,16 @@ static File FS_Info_open_meta(FS_Info self, TSK_INUM_T inode) { static void FS_Info_exit(FS_Info self PYTSK3_ATTRIBUTE_UNUSED) { PYTSK3_UNREFERENCED_PARAMETER(self) - exit(0); + /* Previously called exit(0), which lets any Python caller of + * FS_Info.exit() kill the host interpreter -- a trivial denial of + * service for any process that loads pytsk3 alongside untrusted + * user code (notebooks, plugins, batch runners). Raise instead so + * the caller sees a clean RuntimeError. + */ + RaiseError(ERuntimeError, + "FS_Info.exit is intentionally disabled; previously this " + "method called exit() from C and would terminate the host " + "interpreter."); }; VIRTUAL(FS_Info, Object) { @@ -380,6 +462,11 @@ static int Directory_dest(Directory self) { tsk_fs_dir_close(self->info); self->info = NULL; + if(self->iter_lock_initialized != 0) { + tsk_deinit_lock(&self->iter_lock); + self->iter_lock_initialized = 0; + } + return 0; } @@ -405,7 +492,7 @@ static Directory Directory_Con(Directory self, FS_Info fs, ZString path, TSK_INU self->info = tsk_fs_dir_open(fs->info, path); } if(self->info == NULL) { - RaiseError(EIOError, "Unable to open directory: %s", tsk_error_get()); + RaiseError(EIOError, "Unable to open directory: %s", safe_tsk_error_get()); tsk_error_reset(); return NULL; } @@ -413,6 +500,13 @@ static Directory Directory_Con(Directory self, FS_Info fs, ZString path, TSK_INU self->size = tsk_fs_dir_getsize(self->info); self->fs = fs; + /* Initialize iter_lock so concurrent iteration of this Directory + * from multiple threads is safe. The cursor is mutated under the + * lock in Directory_next and Directory_iter. + */ + tsk_init_lock(&self->iter_lock); + self->iter_lock_initialized = 1; + // TODO: is this still applicable? // Add a reference to them to ensure they dont get freed until we do. // talloc_reference(self, fs); @@ -425,44 +519,69 @@ static Directory Directory_Con(Directory self, FS_Info fs, ZString path, TSK_INU static File Directory_next(Directory self) { TSK_FS_FILE *info = NULL; File object = NULL; + int snapshot_current = 0; if(self == NULL) { RaiseError(EInvalidParameter, "Invalid parameter: self."); return NULL; } + /* Take the cursor snapshot and advance under iter_lock so concurrent + * threads each consume a distinct entry. tsk_fs_dir_get itself is + * thread-safe under libtsk's cache_lock (since TSK_MULTITHREAD_LIB + * is enabled), so we can release iter_lock before calling it. + */ + if(self->iter_lock_initialized != 0) { + tsk_take_lock(&self->iter_lock); + } if((self->current < 0) || ((uint64_t) self->current > (uint64_t) self->size)) { + if(self->iter_lock_initialized != 0) { + tsk_release_lock(&self->iter_lock); + } RaiseError(EInvalidParameter, "Invalid parameter: current."); return NULL; } if((uint64_t) self->current == (uint64_t) self->size) { + if(self->iter_lock_initialized != 0) { + tsk_release_lock(&self->iter_lock); + } return NULL; } /* Cap at INT_MAX so the post-increment below can never overflow - * a signed int. While self->size is bounded by the filesystem, + * a signed int. While self->size is bounded by the filesystem, * directories on exotic inputs could in theory drive this past INT_MAX. */ if(self->current == INT_MAX) { + if(self->iter_lock_initialized != 0) { + tsk_release_lock(&self->iter_lock); + } return NULL; } - info = tsk_fs_dir_get(self->info, self->current); + snapshot_current = self->current; + self->current++; + if(self->iter_lock_initialized != 0) { + tsk_release_lock(&self->iter_lock); + } + + info = tsk_fs_dir_get(self->info, snapshot_current); if(info == NULL) { - RaiseError(EIOError, "Error opening File: %s", tsk_error_get()); + RaiseError(EIOError, "Error opening File: %s", safe_tsk_error_get()); tsk_error_reset(); goto on_error; } // CONSTRUCT_CREATE calls _talloc_memdup to allocate memory for the object. object = CONSTRUCT_CREATE(File, File, NULL); - if(object != NULL) { - // CONSTRUCT_INITIALIZE calls the constructor function on the object. - if(CONSTRUCT_INITIALIZE(File, File, Con, object, self->fs, info) == NULL) { - goto on_error; - } - // Tell the File object to manage info. - object->info_is_internal = 1; + if(object == NULL) { + RaiseError(ENoMemory, "Unable to allocate File."); + goto on_error; } - self->current++; + // CONSTRUCT_INITIALIZE calls the constructor function on the object. + if(CONSTRUCT_INITIALIZE(File, File, Con, object, self->fs, info) == NULL) { + goto on_error; + } + // Tell the File object to manage info. + object->info_is_internal = 1; return object; @@ -477,7 +596,16 @@ static File Directory_next(Directory self) { }; static void Directory_iter(Directory self) { + if(self == NULL) { + return; + } + if(self->iter_lock_initialized != 0) { + tsk_take_lock(&self->iter_lock); + } self->current = 0; + if(self->iter_lock_initialized != 0) { + tsk_release_lock(&self->iter_lock); + } }; VIRTUAL(Directory, Object) { @@ -499,6 +627,11 @@ static int File_dest(File self) { } self->info = NULL; + if(self->iter_lock_initialized != 0) { + tsk_deinit_lock(&self->iter_lock); + self->iter_lock_initialized = 0; + } + return 0; } @@ -523,6 +656,10 @@ static File File_Con(File self, FS_Info fs, TSK_FS_FILE *info) { // Get the total number of attributes. self->max_attr = tsk_fs_file_attr_getsize(info); + /* Initialize iter_lock so concurrent attribute iteration is safe. */ + tsk_init_lock(&self->iter_lock); + self->iter_lock_initialized = 1; + talloc_set_destructor((void *) self, (int(*)(void *)) &File_dest); return self; @@ -569,7 +706,7 @@ static uint64_t File_read_random(File self, TSK_OFF_T offset, }; if(result < 0) { - RaiseError(EIOError, "Read error: %s", tsk_error_get()); + RaiseError(EIOError, "Read error: %s", safe_tsk_error_get()); tsk_error_reset(); return 0; }; @@ -617,23 +754,43 @@ static Directory File_as_directory(File self) { static Attribute File_iternext(File self) { TSK_FS_ATTR *attribute = NULL; Attribute object = NULL; + int snapshot_attr = 0; if(self == NULL) { RaiseError(EInvalidParameter, "Invalid parameter: self."); return NULL; } + /* Snapshot current_attr and advance under iter_lock so concurrent + * iteration from multiple threads doesn't double-consume an + * attribute index or skip ahead. + */ + if(self->iter_lock_initialized != 0) { + tsk_take_lock(&self->iter_lock); + } if(self->current_attr < 0 || self->current_attr > self->max_attr) { + if(self->iter_lock_initialized != 0) { + tsk_release_lock(&self->iter_lock); + } RaiseError(EInvalidParameter, "Invalid parameter: self->current_attr."); return NULL; } if(self->current_attr == self->max_attr) { + if(self->iter_lock_initialized != 0) { + tsk_release_lock(&self->iter_lock); + } return NULL; } + snapshot_attr = self->current_attr; + self->current_attr++; + if(self->iter_lock_initialized != 0) { + tsk_release_lock(&self->iter_lock); + } + // It looks like attribute is managed by the SleuthKit. - attribute = (TSK_FS_ATTR *) tsk_fs_file_attr_get_idx(self->info, self->current_attr); + attribute = (TSK_FS_ATTR *) tsk_fs_file_attr_get_idx(self->info, snapshot_attr); if(!attribute) { - RaiseError(EIOError, "Error opening File: %s", tsk_error_get()); + RaiseError(EIOError, "Error opening File: %s", safe_tsk_error_get()); tsk_error_reset(); return NULL; } @@ -646,7 +803,6 @@ static Attribute File_iternext(File self) { goto on_error; } } - self->current_attr++; return object; @@ -658,7 +814,16 @@ static Attribute File_iternext(File self) { }; static void File_iter__(File self) { + if(self == NULL) { + return; + } + if(self->iter_lock_initialized != 0) { + tsk_take_lock(&self->iter_lock); + } self->current_attr = 0; + if(self->iter_lock_initialized != 0) { + tsk_release_lock(&self->iter_lock); + } }; VIRTUAL(File, Object) { @@ -669,6 +834,19 @@ VIRTUAL(File, Object) { VMETHOD(__iter__) = File_iter__; } END_VIRTUAL +/* Attribute destructor + */ +static int Attribute_dest(Attribute self) { + if(self == NULL) { + return -1; + } + if(self->iter_lock_initialized != 0) { + tsk_deinit_lock(&self->iter_lock); + self->iter_lock_initialized = 0; + } + return 0; +} + /* Attribute constructor */ static Attribute Attribute_Con(Attribute self, TSK_FS_ATTR *info) { @@ -682,17 +860,44 @@ static Attribute Attribute_Con(Attribute self, TSK_FS_ATTR *info) { } self->info = info; + /* Initialize iter_lock so concurrent run iteration is safe. */ + tsk_init_lock(&self->iter_lock); + self->iter_lock_initialized = 1; + + talloc_set_destructor((void *) self, (int(*)(void *)) &Attribute_dest); + return self; } static void Attribute_iter(Attribute self) { + if(self == NULL) { + return; + } + if(self->iter_lock_initialized != 0) { + tsk_take_lock(&self->iter_lock); + } self->current = self->info->nrd.run; + if(self->iter_lock_initialized != 0) { + tsk_release_lock(&self->iter_lock); + } }; static TSK_FS_ATTR_RUN *Attribute_iternext(Attribute self) { TSK_FS_ATTR_RUN *result = NULL; + if(self == NULL) { + return NULL; + } + /* Take iter_lock so the read-modify-write of self->current can't + * race with another thread iterating the same Attribute object. + */ + if(self->iter_lock_initialized != 0) { + tsk_take_lock(&self->iter_lock); + } if(self->current == NULL) { + if(self->iter_lock_initialized != 0) { + tsk_release_lock(&self->iter_lock); + } return NULL; } result = self->current; @@ -702,7 +907,22 @@ static TSK_FS_ATTR_RUN *Attribute_iternext(Attribute self) { if(self->current == self->info->nrd.run) { self->current = NULL; } - return (TSK_FS_ATTR_RUN *) talloc_memdup(NULL, result, sizeof(*result)); + if(self->iter_lock_initialized != 0) { + tsk_release_lock(&self->iter_lock); + } + { + TSK_FS_ATTR_RUN *dup = (TSK_FS_ATTR_RUN *) talloc_memdup( + self, result, sizeof(*result)); + if(dup == NULL) { + /* Returning NULL from iternext without RaiseError would + * silently terminate iteration as if the run list ended. + * Set a clear error so the caller raises MemoryError. + */ + RaiseError(ENoMemory, + "Unable to duplicate attribute run."); + } + return dup; + } } VIRTUAL(Attribute, Object) { @@ -722,6 +942,11 @@ static int Volume_Info_dest(Volume_Info self) { tsk_vs_close(self->info); self->info = NULL; + if(self->iter_lock_initialized != 0) { + tsk_deinit_lock(&self->iter_lock); + self->iter_lock_initialized = 0; + } + return 0; } @@ -749,10 +974,15 @@ static Volume_Info Volume_Info_Con(Volume_Info self, Img_Info img, self->info = tsk_vs_open((TSK_IMG_INFO *) img->img, offset, type); if(self->info == NULL) { - RaiseError(EIOError, "Error opening Volume_Info: %s", tsk_error_get()); + RaiseError(EIOError, "Error opening Volume_Info: %s", safe_tsk_error_get()); tsk_error_reset(); return NULL; } + + /* Initialize iter_lock so concurrent partition iteration is safe. */ + tsk_init_lock(&self->iter_lock); + self->iter_lock_initialized = 1; + talloc_set_destructor((void *) self, (int(*)(void *)) &Volume_Info_dest); return self; @@ -762,19 +992,40 @@ static void Volume_Info_iter(Volume_Info self) { if(self == NULL) { return; } + if(self->iter_lock_initialized != 0) { + tsk_take_lock(&self->iter_lock); + } self->current = 0; + if(self->iter_lock_initialized != 0) { + tsk_release_lock(&self->iter_lock); + } }; static TSK_VS_PART_INFO *Volume_Info_iternext(Volume_Info self) { + int snapshot_current = 0; if(self == NULL || self->info == NULL) { return NULL; } + /* Snapshot and advance the cursor under iter_lock so concurrent + * iteration from multiple threads consumes distinct partitions. + */ + if(self->iter_lock_initialized != 0) { + tsk_take_lock(&self->iter_lock); + } /* Stop iteration at INT_MAX to avoid signed overflow or wrapping to a negative integer */ if(self->current < 0 || self->current == INT_MAX) { + if(self->iter_lock_initialized != 0) { + tsk_release_lock(&self->iter_lock); + } return NULL; } - return (TSK_VS_PART_INFO *)tsk_vs_part_get(self->info, self->current++); + snapshot_current = self->current; + self->current++; + if(self->iter_lock_initialized != 0) { + tsk_release_lock(&self->iter_lock); + } + return (TSK_VS_PART_INFO *)tsk_vs_part_get(self->info, snapshot_current); }; VIRTUAL(Volume_Info, Object) { diff --git a/tsk3.h b/tsk3.h index 0b22500..c20c433 100644 --- a/tsk3.h +++ b/tsk3.h @@ -14,6 +14,40 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* Note on thread-safety: + * + * pytsk3 declares Py_MOD_GIL_NOT_USED on free-threaded Python builds + * and is built against libtsk with TSK_MULTITHREAD_LIB enabled. The + * intended contract for callers is: + * + * - Any pytsk3 object may be shared across threads. Methods on + * Img_Info, FS_Info, File, Directory, Volume_Info, and Attribute + * can be called concurrently from multiple threads on the same + * instance without external locking. + * + * - Per-instance state (iterator cursors, open/close transitions) + * is protected by a per-instance tsk_lock_t inside the C class + * struct. Iterators advance atomically, so two threads sharing a + * Directory or Volume_Info will each consume distinct entries + * without skipping or duplicating. + * + * - libtsk's image cache (cache_lock) and per-thread error TLS + * (tsk_error_get) provide the underlying primitives. Both depend + * on TSK_MULTITHREAD_LIB; see setup.py. + * + * - Python subclasses that override read() (e.g. an Img_Info that + * wraps a file-like object) must provide their own locking around + * any stateful resource they touch -- pytsk3 will dispatch into + * the override from any thread. tests/test_lib.py shows the + * pattern. + * + * - User code that mixes raw libtsk handles obtained from a pytsk3 + * wrapper (e.g. via .info on a borrowed struct) with concurrent + * parent destruction is unsafe; the parent-keepalive on yielded + * children prevents this for normal Python reference flow. + */ + #if !defined( TSK3_H_ ) #define TSK3_H_ @@ -63,6 +97,11 @@ BIND_STRUCT(TSK_VS_INFO); Then open an inode or path f = fs.open_dir(inode = 2) + + Thread-safety: methods on Img_Info are safe to call concurrently + from multiple threads. Img_Info_close serializes against in-flight + Img_Info_read via state_lock so a reader never observes a torn + img_is_open transition. */ CCLASS(Img_Info, Object) PRIVATE Extended_TSK_IMG_INFO *img; @@ -71,10 +110,19 @@ CCLASS(Img_Info, Object) */ PRIVATE int img_is_internal; - /* Value to indicate if img is open + /* Value to indicate if img is open. Read/written under state_lock + * so concurrent close() and read() see a consistent value. */ PRIVATE int img_is_open; + /* Per-instance mutex protecting open/close transitions. Init in + * Con, deinit in dest. Held while Img_Info_read consults + * img_is_open and dispatches to libtsk so a parallel close() + * cannot tear down state mid-read. Not exposed to Python. + */ + PRIVATE tsk_lock_t state_lock; + PRIVATE int state_lock_initialized; + /* Open an image using the Sleuthkit. * * DEFAULT(type) = TSK_IMG_TYPE_DETECT; @@ -93,11 +141,20 @@ CCLASS(Img_Info, Object) END_CCLASS /** This object handles volumes. + + Thread-safety: the iterator cursor (current) is mutated under + iter_lock during __iter__ and iternext, so two threads can share + a Volume_Info and iterate concurrently without skipping or + duplicating partitions or scribbling on the cursor. */ CCLASS(Volume_Info, Object) FOREIGN TSK_VS_INFO *info; int current; + /* Per-instance mutex serializing iterator cursor updates. */ + PRIVATE tsk_lock_t iter_lock; + PRIVATE int iter_lock_initialized; + /** Open a volume using the Sleuthkit. DEFAULT(offset) = 0; @@ -124,6 +181,12 @@ CCLASS(Attribute, Object) FOREIGN TSK_FS_ATTR *info; FOREIGN TSK_FS_ATTR_RUN *current; + /* Per-instance mutex serializing iterator cursor updates. See + * Volume_Info above for rationale. + */ + PRIVATE tsk_lock_t iter_lock; + PRIVATE int iter_lock_initialized; + Attribute METHOD(Attribute, Con, TSK_FS_ATTR *info); void METHOD(Attribute, __iter__); @@ -156,6 +219,12 @@ CCLASS(File, Object) int max_attr; int current_attr; + /* Per-instance mutex serializing iterator cursor updates so two + * threads can share a File and iterate its attributes safely. + */ + PRIVATE tsk_lock_t iter_lock; + PRIVATE int iter_lock_initialized; + File METHOD(File, Con, struct FS_Info_t *fs, TSK_FS_FILE *info); /** Read a buffer from a random location in the file. @@ -193,9 +262,15 @@ CCLASS(Directory, Object) /* Total number of files in this directory */ size_t size; - /* Current file returned in the next iteration */ + /* Current file returned in the next iteration. Mutated under + * iter_lock so two threads can share a Directory and iterate + * concurrently without missed or duplicated entries. + */ int current; + PRIVATE tsk_lock_t iter_lock; + PRIVATE int iter_lock_initialized; + /* We can open the directory using a path, its inode number. DEFAULT(path) = NULL;