Skip to content

Instances of subclasses of PyLong_Type created in C extensions use Java object handles, possibly causing segmentation faults #595

@actapia

Description

@actapia

With GraalPy 3.12.8 on Linux, built from source at f466f8d , subclasses of PyLong_Type created using the C API return Java handles from __new__ instead of real PyObject pointers. When an extension attempts to access an instance of the PyLong_Type subclass as a PyObject, it tries to dereference the Java handle, possibly causing a segmentation fault.

A real example of code that can expose this bug can be seen in boost::python's enum.cpp. In the enum_base::add_value method, boost::python needs to access the name member of the enum_object struct it uses for the instances of the enum_type_object, but this fails on GraalPy. Similarly, the C example extension below attempts to access the member member of the custom_object struct in its set_member function.

/* custom_pyobject.c */
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include <stddef.h>

typedef struct custom_object {
  PyLongObject base_object;
  PyObject* member;
} custom_object;

static PyMemberDef custom_members[] = {
  {"member", Py_T_OBJECT_EX, offsetof(custom_object, member), Py_READONLY, 0},
  {0, 0, 0, 0, 0}
};

static void custom_dealloc(custom_object* self) {
  Py_XDECREF(self->member);
  Py_TYPE(self)->tp_free((PyObject*)self);
}

static PyObject* custom_repr(PyObject* self_) {
  custom_object* self = (custom_object*)self_;
  return PyUnicode_FromFormat("custom_object(%S)", PyObject_Repr(self->member));
}

static PyTypeObject custom_type_object = {
  PyObject_HEAD_INIT(NULL)
  "custom_object",                          /* tp_name */
  sizeof(custom_object),                    /* tp_basicsize */
  0,                                        /* tp_itemsize */
  (destructor) custom_dealloc,              /* tp_dealloc */
  0,                                        /* tp_print */
  0,                                        /* tp_getattr */
  0,                                        /* tp_setattr */
  0,                                        /* tp_compare */
  custom_repr,                              /* tp_repr */
  0,                                        /* tp_as_number */
  0,                                        /* tp_as_sequence */
  0,                                        /* tp_as_mapping */
  0,                                        /* tp_hash */
  0,                                        /* tp_call */
  0,                                        /* tp_str */
  0,                                        /* tp_getattro */
  0,                                        /* tp_setattro */
  0,                                        /* tp_as_buffer */
  Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
  0,                                        /* tp_doc */
  0,                                        /* tp_traverse */
  0,                                        /* tp_clear */
  0,                                        /* tp_richcompare */
  0,                                        /* tp_weaklistoffset */
  0,                                        /* tp_iter */
  0,                                        /* tp_iternext */
  0,                                        /* tp_methods */
  custom_members,                           /* tp_members */
  0,                                        /* tp_getset */
  &PyLong_Type,                             /* tp_base */
  0,                                        /* tp_dict */
  0,                                        /* tp_descr_get */
  0,                                        /* tp_descr_set */
  0,                                        /* tp_dictoffset */
  0,                                        /* tp_init */
  0,                                        /* tp_alloc */
  0,                                        /* tp_new */
  0,                                        /* tp_free */
  0,                                        /* tp_is_gc */
  0,                                        /* tp_bases */
  0,                                        /* tp_mro */
  0,                                        /* tp_cache */
  0,                                        /* tp_subclasses */
  0,                                        /* tp_weaklist */
  0                                         /* tp_del */
};

int custom_pyobject_mod_exec(PyObject* module) {
  PyModule_AddType(module, &custom_type_object);
  return 0;
}

static PyModuleDef_Slot custom_pyobject_slots[] = {
  {Py_mod_exec, custom_pyobject_mod_exec},
  {0, NULL}
};

PyObject* set_member(PyObject* self, PyObject* args) {
  PyObject* inner1;
  PyObject* inner2;
  if (!PyArg_ParseTuple(args, "OO", &inner1, &inner2)) {
    return NULL;
  }
  custom_object* obj = (custom_object*)inner1;
  obj->member = inner2;
  return Py_None;
}

static PyMethodDef custom_pyobject_methods[] = {
  {"set_member", set_member, METH_VARARGS, "Set name."},
  {NULL, NULL, 0, NULL}
};

static struct PyModuleDef custom_pyobject_module = {
  .m_base = PyModuleDef_HEAD_INIT,
  .m_name = "custom_pyobject",
  .m_size = 0,
  .m_methods = custom_pyobject_methods,
  .m_slots = custom_pyobject_slots
};

PyMODINIT_FUNC PyInit_custom_pyobject(void) {
  return PyModuleDef_Init(&custom_pyobject_module);
}

If the code above is compiled to custom_pyobject.so, then the Python code below fails with a segmentation fault on GraalPy:

import custom_pyobject
obj = custom_pyobject.custom_object(10)
custom_pyobject.set_member(obj, "foo")
print(obj)

With CPython, the code prints custom_object('foo').

The relevant discussion on Slack can be found at https://graalvm.slack.com/archives/CNA7PDH2N/p1768011927297709 .

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions