From 3f89994a570434fbf9bc69b5beb8075023828f6e Mon Sep 17 00:00:00 2001
From: Evert Lammerts <evert.lammerts@gmail.com>
Date: Fri, 26 Jun 2026 16:31:24 +0200
Subject: [PATCH] =?UTF-8?q?Introduce=20NumpyArray=20fa=C3=A7ade=20over=20p?=
 =?UTF-8?q?y::array?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a thin wrapper class `NumpyArray` (src/duckdb_py/include/duckdb_python/
numpy/numpy_array.hpp) whose single data member is a `py::array`. This is now
the only spot in the codebase that names `py::array` as the underlying
numpy-array representation, so a future migration to nanobind's `nb::ndarray`
is localized to this one header.

The façade exposes Data()/MutableData() (data buffer pointers), an Allocate()
factory (dtype + count), a FromObject() factory, an `explicit
NumpyArray(py::array)` constructor (a py::object argument implicitly converts
via np.asarray semantics, matching prior behaviour), and GetArray() accessors
for .attr(...) calls, iteration, resize, and handing the array back to Python.
It is default-constructible, copyable, and movable.

Route every direct py::array use through the façade:
- numpy/raw_array_wrapper.{hpp,cpp}: member + Allocate/MutableData, resize via
        GetArray()
- pandas/pandas_bind.hpp (RegisteredArray) and pandas/column/
        pandas_numpy_column.hpp: members + constructors take NumpyArray
- numpy/numpy_scan.cpp: scan helpers take NumpyArray&, .data() -> .Data()
- numpy/numpy_bind.cpp, pandas/bind.cpp: construct NumpyArray instead of
        py::array; dtype attrs via GetArray()
- numpy/array_wrapper.cpp (ToArray): move out / bool-check via GetArray()
- pyconnection.cpp, python_replacement_scan.cpp: py::cast<py::array>(...) ->
        wrap the object in NumpyArray and use GetArray()
---
 .../duckdb_python/numpy/numpy_array.hpp       | 77 +++++++++++++++++++
 .../duckdb_python/numpy/raw_array_wrapper.hpp |  3 +-
 .../pandas/column/pandas_numpy_column.hpp     | 10 ++-
 .../duckdb_python/pandas/pandas_bind.hpp      |  5 +-
 src/duckdb_py/numpy/array_wrapper.cpp         |  8 +-
 src/duckdb_py/numpy/numpy_bind.cpp            |  7 +-
 src/duckdb_py/numpy/numpy_scan.cpp            | 23 +++---
 src/duckdb_py/numpy/raw_array_wrapper.cpp     |  8 +-
 src/duckdb_py/pandas/bind.cpp                 | 29 +++----
 src/duckdb_py/pyconnection.cpp                |  5 +-
 src/duckdb_py/python_replacement_scan.cpp     |  7 +-
 11 files changed, 135 insertions(+), 47 deletions(-)
 create mode 100644 src/duckdb_py/include/duckdb_python/numpy/numpy_array.hpp

diff --git a/src/duckdb_py/include/duckdb_python/numpy/numpy_array.hpp b/src/duckdb_py/include/duckdb_python/numpy/numpy_array.hpp
new file mode 100644
index 00000000..b9aae9f4
--- /dev/null
+++ b/src/duckdb_py/include/duckdb_python/numpy/numpy_array.hpp
@@ -0,0 +1,77 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// duckdb_python/numpy/numpy_array.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "duckdb_python/pybind11/pybind_wrapper.hpp"
+#include "duckdb.hpp"
+
+namespace duckdb {
+
+//! Thin façade over pybind11's `py::array`.
+//!
+//! This class is the SINGLE place in the codebase that names `py::array` as the
+//! underlying numpy-array representation. A future migration to nanobind's
+//! `nb::ndarray` should only require changing the member type and the handful of
+//! small methods defined here -- every call site goes through this wrapper
+//! instead of touching `py::array` directly.
+//!
+//! For operations that don't (yet) have a first-class method on the façade
+//! (Python attribute access via `.attr(...)`, iteration, resizing, handing the
+//! array back to Python, ...) use `GetArray()` to reach the underlying object.
+class NumpyArray {
+public:
+	NumpyArray() = default;
+	//! Wrap an existing numpy array. A `py::object` argument is implicitly
+	//! converted to a `py::array` (np.asarray semantics), matching the behaviour
+	//! the call sites relied on before this façade existed.
+	explicit NumpyArray(py::array arr) : array(std::move(arr)) {
+	}
+
+	NumpyArray(NumpyArray &&) = default;
+	NumpyArray &operator=(NumpyArray &&) = default;
+	NumpyArray(const NumpyArray &) = default;
+	NumpyArray &operator=(const NumpyArray &) = default;
+
+public:
+	//! Allocate a fresh, contiguous 1-D numpy array of `count` elements with the
+	//! given dtype.
+	static NumpyArray Allocate(const py::dtype &dtype, idx_t count) {
+		return NumpyArray(py::array(py::dtype(dtype), count));
+	}
+
+	//! Produce a numpy array from an arbitrary Python object (np.asarray semantics).
+	static NumpyArray FromObject(py::object obj) {
+		return NumpyArray(py::array(std::move(obj)));
+	}
+
+	//! Read-only pointer to the underlying data buffer (wraps `py::array::data()`).
+	const void *Data() const {
+		return array.data();
+	}
+
+	//! Mutable pointer to the underlying data buffer (wraps `py::array::mutable_data()`).
+	void *MutableData() {
+		return array.mutable_data();
+	}
+
+	//! Access the underlying array, e.g. for `.attr(...)` calls, iteration, or to
+	//! hand it back to Python.
+	py::array &GetArray() {
+		return array;
+	}
+	const py::array &GetArray() const {
+		return array;
+	}
+
+private:
+	//! The single data member -- the one spot that later becomes `nb::ndarray`.
+	py::array array;
+};
+
+} // namespace duckdb
diff --git a/src/duckdb_py/include/duckdb_python/numpy/raw_array_wrapper.hpp b/src/duckdb_py/include/duckdb_python/numpy/raw_array_wrapper.hpp
index 124f2112..d24e2612 100644
--- a/src/duckdb_py/include/duckdb_python/numpy/raw_array_wrapper.hpp
+++ b/src/duckdb_py/include/duckdb_python/numpy/raw_array_wrapper.hpp
@@ -9,6 +9,7 @@
 #pragma once
 
 #include "duckdb_python/pybind11/pybind_wrapper.hpp"
+#include "duckdb_python/numpy/numpy_array.hpp"
 #include "duckdb.hpp"
 
 namespace duckdb {
@@ -17,7 +18,7 @@ struct RawArrayWrapper {
 
 	explicit RawArrayWrapper(const LogicalType &type);
 
-	py::array array;
+	NumpyArray array;
 	data_ptr_t data;
 	LogicalType type;
 	idx_t type_width;
diff --git a/src/duckdb_py/include/duckdb_python/pandas/column/pandas_numpy_column.hpp b/src/duckdb_py/include/duckdb_python/pandas/column/pandas_numpy_column.hpp
index 9d8587ee..20b630d4 100644
--- a/src/duckdb_py/include/duckdb_python/pandas/column/pandas_numpy_column.hpp
+++ b/src/duckdb_py/include/duckdb_python/pandas/column/pandas_numpy_column.hpp
@@ -2,18 +2,20 @@
 
 #include "duckdb_python/pandas/pandas_column.hpp"
 #include "duckdb_python/pybind11/pybind_wrapper.hpp"
+#include "duckdb_python/numpy/numpy_array.hpp"
 
 namespace duckdb {
 
 class PandasNumpyColumn : public PandasColumn {
 public:
-	PandasNumpyColumn(py::array array_p) : PandasColumn(PandasColumnBackend::NUMPY), array(std::move(array_p)) {
-		D_ASSERT(py::hasattr(array, "strides"));
-		stride = array.attr("strides").attr("__getitem__")(0).cast<idx_t>();
+	PandasNumpyColumn(NumpyArray array_p) : PandasColumn(PandasColumnBackend::NUMPY), array(std::move(array_p)) {
+		auto &arr = array.GetArray();
+		D_ASSERT(py::hasattr(arr, "strides"));
+		stride = arr.attr("strides").attr("__getitem__")(0).cast<idx_t>();
 	}
 
 public:
-	py::array array;
+	NumpyArray array;
 	idx_t stride;
 };
 
diff --git a/src/duckdb_py/include/duckdb_python/pandas/pandas_bind.hpp b/src/duckdb_py/include/duckdb_python/pandas/pandas_bind.hpp
index b6a70def..805f7cf7 100644
--- a/src/duckdb_py/include/duckdb_python/pandas/pandas_bind.hpp
+++ b/src/duckdb_py/include/duckdb_python/pandas/pandas_bind.hpp
@@ -3,6 +3,7 @@
 #include "duckdb_python/pybind11/pybind_wrapper.hpp"
 #include "duckdb_python/pybind11/python_object_container.hpp"
 #include "duckdb_python/numpy/numpy_type.hpp"
+#include "duckdb_python/numpy/numpy_array.hpp"
 #include "duckdb/common/helper.hpp"
 #include "duckdb_python/pandas/pandas_column.hpp"
 
@@ -11,9 +12,9 @@ namespace duckdb {
 class ClientContext;
 
 struct RegisteredArray {
-	explicit RegisteredArray(py::array numpy_array) : numpy_array(std::move(numpy_array)) {
+	explicit RegisteredArray(NumpyArray numpy_array) : numpy_array(std::move(numpy_array)) {
 	}
-	py::array numpy_array;
+	NumpyArray numpy_array;
 };
 
 struct PandasColumnBindData {
diff --git a/src/duckdb_py/numpy/array_wrapper.cpp b/src/duckdb_py/numpy/array_wrapper.cpp
index 60e9d95d..7cf38f6d 100644
--- a/src/duckdb_py/numpy/array_wrapper.cpp
+++ b/src/duckdb_py/numpy/array_wrapper.cpp
@@ -739,15 +739,15 @@ void ArrayWrapper::Append(idx_t current_offset, Vector &input, idx_t source_size
 }
 
 py::object ArrayWrapper::ToArray() const {
-	D_ASSERT(data->array && mask->array);
+	D_ASSERT(data->array.GetArray() && mask->array.GetArray());
 	data->Resize(data->count);
 	if (!requires_mask) {
-		return std::move(data->array);
+		return std::move(data->array.GetArray());
 	}
 	mask->Resize(mask->count);
 	// construct numpy arrays from the data and the mask
-	auto values = std::move(data->array);
-	auto nullmask = std::move(mask->array);
+	auto values = std::move(data->array.GetArray());
+	auto nullmask = std::move(mask->array.GetArray());
 
 	// create masked array and return it
 	auto masked_array = py::module::import("numpy.ma").attr("masked_array")(values, nullmask);
diff --git a/src/duckdb_py/numpy/numpy_bind.cpp b/src/duckdb_py/numpy/numpy_bind.cpp
index 9ca819af..c197e4ba 100644
--- a/src/duckdb_py/numpy/numpy_bind.cpp
+++ b/src/duckdb_py/numpy/numpy_bind.cpp
@@ -1,5 +1,6 @@
 #include "duckdb_python/numpy/numpy_bind.hpp"
 #include "duckdb_python/numpy/array_wrapper.hpp"
+#include "duckdb_python/numpy/numpy_array.hpp"
 #include "duckdb_python/pandas/pandas_analyzer.hpp"
 #include "duckdb_python/pandas/column/pandas_numpy_column.hpp"
 #include "duckdb_python/pandas/pandas_bind.hpp"
@@ -34,7 +35,7 @@ void NumpyBind::Bind(ClientContext &context, py::handle df, vector<PandasColumnB
 		auto column = get_fun(df_columns[col_idx]);
 
 		if (bind_data.numpy_type.type == NumpyNullableType::FLOAT_16) {
-			bind_data.pandas_col = std::make_unique<PandasNumpyColumn>(py::array(column.attr("astype")("float32")));
+			bind_data.pandas_col = std::make_unique<PandasNumpyColumn>(NumpyArray(column.attr("astype")("float32")));
 			bind_data.numpy_type.type = NumpyNullableType::FLOAT_32;
 			duckdb_col_type = NumpyToLogicalType(bind_data.numpy_type);
 		} else if (bind_data.numpy_type.type == NumpyNullableType::STRING) {
@@ -53,9 +54,9 @@ void NumpyBind::Bind(ClientContext &context, py::handle df, vector<PandasColumnB
 			duckdb_col_type = LogicalType::ENUM(enum_entries_vec, size);
 			auto pandas_col = uniq.attr("__getitem__")(1);
 			bind_data.internal_categorical_type = string(py::str(pandas_col.attr("dtype")));
-			bind_data.pandas_col = std::make_unique<PandasNumpyColumn>(pandas_col);
+			bind_data.pandas_col = std::make_unique<PandasNumpyColumn>(NumpyArray(pandas_col));
 		} else {
-			bind_data.pandas_col = std::make_unique<PandasNumpyColumn>(column);
+			bind_data.pandas_col = std::make_unique<PandasNumpyColumn>(NumpyArray(column));
 			duckdb_col_type = NumpyToLogicalType(bind_data.numpy_type);
 		}
 
diff --git a/src/duckdb_py/numpy/numpy_scan.cpp b/src/duckdb_py/numpy/numpy_scan.cpp
index 4e1e61e4..9c965968 100644
--- a/src/duckdb_py/numpy/numpy_scan.cpp
+++ b/src/duckdb_py/numpy/numpy_scan.cpp
@@ -14,13 +14,14 @@
 #include "duckdb_python/numpy/numpy_type.hpp"
 #include "duckdb/function/scalar/nested_functions.hpp"
 #include "duckdb_python/numpy/numpy_scan.hpp"
+#include "duckdb_python/numpy/numpy_array.hpp"
 #include "duckdb_python/pandas/column/pandas_numpy_column.hpp"
 
 namespace duckdb {
 
 template <class T>
-void ScanNumpyColumn(py::array &numpy_col, idx_t stride, idx_t offset, Vector &out, idx_t count) {
-	auto src_ptr = (T *)numpy_col.data();
+void ScanNumpyColumn(NumpyArray &numpy_col, idx_t stride, idx_t offset, Vector &out, idx_t count) {
+	auto src_ptr = (T *)numpy_col.Data();
 	if (stride == sizeof(T)) {
 		FlatVector::SetData(out, data_ptr_cast(src_ptr + offset), count_t(count));
 	} else {
@@ -32,8 +33,8 @@ void ScanNumpyColumn(py::array &numpy_col, idx_t stride, idx_t offset, Vector &o
 }
 
 template <class T, class V>
-void ScanNumpyCategoryTemplated(py::array &column, idx_t offset, Vector &out, idx_t count) {
-	auto src_ptr = (T *)column.data();
+void ScanNumpyCategoryTemplated(NumpyArray &column, idx_t offset, Vector &out, idx_t count) {
+	auto src_ptr = (T *)column.Data();
 	auto tgt_ptr = (V *)FlatVector::GetData(out);
 	auto &tgt_mask = FlatVector::ValidityMutable(out);
 	for (idx_t i = 0; i < count; i++) {
@@ -47,7 +48,7 @@ void ScanNumpyCategoryTemplated(py::array &column, idx_t offset, Vector &out, id
 }
 
 template <class T>
-void ScanNumpyCategory(py::array &column, idx_t count, idx_t offset, Vector &out, string &src_type) {
+void ScanNumpyCategory(NumpyArray &column, idx_t count, idx_t offset, Vector &out, string &src_type) {
 	if (src_type == "int8") {
 		ScanNumpyCategoryTemplated<int8_t, T>(column, offset, out, count);
 	} else if (src_type == "int16") {
@@ -63,7 +64,7 @@ void ScanNumpyCategory(py::array &column, idx_t count, idx_t offset, Vector &out
 
 static void ApplyMask(PandasColumnBindData &bind_data, ValidityMask &validity, idx_t count, idx_t offset) {
 	D_ASSERT(bind_data.mask);
-	auto mask = reinterpret_cast<const bool *>(bind_data.mask->numpy_array.data());
+	auto mask = reinterpret_cast<const bool *>(bind_data.mask->numpy_array.Data());
 	for (idx_t i = 0; i < count; i++) {
 		auto is_null = mask[offset + i];
 		if (is_null) {
@@ -236,18 +237,18 @@ void NumpyScan::Scan(ClientContext &context, PandasColumnBindData &bind_data, id
 		ScanNumpyMasked<int64_t>(bind_data, count, offset, out);
 		break;
 	case NumpyNullableType::FLOAT_32:
-		ScanNumpyFpColumn<float>(bind_data, reinterpret_cast<const float *>(array.data()), numpy_col.stride, count,
+		ScanNumpyFpColumn<float>(bind_data, reinterpret_cast<const float *>(array.Data()), numpy_col.stride, count,
 		                         offset, out);
 		break;
 	case NumpyNullableType::FLOAT_64:
-		ScanNumpyFpColumn<double>(bind_data, reinterpret_cast<const double *>(array.data()), numpy_col.stride, count,
+		ScanNumpyFpColumn<double>(bind_data, reinterpret_cast<const double *>(array.Data()), numpy_col.stride, count,
 		                          offset, out);
 		break;
 	case NumpyNullableType::DATETIME_NS:
 	case NumpyNullableType::DATETIME_MS:
 	case NumpyNullableType::DATETIME_US:
 	case NumpyNullableType::DATETIME_S: {
-		auto src_ptr = reinterpret_cast<const int64_t *>(array.data());
+		auto src_ptr = reinterpret_cast<const int64_t *>(array.Data());
 		auto tgt_ptr = FlatVector::GetDataMutable<timestamp_t>(out);
 
 		using timestamp_convert_func = std::function<timestamp_t(int64_t)>;
@@ -307,7 +308,7 @@ void NumpyScan::Scan(ClientContext &context, PandasColumnBindData &bind_data, id
 	case NumpyNullableType::TIMEDELTA_US:
 	case NumpyNullableType::TIMEDELTA_MS:
 	case NumpyNullableType::TIMEDELTA_S: {
-		auto src_ptr = reinterpret_cast<const int64_t *>(array.data());
+		auto src_ptr = reinterpret_cast<const int64_t *>(array.Data());
 		auto tgt_ptr = FlatVector::GetDataMutable<interval_t>(out);
 		auto &mask = FlatVector::ValidityMutable(out);
 
@@ -352,7 +353,7 @@ void NumpyScan::Scan(ClientContext &context, PandasColumnBindData &bind_data, id
 	case NumpyNullableType::STRING:
 	case NumpyNullableType::OBJECT: {
 		// Get the source pointer of the numpy array
-		auto src_ptr = (PyObject **)array.data(); // NOLINT
+		auto src_ptr = (PyObject **)array.Data(); // NOLINT
 		const bool is_object_col = bind_data.numpy_type.type == NumpyNullableType::OBJECT;
 		if (is_object_col && out.GetType().id() != LogicalTypeId::VARCHAR) {
 			//! We have determined the underlying logical type of this object column
diff --git a/src/duckdb_py/numpy/raw_array_wrapper.cpp b/src/duckdb_py/numpy/raw_array_wrapper.cpp
index 178e02f6..df89a0f6 100644
--- a/src/duckdb_py/numpy/raw_array_wrapper.cpp
+++ b/src/duckdb_py/numpy/raw_array_wrapper.cpp
@@ -151,14 +151,14 @@ string RawArrayWrapper::DuckDBToNumpyDtype(const LogicalType &type) {
 void RawArrayWrapper::Initialize(idx_t capacity) {
 	string dtype = DuckDBToNumpyDtype(type);
 
-	array = py::array(py::dtype(dtype), capacity);
-	data = data_ptr_cast(array.mutable_data());
+	array = NumpyArray::Allocate(py::dtype(dtype), capacity);
+	data = data_ptr_cast(array.MutableData());
 }
 
 void RawArrayWrapper::Resize(idx_t new_capacity) {
 	vector<py::ssize_t> new_shape {py::ssize_t(new_capacity)};
-	array.resize(new_shape, false);
-	data = data_ptr_cast(array.mutable_data());
+	array.GetArray().resize(new_shape, false);
+	data = data_ptr_cast(array.MutableData());
 }
 
 } // namespace duckdb
diff --git a/src/duckdb_py/pandas/bind.cpp b/src/duckdb_py/pandas/bind.cpp
index 02de2a75..edc85132 100644
--- a/src/duckdb_py/pandas/bind.cpp
+++ b/src/duckdb_py/pandas/bind.cpp
@@ -1,6 +1,7 @@
 #include "duckdb_python/pandas/pandas_bind.hpp"
 #include "duckdb_python/pandas/pandas_analyzer.hpp"
 #include "duckdb_python/pandas/column/pandas_numpy_column.hpp"
+#include "duckdb_python/numpy/numpy_array.hpp"
 #include "duckdb_python/pyconnection/pyconnection.hpp"
 
 namespace duckdb {
@@ -53,19 +54,19 @@ static LogicalType BindColumn(ClientContext &context, PandasBindColumn &column_p
 
 	if (column_has_mask) {
 		// masked object, fetch the internal data and mask array
-		bind_data.mask = std::make_unique<RegisteredArray>(column.attr("array").attr("_mask"));
+		bind_data.mask = std::make_unique<RegisteredArray>(NumpyArray(column.attr("array").attr("_mask")));
 	}
 
 	if (bind_data.numpy_type.type == NumpyNullableType::CATEGORY) {
 		// for category types, we create an ENUM type for string or use the converted numpy type for the rest
 		D_ASSERT(py::hasattr(column, "cat"));
 		D_ASSERT(py::hasattr(column.attr("cat"), "categories"));
-		auto categories = py::array(column.attr("cat").attr("categories"));
-		auto categories_pd_type = ConvertNumpyType(categories.attr("dtype"));
+		NumpyArray categories(column.attr("cat").attr("categories"));
+		auto categories_pd_type = ConvertNumpyType(categories.GetArray().attr("dtype"));
 		if (categories_pd_type.type == NumpyNullableType::OBJECT) {
 			// Let's hope the object type is a string.
 			bind_data.numpy_type.type = NumpyNullableType::CATEGORY;
-			vector<string> enum_entries = py::cast<vector<string>>(categories);
+			vector<string> enum_entries = py::cast<vector<string>>(categories.GetArray());
 			idx_t size = enum_entries.size();
 			Vector enum_entries_vec(LogicalType::VARCHAR, size);
 			auto enum_entries_ptr = FlatVector::GetDataMutable<string_t>(enum_entries_vec);
@@ -74,33 +75,33 @@ static LogicalType BindColumn(ClientContext &context, PandasBindColumn &column_p
 			}
 			D_ASSERT(py::hasattr(column.attr("cat"), "codes"));
 			column_type = LogicalType::ENUM(enum_entries_vec, size);
-			auto pandas_col = py::array(column.attr("cat").attr("codes"));
-			bind_data.internal_categorical_type = string(py::str(pandas_col.attr("dtype")));
-			bind_data.pandas_col = std::make_unique<PandasNumpyColumn>(pandas_col);
+			NumpyArray pandas_col(column.attr("cat").attr("codes"));
+			bind_data.internal_categorical_type = string(py::str(pandas_col.GetArray().attr("dtype")));
+			bind_data.pandas_col = std::make_unique<PandasNumpyColumn>(std::move(pandas_col));
 		} else {
-			auto pandas_col = py::array(column.attr("to_numpy")());
-			auto numpy_type = pandas_col.attr("dtype");
-			bind_data.pandas_col = std::make_unique<PandasNumpyColumn>(pandas_col);
+			NumpyArray pandas_col(column.attr("to_numpy")());
+			auto numpy_type = pandas_col.GetArray().attr("dtype");
+			bind_data.pandas_col = std::make_unique<PandasNumpyColumn>(std::move(pandas_col));
 			// for category types (non-strings), we use the converted numpy type
 			bind_data.numpy_type = ConvertNumpyType(numpy_type);
 			column_type = NumpyToLogicalType(bind_data.numpy_type);
 		}
 	} else if (bind_data.numpy_type.type == NumpyNullableType::FLOAT_16) {
 		auto pandas_array = column.attr("array");
-		bind_data.pandas_col = std::make_unique<PandasNumpyColumn>(py::array(column.attr("to_numpy")("float32")));
+		bind_data.pandas_col = std::make_unique<PandasNumpyColumn>(NumpyArray(column.attr("to_numpy")("float32")));
 		bind_data.numpy_type.type = NumpyNullableType::FLOAT_32;
 		column_type = NumpyToLogicalType(bind_data.numpy_type);
 	} else {
 		auto pandas_array = column.attr("array");
 		if (py::hasattr(pandas_array, "_data")) {
 			// This means we can access the numpy array directly
-			bind_data.pandas_col = std::make_unique<PandasNumpyColumn>(column.attr("array").attr("_data"));
+			bind_data.pandas_col = std::make_unique<PandasNumpyColumn>(NumpyArray(column.attr("array").attr("_data")));
 		} else if (py::hasattr(pandas_array, "asi8")) {
 			// This is a datetime object, has the option to get the array as int64_t's
-			bind_data.pandas_col = std::make_unique<PandasNumpyColumn>(py::array(pandas_array.attr("asi8")));
+			bind_data.pandas_col = std::make_unique<PandasNumpyColumn>(NumpyArray(pandas_array.attr("asi8")));
 		} else {
 			// Otherwise we have to get it through 'to_numpy()'
-			bind_data.pandas_col = std::make_unique<PandasNumpyColumn>(py::array(column.attr("to_numpy")()));
+			bind_data.pandas_col = std::make_unique<PandasNumpyColumn>(NumpyArray(column.attr("to_numpy")()));
 		}
 		column_type = NumpyToLogicalType(bind_data.numpy_type);
 	}
diff --git a/src/duckdb_py/pyconnection.cpp b/src/duckdb_py/pyconnection.cpp
index 5146b38c..96b86639 100644
--- a/src/duckdb_py/pyconnection.cpp
+++ b/src/duckdb_py/pyconnection.cpp
@@ -26,6 +26,7 @@
 #include "duckdb_python/pyresult.hpp"
 #include "duckdb_python/python_conversion.hpp"
 #include "duckdb_python/numpy/numpy_type.hpp"
+#include "duckdb_python/numpy/numpy_array.hpp"
 #include "duckdb_python/jupyter_progress_bar_display.hpp"
 #include "duckdb_python/pyfilesystem.hpp"
 #include "duckdb/parser/parsed_data/create_scalar_function_info.hpp"
@@ -2352,7 +2353,7 @@ bool IsValidNumpyDimensions(const py::handle &object, int &dim) {
 	if (!py::isinstance(object, import_cache.numpy.ndarray())) {
 		return false;
 	}
-	auto shape = (py::cast<py::array>(object)).attr("shape");
+	auto shape = NumpyArray(py::reinterpret_borrow<py::object>(object)).GetArray().attr("shape");
 	if (py::len(shape) != 1) {
 		return false;
 	}
@@ -2366,7 +2367,7 @@ NumpyObjectType DuckDBPyConnection::IsAcceptedNumpyObject(const py::object &obje
 	}
 	auto import_cache_ = ImportCache();
 	if (py::isinstance(object, import_cache_->numpy.ndarray())) {
-		auto len = py::len((py::cast<py::array>(object)).attr("shape"));
+		auto len = py::len(NumpyArray(object).GetArray().attr("shape"));
 		switch (len) {
 		case 1:
 			return NumpyObjectType::NDARRAY1D;
diff --git a/src/duckdb_py/python_replacement_scan.cpp b/src/duckdb_py/python_replacement_scan.cpp
index 8bff9e8f..cef37cd1 100644
--- a/src/duckdb_py/python_replacement_scan.cpp
+++ b/src/duckdb_py/python_replacement_scan.cpp
@@ -3,6 +3,7 @@
 #include "duckdb_python/pybind11/pybind_wrapper.hpp"
 #include "duckdb/main/client_properties.hpp"
 #include "duckdb_python/numpy/numpy_type.hpp"
+#include "duckdb_python/numpy/numpy_array.hpp"
 #include "duckdb/parser/tableref/table_function_ref.hpp"
 #include "duckdb_python/pyconnection/pyconnection.hpp"
 #include "duckdb_python/pybind11/dataframe.hpp"
@@ -166,13 +167,15 @@ unique_ptr<TableRef> PythonReplacementScan::TryReplacementObject(const py::objec
 		case NumpyObjectType::NDARRAY1D:
 			data["column0"] = entry;
 			break;
-		case NumpyObjectType::NDARRAY2D:
+		case NumpyObjectType::NDARRAY2D: {
 			idx = 0;
-			for (auto item : py::cast<py::array>(entry)) {
+			NumpyArray ndarray(entry);
+			for (auto item : ndarray.GetArray()) {
 				data[("column" + std::to_string(idx)).c_str()] = item;
 				idx++;
 			}
 			break;
+		}
 		case NumpyObjectType::LIST:
 			idx = 0;
 			for (auto item : py::cast<py::list>(entry)) {