From d93bb96bd509c8989c2283ee3a57860c372a33f3 Mon Sep 17 00:00:00 2001
From: Daniel Mesejo <mesejoleon@gmail.com>
Date: Sun, 5 Apr 2026 18:08:29 +0200
Subject: [PATCH 1/2] refactor(context): deduplicate register/read
 option-building logic

Extract shared helpers (convert_partition_cols, convert_file_sort_order,
build_parquet/json/avro_options, convert_csv_options), standardize path
types to &str, and remove redundant intermediate variables.
---
 crates/core/src/context.rs | 281 +++++++++++++++++--------------------
 1 file changed, 131 insertions(+), 150 deletions(-)
diff --git a/crates/core/src/context.rs b/crates/core/src/context.rs
index e46d359d6..d13a6a91c 100644
--- a/crates/core/src/context.rs
+++ b/crates/core/src/context.rs
@@ -16,7 +16,6 @@
 // under the License.
 
 use std::collections::{HashMap, HashSet};
-use std::path::PathBuf;
 use std::ptr::NonNull;
 use std::str::FromStr;
 use std::sync::Arc;
@@ -472,19 +471,8 @@ impl PySessionContext {
     ) -> PyDataFusionResult<()> {
         let options = ListingOptions::new(Arc::new(ParquetFormat::new()))
             .with_file_extension(file_extension)
-            .with_table_partition_cols(
-                table_partition_cols
-                    .into_iter()
-                    .map(|(name, ty)| (name, ty.0))
-                    .collect::<Vec<(String, DataType)>>(),
-            )
-            .with_file_sort_order(
-                file_sort_order
-                    .unwrap_or_default()
-                    .into_iter()
-                    .map(|e| e.into_iter().map(|f| f.into()).collect())
-                    .collect(),
-            );
+            .with_table_partition_cols(convert_partition_cols(table_partition_cols))
+            .with_file_sort_order(convert_file_sort_order(file_sort_order));
         let table_path = ListingTableUrl::parse(path)?;
         let resolved_schema: SchemaRef = match schema {
             Some(s) => Arc::new(s.0),
@@ -851,25 +839,15 @@ impl PySessionContext {
         file_sort_order: Option<Vec<Vec<PySortExpr>>>,
         py: Python,
     ) -> PyDataFusionResult<()> {
-        let mut options = ParquetReadOptions::default()
-            .table_partition_cols(
-                table_partition_cols
-                    .into_iter()
-                    .map(|(name, ty)| (name, ty.0))
-                    .collect::<Vec<(String, DataType)>>(),
-            )
-            .parquet_pruning(parquet_pruning)
-            .skip_metadata(skip_metadata);
-        options.file_extension = file_extension;
-        options.schema = schema.as_ref().map(|x| &x.0);
-        options.file_sort_order = file_sort_order
-            .unwrap_or_default()
-            .into_iter()
-            .map(|e| e.into_iter().map(|f| f.into()).collect())
-            .collect();
-
-        let result = self.ctx.register_parquet(name, path, options);
-        wait_for_future(py, result)??;
+        let options = build_parquet_options(
+            table_partition_cols,
+            parquet_pruning,
+            file_extension,
+            skip_metadata,
+            &schema,
+            file_sort_order,
+        );
+        wait_for_future(py, self.ctx.register_parquet(name, path, options))??;
         Ok(())
     }
 
@@ -883,19 +861,17 @@ impl PySessionContext {
         options: Option<&PyCsvReadOptions>,
         py: Python,
     ) -> PyDataFusionResult<()> {
-        let options = options
-            .map(|opts| opts.try_into())
-            .transpose()?
-            .unwrap_or_default();
+        let options = convert_csv_options(options)?;
 
         if path.is_instance_of::<PyList>() {
             let paths = path.extract::<Vec<String>>()?;
-            let result = self.register_csv_from_multiple_paths(name, paths, options);
-            wait_for_future(py, result)??;
+            wait_for_future(
+                py,
+                self.register_csv_from_multiple_paths(name, paths, options),
+            )??;
         } else {
             let path = path.extract::<String>()?;
-            let result = self.ctx.register_csv(name, &path, options);
-            wait_for_future(py, result)??;
+            wait_for_future(py, self.ctx.register_csv(name, &path, options))??;
         }
 
         Ok(())
@@ -912,7 +888,7 @@ impl PySessionContext {
     pub fn register_json(
         &self,
         name: &str,
-        path: PathBuf,
+        path: &str,
         schema: Option<PyArrowType<Schema>>,
         schema_infer_max_records: usize,
         file_extension: &str,
@@ -920,25 +896,14 @@ impl PySessionContext {
         file_compression_type: Option<String>,
         py: Python,
     ) -> PyDataFusionResult<()> {
-        let path = path
-            .to_str()
-            .ok_or_else(|| PyValueError::new_err("Unable to convert path to a string"))?;
-
-        let mut options = JsonReadOptions::default()
-            .file_compression_type(parse_file_compression_type(file_compression_type)?)
-            .table_partition_cols(
-                table_partition_cols
-                    .into_iter()
-                    .map(|(name, ty)| (name, ty.0))
-                    .collect::<Vec<(String, DataType)>>(),
-            );
-        options.schema_infer_max_records = schema_infer_max_records;
-        options.file_extension = file_extension;
-        options.schema = schema.as_ref().map(|x| &x.0);
-
-        let result = self.ctx.register_json(name, path, options);
-        wait_for_future(py, result)??;
-
+        let options = build_json_options(
+            table_partition_cols,
+            file_compression_type,
+            schema_infer_max_records,
+            file_extension,
+            &schema,
+        )?;
+        wait_for_future(py, self.ctx.register_json(name, path, options))??;
         Ok(())
     }
 
@@ -951,28 +916,14 @@ impl PySessionContext {
     pub fn register_avro(
         &self,
         name: &str,
-        path: PathBuf,
+        path: &str,
         schema: Option<PyArrowType<Schema>>,
         file_extension: &str,
         table_partition_cols: Vec<(String, PyArrowType<DataType>)>,
         py: Python,
     ) -> PyDataFusionResult<()> {
-        let path = path
-            .to_str()
-            .ok_or_else(|| PyValueError::new_err("Unable to convert path to a string"))?;
-
-        let mut options = AvroReadOptions::default().table_partition_cols(
-            table_partition_cols
-                .into_iter()
-                .map(|(name, ty)| (name, ty.0))
-                .collect::<Vec<(String, DataType)>>(),
-        );
-        options.file_extension = file_extension;
-        options.schema = schema.as_ref().map(|x| &x.0);
-
-        let result = self.ctx.register_avro(name, path, options);
-        wait_for_future(py, result)??;
-
+        let options = build_avro_options(table_partition_cols, file_extension, &schema);
+        wait_for_future(py, self.ctx.register_avro(name, path, options))??;
         Ok(())
     }
 
@@ -1147,7 +1098,7 @@ impl PySessionContext {
     #[pyo3(signature = (path, schema=None, schema_infer_max_records=1000, file_extension=".json", table_partition_cols=vec![], file_compression_type=None))]
     pub fn read_json(
         &self,
-        path: PathBuf,
+        path: &str,
         schema: Option<PyArrowType<Schema>>,
         schema_infer_max_records: usize,
         file_extension: &str,
@@ -1155,27 +1106,14 @@ impl PySessionContext {
         file_compression_type: Option<String>,
         py: Python,
     ) -> PyDataFusionResult<PyDataFrame> {
-        let path = path
-            .to_str()
-            .ok_or_else(|| PyValueError::new_err("Unable to convert path to a string"))?;
-        let mut options = JsonReadOptions::default()
-            .table_partition_cols(
-                table_partition_cols
-                    .into_iter()
-                    .map(|(name, ty)| (name, ty.0))
-                    .collect::<Vec<(String, DataType)>>(),
-            )
-            .file_compression_type(parse_file_compression_type(file_compression_type)?);
-        options.schema_infer_max_records = schema_infer_max_records;
-        options.file_extension = file_extension;
-        let df = if let Some(schema) = schema {
-            options.schema = Some(&schema.0);
-            let result = self.ctx.read_json(path, options);
-            wait_for_future(py, result)??
-        } else {
-            let result = self.ctx.read_json(path, options);
-            wait_for_future(py, result)??
-        };
+        let options = build_json_options(
+            table_partition_cols,
+            file_compression_type,
+            schema_infer_max_records,
+            file_extension,
+            &schema,
+        )?;
+        let df = wait_for_future(py, self.ctx.read_json(path, options))??;
         Ok(PyDataFrame::new(df))
     }
 
@@ -1188,23 +1126,15 @@ impl PySessionContext {
         options: Option<&PyCsvReadOptions>,
         py: Python,
     ) -> PyDataFusionResult<PyDataFrame> {
-        let options = options
-            .map(|opts| opts.try_into())
-            .transpose()?
-            .unwrap_or_default();
+        let options = convert_csv_options(options)?;
 
-        if path.is_instance_of::<PyList>() {
-            let paths = path.extract::<Vec<String>>()?;
-            let paths = paths.iter().map(|p| p as &str).collect::<Vec<&str>>();
-            let result = self.ctx.read_csv(paths, options);
-            let df = PyDataFrame::new(wait_for_future(py, result)??);
-            Ok(df)
+        let paths: Vec<String> = if path.is_instance_of::<PyList>() {
+            path.extract()?
         } else {
-            let path = path.extract::<String>()?;
-            let result = self.ctx.read_csv(path, options);
-            let df = PyDataFrame::new(wait_for_future(py, result)??);
-            Ok(df)
-        }
+            vec![path.extract()?]
+        };
+        let df = wait_for_future(py, self.ctx.read_csv(paths, options))??;
+        Ok(PyDataFrame::new(df))
     }
 
     #[allow(clippy::too_many_arguments)]
@@ -1227,25 +1157,15 @@ impl PySessionContext {
         file_sort_order: Option<Vec<Vec<PySortExpr>>>,
         py: Python,
     ) -> PyDataFusionResult<PyDataFrame> {
-        let mut options = ParquetReadOptions::default()
-            .table_partition_cols(
-                table_partition_cols
-                    .into_iter()
-                    .map(|(name, ty)| (name, ty.0))
-                    .collect::<Vec<(String, DataType)>>(),
-            )
-            .parquet_pruning(parquet_pruning)
-            .skip_metadata(skip_metadata);
-        options.file_extension = file_extension;
-        options.schema = schema.as_ref().map(|x| &x.0);
-        options.file_sort_order = file_sort_order
-            .unwrap_or_default()
-            .into_iter()
-            .map(|e| e.into_iter().map(|f| f.into()).collect())
-            .collect();
-
-        let result = self.ctx.read_parquet(path, options);
-        let df = PyDataFrame::new(wait_for_future(py, result)??);
+        let options = build_parquet_options(
+            table_partition_cols,
+            parquet_pruning,
+            file_extension,
+            skip_metadata,
+            &schema,
+            file_sort_order,
+        );
+        let df = PyDataFrame::new(wait_for_future(py, self.ctx.read_parquet(path, options))??);
         Ok(df)
     }
 
@@ -1259,21 +1179,8 @@ impl PySessionContext {
         file_extension: &str,
         py: Python,
     ) -> PyDataFusionResult<PyDataFrame> {
-        let mut options = AvroReadOptions::default().table_partition_cols(
-            table_partition_cols
-                .into_iter()
-                .map(|(name, ty)| (name, ty.0))
-                .collect::<Vec<(String, DataType)>>(),
-        );
-        options.file_extension = file_extension;
-        let df = if let Some(schema) = schema {
-            options.schema = Some(&schema.0);
-            let read_future = self.ctx.read_avro(path, options);
-            wait_for_future(py, read_future)??
-        } else {
-            let read_future = self.ctx.read_avro(path, options);
-            wait_for_future(py, read_future)??
-        };
+        let options = build_avro_options(table_partition_cols, file_extension, &schema);
+        let df = wait_for_future(py, self.ctx.read_avro(path, options))??;
         Ok(PyDataFrame::new(df))
     }
 
@@ -1396,7 +1303,7 @@ impl PySessionContext {
         // check if the file extension matches the expected extension
         for path in &table_paths {
             let file_path = path.as_str();
-            if !file_path.ends_with(option_extension.clone().as_str()) && !path.is_collection() {
+            if !file_path.ends_with(option_extension.as_str()) && !path.is_collection() {
                 return exec_err!(
                     "File path '{file_path}' does not match the expected extension '{option_extension}'"
                 );
@@ -1437,6 +1344,80 @@ pub fn parse_file_compression_type(
         })
 }
 
+fn convert_csv_options(
+    options: Option<&PyCsvReadOptions>,
+) -> PyDataFusionResult<CsvReadOptions<'_>> {
+    Ok(options
+        .map(|opts| opts.try_into())
+        .transpose()?
+        .unwrap_or_default())
+}
+
+fn convert_partition_cols(
+    table_partition_cols: Vec<(String, PyArrowType<DataType>)>,
+) -> Vec<(String, DataType)> {
+    table_partition_cols
+        .into_iter()
+        .map(|(name, ty)| (name, ty.0))
+        .collect()
+}
+
+fn convert_file_sort_order(
+    file_sort_order: Option<Vec<Vec<PySortExpr>>>,
+) -> Vec<Vec<datafusion::logical_expr::SortExpr>> {
+    file_sort_order
+        .unwrap_or_default()
+        .into_iter()
+        .map(|e| e.into_iter().map(|f| f.into()).collect())
+        .collect()
+}
+
+fn build_parquet_options<'a>(
+    table_partition_cols: Vec<(String, PyArrowType<DataType>)>,
+    parquet_pruning: bool,
+    file_extension: &'a str,
+    skip_metadata: bool,
+    schema: &'a Option<PyArrowType<Schema>>,
+    file_sort_order: Option<Vec<Vec<PySortExpr>>>,
+) -> ParquetReadOptions<'a> {
+    let mut options = ParquetReadOptions::default()
+        .table_partition_cols(convert_partition_cols(table_partition_cols))
+        .parquet_pruning(parquet_pruning)
+        .skip_metadata(skip_metadata);
+    options.file_extension = file_extension;
+    options.schema = schema.as_ref().map(|x| &x.0);
+    options.file_sort_order = convert_file_sort_order(file_sort_order);
+    options
+}
+
+fn build_json_options<'a>(
+    table_partition_cols: Vec<(String, PyArrowType<DataType>)>,
+    file_compression_type: Option<String>,
+    schema_infer_max_records: usize,
+    file_extension: &'a str,
+    schema: &'a Option<PyArrowType<Schema>>,
+) -> Result<JsonReadOptions<'a>, PyErr> {
+    let mut options = JsonReadOptions::default()
+        .table_partition_cols(convert_partition_cols(table_partition_cols))
+        .file_compression_type(parse_file_compression_type(file_compression_type)?);
+    options.schema_infer_max_records = schema_infer_max_records;
+    options.file_extension = file_extension;
+    options.schema = schema.as_ref().map(|x| &x.0);
+    Ok(options)
+}
+
+fn build_avro_options<'a>(
+    table_partition_cols: Vec<(String, PyArrowType<DataType>)>,
+    file_extension: &'a str,
+    schema: &'a Option<PyArrowType<Schema>>,
+) -> AvroReadOptions<'a> {
+    let mut options = AvroReadOptions::default()
+        .table_partition_cols(convert_partition_cols(table_partition_cols));
+    options.file_extension = file_extension;
+    options.schema = schema.as_ref().map(|x| &x.0);
+    options
+}
+
 impl From<PySessionContext> for SessionContext {
     fn from(ctx: PySessionContext) -> SessionContext {
         ctx.ctx.as_ref().clone()

From f536873337b3b18a31462e5ccccc2dabd91f2cca Mon Sep 17 00:00:00 2001
From: Daniel Mesejo <mesejoleon@gmail.com>
Date: Sat, 11 Apr 2026 00:40:20 +0200
Subject: [PATCH 2/2] refactor(context): accept PathBuf for path arguments in
 register/read methods

Change path parameters from &str to PathBuf in all register/read methods
(register_listing_table, register_parquet, register_json, register_avro,
register_arrow, read_json, read_parquet, read_avro, read_arrow) so callers
can pass either a Python str or a pathlib.Path object. For register_csv and
read_csv, which take &Bound<PyAny> to handle lists, extract path elements as
PathBuf rather than String for the same reason.

Add a path_to_str helper that converts PathBuf to &str, returning an explicit
error for non-UTF-8 paths rather than silently corrupting them.

Add build_arrow_options helper to deduplicate register_arrow/read_arrow
option-building logic, consistent with the existing parquet/json/avro helpers.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 crates/core/src/context.rs   | 112 +++++++++++++++++++++--------------
 python/datafusion/context.py |  28 ++++-----
 2 files changed, 80 insertions(+), 60 deletions(-)

diff --git a/crates/core/src/context.rs b/crates/core/src/context.rs
index d13a6a91c..aed3a7472 100644
--- a/crates/core/src/context.rs
+++ b/crates/core/src/context.rs
@@ -16,6 +16,7 @@
 // under the License.
 
 use std::collections::{HashMap, HashSet};
+use std::path::{Path, PathBuf};
 use std::ptr::NonNull;
 use std::str::FromStr;
 use std::sync::Arc;
@@ -462,7 +463,7 @@ impl PySessionContext {
     pub fn register_listing_table(
         &self,
         name: &str,
-        path: &str,
+        path: PathBuf,
         table_partition_cols: Vec<(String, PyArrowType<DataType>)>,
         file_extension: &str,
         schema: Option<PyArrowType<Schema>>,
@@ -473,7 +474,7 @@ impl PySessionContext {
             .with_file_extension(file_extension)
             .with_table_partition_cols(convert_partition_cols(table_partition_cols))
             .with_file_sort_order(convert_file_sort_order(file_sort_order));
-        let table_path = ListingTableUrl::parse(path)?;
+        let table_path = ListingTableUrl::parse(path_to_str(&path)?)?;
         let resolved_schema: SchemaRef = match schema {
             Some(s) => Arc::new(s.0),
             None => {
@@ -830,7 +831,7 @@ impl PySessionContext {
     pub fn register_parquet(
         &self,
         name: &str,
-        path: &str,
+        path: PathBuf,
         table_partition_cols: Vec<(String, PyArrowType<DataType>)>,
         parquet_pruning: bool,
         file_extension: &str,
@@ -847,7 +848,11 @@ impl PySessionContext {
             &schema,
             file_sort_order,
         );
-        wait_for_future(py, self.ctx.register_parquet(name, path, options))??;
+        wait_for_future(
+            py,
+            self.ctx
+                .register_parquet(name, path_to_str(&path)?, options),
+        )??;
         Ok(())
     }
 
@@ -864,14 +869,21 @@ impl PySessionContext {
         let options = convert_csv_options(options)?;
 
         if path.is_instance_of::<PyList>() {
-            let paths = path.extract::<Vec<String>>()?;
+            let paths = path
+                .extract::<Vec<PathBuf>>()?
+                .iter()
+                .map(|p| path_to_str(p).map(str::to_owned))
+                .collect::<PyDataFusionResult<Vec<_>>>()?;
             wait_for_future(
                 py,
                 self.register_csv_from_multiple_paths(name, paths, options),
             )??;
         } else {
-            let path = path.extract::<String>()?;
-            wait_for_future(py, self.ctx.register_csv(name, &path, options))??;
+            let path = path.extract::<PathBuf>()?;
+            wait_for_future(
+                py,
+                self.ctx.register_csv(name, path_to_str(&path)?, options),
+            )??;
         }
 
         Ok(())
@@ -888,7 +900,7 @@ impl PySessionContext {
     pub fn register_json(
         &self,
         name: &str,
-        path: &str,
+        path: PathBuf,
         schema: Option<PyArrowType<Schema>>,
         schema_infer_max_records: usize,
         file_extension: &str,
@@ -903,7 +915,10 @@ impl PySessionContext {
             file_extension,
             &schema,
         )?;
-        wait_for_future(py, self.ctx.register_json(name, path, options))??;
+        wait_for_future(
+            py,
+            self.ctx.register_json(name, path_to_str(&path)?, options),
+        )??;
         Ok(())
     }
 
@@ -916,14 +931,17 @@ impl PySessionContext {
     pub fn register_avro(
         &self,
         name: &str,
-        path: &str,
+        path: PathBuf,
         schema: Option<PyArrowType<Schema>>,
         file_extension: &str,
         table_partition_cols: Vec<(String, PyArrowType<DataType>)>,
         py: Python,
     ) -> PyDataFusionResult<()> {
         let options = build_avro_options(table_partition_cols, file_extension, &schema);
-        wait_for_future(py, self.ctx.register_avro(name, path, options))??;
+        wait_for_future(
+            py,
+            self.ctx.register_avro(name, path_to_str(&path)?, options),
+        )??;
         Ok(())
     }
 
@@ -931,23 +949,17 @@ impl PySessionContext {
     pub fn register_arrow(
         &self,
         name: &str,
-        path: &str,
+        path: PathBuf,
         schema: Option<PyArrowType<Schema>>,
         file_extension: &str,
         table_partition_cols: Vec<(String, PyArrowType<DataType>)>,
         py: Python,
     ) -> PyDataFusionResult<()> {
-        let mut options = ArrowReadOptions::default().table_partition_cols(
-            table_partition_cols
-                .into_iter()
-                .map(|(name, ty)| (name, ty.0))
-                .collect::<Vec<(String, DataType)>>(),
-        );
-        options.file_extension = file_extension;
-        options.schema = schema.as_ref().map(|x| &x.0);
-
-        let result = self.ctx.register_arrow(name, path, options);
-        wait_for_future(py, result)??;
+        let options = build_arrow_options(table_partition_cols, file_extension, &schema);
+        wait_for_future(
+            py,
+            self.ctx.register_arrow(name, path_to_str(&path)?, options),
+        )??;
         Ok(())
     }
 
@@ -1098,7 +1110,7 @@ impl PySessionContext {
     #[pyo3(signature = (path, schema=None, schema_infer_max_records=1000, file_extension=".json", table_partition_cols=vec![], file_compression_type=None))]
     pub fn read_json(
         &self,
-        path: &str,
+        path: PathBuf,
         schema: Option<PyArrowType<Schema>>,
         schema_infer_max_records: usize,
         file_extension: &str,
@@ -1113,7 +1125,7 @@ impl PySessionContext {
             file_extension,
             &schema,
         )?;
-        let df = wait_for_future(py, self.ctx.read_json(path, options))??;
+        let df = wait_for_future(py, self.ctx.read_json(path_to_str(&path)?, options))??;
         Ok(PyDataFrame::new(df))
     }
 
@@ -1129,9 +1141,12 @@ impl PySessionContext {
         let options = convert_csv_options(options)?;
 
         let paths: Vec<String> = if path.is_instance_of::<PyList>() {
-            path.extract()?
+            path.extract::<Vec<PathBuf>>()?
+                .iter()
+                .map(|p| path_to_str(p).map(str::to_owned))
+                .collect::<PyDataFusionResult<_>>()?
         } else {
-            vec![path.extract()?]
+            vec![path_to_str(&path.extract::<PathBuf>()?)?.to_owned()]
         };
         let df = wait_for_future(py, self.ctx.read_csv(paths, options))??;
         Ok(PyDataFrame::new(df))
@@ -1148,7 +1163,7 @@ impl PySessionContext {
         file_sort_order=None))]
     pub fn read_parquet(
         &self,
-        path: &str,
+        path: PathBuf,
         table_partition_cols: Vec<(String, PyArrowType<DataType>)>,
         parquet_pruning: bool,
         file_extension: &str,
@@ -1165,7 +1180,10 @@ impl PySessionContext {
             &schema,
             file_sort_order,
         );
-        let df = PyDataFrame::new(wait_for_future(py, self.ctx.read_parquet(path, options))??);
+        let df = PyDataFrame::new(wait_for_future(
+            py,
+            self.ctx.read_parquet(path_to_str(&path)?, options),
+        )??);
         Ok(df)
     }
 
@@ -1173,37 +1191,28 @@ impl PySessionContext {
     #[pyo3(signature = (path, schema=None, table_partition_cols=vec![], file_extension=".avro"))]
     pub fn read_avro(
         &self,
-        path: &str,
+        path: PathBuf,
         schema: Option<PyArrowType<Schema>>,
         table_partition_cols: Vec<(String, PyArrowType<DataType>)>,
         file_extension: &str,
         py: Python,
     ) -> PyDataFusionResult<PyDataFrame> {
         let options = build_avro_options(table_partition_cols, file_extension, &schema);
-        let df = wait_for_future(py, self.ctx.read_avro(path, options))??;
+        let df = wait_for_future(py, self.ctx.read_avro(path_to_str(&path)?, options))??;
         Ok(PyDataFrame::new(df))
     }
 
     #[pyo3(signature = (path, schema=None, file_extension=".arrow", table_partition_cols=vec![]))]
     pub fn read_arrow(
         &self,
-        path: &str,
+        path: PathBuf,
         schema: Option<PyArrowType<Schema>>,
         file_extension: &str,
         table_partition_cols: Vec<(String, PyArrowType<DataType>)>,
         py: Python,
     ) -> PyDataFusionResult<PyDataFrame> {
-        let mut options = ArrowReadOptions::default().table_partition_cols(
-            table_partition_cols
-                .into_iter()
-                .map(|(name, ty)| (name, ty.0))
-                .collect::<Vec<(String, DataType)>>(),
-        );
-        options.file_extension = file_extension;
-        options.schema = schema.as_ref().map(|x| &x.0);
-
-        let result = self.ctx.read_arrow(path, options);
-        let df = wait_for_future(py, result)??;
+        let options = build_arrow_options(table_partition_cols, file_extension, &schema);
+        let df = wait_for_future(py, self.ctx.read_arrow(path_to_str(&path)?, options))??;
         Ok(PyDataFrame::new(df))
     }
 
@@ -1344,6 +1353,11 @@ pub fn parse_file_compression_type(
         })
 }
 
+fn path_to_str(path: &Path) -> PyDataFusionResult<&str> {
+    path.to_str()
+        .ok_or_else(|| PyValueError::new_err("Unable to convert path to a string").into())
+}
+
 fn convert_csv_options(
     options: Option<&PyCsvReadOptions>,
 ) -> PyDataFusionResult<CsvReadOptions<'_>> {
@@ -1406,6 +1420,18 @@ fn build_json_options<'a>(
     Ok(options)
 }
 
+fn build_arrow_options<'a>(
+    table_partition_cols: Vec<(String, PyArrowType<DataType>)>,
+    file_extension: &'a str,
+    schema: &'a Option<PyArrowType<Schema>>,
+) -> ArrowReadOptions<'a> {
+    let mut options = ArrowReadOptions::default()
+        .table_partition_cols(convert_partition_cols(table_partition_cols));
+    options.file_extension = file_extension;
+    options.schema = schema.as_ref().map(|x| &x.0);
+    options
+}
+
 fn build_avro_options<'a>(
     table_partition_cols: Vec<(String, PyArrowType<DataType>)>,
     file_extension: &'a str,
diff --git a/python/datafusion/context.py b/python/datafusion/context.py
index c3f94cc16..827acbd50 100644
--- a/python/datafusion/context.py
+++ b/python/datafusion/context.py
@@ -603,7 +603,7 @@ def register_listing_table(
         table_partition_cols = _convert_table_partition_cols(table_partition_cols)
         self.ctx.register_listing_table(
             name,
-            str(path),
+            path,
             table_partition_cols,
             file_extension,
             schema,
@@ -971,7 +971,7 @@ def register_parquet(
         table_partition_cols = _convert_table_partition_cols(table_partition_cols)
         self.ctx.register_parquet(
             name,
-            str(path),
+            path,
             table_partition_cols,
             parquet_pruning,
             file_extension,
@@ -1013,8 +1013,6 @@ def register_csv(
             options: Set advanced options for CSV reading. This cannot be
                 combined with any of the other options in this method.
         """
-        path_arg = [str(p) for p in path] if isinstance(path, list) else str(path)
-
         if options is not None and (
             schema is not None
             or not has_header
@@ -1048,7 +1046,7 @@ def register_csv(
 
         self.ctx.register_csv(
             name,
-            path_arg,
+            path,
             options.to_inner(),
         )
 
@@ -1083,7 +1081,7 @@ def register_json(
         table_partition_cols = _convert_table_partition_cols(table_partition_cols)
         self.ctx.register_json(
             name,
-            str(path),
+            path,
             schema,
             schema_infer_max_records,
             file_extension,
@@ -1114,9 +1112,7 @@ def register_avro(
         if table_partition_cols is None:
             table_partition_cols = []
         table_partition_cols = _convert_table_partition_cols(table_partition_cols)
-        self.ctx.register_avro(
-            name, str(path), schema, file_extension, table_partition_cols
-        )
+        self.ctx.register_avro(name, path, schema, file_extension, table_partition_cols)
 
     def register_arrow(
         self,
@@ -1195,7 +1191,7 @@ def register_arrow(
             table_partition_cols = []
         table_partition_cols = _convert_table_partition_cols(table_partition_cols)
         self.ctx.register_arrow(
-            name, str(path), schema, file_extension, table_partition_cols
+            name, path, schema, file_extension, table_partition_cols
         )
 
     def register_dataset(self, name: str, dataset: pa.dataset.Dataset) -> None:
@@ -1407,7 +1403,7 @@ def read_json(
         table_partition_cols = _convert_table_partition_cols(table_partition_cols)
         return DataFrame(
             self.ctx.read_json(
-                str(path),
+                path,
                 schema,
                 schema_infer_max_records,
                 file_extension,
@@ -1450,8 +1446,6 @@ def read_csv(
         Returns:
             DataFrame representation of the read CSV files
         """
-        path_arg = [str(p) for p in path] if isinstance(path, list) else str(path)
-
         if options is not None and (
             schema is not None
             or not has_header
@@ -1487,7 +1481,7 @@ def read_csv(
 
         return DataFrame(
             self.ctx.read_csv(
-                path_arg,
+                path,
                 options.to_inner(),
             )
         )
@@ -1530,7 +1524,7 @@ def read_parquet(
         file_sort_order = self._convert_file_sort_order(file_sort_order)
         return DataFrame(
             self.ctx.read_parquet(
-                str(path),
+                path,
                 table_partition_cols,
                 parquet_pruning,
                 file_extension,
@@ -1562,7 +1556,7 @@ def read_avro(
             file_partition_cols = []
         file_partition_cols = _convert_table_partition_cols(file_partition_cols)
         return DataFrame(
-            self.ctx.read_avro(str(path), schema, file_partition_cols, file_extension)
+            self.ctx.read_avro(path, schema, file_partition_cols, file_extension)
         )
 
     def read_arrow(
@@ -1634,7 +1628,7 @@ def read_arrow(
             file_partition_cols = []
         file_partition_cols = _convert_table_partition_cols(file_partition_cols)
         return DataFrame(
-            self.ctx.read_arrow(str(path), schema, file_extension, file_partition_cols)
+            self.ctx.read_arrow(path, schema, file_extension, file_partition_cols)
         )
 
     def read_empty(self) -> DataFrame: