From d93bb96bd509c8989c2283ee3a57860c372a33f3 Mon Sep 17 00:00:00 2001 From: Daniel Mesejo Date: Sun, 5 Apr 2026 18:08:29 +0200 Subject: [PATCH 1/2] refactor(context): deduplicate register/read option-building logic Extract shared helpers (convert_partition_cols, convert_file_sort_order, build_parquet/json/avro_options, convert_csv_options), standardize path types to &str, and remove redundant intermediate variables. --- crates/core/src/context.rs | 281 +++++++++++++++++-------------------- 1 file changed, 131 insertions(+), 150 deletions(-) diff --git a/crates/core/src/context.rs b/crates/core/src/context.rs index e46d359d6..d13a6a91c 100644 --- a/crates/core/src/context.rs +++ b/crates/core/src/context.rs @@ -16,7 +16,6 @@ // under the License. use std::collections::{HashMap, HashSet}; -use std::path::PathBuf; use std::ptr::NonNull; use std::str::FromStr; use std::sync::Arc; @@ -472,19 +471,8 @@ impl PySessionContext { ) -> PyDataFusionResult<()> { let options = ListingOptions::new(Arc::new(ParquetFormat::new())) .with_file_extension(file_extension) - .with_table_partition_cols( - table_partition_cols - .into_iter() - .map(|(name, ty)| (name, ty.0)) - .collect::>(), - ) - .with_file_sort_order( - file_sort_order - .unwrap_or_default() - .into_iter() - .map(|e| e.into_iter().map(|f| f.into()).collect()) - .collect(), - ); + .with_table_partition_cols(convert_partition_cols(table_partition_cols)) + .with_file_sort_order(convert_file_sort_order(file_sort_order)); let table_path = ListingTableUrl::parse(path)?; let resolved_schema: SchemaRef = match schema { Some(s) => Arc::new(s.0), @@ -851,25 +839,15 @@ impl PySessionContext { file_sort_order: Option>>, py: Python, ) -> PyDataFusionResult<()> { - let mut options = ParquetReadOptions::default() - .table_partition_cols( - table_partition_cols - .into_iter() - .map(|(name, ty)| (name, ty.0)) - .collect::>(), - ) - .parquet_pruning(parquet_pruning) - .skip_metadata(skip_metadata); - options.file_extension = file_extension; - options.schema = schema.as_ref().map(|x| &x.0); - options.file_sort_order = file_sort_order - .unwrap_or_default() - .into_iter() - .map(|e| e.into_iter().map(|f| f.into()).collect()) - .collect(); - - let result = self.ctx.register_parquet(name, path, options); - wait_for_future(py, result)??; + let options = build_parquet_options( + table_partition_cols, + parquet_pruning, + file_extension, + skip_metadata, + &schema, + file_sort_order, + ); + wait_for_future(py, self.ctx.register_parquet(name, path, options))??; Ok(()) } @@ -883,19 +861,17 @@ impl PySessionContext { options: Option<&PyCsvReadOptions>, py: Python, ) -> PyDataFusionResult<()> { - let options = options - .map(|opts| opts.try_into()) - .transpose()? - .unwrap_or_default(); + let options = convert_csv_options(options)?; if path.is_instance_of::() { let paths = path.extract::>()?; - let result = self.register_csv_from_multiple_paths(name, paths, options); - wait_for_future(py, result)??; + wait_for_future( + py, + self.register_csv_from_multiple_paths(name, paths, options), + )??; } else { let path = path.extract::()?; - let result = self.ctx.register_csv(name, &path, options); - wait_for_future(py, result)??; + wait_for_future(py, self.ctx.register_csv(name, &path, options))??; } Ok(()) @@ -912,7 +888,7 @@ impl PySessionContext { pub fn register_json( &self, name: &str, - path: PathBuf, + path: &str, schema: Option>, schema_infer_max_records: usize, file_extension: &str, @@ -920,25 +896,14 @@ impl PySessionContext { file_compression_type: Option, py: Python, ) -> PyDataFusionResult<()> { - let path = path - .to_str() - .ok_or_else(|| PyValueError::new_err("Unable to convert path to a string"))?; - - let mut options = JsonReadOptions::default() - .file_compression_type(parse_file_compression_type(file_compression_type)?) - .table_partition_cols( - table_partition_cols - .into_iter() - .map(|(name, ty)| (name, ty.0)) - .collect::>(), - ); - options.schema_infer_max_records = schema_infer_max_records; - options.file_extension = file_extension; - options.schema = schema.as_ref().map(|x| &x.0); - - let result = self.ctx.register_json(name, path, options); - wait_for_future(py, result)??; - + let options = build_json_options( + table_partition_cols, + file_compression_type, + schema_infer_max_records, + file_extension, + &schema, + )?; + wait_for_future(py, self.ctx.register_json(name, path, options))??; Ok(()) } @@ -951,28 +916,14 @@ impl PySessionContext { pub fn register_avro( &self, name: &str, - path: PathBuf, + path: &str, schema: Option>, file_extension: &str, table_partition_cols: Vec<(String, PyArrowType)>, py: Python, ) -> PyDataFusionResult<()> { - let path = path - .to_str() - .ok_or_else(|| PyValueError::new_err("Unable to convert path to a string"))?; - - let mut options = AvroReadOptions::default().table_partition_cols( - table_partition_cols - .into_iter() - .map(|(name, ty)| (name, ty.0)) - .collect::>(), - ); - options.file_extension = file_extension; - options.schema = schema.as_ref().map(|x| &x.0); - - let result = self.ctx.register_avro(name, path, options); - wait_for_future(py, result)??; - + let options = build_avro_options(table_partition_cols, file_extension, &schema); + wait_for_future(py, self.ctx.register_avro(name, path, options))??; Ok(()) } @@ -1147,7 +1098,7 @@ impl PySessionContext { #[pyo3(signature = (path, schema=None, schema_infer_max_records=1000, file_extension=".json", table_partition_cols=vec![], file_compression_type=None))] pub fn read_json( &self, - path: PathBuf, + path: &str, schema: Option>, schema_infer_max_records: usize, file_extension: &str, @@ -1155,27 +1106,14 @@ impl PySessionContext { file_compression_type: Option, py: Python, ) -> PyDataFusionResult { - let path = path - .to_str() - .ok_or_else(|| PyValueError::new_err("Unable to convert path to a string"))?; - let mut options = JsonReadOptions::default() - .table_partition_cols( - table_partition_cols - .into_iter() - .map(|(name, ty)| (name, ty.0)) - .collect::>(), - ) - .file_compression_type(parse_file_compression_type(file_compression_type)?); - options.schema_infer_max_records = schema_infer_max_records; - options.file_extension = file_extension; - let df = if let Some(schema) = schema { - options.schema = Some(&schema.0); - let result = self.ctx.read_json(path, options); - wait_for_future(py, result)?? - } else { - let result = self.ctx.read_json(path, options); - wait_for_future(py, result)?? - }; + let options = build_json_options( + table_partition_cols, + file_compression_type, + schema_infer_max_records, + file_extension, + &schema, + )?; + let df = wait_for_future(py, self.ctx.read_json(path, options))??; Ok(PyDataFrame::new(df)) } @@ -1188,23 +1126,15 @@ impl PySessionContext { options: Option<&PyCsvReadOptions>, py: Python, ) -> PyDataFusionResult { - let options = options - .map(|opts| opts.try_into()) - .transpose()? - .unwrap_or_default(); + let options = convert_csv_options(options)?; - if path.is_instance_of::() { - let paths = path.extract::>()?; - let paths = paths.iter().map(|p| p as &str).collect::>(); - let result = self.ctx.read_csv(paths, options); - let df = PyDataFrame::new(wait_for_future(py, result)??); - Ok(df) + let paths: Vec = if path.is_instance_of::() { + path.extract()? } else { - let path = path.extract::()?; - let result = self.ctx.read_csv(path, options); - let df = PyDataFrame::new(wait_for_future(py, result)??); - Ok(df) - } + vec![path.extract()?] + }; + let df = wait_for_future(py, self.ctx.read_csv(paths, options))??; + Ok(PyDataFrame::new(df)) } #[allow(clippy::too_many_arguments)] @@ -1227,25 +1157,15 @@ impl PySessionContext { file_sort_order: Option>>, py: Python, ) -> PyDataFusionResult { - let mut options = ParquetReadOptions::default() - .table_partition_cols( - table_partition_cols - .into_iter() - .map(|(name, ty)| (name, ty.0)) - .collect::>(), - ) - .parquet_pruning(parquet_pruning) - .skip_metadata(skip_metadata); - options.file_extension = file_extension; - options.schema = schema.as_ref().map(|x| &x.0); - options.file_sort_order = file_sort_order - .unwrap_or_default() - .into_iter() - .map(|e| e.into_iter().map(|f| f.into()).collect()) - .collect(); - - let result = self.ctx.read_parquet(path, options); - let df = PyDataFrame::new(wait_for_future(py, result)??); + let options = build_parquet_options( + table_partition_cols, + parquet_pruning, + file_extension, + skip_metadata, + &schema, + file_sort_order, + ); + let df = PyDataFrame::new(wait_for_future(py, self.ctx.read_parquet(path, options))??); Ok(df) } @@ -1259,21 +1179,8 @@ impl PySessionContext { file_extension: &str, py: Python, ) -> PyDataFusionResult { - let mut options = AvroReadOptions::default().table_partition_cols( - table_partition_cols - .into_iter() - .map(|(name, ty)| (name, ty.0)) - .collect::>(), - ); - options.file_extension = file_extension; - let df = if let Some(schema) = schema { - options.schema = Some(&schema.0); - let read_future = self.ctx.read_avro(path, options); - wait_for_future(py, read_future)?? - } else { - let read_future = self.ctx.read_avro(path, options); - wait_for_future(py, read_future)?? - }; + let options = build_avro_options(table_partition_cols, file_extension, &schema); + let df = wait_for_future(py, self.ctx.read_avro(path, options))??; Ok(PyDataFrame::new(df)) } @@ -1396,7 +1303,7 @@ impl PySessionContext { // check if the file extension matches the expected extension for path in &table_paths { let file_path = path.as_str(); - if !file_path.ends_with(option_extension.clone().as_str()) && !path.is_collection() { + if !file_path.ends_with(option_extension.as_str()) && !path.is_collection() { return exec_err!( "File path '{file_path}' does not match the expected extension '{option_extension}'" ); @@ -1437,6 +1344,80 @@ pub fn parse_file_compression_type( }) } +fn convert_csv_options( + options: Option<&PyCsvReadOptions>, +) -> PyDataFusionResult> { + Ok(options + .map(|opts| opts.try_into()) + .transpose()? + .unwrap_or_default()) +} + +fn convert_partition_cols( + table_partition_cols: Vec<(String, PyArrowType)>, +) -> Vec<(String, DataType)> { + table_partition_cols + .into_iter() + .map(|(name, ty)| (name, ty.0)) + .collect() +} + +fn convert_file_sort_order( + file_sort_order: Option>>, +) -> Vec> { + file_sort_order + .unwrap_or_default() + .into_iter() + .map(|e| e.into_iter().map(|f| f.into()).collect()) + .collect() +} + +fn build_parquet_options<'a>( + table_partition_cols: Vec<(String, PyArrowType)>, + parquet_pruning: bool, + file_extension: &'a str, + skip_metadata: bool, + schema: &'a Option>, + file_sort_order: Option>>, +) -> ParquetReadOptions<'a> { + let mut options = ParquetReadOptions::default() + .table_partition_cols(convert_partition_cols(table_partition_cols)) + .parquet_pruning(parquet_pruning) + .skip_metadata(skip_metadata); + options.file_extension = file_extension; + options.schema = schema.as_ref().map(|x| &x.0); + options.file_sort_order = convert_file_sort_order(file_sort_order); + options +} + +fn build_json_options<'a>( + table_partition_cols: Vec<(String, PyArrowType)>, + file_compression_type: Option, + schema_infer_max_records: usize, + file_extension: &'a str, + schema: &'a Option>, +) -> Result, PyErr> { + let mut options = JsonReadOptions::default() + .table_partition_cols(convert_partition_cols(table_partition_cols)) + .file_compression_type(parse_file_compression_type(file_compression_type)?); + options.schema_infer_max_records = schema_infer_max_records; + options.file_extension = file_extension; + options.schema = schema.as_ref().map(|x| &x.0); + Ok(options) +} + +fn build_avro_options<'a>( + table_partition_cols: Vec<(String, PyArrowType)>, + file_extension: &'a str, + schema: &'a Option>, +) -> AvroReadOptions<'a> { + let mut options = AvroReadOptions::default() + .table_partition_cols(convert_partition_cols(table_partition_cols)); + options.file_extension = file_extension; + options.schema = schema.as_ref().map(|x| &x.0); + options +} + impl From for SessionContext { fn from(ctx: PySessionContext) -> SessionContext { ctx.ctx.as_ref().clone() From f536873337b3b18a31462e5ccccc2dabd91f2cca Mon Sep 17 00:00:00 2001 From: Daniel Mesejo Date: Sat, 11 Apr 2026 00:40:20 +0200 Subject: [PATCH 2/2] refactor(context): accept PathBuf for path arguments in register/read methods Change path parameters from &str to PathBuf in all register/read methods (register_listing_table, register_parquet, register_json, register_avro, register_arrow, read_json, read_parquet, read_avro, read_arrow) so callers can pass either a Python str or a pathlib.Path object. For register_csv and read_csv, which take &Bound to handle lists, extract path elements as PathBuf rather than String for the same reason. Add a path_to_str helper that converts PathBuf to &str, returning an explicit error for non-UTF-8 paths rather than silently corrupting them. Add build_arrow_options helper to deduplicate register_arrow/read_arrow option-building logic, consistent with the existing parquet/json/avro helpers. Co-Authored-By: Claude Sonnet 4.6 --- crates/core/src/context.rs | 112 +++++++++++++++++++++-------------- python/datafusion/context.py | 28 ++++----- 2 files changed, 80 insertions(+), 60 deletions(-) diff --git a/crates/core/src/context.rs b/crates/core/src/context.rs index d13a6a91c..aed3a7472 100644 --- a/crates/core/src/context.rs +++ b/crates/core/src/context.rs @@ -16,6 +16,7 @@ // under the License. use std::collections::{HashMap, HashSet}; +use std::path::{Path, PathBuf}; use std::ptr::NonNull; use std::str::FromStr; use std::sync::Arc; @@ -462,7 +463,7 @@ impl PySessionContext { pub fn register_listing_table( &self, name: &str, - path: &str, + path: PathBuf, table_partition_cols: Vec<(String, PyArrowType)>, file_extension: &str, schema: Option>, @@ -473,7 +474,7 @@ impl PySessionContext { .with_file_extension(file_extension) .with_table_partition_cols(convert_partition_cols(table_partition_cols)) .with_file_sort_order(convert_file_sort_order(file_sort_order)); - let table_path = ListingTableUrl::parse(path)?; + let table_path = ListingTableUrl::parse(path_to_str(&path)?)?; let resolved_schema: SchemaRef = match schema { Some(s) => Arc::new(s.0), None => { @@ -830,7 +831,7 @@ impl PySessionContext { pub fn register_parquet( &self, name: &str, - path: &str, + path: PathBuf, table_partition_cols: Vec<(String, PyArrowType)>, parquet_pruning: bool, file_extension: &str, @@ -847,7 +848,11 @@ impl PySessionContext { &schema, file_sort_order, ); - wait_for_future(py, self.ctx.register_parquet(name, path, options))??; + wait_for_future( + py, + self.ctx + .register_parquet(name, path_to_str(&path)?, options), + )??; Ok(()) } @@ -864,14 +869,21 @@ impl PySessionContext { let options = convert_csv_options(options)?; if path.is_instance_of::() { - let paths = path.extract::>()?; + let paths = path + .extract::>()? + .iter() + .map(|p| path_to_str(p).map(str::to_owned)) + .collect::>>()?; wait_for_future( py, self.register_csv_from_multiple_paths(name, paths, options), )??; } else { - let path = path.extract::()?; - wait_for_future(py, self.ctx.register_csv(name, &path, options))??; + let path = path.extract::()?; + wait_for_future( + py, + self.ctx.register_csv(name, path_to_str(&path)?, options), + )??; } Ok(()) @@ -888,7 +900,7 @@ impl PySessionContext { pub fn register_json( &self, name: &str, - path: &str, + path: PathBuf, schema: Option>, schema_infer_max_records: usize, file_extension: &str, @@ -903,7 +915,10 @@ impl PySessionContext { file_extension, &schema, )?; - wait_for_future(py, self.ctx.register_json(name, path, options))??; + wait_for_future( + py, + self.ctx.register_json(name, path_to_str(&path)?, options), + )??; Ok(()) } @@ -916,14 +931,17 @@ impl PySessionContext { pub fn register_avro( &self, name: &str, - path: &str, + path: PathBuf, schema: Option>, file_extension: &str, table_partition_cols: Vec<(String, PyArrowType)>, py: Python, ) -> PyDataFusionResult<()> { let options = build_avro_options(table_partition_cols, file_extension, &schema); - wait_for_future(py, self.ctx.register_avro(name, path, options))??; + wait_for_future( + py, + self.ctx.register_avro(name, path_to_str(&path)?, options), + )??; Ok(()) } @@ -931,23 +949,17 @@ impl PySessionContext { pub fn register_arrow( &self, name: &str, - path: &str, + path: PathBuf, schema: Option>, file_extension: &str, table_partition_cols: Vec<(String, PyArrowType)>, py: Python, ) -> PyDataFusionResult<()> { - let mut options = ArrowReadOptions::default().table_partition_cols( - table_partition_cols - .into_iter() - .map(|(name, ty)| (name, ty.0)) - .collect::>(), - ); - options.file_extension = file_extension; - options.schema = schema.as_ref().map(|x| &x.0); - - let result = self.ctx.register_arrow(name, path, options); - wait_for_future(py, result)??; + let options = build_arrow_options(table_partition_cols, file_extension, &schema); + wait_for_future( + py, + self.ctx.register_arrow(name, path_to_str(&path)?, options), + )??; Ok(()) } @@ -1098,7 +1110,7 @@ impl PySessionContext { #[pyo3(signature = (path, schema=None, schema_infer_max_records=1000, file_extension=".json", table_partition_cols=vec![], file_compression_type=None))] pub fn read_json( &self, - path: &str, + path: PathBuf, schema: Option>, schema_infer_max_records: usize, file_extension: &str, @@ -1113,7 +1125,7 @@ impl PySessionContext { file_extension, &schema, )?; - let df = wait_for_future(py, self.ctx.read_json(path, options))??; + let df = wait_for_future(py, self.ctx.read_json(path_to_str(&path)?, options))??; Ok(PyDataFrame::new(df)) } @@ -1129,9 +1141,12 @@ impl PySessionContext { let options = convert_csv_options(options)?; let paths: Vec = if path.is_instance_of::() { - path.extract()? + path.extract::>()? + .iter() + .map(|p| path_to_str(p).map(str::to_owned)) + .collect::>()? } else { - vec![path.extract()?] + vec![path_to_str(&path.extract::()?)?.to_owned()] }; let df = wait_for_future(py, self.ctx.read_csv(paths, options))??; Ok(PyDataFrame::new(df)) @@ -1148,7 +1163,7 @@ impl PySessionContext { file_sort_order=None))] pub fn read_parquet( &self, - path: &str, + path: PathBuf, table_partition_cols: Vec<(String, PyArrowType)>, parquet_pruning: bool, file_extension: &str, @@ -1165,7 +1180,10 @@ impl PySessionContext { &schema, file_sort_order, ); - let df = PyDataFrame::new(wait_for_future(py, self.ctx.read_parquet(path, options))??); + let df = PyDataFrame::new(wait_for_future( + py, + self.ctx.read_parquet(path_to_str(&path)?, options), + )??); Ok(df) } @@ -1173,37 +1191,28 @@ impl PySessionContext { #[pyo3(signature = (path, schema=None, table_partition_cols=vec![], file_extension=".avro"))] pub fn read_avro( &self, - path: &str, + path: PathBuf, schema: Option>, table_partition_cols: Vec<(String, PyArrowType)>, file_extension: &str, py: Python, ) -> PyDataFusionResult { let options = build_avro_options(table_partition_cols, file_extension, &schema); - let df = wait_for_future(py, self.ctx.read_avro(path, options))??; + let df = wait_for_future(py, self.ctx.read_avro(path_to_str(&path)?, options))??; Ok(PyDataFrame::new(df)) } #[pyo3(signature = (path, schema=None, file_extension=".arrow", table_partition_cols=vec![]))] pub fn read_arrow( &self, - path: &str, + path: PathBuf, schema: Option>, file_extension: &str, table_partition_cols: Vec<(String, PyArrowType)>, py: Python, ) -> PyDataFusionResult { - let mut options = ArrowReadOptions::default().table_partition_cols( - table_partition_cols - .into_iter() - .map(|(name, ty)| (name, ty.0)) - .collect::>(), - ); - options.file_extension = file_extension; - options.schema = schema.as_ref().map(|x| &x.0); - - let result = self.ctx.read_arrow(path, options); - let df = wait_for_future(py, result)??; + let options = build_arrow_options(table_partition_cols, file_extension, &schema); + let df = wait_for_future(py, self.ctx.read_arrow(path_to_str(&path)?, options))??; Ok(PyDataFrame::new(df)) } @@ -1344,6 +1353,11 @@ pub fn parse_file_compression_type( }) } +fn path_to_str(path: &Path) -> PyDataFusionResult<&str> { + path.to_str() + .ok_or_else(|| PyValueError::new_err("Unable to convert path to a string").into()) +} + fn convert_csv_options( options: Option<&PyCsvReadOptions>, ) -> PyDataFusionResult> { @@ -1406,6 +1420,18 @@ fn build_json_options<'a>( Ok(options) } +fn build_arrow_options<'a>( + table_partition_cols: Vec<(String, PyArrowType)>, + file_extension: &'a str, + schema: &'a Option>, +) -> ArrowReadOptions<'a> { + let mut options = ArrowReadOptions::default() + .table_partition_cols(convert_partition_cols(table_partition_cols)); + options.file_extension = file_extension; + options.schema = schema.as_ref().map(|x| &x.0); + options +} + fn build_avro_options<'a>( table_partition_cols: Vec<(String, PyArrowType)>, file_extension: &'a str, diff --git a/python/datafusion/context.py b/python/datafusion/context.py index c3f94cc16..827acbd50 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -603,7 +603,7 @@ def register_listing_table( table_partition_cols = _convert_table_partition_cols(table_partition_cols) self.ctx.register_listing_table( name, - str(path), + path, table_partition_cols, file_extension, schema, @@ -971,7 +971,7 @@ def register_parquet( table_partition_cols = _convert_table_partition_cols(table_partition_cols) self.ctx.register_parquet( name, - str(path), + path, table_partition_cols, parquet_pruning, file_extension, @@ -1013,8 +1013,6 @@ def register_csv( options: Set advanced options for CSV reading. This cannot be combined with any of the other options in this method. """ - path_arg = [str(p) for p in path] if isinstance(path, list) else str(path) - if options is not None and ( schema is not None or not has_header @@ -1048,7 +1046,7 @@ def register_csv( self.ctx.register_csv( name, - path_arg, + path, options.to_inner(), ) @@ -1083,7 +1081,7 @@ def register_json( table_partition_cols = _convert_table_partition_cols(table_partition_cols) self.ctx.register_json( name, - str(path), + path, schema, schema_infer_max_records, file_extension, @@ -1114,9 +1112,7 @@ def register_avro( if table_partition_cols is None: table_partition_cols = [] table_partition_cols = _convert_table_partition_cols(table_partition_cols) - self.ctx.register_avro( - name, str(path), schema, file_extension, table_partition_cols - ) + self.ctx.register_avro(name, path, schema, file_extension, table_partition_cols) def register_arrow( self, @@ -1195,7 +1191,7 @@ def register_arrow( table_partition_cols = [] table_partition_cols = _convert_table_partition_cols(table_partition_cols) self.ctx.register_arrow( - name, str(path), schema, file_extension, table_partition_cols + name, path, schema, file_extension, table_partition_cols ) def register_dataset(self, name: str, dataset: pa.dataset.Dataset) -> None: @@ -1407,7 +1403,7 @@ def read_json( table_partition_cols = _convert_table_partition_cols(table_partition_cols) return DataFrame( self.ctx.read_json( - str(path), + path, schema, schema_infer_max_records, file_extension, @@ -1450,8 +1446,6 @@ def read_csv( Returns: DataFrame representation of the read CSV files """ - path_arg = [str(p) for p in path] if isinstance(path, list) else str(path) - if options is not None and ( schema is not None or not has_header @@ -1487,7 +1481,7 @@ def read_csv( return DataFrame( self.ctx.read_csv( - path_arg, + path, options.to_inner(), ) ) @@ -1530,7 +1524,7 @@ def read_parquet( file_sort_order = self._convert_file_sort_order(file_sort_order) return DataFrame( self.ctx.read_parquet( - str(path), + path, table_partition_cols, parquet_pruning, file_extension, @@ -1562,7 +1556,7 @@ def read_avro( file_partition_cols = [] file_partition_cols = _convert_table_partition_cols(file_partition_cols) return DataFrame( - self.ctx.read_avro(str(path), schema, file_partition_cols, file_extension) + self.ctx.read_avro(path, schema, file_partition_cols, file_extension) ) def read_arrow( @@ -1634,7 +1628,7 @@ def read_arrow( file_partition_cols = [] file_partition_cols = _convert_table_partition_cols(file_partition_cols) return DataFrame( - self.ctx.read_arrow(str(path), schema, file_extension, file_partition_cols) + self.ctx.read_arrow(path, schema, file_extension, file_partition_cols) ) def read_empty(self) -> DataFrame: