Skip to content

SEDF from FeatureLayer query to parquet raises TypeError: Object of type _Metadata is not JSON serializable #2499

@L-Koren

Description

@L-Koren

Describe the bug
When attempting to export a Spatially Enabled Dataframe resulting from a FeatureLayer.query() to parquet a TypeError is raised.

To Reproduce
Steps to reproduce the behavior:

from arcgis.gis import GIS
 
gis = GIS()
 
item = gis.content.search(
    query="USA Major Cities",
    item_type="Feature Layer",
    outside_org=True,
)[0]
layer = item.layers[0]
 
sdf = layer.query(
    where="1=1",
    out_fields="*",
    return_geometry=False,
    result_record_count=1,
    as_df=True
)
 
## Will raise error
try:
    sdf.to_parquet("test.parquet")
except TypeError as e:
    print(f"Error found for sdf {e}.")
 
## Will not
sdf.attrs.pop("metadata", None)
sdf.to_parquet("test.parquet")

error:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[34], line 32
     29     except TypeError as e:
     30         print(f"Error found for sdf {name}: {e}.")
---> 32 sdf_error.to_parquet("test.parquet")
 
File c:\Users\USERNAME\AppData\Local\ESRI\conda\envs\arcgispro-py3-clone\Lib\site-packages\pandas\util\_decorators.py:333, in deprecate_nonkeyword_arguments..decorate..wrapper(*args, **kwargs)
    327 if len(args) > num_allow_args:
    328     warnings.warn(
    329         msg.format(arguments=_format_argument_list(allow_args)),
    330         FutureWarning,
    331         stacklevel=find_stack_level(),
    332     )
--> 333 return func(*args, **kwargs)
 
File c:\Users\USERNAME\AppData\Local\ESRI\conda\envs\arcgispro-py3-clone\Lib\site-packages\pandas\core\frame.py:3113, in DataFrame.to_parquet(self, path, engine, compression, index, partition_cols, storage_options, **kwargs)
   3032 """
   3033 Write a DataFrame to the binary parquet format.
   3034 
   (...)
   3109 >>> content = f.read()
   3110 """
   3111 from pandas.io.parquet import to_parquet
-> 3113 return to_parquet(
   3114     self,
   3115     path,
   3116     engine,
   3117     compression=compression,
   3118     index=index,
   3119     partition_cols=partition_cols,
   3120     storage_options=storage_options,
   3121     **kwargs,
   3122 )
 
File c:\Users\USERNAME\AppData\Local\ESRI\conda\envs\arcgispro-py3-clone\Lib\site-packages\pandas\io\parquet.py:480, in to_parquet(df, path, engine, compression, index, storage_options, partition_cols, filesystem, **kwargs)
    476 impl = get_engine(engine)
    478 path_or_buf: FilePath | WriteBuffer[bytes] = io.BytesIO() if path is None else path
--> 480 impl.write(
    481     df,
    482     path_or_buf,
    483     compression=compression,
    484     index=index,
    485     partition_cols=partition_cols,
    486     storage_options=storage_options,
    487     filesystem=filesystem,
    488     **kwargs,
    489 )
    491 if path is None:
    492     assert isinstance(path_or_buf, io.BytesIO)
 
File c:\Users\USERNAME\AppData\Local\ESRI\conda\envs\arcgispro-py3-clone\Lib\site-packages\pandas\io\parquet.py:193, in PyArrowImpl.write(self, df, path, compression, index, storage_options, partition_cols, filesystem, **kwargs)
    190 table = self.api.Table.from_pandas(df, **from_pandas_kwargs)
    192 if df.attrs:
--> 193     df_metadata = {"PANDAS_ATTRS": json.dumps(df.attrs)}
    194     existing_metadata = table.schema.metadata
    195     merged_metadata = {**existing_metadata, **df_metadata}
 
File c:\Users\USERNAME\AppData\Local\ESRI\conda\envs\arcgispro-py3-clone\Lib\json\__init__.py:231, in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)
    226 # cached encoder
    227 if (not skipkeys and ensure_ascii and
    228     check_circular and allow_nan and
    229     cls is None and indent is None and separators is None and
    230     default is None and not sort_keys and not kw):
--> 231     return _default_encoder.encode(obj)
    232 if cls is None:
    233     cls = JSONEncoder
 
File c:\Users\USERNAME\AppData\Local\ESRI\conda\envs\arcgispro-py3-clone\Lib\json\encoder.py:200, in JSONEncoder.encode(self, o)
    196         return encode_basestring(o)
    197 # This doesn't pass the iterator directly to ''.join() because the
    198 # exceptions aren't as detailed.  The list call should be roughly
    199 # equivalent to the PySequence_Fast that ''.join() would do.
--> 200 chunks = self.iterencode(o, _one_shot=True)
    201 if not isinstance(chunks, (list, tuple)):
    202     chunks = list(chunks)
 
File c:\Users\USERNAME\AppData\Local\ESRI\conda\envs\arcgispro-py3-clone\Lib\json\encoder.py:258, in JSONEncoder.iterencode(self, o, _one_shot)
    253 else:
    254     _iterencode = _make_iterencode(
    255         markers, self.default, _encoder, self.indent, floatstr,
    256         self.key_separator, self.item_separator, self.sort_keys,
    257         self.skipkeys, _one_shot)
--> 258 return _iterencode(o, 0)
 
File c:\Users\USERNAME\AppData\Local\ESRI\conda\envs\arcgispro-py3-clone\Lib\json\encoder.py:180, in JSONEncoder.default(self, o)
    161 def default(self, o):
    162     """Implement this method in a subclass such that it returns
    163     a serializable object for ``o``, or calls the base implementation
    164     (to raise a ``TypeError``).
   (...)
    178 
    179     """
--> 180     raise TypeError(f'Object of type {o.__class__.__name__} '
    181                     f'is not JSON serializable')
 
TypeError: Object of type _Metadata is not JSON serializable

Expected behavior
SEDF to allow for creation of a parquet file without having to manually strip attrs.

Platform:

  • OS: Windows 11
  • Python API Version: 2.4.1.1 & 2.4.1.3

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions