deepnote · m1so · Jun 25, 2026 · Jun 24, 2026 · Jun 24, 2026 · Jun 24, 2026
@@ -411,7 +411,7 @@ jobs:
           coverage report --format=markdown >> $GITHUB_STEP_SUMMARY
 
       - name: Upload combined coverage to Codecov
-        uses: codecov/codecov-action@5a1091511ad55cbe89839c7260b706298ca349f7 # v5
+        uses: codecov/codecov-action@5975040f7f7d40edaff8d784b576fd65ae95c073 # v5.5.5
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
           slug: ${{ github.repository }}

@@ -17,6 +17,7 @@
 from deepnote_toolkit.chart.types import CHART_ROW_LIMIT, VEGA_5_MIME_TYPE, ChartError
 from deepnote_toolkit.chart.utils import (
     sanitize_dataframe_for_chart,
+    sanitize_polars_dataframe_for_chart,
     serialize_values_list_for_json,
 )
 from deepnote_toolkit.logging import LoggerManager
@@ -141,11 +142,18 @@ def __init__(
             if filtered_df.native_type == "pandas":
                 sanitized_pandas = sanitize_dataframe_for_chart(filtered_df.to_native())
                 oc_sanitized_df = oc.DataFrame.from_native(sanitized_pandas)
-            elif filtered_df.native_type in ("pyspark", "polars-eager"):
-                # We don't need to sanitize Spark DFs because they will processed by Spark itself and it can handle
-                # all data types by itself
-                # Polars is powered by Arrow, which is same format used internally by VegaFusion so there is no need
-                # to do any additional sanitization for it either
+            elif filtered_df.native_type == "polars-eager":
+                # Polars is Arrow-backed, so most types pass through to VegaFusion
+                # untouched. The exception is Object columns (e.g. uuid.UUID values),
+                # which convert to an Arrow type VegaFusion can't serialize, so they
+                # still need sanitizing.
+                sanitized_polars = sanitize_polars_dataframe_for_chart(
+                    filtered_df.to_native()
+                )
+                oc_sanitized_df = oc.DataFrame.from_native(sanitized_polars)
+            elif filtered_df.native_type == "pyspark":
+                # Spark processes the data itself and handles all data types, so no
+                # sanitization is needed here.
                 oc_sanitized_df = filtered_df
             else:
                 raise TypeError(

@@ -1,20 +1,50 @@
-from typing import Any, List, Optional
+import uuid
+from typing import TYPE_CHECKING, Any, List, Optional
 
 import pandas as pd
 
 import deepnote_toolkit.ocelots as oc
 
+if TYPE_CHECKING:
+    import polars as pl
+
 
 def sanitize_dataframe_for_chart(pd_df: pd.DataFrame):
     sanitized_dataframe = pd_df.copy()
 
     oc.pandas.utils.deduplicate_columns(sanitized_dataframe)
     _convert_timedelta_columns_to_seconds(sanitized_dataframe)
+    _convert_uuid_columns_to_string(sanitized_dataframe)
     _convert_column_names_to_string(sanitized_dataframe)
 
     return sanitized_dataframe
 
 
+def sanitize_polars_dataframe_for_chart(pl_df: "pl.DataFrame") -> "pl.DataFrame":
+    """
+    Coerce polars columns that VegaFusion cannot serialize into chart-friendly
+    types, returning a new DataFrame.
+
+    polars stores values it has no native type for (e.g. ``uuid.UUID`` objects)
+    in an ``Object`` column, which converts to an opaque Arrow ``FixedSizeBinary``
+    that VegaFusion cannot serialize to JSON. Such columns are not meaningfully
+    chartable as-is, so we stringify them -- the polars analogue of the UUID
+    handling in :func:`sanitize_dataframe_for_chart` for the pandas path.
+    """
+    import polars as pl
+
+    object_columns = [
+        name for name, dtype in zip(pl_df.columns, pl_df.dtypes) if dtype == pl.Object
+    ]
+    if not object_columns:
+        return pl_df
+
+    return pl_df.with_columns(
+        pl.col(name).map_elements(str, return_dtype=pl.String)
+        for name in object_columns
+    )
+
+
 def _convert_column_names_to_string(pd_df: pd.DataFrame):
     """
     Converts dataframe column names to strings.
@@ -24,6 +54,32 @@ def _convert_column_names_to_string(pd_df: pd.DataFrame):
     pd_df.columns = pd_df.columns.astype(str)
 
 
+def _convert_uuid_columns_to_string(pd_df: pd.DataFrame):
+    """
+    Converts columns of ``uuid.UUID`` objects to strings.
+
+    Starting with pyarrow 24.0.0, Arrow conversion infers the canonical
+    ``arrow.uuid`` extension type (backed by ``FixedSizeBinary(16)``) for object
+    columns holding ``uuid.UUID`` values; pyarrow <= 23 produced a serializable
+    result for the same data. VegaFusion's Arrow runtime cannot serialize
+    ``FixedSizeBinary(16)`` to JSON (``Unsupported datatype for JSON
+    serialization: FixedSizeBinary(16)``), so we stringify such columns to keep
+    charting working across pyarrow versions.
+
+    WARNING: This function modifies the DataFrame in-place.
+    """
+    for column in pd_df.columns:
+        col = pd_df[column]
+        if not pd.api.types.is_object_dtype(col.dtype):
+            continue
+        non_null = col.dropna()
+        if non_null.empty or not isinstance(non_null.iloc[0], uuid.UUID):
+            continue
+        pd_df[column] = col.map(
+            lambda value: str(value) if isinstance(value, uuid.UUID) else value
+        )
+
+
 def _convert_timedelta_columns_to_seconds(pd_sanitized_df: pd.DataFrame):
     """
     Converts timedelta columns to seconds.