From 572ae7589a04cf10b94e43176138aaa9eb042d1b Mon Sep 17 00:00:00 2001 From: Nic Cope Date: Wed, 3 Jun 2026 17:32:12 -0700 Subject: [PATCH 1/2] Serialize models with exclude_unset, not exclude_defaults resource.update and update_status serialized Pydantic models with model_dump(exclude_defaults=True), which asks "is this field different from its default?". The correct question for server-side apply is "did the caller set this field?", which exclude_unset answers. exclude_defaults also regressed with newer datamodel-code-generator. It emits object defaults as a raw dict with validate_default=True instead of a default_factory. The default is validated into a model instance at construction, which doesn't compare equal to the declared dict default, so exclude_defaults fails to exclude it. Unset fields like spec.providerConfigRef then leaked into every composed resource. exclude_unset is immune to how a default is represented: a field the caller didn't touch is absent from model_fields_set. It also keeps fields the caller explicitly set to their default value, which is more correct under server-side apply, where setting a field claims ownership of it. The apiVersion and kind workaround stays. Functions build models with kwargs and rarely pass these, so they're unset and excluded either way. See https://github.com/crossplane/function-sdk-python/issues/207 for more detail. Signed-off-by: Nic Cope --- crossplane/function/resource.py | 19 +- tests/test_resource.py | 82 ++++++++- .../testdata/models/io/upbound/m/__init__.py | 0 .../models/io/upbound/m/aws/__init__.py | 0 .../models/io/upbound/m/aws/iam/__init__.py | 0 .../m/aws/iam/accountalias/__init__.py | 0 .../upbound/m/aws/iam/accountalias/v1beta1.py | 166 ++++++++++++++++++ 7 files changed, 252 insertions(+), 15 deletions(-) create mode 100644 tests/testdata/models/io/upbound/m/__init__.py create mode 100644 tests/testdata/models/io/upbound/m/aws/__init__.py create mode 100644 tests/testdata/models/io/upbound/m/aws/iam/__init__.py create mode 100644 tests/testdata/models/io/upbound/m/aws/iam/accountalias/__init__.py create mode 100644 tests/testdata/models/io/upbound/m/aws/iam/accountalias/v1beta1.py diff --git a/crossplane/function/resource.py b/crossplane/function/resource.py index 6b240d3..7f1298c 100644 --- a/crossplane/function/resource.py +++ b/crossplane/function/resource.py @@ -40,12 +40,13 @@ def update(r: fnv1.Resource, source: dict | structpb.Struct | pydantic.BaseModel """ match source: case pydantic.BaseModel(): - data = source.model_dump(exclude_defaults=True, warnings=False) - # In Pydantic, exclude_defaults=True in model_dump excludes fields - # that have their value equal to the default. If a field like - # apiVersion is set to its default value 's3.aws.upbound.io/v1beta2' - # (and not explicitly provided during initialization), it will be - # excluded from the serialized output. + # exclude_unset emits only the fields the caller explicitly set. + # Crossplane treats desired resources as server-side apply intent, + # so a function should own exactly the fields it has an opinion + # about and leave the rest to the API server. + data = source.model_dump(exclude_unset=True, warnings=False) + # apiVersion and kind identify the resource but are rarely passed + # as kwargs, so they're usually unset. Add them back explicitly. data["apiVersion"] = source.apiVersion data["kind"] = source.kind r.resource.update(data) @@ -71,11 +72,11 @@ def update_status( status: The status to set, as a dictionary or Pydantic model. Sets ``r.resource.status`` from the supplied status. When the status - is a Pydantic model, fields set to their default value are excluded, - matching the behavior of :func:`update`. + is a Pydantic model, fields the caller didn't explicitly set are + excluded, matching the behavior of :func:`update`. """ if isinstance(status, pydantic.BaseModel): - status = status.model_dump(exclude_defaults=True, warnings=False) + status = status.model_dump(exclude_unset=True, warnings=False) update(r, {"status": status}) diff --git a/tests/test_resource.py b/tests/test_resource.py index 88bb37a..8124a5b 100644 --- a/tests/test_resource.py +++ b/tests/test_resource.py @@ -22,7 +22,10 @@ import crossplane.function.proto.v1.run_function_pb2 as fnv1 from crossplane.function import logging, resource -from tests.testdata.models.io.upbound.aws.s3 import v1beta2 +from tests.testdata.models.io.upbound.aws.s3 import v1beta2 as s3v1beta2 +from tests.testdata.models.io.upbound.m.aws.iam.accountalias import ( + v1beta1 as accountaliasv1beta1, +) class TestResource(unittest.TestCase): @@ -59,13 +62,28 @@ class TestCase: {"apiVersion": "example.org", "kind": "XR"} ), ), - status=v1beta2.ForProvider(region="us-west-2"), + status=s3v1beta2.ForProvider(region="us-west-2"), want={ "apiVersion": "example.org", "kind": "XR", "status": {"region": "us-west-2"}, }, ), + TestCase( + reason="Fields the caller set should be kept, while unset " + "fields are omitted.", + r=fnv1.Resource( + resource=resource.dict_to_struct( + {"apiVersion": "example.org", "kind": "XR"} + ), + ), + status=s3v1beta2.ForProvider(region="us-west-2", forceDestroy=False), + want={ + "apiVersion": "example.org", + "kind": "XR", + "status": {"region": "us-west-2", "forceDestroy": False}, + }, + ), TestCase( reason="Setting status on an empty resource should work.", r=fnv1.Resource(), @@ -131,11 +149,16 @@ class TestCase: ), ), TestCase( - reason="Updating from a Pydantic model should work.", + # This model uses the default_factory form that older + # datamodel-code-generator emits for fields with an object + # default. providerConfigRef has such a default but isn't set + # here, so it must not be emitted. + reason="Updating from a Pydantic model with default_factory " + "object defaults should omit unset fields.", r=fnv1.Resource(), - source=v1beta2.Bucket( - spec=v1beta2.Spec( - forProvider=v1beta2.ForProvider(region="us-west-2"), + source=s3v1beta2.Bucket( + spec=s3v1beta2.Spec( + forProvider=s3v1beta2.ForProvider(region="us-west-2"), ), ), want=fnv1.Resource( @@ -148,6 +171,53 @@ class TestCase: ), ), ), + TestCase( + # This model uses the validate_default=True form that newer + # datamodel-code-generator emits for fields with an object + # default. providerConfigRef has such a default but isn't set + # here, so it must not be emitted. + reason="Updating from a Pydantic model with validate_default " + "object defaults should omit unset fields.", + r=fnv1.Resource(), + source=accountaliasv1beta1.AccountAlias( + spec=accountaliasv1beta1.Spec(forProvider={"x": "y"}), + ), + want=fnv1.Resource( + resource=resource.dict_to_struct( + { + "apiVersion": "iam.aws.m.upbound.io/v1beta1", + "kind": "AccountAlias", + "spec": {"forProvider": {"x": "y"}}, + } + ), + ), + ), + TestCase( + # managementPolicies defaults to ["*"] and is set to ["*"] + # here. A field the caller sets is one it has an opinion about + # and should own, even when the value equals the default. + reason="A field the caller explicitly set to its default value " + "should be emitted.", + r=fnv1.Resource(), + source=accountaliasv1beta1.AccountAlias( + spec=accountaliasv1beta1.Spec( + forProvider={"x": "y"}, + managementPolicies=["*"], + ), + ), + want=fnv1.Resource( + resource=resource.dict_to_struct( + { + "apiVersion": "iam.aws.m.upbound.io/v1beta1", + "kind": "AccountAlias", + "spec": { + "forProvider": {"x": "y"}, + "managementPolicies": ["*"], + }, + } + ), + ), + ), ] for case in cases: diff --git a/tests/testdata/models/io/upbound/m/__init__.py b/tests/testdata/models/io/upbound/m/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/testdata/models/io/upbound/m/aws/__init__.py b/tests/testdata/models/io/upbound/m/aws/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/testdata/models/io/upbound/m/aws/iam/__init__.py b/tests/testdata/models/io/upbound/m/aws/iam/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/testdata/models/io/upbound/m/aws/iam/accountalias/__init__.py b/tests/testdata/models/io/upbound/m/aws/iam/accountalias/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/testdata/models/io/upbound/m/aws/iam/accountalias/v1beta1.py b/tests/testdata/models/io/upbound/m/aws/iam/accountalias/v1beta1.py new file mode 100644 index 0000000..3d1183a --- /dev/null +++ b/tests/testdata/models/io/upbound/m/aws/iam/accountalias/v1beta1.py @@ -0,0 +1,166 @@ +# generated by datamodel-codegen: +# filename: workdir/iam_aws_m_upbound_io_v1beta1_accountalias.yaml + +from __future__ import annotations + +from typing import Any, Literal + +from pydantic import AwareDatetime, BaseModel, Field + +from ......k8s.apimachinery.pkg.apis.meta import v1 + + +class ProviderConfigRef(BaseModel): + kind: str + """ + Kind of the referenced object. + """ + name: str + """ + Name of the referenced object. + """ + + +class WriteConnectionSecretToRef(BaseModel): + name: str + """ + Name of the secret. + """ + + +class Spec(BaseModel): + forProvider: dict[str, Any] + initProvider: dict[str, Any] | None = None + """ + THIS IS A BETA FIELD. It will be honored + unless the Management Policies feature flag is disabled. + InitProvider holds the same fields as ForProvider, with the exception + of Identifier and other resource reference fields. The fields that are + in InitProvider are merged into ForProvider when the resource is created. + The same fields are also added to the terraform ignore_changes hook, to + avoid updating them after creation. This is useful for fields that are + required on creation, but we do not desire to update them after creation, + for example because of an external controller is managing them, like an + autoscaler. + """ + managementPolicies: ( + list[Literal['Observe', 'Create', 'Update', 'Delete', 'LateInitialize', '*']] + | None + ) = ['*'] + """ + THIS IS A BETA FIELD. It is on by default but can be opted out + through a Crossplane feature flag. + ManagementPolicies specify the array of actions Crossplane is allowed to + take on the managed and external resources. + See the design doc for more information: https://github.com/crossplane/crossplane/blob/499895a25d1a1a0ba1604944ef98ac7a1a71f197/design/design-doc-observe-only-resources.md?plain=1#L223 + and this one: https://github.com/crossplane/crossplane/blob/444267e84783136daa93568b364a5f01228cacbe/design/one-pager-ignore-changes.md + """ + providerConfigRef: ProviderConfigRef | None = Field( + {'kind': 'ClusterProviderConfig', 'name': 'default'}, validate_default=True + ) + """ + ProviderConfigReference specifies how the provider that will be used to + create, observe, update, and delete this managed resource should be + configured. + """ + writeConnectionSecretToRef: WriteConnectionSecretToRef | None = None + """ + WriteConnectionSecretToReference specifies the namespace and name of a + Secret to which any connection details for this managed resource should + be written. Connection details frequently include the endpoint, username, + and password required to connect to the managed resource. + """ + + +class AtProvider(BaseModel): + id: str | None = None + + +class Condition(BaseModel): + lastTransitionTime: AwareDatetime + """ + LastTransitionTime is the last time this condition transitioned from one + status to another. + """ + message: str | None = None + """ + A Message containing details about this condition's last transition from + one status to another, if any. + """ + observedGeneration: int | None = None + """ + ObservedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + """ + reason: str + """ + A Reason for this condition's last transition from one status to another. + """ + status: str + """ + Status of this condition; is it currently True, False, or Unknown? + """ + type: str + """ + Type of this condition. At most one of each condition type may apply to + a resource at any point in time. + """ + + +class Status(BaseModel): + atProvider: AtProvider | None = None + conditions: list[Condition] | None = None + """ + Conditions of the resource. + """ + observedGeneration: int | None = None + """ + ObservedGeneration is the latest metadata.generation + which resulted in either a ready state, or stalled due to error + it can not recover from without human intervention. + """ + + +class AccountAlias(BaseModel): + apiVersion: Literal['iam.aws.m.upbound.io/v1beta1'] | None = ( + 'iam.aws.m.upbound.io/v1beta1' + ) + """ + APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + """ + kind: Literal['AccountAlias'] | None = 'AccountAlias' + """ + Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + """ + metadata: v1.ObjectMeta | None = None + """ + Standard object's metadata. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata + """ + spec: Spec + """ + AccountAliasSpec defines the desired state of AccountAlias + """ + status: Status | None = None + """ + AccountAliasStatus defines the observed state of AccountAlias. + """ + + +class AccountAliasList(BaseModel): + apiVersion: str | None = None + """ + APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + """ + items: list[AccountAlias] + """ + List of accountaliases. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md + """ + kind: str | None = None + """ + Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + """ + metadata: v1.ListMeta | None = None + """ + Standard list metadata. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + """ \ No newline at end of file From d18cbbb481bb9859b2f58625050d6482201cc021 Mon Sep 17 00:00:00 2001 From: Nic Cope Date: Mon, 8 Jun 2026 13:37:38 -0700 Subject: [PATCH 2/2] Pass by_alias when serializing models in update and update_status resource.update and resource.update_status serialize Pydantic models with model_dump(exclude_unset=True), which doesn't pass by_alias=True. A model field that carries a Pydantic alias is then serialized under its Python attribute name rather than its alias, which is the resource's real wire name. This bites fields whose KRM name is a Python keyword or builtin. datamodel-code-generator can't name a field bool, int, from, continue, or schema, so it emits a bool_ attribute aliased to bool, int_ aliased to int, and so on. When a function composes a resource that sets such a field, update wrote the Python name into the desired resource: data = source.model_dump(exclude_unset=True) # -> {"bool_": True}, but the resource's field is "bool" The composed resource then carried bool_: true instead of bool: true, which doesn't match the resource's schema. The API server rejects it or silently drops the unknown field, and the field the function set never takes effect. This surfaced with Kubernetes Dynamic Resource Allocation, whose device attribute value is a one-of over string, version, bool, and int. This change passes by_alias=True alongside exclude_unset=True in both functions. It's a no-op for ordinary fields, which have no alias, and corrects only the keyword-collision cases. It's also symmetric with how pydantic deserializes these models: by default an aliased field is populated only by its alias, so reads and writes now both speak wire names. Fixes #210. Signed-off-by: Nic Cope --- crossplane/function/resource.py | 15 ++- tests/test_resource.py | 121 ++++++++++++++++++ tests/testdata/models/io/k8s/api/__init__.py | 2 + .../models/io/k8s/api/resource/__init__.py | 2 + .../testdata/models/io/k8s/api/resource/v1.py | 66 ++++++++++ 5 files changed, 203 insertions(+), 3 deletions(-) create mode 100644 tests/testdata/models/io/k8s/api/__init__.py create mode 100644 tests/testdata/models/io/k8s/api/resource/__init__.py create mode 100644 tests/testdata/models/io/k8s/api/resource/v1.py diff --git a/crossplane/function/resource.py b/crossplane/function/resource.py index 7f1298c..d597fa8 100644 --- a/crossplane/function/resource.py +++ b/crossplane/function/resource.py @@ -44,7 +44,15 @@ def update(r: fnv1.Resource, source: dict | structpb.Struct | pydantic.BaseModel # Crossplane treats desired resources as server-side apply intent, # so a function should own exactly the fields it has an opinion # about and leave the rest to the API server. - data = source.model_dump(exclude_unset=True, warnings=False) + # + # by_alias emits each field under its alias, which is its real + # wire name. datamodel-code-generator aliases fields whose KRM + # name collides with a Python keyword or builtin (e.g. it emits a + # bool_ attribute aliased to bool, continue_ aliased to continue). + # Without by_alias those fields serialize under the Python name and + # don't match the resource's schema. It's a no-op for ordinary + # fields, which have no alias. + data = source.model_dump(exclude_unset=True, by_alias=True, warnings=False) # apiVersion and kind identify the resource but are rarely passed # as kwargs, so they're usually unset. Add them back explicitly. data["apiVersion"] = source.apiVersion @@ -73,10 +81,11 @@ def update_status( Sets ``r.resource.status`` from the supplied status. When the status is a Pydantic model, fields the caller didn't explicitly set are - excluded, matching the behavior of :func:`update`. + excluded and aliased fields are emitted under their wire names, + matching the behavior of :func:`update`. """ if isinstance(status, pydantic.BaseModel): - status = status.model_dump(exclude_unset=True, warnings=False) + status = status.model_dump(exclude_unset=True, by_alias=True, warnings=False) update(r, {"status": status}) diff --git a/tests/test_resource.py b/tests/test_resource.py index 8124a5b..ffbbc2c 100644 --- a/tests/test_resource.py +++ b/tests/test_resource.py @@ -22,6 +22,7 @@ import crossplane.function.proto.v1.run_function_pb2 as fnv1 from crossplane.function import logging, resource +from tests.testdata.models.io.k8s.api.resource import v1 as resourcev1 from tests.testdata.models.io.upbound.aws.s3 import v1beta2 as s3v1beta2 from tests.testdata.models.io.upbound.m.aws.iam.accountalias import ( v1beta1 as accountaliasv1beta1, @@ -84,6 +85,21 @@ class TestCase: "status": {"region": "us-west-2", "forceDestroy": False}, }, ), + TestCase( + reason="Setting status from a Pydantic model with keyword-" + "aliased fields should emit the fields under their aliases.", + r=fnv1.Resource( + resource=resource.dict_to_struct( + {"apiVersion": "example.org", "kind": "XR"} + ), + ), + status=resourcev1.DeviceAttribute(**{"bool": True}), + want={ + "apiVersion": "example.org", + "kind": "XR", + "status": {"bool": True}, + }, + ), TestCase( reason="Setting status on an empty resource should work.", r=fnv1.Resource(), @@ -192,6 +208,51 @@ class TestCase: ), ), ), + TestCase( + # datamodel-code-generator can't name a field bool or int, so + # it emits bool_ aliased to bool and int_ aliased to int. The + # alias is the resource's real wire name, so update must emit + # fields under their aliases. + reason="Updating from a Pydantic model with keyword-aliased " + "fields should emit the fields under their aliases.", + r=fnv1.Resource(), + source=resourcev1.ResourceSlice( + spec=resourcev1.Spec( + devices=[ + resourcev1.Device( + name="gpu", + attributes={ + "powered": resourcev1.DeviceAttribute( + **{"bool": True}, + ), + "lanes": resourcev1.DeviceAttribute( + **{"int": 16}, + ), + }, + ), + ], + ), + ), + want=fnv1.Resource( + resource=resource.dict_to_struct( + { + "apiVersion": "resource.k8s.io/v1", + "kind": "ResourceSlice", + "spec": { + "devices": [ + { + "name": "gpu", + "attributes": { + "powered": {"bool": True}, + "lanes": {"int": 16}, + }, + }, + ], + }, + } + ), + ), + ), TestCase( # managementPolicies defaults to ["*"] and is set to ["*"] # here. A field the caller sets is one it has an opinion about @@ -228,6 +289,66 @@ class TestCase: "-want, +got", ) + def test_model_round_trip(self) -> None: + # A function reads an observed resource (wire names), validates it into + # a model, then writes it back via update. A field that goes in under + # its wire name must come back out under the same wire name. This pins + # the property the by_alias fix exists to guarantee: validation accepts + # the alias, and serialization must emit the alias, not the Python + # attribute name. It does not assert anything about fields the model + # doesn't define (pydantic drops them) or value types (Struct coerces + # numbers to float). + @dataclasses.dataclass + class TestCase: + reason: str + # The resource as it arrives from Crossplane, using wire names. + observed: dict + # The model type to validate the observed resource into. + model: type[pydantic.BaseModel] + + cases = [ + TestCase( + reason="A model with keyword-aliased fields should round-trip " + "through validation and update with its fields under the same " + "wire names (bool, int) they arrived under.", + observed={ + "apiVersion": "resource.k8s.io/v1", + "kind": "ResourceSlice", + "spec": { + "devices": [ + { + "name": "gpu", + "attributes": { + "powered": {"bool": True}, + "lanes": {"int": 16}, + "model": {"string": "h100"}, + }, + }, + ], + }, + }, + model=resourcev1.ResourceSlice, + ), + TestCase( + reason="A model with only ordinary fields should round-trip unchanged.", + observed={ + "apiVersion": "s3.aws.upbound.io/v1beta2", + "kind": "Bucket", + "spec": {"forProvider": {"region": "us-west-2"}}, + }, + model=s3v1beta2.Bucket, + ), + ] + + for case in cases: + # Mimic the SDK flow: a function reads an observed resource (wire + # names), validates it into a model, then writes it back out. + m = case.model.model_validate(case.observed) + r = fnv1.Resource() + resource.update(r, m) + got = resource.struct_to_dict(r.resource) + self.assertEqual(case.observed, got, case.reason) + def test_get_condition(self) -> None: @dataclasses.dataclass class TestCase: diff --git a/tests/testdata/models/io/k8s/api/__init__.py b/tests/testdata/models/io/k8s/api/__init__.py new file mode 100644 index 0000000..ba28d01 --- /dev/null +++ b/tests/testdata/models/io/k8s/api/__init__.py @@ -0,0 +1,2 @@ +# generated by datamodel-codegen: +# filename: diff --git a/tests/testdata/models/io/k8s/api/resource/__init__.py b/tests/testdata/models/io/k8s/api/resource/__init__.py new file mode 100644 index 0000000..ba28d01 --- /dev/null +++ b/tests/testdata/models/io/k8s/api/resource/__init__.py @@ -0,0 +1,2 @@ +# generated by datamodel-codegen: +# filename: diff --git a/tests/testdata/models/io/k8s/api/resource/v1.py b/tests/testdata/models/io/k8s/api/resource/v1.py new file mode 100644 index 0000000..a538e2b --- /dev/null +++ b/tests/testdata/models/io/k8s/api/resource/v1.py @@ -0,0 +1,66 @@ +# generated by datamodel-codegen: +# filename: + +from __future__ import annotations + +from typing import Literal + +from pydantic import BaseModel, Field + +from ...apimachinery.pkg.apis.meta import v1 + + +class DeviceAttribute(BaseModel): + bool_: bool | None = Field(None, alias='bool') + """ + BoolValue is a true/false value. + """ + int_: int | None = Field(None, alias='int') + """ + IntValue is a number. + """ + string: str | None = None + """ + StringValue is a string. + """ + version: str | None = None + """ + VersionValue is a semantic version according to semver.org spec 2.0.0. + """ + + +class Device(BaseModel): + name: str + """ + Name is a unique identifier among all devices managed by the driver. + """ + attributes: dict[str, DeviceAttribute] | None = None + """ + Attributes defines the set of attributes for this device. + """ + + +class Spec(BaseModel): + devices: list[Device] | None = None + """ + Devices lists the devices in this resource slice. + """ + + +class ResourceSlice(BaseModel): + apiVersion: Literal['resource.k8s.io/v1'] | None = 'resource.k8s.io/v1' + """ + APIVersion defines the versioned schema of this representation of an object. + """ + kind: Literal['ResourceSlice'] | None = 'ResourceSlice' + """ + Kind is a string value representing the REST resource this object represents. + """ + metadata: v1.ObjectMeta | None = None + """ + Standard object's metadata. + """ + spec: Spec + """ + Contents of the ResourceSlice. + """