diff --git a/docs/concepts/cabling.md b/docs/concepts/cabling.md index 9aed1b6cd..fbdd17e25 100644 --- a/docs/concepts/cabling.md +++ b/docs/concepts/cabling.md @@ -50,6 +50,29 @@ The controller resolves the referenced `Interface`, looks up the owning `Device` compares it against the LLDP `systemName`. It then compares the referenced interface name against the LLDP `portID`, accounting for vendor-specific naming conventions (e.g. NX-OS `eth1/1` vs. `Ethernet1/1`). +#### Hostname validation with DNS + +When validating neighbor adjacencies via labels, the controller compares the remote device's hostname +against the LLDP `systemName` field. The comparison behavior depends on whether a `DNS` Kubernetes resource is +configured for the remote device: + +**Without DNS configuration:** +- The controller compares the device's `.status.hostname` directly against `systemName`. +- Example: If device hostname is `router1` and LLDP reports `systemName: router1`, validation succeeds. + +**With DNS configuration:** +- Devices, e.g., Cisco NX-OS, report the Fully Qualified Domain Name (FQDN) in LLDP when DNS is properly configured. +- The controller automatically constructs the expected FQDN by combining the device's `.status.hostname` +with the domain from the associated `DNS` Kubernetes resource. +- Example: If device hostname is `router1` and a `DNS` resource specifies domain `example.com`, the controller +validates the label against the value `systemName: router1.example.com`. + +This means that if you enable DNS on your devices, you should ensure that: +1. A `DNS` resource exists for each device that will report FQDN in LLDP. +2. The domain in the `DNS` resource matches the DNS configuration on the device. +3. LLDP neighbors will be validated using FQDN (hostname + domain) instead of just the hostname. + + ### Annotation — the operator manages only one of the interfaces When the remote end is **not** managed by the operator (e.g. a compute node that does not diff --git a/internal/controller/core/interface_controller.go b/internal/controller/core/interface_controller.go index 1e00226fb..b62e2a20e 100644 --- a/internal/controller/core/interface_controller.go +++ b/internal/controller/core/interface_controller.go @@ -17,7 +17,9 @@ import ( "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/selection" "k8s.io/apimachinery/pkg/types" kerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/client-go/tools/events" @@ -421,6 +423,23 @@ func (r *InterfaceReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Man }, }), ). + // Watches enqueues Interfaces that have neighbor labels pointing to interfaces + // on a device when the DNS resource associated with that device changes. This ensures LLDP + // validation is re-evaluated when DNS domain changes affect FQDN matching. + Watches( + &v1alpha1.DNS{}, + handler.EnqueueRequestsFromMapFunc(r.dnsToNeighborInterfaces), + builder.WithPredicates(predicate.Funcs{ + UpdateFunc: func(e event.UpdateEvent) bool { + oldDNS := e.ObjectOld.(*v1alpha1.DNS) + newDNS := e.ObjectNew.(*v1alpha1.DNS) + return oldDNS.Spec.Domain != newDNS.Spec.Domain + }, + GenericFunc: func(e event.GenericEvent) bool { + return false + }, + }), + ). Complete(r) } @@ -444,12 +463,16 @@ func (interfaceUpdatePredicate) Update(e event.UpdateEvent) bool { if !ok { return true } - // Always reconcile if conditions haven't been fully initialized. - // InitializeConditions adds Ready/Configured/Operational, and paused.EnsureCondition - // adds Paused. Until all 4 are present, we must allow reconciles to complete setup. - if len(newIntf.Status.Conditions) < 4 { + + // Allow lifecycle metadata transitions to pass through so the controller can + // continue after adding/removing finalizers and when deletion starts. + if !equality.Semantic.DeepEqual(oldIntf.GetFinalizers(), newIntf.GetFinalizers()) { return true } + if !equality.Semantic.DeepEqual(oldIntf.GetDeletionTimestamp(), newIntf.GetDeletionTimestamp()) { + return true + } + oldStatus := oldIntf.Status.DeepCopy() newStatus := newIntf.Status.DeepCopy() for i := range oldStatus.Neighbors { @@ -623,13 +646,6 @@ func (r *InterfaceReconciler) reconcileInterfaceStatus(ctx context.Context, s *s // If that fails, it attempts to validate through annotation (neighbor is not managed by the operator). // Only returns an error if there is an issue during the validation process, but does not return an error if the validation fails (i.e. the adjacency is marked as invalid). func (r *InterfaceReconciler) updateNeighborAdjacenciesStatus(ctx context.Context, s *scope, status *provider.InterfaceStatus) error { - type neighborKey struct{ ChassisID, PortID string } - - existingNeighbors := make(map[neighborKey]v1alpha1.Neighbor) - for _, n := range s.Interface.Status.Neighbors { - existingNeighbors[neighborKey{n.ChassisID, n.PortID}] = n - } - log := ctrl.LoggerFrom(ctx) if len(status.LLDPAdjacencies) > 1 { log.V(1).Info("Multiple LLDP adjacencies found for a single interface, will validate each adjacency against one single label/annotation", "interface", klog.KObj(s.Interface), "adjacencyCount", len(status.LLDPAdjacencies)) @@ -709,8 +725,26 @@ func (r *InterfaceReconciler) validateLLDPAdjacencyThroughLabel(ctx context.Cont return "", fmt.Errorf("the neighbor device does not have a hostname yet, cannot validate adjacency: neighborInterface=%q", remoteIntf.Name) } - if remoteDevice.Status.Hostname != n.SystemName { - log.V(1).Info("the neighbor device hostname does not match", "expected", n.SystemName, "actual", remoteDevice.Status.Hostname) + dns := new(v1alpha1.DNSList) + if err := r.List( + ctx, dns, + client.InNamespace(remoteDevice.Namespace), + client.MatchingFields{v1alpha1.DeviceRefIndexKey: remoteDevice.Name}, + ); err != nil { + return "", fmt.Errorf("failed to list DNSes for device %q: %w", remoteDevice.Name, err) + } + + if len(dns.Items) > 1 { + return "", fmt.Errorf("multiple DNS resources found for device %q, expected at most one", remoteDevice.Name) + } + + var domain string + if len(dns.Items) == 1 { + domain = "." + dns.Items[0].Spec.Domain + } + + if !strings.EqualFold(remoteDevice.Status.Hostname+domain, n.SystemName) { + log.V(1).Info("neighbor device name does not match label reference", "actual", n.SystemName, "expected", remoteDevice.Status.Hostname+domain) return v1alpha1.NeighborDeviceMismatch, nil } @@ -1344,7 +1378,7 @@ func (r *InterfaceReconciler) interfacesForProviderConfig(ctx context.Context, o list := &v1alpha1.InterfaceList{} if err := r.List(ctx, list, client.InNamespace(obj.GetNamespace())); err != nil { - log.Error(err, "Failed to list Interfacees") + log.Error(err, "Failed to list Interfaces") return nil } @@ -1402,3 +1436,65 @@ func (r *InterfaceReconciler) deviceToInterfaces(ctx context.Context, obj client return requests } + +// dnsToNeighborInterfaces is a [handler.MapFunc] to be used to enqueue requests for reconciliation +// for Interfaces when the DNS resources associated with any neighboring device gets updated, created or deleted. +// This is relevant for LLDP adjacency validation through neighbor labels, where the neighbor's system name as +// seen in LLDP is a fully qualified domain name (FQDN). +func (r *InterfaceReconciler) dnsToNeighborInterfaces(ctx context.Context, obj client.Object) []ctrl.Request { + dns, ok := obj.(*v1alpha1.DNS) + if !ok { + panic(fmt.Sprintf("Expected a DNS but got a %T", obj)) + } + + log := ctrl.LoggerFrom(ctx, "DNS", klog.KObj(dns)) + + // remoteInterfaces is a list of all interfaces that are on a neighboring device with change in the DNS resource. + remoteInterfaces := new(v1alpha1.InterfaceList) + if err := r.List( + ctx, remoteInterfaces, + client.InNamespace(dns.Namespace), + client.MatchingFields{v1alpha1.DeviceRefIndexKey: dns.Spec.DeviceRef.Name}, + ); err != nil { + log.Error(err, "Failed to list Interfaces for device", "device", dns.Spec.DeviceRef.Name) + return nil + } + + names := make([]string, len(remoteInterfaces.Items)) + for i, intf := range remoteInterfaces.Items { + names[i] = intf.Name + } + + req, err := labels.NewRequirement( + v1alpha1.PhysicalInterfaceNeighborLabel, + selection.In, + names, + ) + if err != nil { + return nil + } + + // localInterfaces is a list of interfaces that reference the remote interfaces as neighbors through their labels. + localInterfaces := new(v1alpha1.InterfaceList) + if err := r.List( + ctx, localInterfaces, + client.InNamespace(dns.Namespace), + client.MatchingLabelsSelector{Selector: labels.NewSelector().Add(*req)}, + ); err != nil { + log.Error(err, "Failed to list local Interfaces with neighbor labels referencing remote interfaces", "remoteInterfaces", names) + return nil + } + + requests := make([]ctrl.Request, 0, len(localInterfaces.Items)) + for _, intf := range localInterfaces.Items { + log.V(2).Info("Enqueuing local Interface for reconciliation due to DNS change in neighbor device", "Interface", klog.KObj(&intf), "DNS", klog.KObj(dns)) + requests = append(requests, ctrl.Request{ + NamespacedName: client.ObjectKey{ + Name: intf.Name, + Namespace: intf.Namespace, + }, + }) + } + + return requests +} diff --git a/internal/controller/core/interface_controller_test.go b/internal/controller/core/interface_controller_test.go index 0107f8690..a250cb2af 100644 --- a/internal/controller/core/interface_controller_test.go +++ b/internal/controller/core/interface_controller_test.go @@ -1256,6 +1256,161 @@ var _ = Describe("Interface Controller", func() { }) }) + Context("When DNS domain changes on a neighboring device", func() { + var ( + localDevice *v1alpha1.Device + remoteDevice *v1alpha1.Device + localIntf *v1alpha1.Interface + remoteIntf *v1alpha1.Interface + dns *v1alpha1.DNS + ) + + BeforeEach(func() { + By("Creating local and remote Device resources") + localDevice = &v1alpha1.Device{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "local-device-", + Namespace: metav1.NamespaceDefault, + }, + Spec: v1alpha1.DeviceSpec{ + Endpoint: v1alpha1.Endpoint{ + Address: "192.168.10.10:9339", + }, + }, + } + Expect(k8sClient.Create(ctx, localDevice)).To(Succeed()) + + remoteDevice = &v1alpha1.Device{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "remote-device-", + Namespace: metav1.NamespaceDefault, + }, + Spec: v1alpha1.DeviceSpec{ + Endpoint: v1alpha1.Endpoint{ + Address: "192.168.10.11:9339", + }, + }, + } + Expect(k8sClient.Create(ctx, remoteDevice)).To(Succeed()) + + By("Waiting for the device controller to set hostname on remote device and patching it") + Eventually(func(g Gomega) { + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(remoteDevice), remoteDevice)).To(Succeed()) + g.Expect(remoteDevice.Status.LastRebootTime.IsZero()).To(BeFalse()) + }).Should(Succeed()) + remoteDevice.Status.Hostname = "remote-switch" + Expect(k8sClient.Status().Update(ctx, remoteDevice)).To(Succeed()) + + By("Creating a remote Interface on the remote device") + remoteIntf = &v1alpha1.Interface{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "remote-intf-", + Namespace: metav1.NamespaceDefault, + }, + Spec: v1alpha1.InterfaceSpec{ + DeviceRef: v1alpha1.LocalObjectReference{Name: remoteDevice.Name}, + Name: "Ethernet1/1", + AdminState: v1alpha1.AdminStateUp, + Type: v1alpha1.InterfaceTypePhysical, + }, + } + Expect(k8sClient.Create(ctx, remoteIntf)).To(Succeed()) + + By("Creating a DNS resource for the remote device") + dns = &v1alpha1.DNS{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "remote-dns-", + Namespace: metav1.NamespaceDefault, + }, + Spec: v1alpha1.DNSSpec{ + DeviceRef: v1alpha1.LocalObjectReference{Name: remoteDevice.Name}, + Domain: "example.com", + }, + } + Expect(k8sClient.Create(ctx, dns)).To(Succeed()) + + By("Waiting for the local device controller to initialize") + Eventually(func(g Gomega) { + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(localDevice), localDevice)).To(Succeed()) + g.Expect(localDevice.Status.LastRebootTime.IsZero()).To(BeFalse()) + }).Should(Succeed()) + + By("Configuring LLDP neighbor on the provider for the local interface") + testProvider.SetLLDPNeighbor("Ethernet1/2", "remote-switch.example.com", "aa:bb:cc:dd:ee:ff", "Ethernet1/1", 120) + + By("Creating a local Physical Interface with neighbor label pointing to the remote interface") + localIntf = &v1alpha1.Interface{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "local-intf-", + Namespace: metav1.NamespaceDefault, + Labels: map[string]string{ + v1alpha1.PhysicalInterfaceNeighborLabel: remoteIntf.Name, + }, + }, + Spec: v1alpha1.InterfaceSpec{ + DeviceRef: v1alpha1.LocalObjectReference{Name: localDevice.Name}, + Name: "Ethernet1/2", + AdminState: v1alpha1.AdminStateUp, + Type: v1alpha1.InterfaceTypePhysical, + }, + } + Expect(k8sClient.Create(ctx, localIntf)).To(Succeed()) + }) + + AfterEach(func() { + By("Cleaning up LLDP neighbor configuration") + testProvider.Lock() + delete(testProvider.LLDPNeighbors, "Ethernet1/2") + testProvider.Unlock() + + By("Cleaning up all Interface resources") + Expect(k8sClient.DeleteAllOf(ctx, &v1alpha1.Interface{}, client.InNamespace(metav1.NamespaceDefault))).To(Succeed()) + Eventually(func(g Gomega) { + intfList := &v1alpha1.InterfaceList{} + g.Expect(k8sClient.List(ctx, intfList, client.InNamespace(metav1.NamespaceDefault))).To(Succeed()) + g.Expect(intfList.Items).To(BeEmpty()) + }).Should(Succeed()) + + By("Cleaning up DNS resource") + if dns != nil { + Expect(k8sClient.Delete(ctx, dns)).To(Succeed()) + } + + By("Cleaning up Device resources") + if localDevice != nil { + Expect(k8sClient.Delete(ctx, localDevice, client.PropagationPolicy(metav1.DeletePropagationForeground))).To(Succeed()) + } + if remoteDevice != nil { + Expect(k8sClient.Delete(ctx, remoteDevice, client.PropagationPolicy(metav1.DeletePropagationForeground))).To(Succeed()) + } + }) + + It("Should verify LLDP neighbor and re-validate when DNS domain changes", func() { + By("Verifying the neighbor validation is Verified") + Eventually(func(g Gomega) { + resource := &v1alpha1.Interface{} + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(localIntf), resource)).To(Succeed()) + g.Expect(resource.Status.Neighbors).To(HaveLen(1)) + g.Expect(resource.Status.Neighbors[0].Validation).To(Equal(v1alpha1.NeighborVerified)) + }).Should(Succeed()) + + By("Updating the DNS domain on the remote device") + Eventually(func(g Gomega) { + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(dns), dns)).To(Succeed()) + dns.Spec.Domain = "new.example.com" + g.Expect(k8sClient.Update(ctx, dns)).To(Succeed()) + }).Should(Succeed()) + + By("Verifying the neighbor validation changes to DeviceMismatch") + Eventually(func(g Gomega) { + resource := &v1alpha1.Interface{} + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(localIntf), resource)).To(Succeed()) + g.Expect(resource.Status.Neighbors).To(HaveLen(1)) + g.Expect(resource.Status.Neighbors[0].Validation).To(Equal(v1alpha1.NeighborDeviceMismatch)) + }).Should(Succeed()) + }) + }) + Context("Interface Update Predicate", func() { var p interfaceUpdatePredicate @@ -1341,7 +1496,7 @@ var _ = Describe("Interface Controller", func() { Expect(p.Update(e)).To(BeTrue()) }) - It("Should allow update when generation changes", func() { + It("Should not rely on this predicate for generation changes", func() { oldIntf := &v1alpha1.Interface{ ObjectMeta: metav1.ObjectMeta{Generation: 1}, Status: v1alpha1.InterfaceStatus{ @@ -1355,10 +1510,10 @@ var _ = Describe("Interface Controller", func() { newIntf.Status.Neighbors[0].ExpirationTime = metav1.NewTime(time.Now().Add(120 * time.Second)) e := event.UpdateEvent{ObjectOld: oldIntf, ObjectNew: newIntf} - Expect(p.Update(e)).To(BeTrue()) + Expect(p.Update(e)).To(BeFalse()) }) - It("Should allow update when conditions are not fully initialized", func() { + It("Should block no-op update when conditions are not fully initialized", func() { oldIntf := &v1alpha1.Interface{ Status: v1alpha1.InterfaceStatus{ Conditions: []metav1.Condition{ @@ -1369,6 +1524,25 @@ var _ = Describe("Interface Controller", func() { } newIntf := oldIntf.DeepCopy() + e := event.UpdateEvent{ObjectOld: oldIntf, ObjectNew: newIntf} + Expect(p.Update(e)).To(BeFalse()) + }) + + It("Should allow update when finalizers change", func() { + oldIntf := &v1alpha1.Interface{} + newIntf := oldIntf.DeepCopy() + newIntf.Finalizers = []string{v1alpha1.FinalizerName} + + e := event.UpdateEvent{ObjectOld: oldIntf, ObjectNew: newIntf} + Expect(p.Update(e)).To(BeTrue()) + }) + + It("Should allow update when deletion timestamp changes", func() { + oldIntf := &v1alpha1.Interface{} + newIntf := oldIntf.DeepCopy() + now := metav1.NewTime(time.Now()) + newIntf.DeletionTimestamp = &now + e := event.UpdateEvent{ObjectOld: oldIntf, ObjectNew: newIntf} Expect(p.Update(e)).To(BeTrue()) }) diff --git a/internal/controller/core/suite_test.go b/internal/controller/core/suite_test.go index 0244c354c..d7cf389fd 100644 --- a/internal/controller/core/suite_test.go +++ b/internal/controller/core/suite_test.go @@ -958,9 +958,11 @@ func (p *Provider) SetLLDPNeighbor(interfaceName, sysName, chassisID, portID str p.Lock() defer p.Unlock() p.LLDPNeighbors[interfaceName] = &provider.LLDPAdjacency{ - SysName: sysName, - ChassisID: chassisID, - PortID: portID, - TTL: time.Duration(ttl) * time.Second, + SysName: sysName, + ChassisID: chassisID, + ChassisIDType: 4, // MACAddress + PortID: portID, + PortIDType: 7, // Local + TTL: time.Duration(ttl) * time.Second, } }