Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion api/v1alpha1/cachedimage_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package v1alpha1
import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
)

// CachedImageSpec defines the desired state of CachedImage.
Expand Down Expand Up @@ -79,6 +80,9 @@ type CachedImageStatus struct {
NodesTargeted int32 `json:"nodesTargeted,omitempty"`
// NodesReady is the number of nodes that have successfully pulled the image.
NodesReady int32 `json:"nodesReady,omitempty"`
// NodesPulling is the number of nodes currently pulling the image.
// +optional
NodesPulling int32 `json:"nodesPulling,omitempty"`
// CachedNodes is the list of node names that have successfully cached the image.
// +optional
CachedNodes []string `json:"cachedNodes,omitempty"`
Expand All @@ -92,6 +96,7 @@ type CachedImageStatus struct {
// +optional
LastAttemptedAt *metav1.Time `json:"lastAttemptedAt,omitempty"`
// Conditions represent the latest available observations.
// Condition types: Ready, PullProgress.
// +optional
Conditions []metav1.Condition `json:"conditions,omitempty"`
}
Expand Down Expand Up @@ -128,5 +133,8 @@ type CachedImageList struct {
}

func init() {
SchemeBuilder.Register(&CachedImage{}, &CachedImageList{})
SchemeBuilder.Register(func(s *runtime.Scheme) error {
s.AddKnownTypes(GroupVersion, &CachedImage{}, &CachedImageList{})
return nil
})
}
6 changes: 5 additions & 1 deletion api/v1alpha1/cachedimageset_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package v1alpha1
import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
)

// CachedImageSetSpec defines the desired state of CachedImageSet.
Expand Down Expand Up @@ -113,5 +114,8 @@ type CachedImageSetList struct {
}

func init() {
SchemeBuilder.Register(&CachedImageSet{}, &CachedImageSetList{})
SchemeBuilder.Register(func(s *runtime.Scheme) error {
s.AddKnownTypes(GroupVersion, &CachedImageSet{}, &CachedImageSetList{})
return nil
})
}
6 changes: 5 additions & 1 deletion api/v1alpha1/discoverypolicy_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package v1alpha1
import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
)

// DiscoveryPolicySpec defines the desired state of DiscoveryPolicy.
Expand Down Expand Up @@ -152,5 +153,8 @@ type DiscoveryPolicyList struct {
}

func init() {
SchemeBuilder.Register(&DiscoveryPolicy{}, &DiscoveryPolicyList{})
SchemeBuilder.Register(func(s *runtime.Scheme) error {
s.AddKnownTypes(GroupVersion, &DiscoveryPolicy{}, &DiscoveryPolicyList{})
return nil
})
}
10 changes: 8 additions & 2 deletions api/v1alpha1/groupversion_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,23 @@ limitations under the License.
package v1alpha1

import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"sigs.k8s.io/controller-runtime/pkg/scheme"
)

var (
// GroupVersion is group version used to register these objects.
GroupVersion = schema.GroupVersion{Group: "drop.corewire.io", Version: "v1alpha1"}

// SchemeBuilder is used to add go types to the GroupVersionKind scheme.
SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}
SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes)

// AddToScheme adds the types in this group-version to the given scheme.
AddToScheme = SchemeBuilder.AddToScheme
)

func addKnownTypes(scheme *runtime.Scheme) error {
metav1.AddToGroupVersion(scheme, GroupVersion)
return nil
}
6 changes: 5 additions & 1 deletion api/v1alpha1/pullpolicy_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package v1alpha1
import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
)

// PullPolicySpec defines pacing and behavior configuration for image pulls.
Expand Down Expand Up @@ -80,5 +81,8 @@ type PullPolicyList struct {
}

func init() {
SchemeBuilder.Register(&PullPolicy{}, &PullPolicyList{})
SchemeBuilder.Register(func(s *runtime.Scheme) error {
s.AddKnownTypes(GroupVersion, &PullPolicy{}, &PullPolicyList{})
return nil
})
}
50 changes: 36 additions & 14 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,21 @@ import (
"flag"
"os"
"path/filepath"
"time"

// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
// to ensure that exec-entrypoint and run can make use of them.
_ "k8s.io/client-go/plugin/pkg/client/auth"

corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
"k8s.io/utils/ptr"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/cache"
"sigs.k8s.io/controller-runtime/pkg/certwatcher"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
"sigs.k8s.io/controller-runtime/pkg/metrics/filters"
Expand Down Expand Up @@ -140,7 +145,7 @@ func main() {

// Metrics endpoint is enabled in 'config/default/kustomization.yaml'. The Metrics options configure the server.
// More info:
// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.20.4/pkg/metrics/server
// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.24.1/pkg/metrics/server
// - https://book.kubebuilder.io/reference/metrics.html
metricsServerOptions := metricsserver.Options{
BindAddress: metricsAddr,
Expand All @@ -152,7 +157,7 @@ func main() {
// FilterProvider is used to protect the metrics endpoint with authn/authz.
// These configurations ensure that only authorized users and service accounts
// can access the metrics endpoint. The RBAC are configured in 'config/rbac/kustomization.yaml'. More info:
// https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.20.4/pkg/metrics/filters#WithAuthenticationAndAuthorization
// https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.24.1/pkg/metrics/filters#WithAuthenticationAndAuthorization
metricsServerOptions.FilterProvider = filters.WithAuthenticationAndAuthorization
}

Expand Down Expand Up @@ -190,17 +195,34 @@ func main() {
HealthProbeBindAddress: probeAddr,
LeaderElection: enableLeaderElection,
LeaderElectionID: "b889acf8.corewire.io",
// LeaderElectionReleaseOnCancel defines if the leader should step down voluntarily
// when the Manager ends. This requires the binary to immediately end when the
// Manager is stopped, otherwise, this setting is unsafe. Setting this significantly
// speeds up voluntary leader transitions as the new leader don't have to wait
// LeaseDuration time first.
//
// In the default scaffold provided, the program ends immediately after
// the manager stops, so would be fine to enable this option. However,
// if you are doing or is intended to do any operation such as perform cleanups
// after the manager stops then its usage might be unsafe.
// LeaderElectionReleaseOnCancel: true,
// LeaderElectionReleaseOnCancel enables voluntary leader step-down when the
// Manager ends. This significantly speeds up leader transitions since the new
// leader doesn't have to wait the full LeaseDuration. Safe here because the
// binary exits immediately after mgr.Start returns.
LeaderElectionReleaseOnCancel: true,
// Fine-granular cache sync periods (controller-runtime v0.24+): use longer
// resync intervals for stable resources (Nodes change infrequently) and shorter
// intervals for our own CRDs where timely reconciliation matters.
Cache: cache.Options{
SyncPeriod: ptr.To(10 * time.Minute), // default for all types
ByObject: map[client.Object]cache.ByObject{
&corev1.Node{}: {
SyncPeriod: ptr.To(30 * time.Minute), // nodes rarely change
},
&dropv1alpha1.CachedImage{}: {
SyncPeriod: ptr.To(5 * time.Minute), // CRDs: tighter resync
},
&dropv1alpha1.CachedImageSet{}: {
SyncPeriod: ptr.To(5 * time.Minute),
},
},
},
// Set a default field owner for SSA (Server-Side Apply) operations
// (controller-runtime v0.24+). This ensures consistent field management
// across all controllers without per-call configuration.
Client: client.Options{
FieldOwner: "drop-controller",
},
})
if err != nil {
setupLog.Error(err, "unable to start manager")
Expand All @@ -211,7 +233,7 @@ func main() {
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
PacingEngine: pacing.NewEngine(mgr.GetClient(), podNamespace),
Recorder: mgr.GetEventRecorderFor("cachedimage-controller"),
Recorder: mgr.GetEventRecorder("cachedimage-controller"),
PodNamespace: podNamespace,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "CachedImage")
Expand Down
3 changes: 2 additions & 1 deletion docs/content/docs/reference/_generated_crds.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,12 @@ CachedImage is the Schema for the cachedimages API.
| `resolvedDigest` | `string` | ResolvedDigest is the sha256 digest of the image as reported by the container runtime after pull. |
| `nodesTargeted` | `int32` | NodesTargeted is the number of nodes that should have this image. |
| `nodesReady` | `int32` | NodesReady is the number of nodes that have successfully pulled the image. |
| `nodesPulling` | `int32` | NodesPulling is the number of nodes currently pulling the image. |
| `cachedNodes` | `[]string` | CachedNodes is the list of node names that have successfully cached the image. |
| `consecutiveFailures` | `int32` | ConsecutiveFailures counts sequential reconcile failures for backoff calculation. |
| `lastPulledAt` | `*metav1.Time` | LastPulledAt is the timestamp of the most recent successful pull. |
| `lastAttemptedAt` | `*metav1.Time` | LastAttemptedAt is the timestamp of the most recent pull attempt (success or failure). |
| `conditions` | `[]metav1.Condition` | Conditions represent the latest available observations. |
| `conditions` | `[]metav1.Condition` | Conditions represent the latest available observations. Condition types: Ready, PullProgress. |

---

Expand Down
3 changes: 3 additions & 0 deletions docs/content/docs/reference/_generated_metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ The drop operator exposes the following metrics:
| `drop_reconcile_total` | counter | Total number of reconciliation attempts. |
| `drop_discovery_source_health` | gauge | Whether a discovery source is reachable and queryable (1=healthy, 0=unhealthy). |
| `drop_discovery_source_latency_seconds` | histogram | Latency of discovery source queries in seconds. |
| `drop_nodes_targeted` | gauge | Number of nodes targeted by each CachedImage resource. |
| `drop_nodes_cached` | gauge | Number of nodes where the image is successfully cached. |
| `drop_consecutive_failures` | gauge | Current number of consecutive pull failures for a CachedImage. |

## Useful Queries

Expand Down
6 changes: 5 additions & 1 deletion docs/static/llms-full.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,12 @@ Controller: internal/controller/cachedimage_controller.go | Test: internal/contr
| ResolvedDigest | `resolvedDigest` | `string` | ResolvedDigest is the sha256 digest of the image as reported by the container runtime after pull. |
| NodesTargeted | `nodesTargeted` | `int32` | NodesTargeted is the number of nodes that should have this image. |
| NodesReady | `nodesReady` | `int32` | NodesReady is the number of nodes that have successfully pulled the image. |
| NodesPulling | `nodesPulling` | `int32` | NodesPulling is the number of nodes currently pulling the image. |
| CachedNodes | `cachedNodes` | `[]string` | CachedNodes is the list of node names that have successfully cached the image. |
| ConsecutiveFailures | `consecutiveFailures` | `int32` | ConsecutiveFailures counts sequential reconcile failures for backoff calculation. |
| LastPulledAt | `lastPulledAt` | `*metav1.Time` | LastPulledAt is the timestamp of the most recent successful pull. |
| LastAttemptedAt | `lastAttemptedAt` | `*metav1.Time` | LastAttemptedAt is the timestamp of the most recent pull attempt (success or failure). |
| Conditions | `conditions` | `[]metav1.Condition` | Conditions represent the latest available observations. |
| Conditions | `conditions` | `[]metav1.Condition` | Conditions represent the latest available observations. Condition types: Ready, PullProgress. |


### CachedImageSet
Expand Down Expand Up @@ -240,6 +241,9 @@ graph LR
| `drop_reconcile_total` | counter | Total number of reconciliation attempts. |
| `drop_discovery_source_health` | gauge | Whether a discovery source is reachable and queryable (1=healthy, 0=unhealthy). |
| `drop_discovery_source_latency_seconds` | histogram | Latency of discovery source queries in seconds. |
| `drop_nodes_targeted` | gauge | Number of nodes targeted by each CachedImage resource. |
| `drop_nodes_cached` | gauge | Number of nodes where the image is successfully cached. |
| `drop_consecutive_failures` | gauge | Current number of consecutive pull failures for a CachedImage. |

## Sample CRs

Expand Down
Loading
Loading