diff --git a/pkg/datagatherer/k8sdynamic/dynamic.go b/pkg/datagatherer/k8sdynamic/dynamic.go index da805b07..0f2532f2 100644 --- a/pkg/datagatherer/k8sdynamic/dynamic.go +++ b/pkg/datagatherer/k8sdynamic/dynamic.go @@ -82,6 +82,10 @@ type ConfigDynamic struct { FieldSelectors []string `yaml:"field-selectors"` // LabelSelectors is a list of label selectors to use when listing this resource LabelSelectors []string `yaml:"label-selectors"` + // ExcludeAnnotationKeysRegex is a list of regular expressions to exclude. + ExcludeAnnotationKeysRegex []string `yaml:"excludeAnnotationKeysRegex"` + // ExcludeLabelKeysRegex is a list of regular expressions to exclude. + ExcludeLabelKeysRegex []string `yaml:"excludeLabelKeysRegex"` } // UnmarshalYAML unmarshals the ConfigDynamic resolving GroupVersionResource. @@ -93,10 +97,12 @@ func (c *ConfigDynamic) UnmarshalYAML(unmarshal func(any) error) error { Version string `yaml:"version"` Resource string `yaml:"resource"` } `yaml:"resource-type"` - ExcludeNamespaces []string `yaml:"exclude-namespaces"` - IncludeNamespaces []string `yaml:"include-namespaces"` - FieldSelectors []string `yaml:"field-selectors"` - LabelSelectors []string `yaml:"label-selectors"` + ExcludeNamespaces []string `yaml:"exclude-namespaces"` + IncludeNamespaces []string `yaml:"include-namespaces"` + FieldSelectors []string `yaml:"field-selectors"` + LabelSelectors []string `yaml:"label-selectors"` + ExcludeAnnotationKeysRegex []string `yaml:"excludeAnnotationKeysRegex"` + ExcludeLabelKeysRegex []string `yaml:"excludeLabelKeysRegex"` }{} err := unmarshal(&aux) if err != nil { @@ -111,6 +117,8 @@ func (c *ConfigDynamic) UnmarshalYAML(unmarshal func(any) error) error { c.IncludeNamespaces = aux.IncludeNamespaces c.FieldSelectors = aux.FieldSelectors c.LabelSelectors = aux.LabelSelectors + c.ExcludeAnnotationKeysRegex = aux.ExcludeAnnotationKeysRegex + c.ExcludeLabelKeysRegex = aux.ExcludeLabelKeysRegex return nil } @@ -146,6 +154,18 @@ func (c *ConfigDynamic) validate() error { } } + for i, r := range c.ExcludeAnnotationKeysRegex { + if _, err := regexp.Compile(r); err != nil { + errs = append(errs, fmt.Sprintf("invalid excludeAnnotationKeysRegex[%d]: %s", i, err)) + } + } + + for i, r := range c.ExcludeLabelKeysRegex { + if _, err := regexp.Compile(r); err != nil { + errs = append(errs, fmt.Sprintf("invalid excludeLabelKeysRegex[%d]: %s", i, err)) + } + } + if len(errs) > 0 { return errors.New(strings.Join(errs, ", ")) } @@ -309,6 +329,21 @@ func (c *ConfigDynamic) newDataGathererWithClient(ctx context.Context, cl dynami } newDataGatherer.registration = registration + for _, r := range c.ExcludeAnnotationKeysRegex { + compiled, err := regexp.Compile(r) + if err != nil { + return nil, fmt.Errorf("invalid excludeAnnotationKeysRegex %q: %w", r, err) + } + newDataGatherer.ExcludeAnnotKeys = append(newDataGatherer.ExcludeAnnotKeys, compiled) + } + for _, r := range c.ExcludeLabelKeysRegex { + compiled, err := regexp.Compile(r) + if err != nil { + return nil, fmt.Errorf("invalid excludeLabelKeysRegex %q: %w", r, err) + } + newDataGatherer.ExcludeLabelKeys = append(newDataGatherer.ExcludeLabelKeys, compiled) + } + return newDataGatherer, nil } @@ -423,6 +458,8 @@ func (g *DataGathererDynamic) Fetch(ctx context.Context) (any, int, error) { return nil, -1, fmt.Errorf("failed to parse cached resource") } + items = g.excludeResources(items) + // Redact Secret data (which may include encrypting it if enabled) err := g.redactList(ctx, items) if err != nil { @@ -434,6 +471,42 @@ func (g *DataGathererDynamic) Fetch(ctx context.Context) (any, int, error) { }, len(items), nil } +// excludeResources drops any resource whose annotation or label keys match the +// configured exclusion patterns. This is distinct from redactList, which strips +// matching keys from kept resources. +// +// Note: the work done here scales with the number of resources, annotations, +// labels, and exclusion rules, as well as the complexity of each regex. +// A user configuring many complex patterns against a large cluster may see +// a meaningful CPU cost. +func (g *DataGathererDynamic) excludeResources(list []*api.GatheredResource) []*api.GatheredResource { + if len(g.ExcludeAnnotKeys) == 0 && len(g.ExcludeLabelKeys) == 0 { + return list + } + return slices.DeleteFunc(list, g.resourceMatchesExclusionKeys) +} + +func (g *DataGathererDynamic) resourceMatchesExclusionKeys(item *api.GatheredResource) bool { + res, ok := item.Resource.(*unstructured.Unstructured) + if !ok { + return false + } + + return anyKeyMatches(res.GetAnnotations(), g.ExcludeAnnotKeys) || + anyKeyMatches(res.GetLabels(), g.ExcludeLabelKeys) +} + +func anyKeyMatches(m map[string]string, patterns []*regexp.Regexp) bool { + for key := range m { + for _, p := range patterns { + if p.MatchString(key) { + return true + } + } + } + return false +} + // redactList removes sensitive and superfluous data from the supplied resource list. // All resources have superfluous managed-data fields removed. // All resources have sensitive labels and annotations removed. diff --git a/pkg/datagatherer/k8sdynamic/dynamic_test.go b/pkg/datagatherer/k8sdynamic/dynamic_test.go index 9b4651eb..b94d1d5b 100644 --- a/pkg/datagatherer/k8sdynamic/dynamic_test.go +++ b/pkg/datagatherer/k8sdynamic/dynamic_test.go @@ -300,6 +300,34 @@ label-selectors: t.Errorf("LabelSelectors does not match: got=%+v want=%+v", got, want) } } +func TestUnmarshalDynamicConfig_ExclusionRegex(t *testing.T) { + // Verify that the per-gatherer excludeAnnotationKeysRegex and + // excludeLabelKeysRegex fields are parsed from YAML. + textCfg := ` +resource-type: + version: v1 + resource: secrets +excludeAnnotationKeysRegex: + - '^openshift\.io.*$' + - '^kapp\.k14s\.io/.*$' +excludeLabelKeysRegex: + - '^company\.com/employee-id$' +` + cfg := ConfigDynamic{} + if err := yaml.Unmarshal([]byte(textCfg), &cfg); err != nil { + t.Fatalf("unexpected error: %+v", err) + } + + expectedAnnot := []string{`^openshift\.io.*$`, `^kapp\.k14s\.io/.*$`} + expectedLabel := []string{`^company\.com/employee-id$`} + + if got, expected := cfg.ExcludeAnnotationKeysRegex, expectedAnnot; !reflect.DeepEqual(got, expected) { + t.Errorf("ExcludeAnnotationKeysRegex: got=%v want=%v", got, expected) + } + if got, expected := cfg.ExcludeLabelKeysRegex, expectedLabel; !reflect.DeepEqual(got, expected) { + t.Errorf("ExcludeLabelKeysRegex: got=%v want=%v", got, expected) + } +} func TestConfigDynamicValidate(t *testing.T) { tests := []struct { @@ -345,6 +373,20 @@ func TestConfigDynamicValidate(t *testing.T) { }, ExpectedError: "invalid field selector 0: invalid selector: 'foo'; can't understand 'foo'", }, + { + Config: ConfigDynamic{ + GroupVersionResource: schema.GroupVersionResource{Version: "v1", Resource: "secrets"}, + ExcludeAnnotationKeysRegex: []string{`^[0-9$`}, + }, + ExpectedError: "invalid excludeAnnotationKeysRegex[0]", + }, + { + Config: ConfigDynamic{ + GroupVersionResource: schema.GroupVersionResource{Version: "v1", Resource: "secrets"}, + ExcludeLabelKeysRegex: []string{`^[0-9$`}, + }, + ExpectedError: "invalid excludeLabelKeysRegex[0]", + }, } for _, test := range tests { @@ -763,6 +805,48 @@ func TestDynamicGatherer_Fetch(t *testing.T) { map[string]any{"prod": "true"}, )}}, }, + "per-gatherer excludeAnnotationKeysRegex excludes matching resources entirely": { + // Resources annotated with openshift.io/* should not appear in the + // output at all, not just have those keys stripped. + config: ConfigDynamic{ + GroupVersionResource: schema.GroupVersionResource{Group: "", Version: "v1", Resource: "secrets"}, + ExcludeAnnotationKeysRegex: []string{`^openshift\.io.*$`}, + }, + addObjects: []*unstructured.Unstructured{ + getObjectAnnot("v1", "Secret", "excluded", "ns", + map[string]any{"openshift.io/discovery": "ignore", "other": "kept"}, + map[string]any{}, + ), + getObjectAnnot("v1", "Secret", "included", "ns", + map[string]any{"other": "kept"}, + map[string]any{}, + ), + }, + expected: []*api.GatheredResource{{Resource: getObjectAnnot("v1", "Secret", "included", "ns", + map[string]any{"other": "kept"}, + map[string]any{}, + )}}, + }, + "per-gatherer excludeLabelKeysRegex excludes matching resources entirely": { + config: ConfigDynamic{ + GroupVersionResource: schema.GroupVersionResource{Group: "", Version: "v1", Resource: "secrets"}, + ExcludeLabelKeysRegex: []string{`^discovery\.venafi\.com/exclude$`}, + }, + addObjects: []*unstructured.Unstructured{ + getObjectAnnot("v1", "Secret", "excluded", "ns", + map[string]any{}, + map[string]any{"discovery.venafi.com/exclude": "true", "other": "kept"}, + ), + getObjectAnnot("v1", "Secret", "included", "ns", + map[string]any{}, + map[string]any{"other": "kept"}, + ), + }, + expected: []*api.GatheredResource{{Resource: getObjectAnnot("v1", "Secret", "included", "ns", + map[string]any{}, + map[string]any{"other": "kept"}, + )}}, + }, } for name, tc := range tests { @@ -967,6 +1051,55 @@ func compareEncryptedData(t *testing.T, privKey *stdrsa.PrivateKey, got *unstruc unstructured.RemoveNestedField(got.Object, encryptedDataFieldName) } +// TestExcludeAnnotKeys_ExcludesResourcesFromUpload verifies that resources +// whose annotation keys match ExcludeAnnotKeys are dropped entirely from +// Fetch() results, not just have those keys stripped. +func TestExcludeAnnotKeys_ExcludesResourcesFromUpload(t *testing.T) { + ctx := t.Context() + + gvrToListKind := map[schema.GroupVersionResource]string{ + {Group: "", Version: "v1", Resource: "secrets"}: "UnstructuredList", + } + + // "excluded" has a matching annotation key; "included" does not. + excluded := getObjectAnnot("v1", "Secret", "excluded", "ns", + map[string]any{"openshift.io/discovery": "ignore"}, + map[string]any{}, + ) + included := getObjectAnnot("v1", "Secret", "included", "ns", + map[string]any{"other": "kept"}, + map[string]any{}, + ) + + cl := fake.NewSimpleDynamicClientWithCustomListKinds( + runtime.NewScheme(), gvrToListKind, excluded, included, + ) + + cfg := ConfigDynamic{ + GroupVersionResource: schema.GroupVersionResource{Group: "", Version: "v1", Resource: "secrets"}, + } + dg, err := cfg.newDataGathererWithClient(ctx, cl, nil) + require.NoError(t, err) + + dgd := dg.(*DataGathererDynamic) + dgd.ExcludeAnnotKeys = []*regexp.Regexp{regexp.MustCompile(`^openshift\.io/.*$`)} + + go func() { _ = dg.Run(ctx) }() + require.NoError(t, dgd.WaitForCacheSync(ctx)) + + res, count, err := dg.Fetch(ctx) + require.NoError(t, err) + + data, ok := res.(*api.DynamicData) + require.True(t, ok) + + assert.Equal(t, 1, count, "only the non-matching resource should be returned") + if assert.Len(t, data.Items, 1) { + got := data.Items[0].Resource.(*unstructured.Unstructured) + assert.Equal(t, "included", got.GetName(), "the resource with matching annotation key should be excluded") + } +} + func TestDynamicGathererNativeResources_Fetch(t *testing.T) { // start a k8s client // init the datagatherer's informer with the client