diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 84d5222..68d6912 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -10,6 +10,8 @@ jobs: steps: - uses: actions/checkout@v4 - uses: actions/setup-go@v5 + with: + go-version-file: go.mod - name: Test run: go test ./... diff --git a/.gitignore b/.gitignore index 0063ff9..a7b4b12 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ +plugin-github-repositories .idea/ dist/ .DS_Store plugin .env -.envrc \ No newline at end of file +.envrc diff --git a/README.md b/README.md index 7fce516..523da99 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ Fetches information regarding the repository, including - Repository metadata and settings - Configured workflows - Recent workflow runs +- Optional direct Go dependency health and supply-chain visibility facts This plugin is intended to be run as part of an aggregate agent, and will execute the policy suite for each repository. @@ -14,11 +15,14 @@ To authenticate this plugin, you must provide a token which has at minimum the f - Actions (read-only) - Used to pull workflow jobs and success - Administration (read-only) - Used to check configuration and rulesets for a repository +- Contents (read-only) - Used to read repository files such as `go.mod` when dependency health collection is enabled - Metadata (read-only) - Required by GitHub - Pull Requests (read-only) - Used to pull PRs and status - Secret scanning alerts (read-only) - Used to check if secrets have been found - Secret scanning push protection bypass requests (read-only) - Used to check the process of any bypass requests +When dependency health collection is enabled, the token also uses repository contents, Actions, pull requests, license, and dependency graph/SBOM APIs against resolved public GitHub dependency repositories. Missing permissions or unavailable upstream data for resolved dependency repositories are recorded as dependency-level collection gaps and do not fail the repository evaluation. + ## Configuration ```yaml @@ -32,8 +36,20 @@ plugins: # Alternatively, these can be limited via the PAT configuration included_repositories: foo,bar,baz excluded_repositories: quix,quiz + # Optional dependency health collection. Disabled by default to avoid extra GitHub API usage. + dependency_health_enabled: "false" + dependency_health_max_dependencies: "50" + dependency_health_closed_pr_lookback_days: "180" + dependency_health_include_unresolved: "true" + dependency_health_collect_sbom: "true" + dependency_health_pr_interaction_sample_size: "20" ``` +Dependency health collection currently parses direct `go.mod` dependencies only. It resolves module paths that start with `github.com/{owner}/{repo}` and collects public upstream repository health signals. +Dependency policies are now evaluated using policy behavior metadata from the request (`dependency` behavior), and dependency inputs expose repository/dependency context under `input.dependency` and `input.repository` with request policy data available at `input.policy_data`. This can add several GitHub API calls per direct dependency, so enable it only for policy collections that need dependency evidence. + +Policy input migration: use request `policy_data` for new policy-specific inputs. The legacy plugin config key `policy_input` is still accepted as a JSON string fallback when request `policy_data` is not provided, and repository policy evaluation exposes the same data under both `input.policy_data` and the legacy `input.policy_input` key for compatibility. If both `policy_data` and `policy_input` are provided, `policy_data` is used. + ## Integration testing This plugin contains unit tests as well as integration tests. diff --git a/dependencies.go b/dependencies.go new file mode 100644 index 0000000..f812238 --- /dev/null +++ b/dependencies.go @@ -0,0 +1,727 @@ +package main + +import ( + "context" + "fmt" + "sort" + "strings" + "time" + + "github.com/google/go-github/v71/github" + "golang.org/x/mod/modfile" +) + +const ( + dependencyEcosystemGo = "go" + dependencySourceGoMod = "go.mod" + dependencyPRPageSize = 100 + dependencyPRMaxPages = 1 +) + +type goModuleDependency struct { + Name string + Version string + Direct bool +} + +// GatherRepositoryDependencies returns collected direct dependency facts for a repository. +func (l *GithubReposPlugin) GatherRepositoryDependencies(ctx context.Context, repo *github.Repository) []*RepositoryDependency { + dependencies, err := l.gatherRepositoryDependencies(ctx, repo, nil) + if err != nil && l.Logger != nil { + l.Logger.Warn("dependency collection failed", "repo", repo.GetFullName(), "error", err) + } + return dependencies +} + +func (l *GithubReposPlugin) gatherRepositoryDependencies(ctx context.Context, repo *github.Repository, onDependency func(*RepositoryDependency) error) ([]*RepositoryDependency, error) { + if repo == nil { + return nil, nil + } + if l.config == nil { + return nil, fmt.Errorf("github repositories plugin is not configured") + } + if l.githubClient == nil { + return nil, fmt.Errorf("github client is not configured") + } + + l.Logger.Debug("Fetching go.mod for dependency collection", "repo", repo.GetFullName(), "ref", repo.GetDefaultBranch()) + content, err := l.fetchGoMod(ctx, repo) + if err != nil { + l.Logger.Warn("failed to fetch go.mod for dependency collection", "repo", repo.GetFullName(), "error", err) + if onDependency != nil { + dep := newDependencyCollectionGap("go_mod_fetch", err) + if emitErr := emitDependency(dep, onDependency); emitErr != nil { + return []*RepositoryDependency{dep}, emitErr + } + return []*RepositoryDependency{dep}, nil + } + return nil, nil + } + if content == "" { + l.Logger.Debug("No go.mod content found for dependency collection", "repo", repo.GetFullName()) + if onDependency != nil { + dep := newDependencyCollectionGap("go_mod_fetch", fmt.Errorf("go.mod content unavailable")) + if emitErr := emitDependency(dep, onDependency); emitErr != nil { + return []*RepositoryDependency{dep}, emitErr + } + return []*RepositoryDependency{dep}, nil + } + return nil, nil + } + + modDeps, err := parseGoModDirectDependencies([]byte(content)) + if err != nil { + l.Logger.Warn("failed to parse go.mod for dependency collection", "repo", repo.GetFullName(), "error", err) + if onDependency != nil { + dep := newDependencyCollectionGap("go_mod_parse", err) + if emitErr := emitDependency(dep, onDependency); emitErr != nil { + return []*RepositoryDependency{dep}, emitErr + } + return []*RepositoryDependency{dep}, nil + } + return nil, nil + } + l.Logger.Debug("Parsed direct go.mod dependencies", "repo", repo.GetFullName(), "dependencies", len(modDeps)) + + if len(modDeps) > l.config.dependencyHealthMaxDependencies { + l.Logger.Debug( + "Truncating dependency collection to configured maximum", + "repo", repo.GetFullName(), + "parsed_dependencies", len(modDeps), + "max_dependencies", l.config.dependencyHealthMaxDependencies, + ) + modDeps = modDeps[:l.config.dependencyHealthMaxDependencies] + } + + dependencies := make([]*RepositoryDependency, 0, len(modDeps)) + repositoryFacts := make(map[string]*RepositoryDependency) + resolved := 0 + unresolved := 0 + for _, modDep := range modDeps { + dep := newRepositoryDependency(modDep) + resolveDependencyRepository(dep) + if dep.Repository.Resolved { + resolved++ + cacheKey := dependencyRepositoryCacheKey(dep) + if cached, ok := repositoryFacts[cacheKey]; ok { + l.Logger.Debug( + "Reusing cached dependency repository facts", + "repo", repo.GetFullName(), + "dependency", dep.Name, + "dependency_repo", dep.Repository.URL, + ) + copyDependencyRepositoryFacts(dep, cached) + dependencies = append(dependencies, dep) + if err := emitDependency(dep, onDependency); err != nil { + return dependencies, err + } + continue + } + l.Logger.Debug( + "Collecting dependency repository facts", + "repo", repo.GetFullName(), + "dependency", dep.Name, + "dependency_repo", dep.Repository.URL, + ) + l.collectDependencyRepositoryFacts(ctx, dep) + repositoryFacts[cacheKey] = cloneRepositoryDependency(dep) + } else if !l.config.dependencyHealthIncludeUnresolved { + unresolved++ + l.Logger.Debug("Skipping unresolved dependency", "repo", repo.GetFullName(), "dependency", dep.Name) + continue + } else { + unresolved++ + l.Logger.Debug("Including unresolved dependency", "repo", repo.GetFullName(), "dependency", dep.Name) + } + dependencies = append(dependencies, dep) + if err := emitDependency(dep, onDependency); err != nil { + return dependencies, err + } + } + l.Logger.Debug( + "Dependency collection finished", + "repo", repo.GetFullName(), + "dependencies", len(dependencies), + "resolved", resolved, + "unresolved", unresolved, + ) + + return dependencies, nil +} + +func emitDependency(dep *RepositoryDependency, onDependency func(*RepositoryDependency) error) error { + if onDependency == nil { + return nil + } + return onDependency(dep) +} + +func dependencyRepositoryCacheKey(dep *RepositoryDependency) string { + if dep == nil || dep.Repository == nil { + return "" + } + return strings.ToLower(fmt.Sprintf("%s/%s/%s", dep.Repository.Provider, dep.Repository.Owner, dep.Repository.Name)) +} + +func copyDependencyRepositoryFacts(target, source *RepositoryDependency) { + if target == nil || source == nil { + return + } + target.Health = cloneDependencyHealth(source.Health) + target.SupplyChain = cloneDependencySupplyChain(source.SupplyChain) + target.CollectionStatus = cloneDependencyCollectionStatus(source.CollectionStatus) + if target.CollectionStatus == nil { + target.CollectionStatus = &DependencyCollectionStatus{} + } + target.CollectionStatus.DependencyParsed = true + target.CollectionStatus.RepositoryResolved = target.Repository != nil && target.Repository.Resolved +} + +func cloneRepositoryDependency(dep *RepositoryDependency) *RepositoryDependency { + if dep == nil { + return nil + } + cloned := *dep + cloned.Repository = cloneDependencyRepository(dep.Repository) + cloned.Health = cloneDependencyHealth(dep.Health) + cloned.SupplyChain = cloneDependencySupplyChain(dep.SupplyChain) + cloned.CollectionStatus = cloneDependencyCollectionStatus(dep.CollectionStatus) + return &cloned +} + +func cloneDependencyRepository(repo *DependencyRepository) *DependencyRepository { + if repo == nil { + return nil + } + cloned := *repo + return &cloned +} + +func cloneDependencyHealth(health *DependencyHealth) *DependencyHealth { + if health == nil { + return nil + } + cloned := *health + if health.LatestRelease != nil { + release := *health.LatestRelease + release.PublishedAt = cloneTimePtr(health.LatestRelease.PublishedAt) + cloned.LatestRelease = &release + } + if health.LatestCommit != nil { + commit := *health.LatestCommit + commit.CommittedAt = cloneTimePtr(health.LatestCommit.CommittedAt) + cloned.LatestCommit = &commit + } + if health.Workflows != nil { + workflows := *health.Workflows + if health.Workflows.LatestDefaultBranchRun != nil { + run := *health.Workflows.LatestDefaultBranchRun + run.CreatedAt = cloneTimePtr(health.Workflows.LatestDefaultBranchRun.CreatedAt) + workflows.LatestDefaultBranchRun = &run + } + cloned.Workflows = &workflows + } + if health.PullRequests != nil { + pullRequests := *health.PullRequests + pullRequests.OldestOpenCreatedAt = cloneTimePtr(health.PullRequests.OldestOpenCreatedAt) + pullRequests.MedianDaysToClose = cloneFloat64Ptr(health.PullRequests.MedianDaysToClose) + pullRequests.MedianHoursToFirstInteraction = cloneFloat64Ptr(health.PullRequests.MedianHoursToFirstInteraction) + cloned.PullRequests = &pullRequests + } + return &cloned +} + +func cloneDependencySupplyChain(supplyChain *DependencySupplyChain) *DependencySupplyChain { + if supplyChain == nil { + return nil + } + cloned := *supplyChain + if supplyChain.License != nil { + license := *supplyChain.License + cloned.License = &license + } + if supplyChain.SBOM != nil { + sbom := *supplyChain.SBOM + sbom.CreationInfoCreated = cloneTimePtr(supplyChain.SBOM.CreationInfoCreated) + cloned.SBOM = &sbom + } + return &cloned +} + +func cloneDependencyCollectionStatus(status *DependencyCollectionStatus) *DependencyCollectionStatus { + if status == nil { + return nil + } + cloned := *status + cloned.Errors = make([]*DependencyCollectionError, 0, len(status.Errors)) + for _, collectionError := range status.Errors { + if collectionError == nil { + continue + } + copied := *collectionError + cloned.Errors = append(cloned.Errors, &copied) + } + return &cloned +} + +func cloneTimePtr(value *time.Time) *time.Time { + if value == nil { + return nil + } + copied := *value + return &copied +} + +func cloneFloat64Ptr(value *float64) *float64 { + if value == nil { + return nil + } + copied := *value + return &copied +} + +func (l *GithubReposPlugin) fetchGoMod(ctx context.Context, repo *github.Repository) (string, error) { + file, _, _, err := l.githubClient.Repositories.GetContents( + ctx, + repo.GetOwner().GetLogin(), + repo.GetName(), + dependencySourceGoMod, + &github.RepositoryContentGetOptions{Ref: repo.GetDefaultBranch()}, + ) + if err != nil { + return "", err + } + if file == nil { + return "", nil + } + content, err := file.GetContent() + if err != nil { + return "", err + } + return content, nil +} + +func parseGoModDirectDependencies(content []byte) ([]goModuleDependency, error) { + parsed, err := modfile.Parse(dependencySourceGoMod, content, nil) + if err != nil { + return nil, err + } + + deps := make([]goModuleDependency, 0, len(parsed.Require)) + for _, req := range parsed.Require { + if req == nil || req.Indirect { + continue + } + deps = append(deps, goModuleDependency{ + Name: req.Mod.Path, + Version: req.Mod.Version, + Direct: true, + }) + } + return deps, nil +} + +func newRepositoryDependency(modDep goModuleDependency) *RepositoryDependency { + return &RepositoryDependency{ + Name: modDep.Name, + Ecosystem: dependencyEcosystemGo, + SourceFile: dependencySourceGoMod, + Direct: modDep.Direct, + DeclaredVersion: modDep.Version, + Repository: &DependencyRepository{}, + Health: &DependencyHealth{}, + SupplyChain: &DependencySupplyChain{}, + CollectionStatus: &DependencyCollectionStatus{ + DependencyParsed: true, + Errors: make([]*DependencyCollectionError, 0), + }, + } +} + +func newDependencyCollectionGap(scope string, err error) *RepositoryDependency { + dep := &RepositoryDependency{ + Name: "dependency-collection-unavailable", + Ecosystem: dependencyEcosystemGo, + SourceFile: dependencySourceGoMod, + Repository: &DependencyRepository{}, + Health: &DependencyHealth{}, + SupplyChain: &DependencySupplyChain{}, + CollectionStatus: &DependencyCollectionStatus{ + Errors: make([]*DependencyCollectionError, 0), + }, + } + if err != nil { + dep.CollectionStatus.Errors = append(dep.CollectionStatus.Errors, &DependencyCollectionError{Scope: scope, Message: err.Error()}) + } + return dep +} + +func resolveDependencyRepository(dep *RepositoryDependency) { + owner, repo, ok := resolveGitHubModulePath(dep.Name) + if !ok { + return + } + dep.Repository = &DependencyRepository{ + Provider: "github", + Owner: owner, + Name: repo, + URL: fmt.Sprintf("https://github.com/%s/%s", owner, repo), + Resolved: true, + } + dep.CollectionStatus.RepositoryResolved = true +} + +func resolveGitHubModulePath(modulePath string) (string, string, bool) { + parts := strings.Split(modulePath, "/") + if len(parts) < 3 || parts[0] != "github.com" || parts[1] == "" || parts[2] == "" { + return "", "", false + } + return parts[1], parts[2], true +} + +func (l *GithubReposPlugin) collectDependencyRepositoryFacts(ctx context.Context, dep *RepositoryDependency) { + owner := dep.Repository.Owner + name := dep.Repository.Name + + repo, _, err := l.githubClient.Repositories.Get(ctx, owner, name) + if err != nil { + l.recordDependencyCollectionError(dep, "repository", err) + return + } + + dep.Health.RepositoryArchived = repo.GetArchived() + + healthErrorCount := len(dep.CollectionStatus.Errors) + l.collectDependencyRelease(ctx, dep) + l.collectDependencyCommit(ctx, dep, repo.GetDefaultBranch()) + l.collectDependencyWorkflows(ctx, dep, repo.GetDefaultBranch()) + l.collectDependencyPullRequests(ctx, dep) + dep.CollectionStatus.HealthCollected = len(dep.CollectionStatus.Errors) == healthErrorCount + + l.collectDependencyLicense(ctx, dep) + if l.config.dependencyHealthCollectSBOM { + l.collectDependencySBOM(ctx, dep) + } +} + +func (l *GithubReposPlugin) collectDependencyRelease(ctx context.Context, dep *RepositoryDependency) { + release, resp, err := l.githubClient.Repositories.GetLatestRelease(ctx, dep.Repository.Owner, dep.Repository.Name) + if err != nil { + if resp != nil && resp.Response != nil && resp.StatusCode == 404 { + return + } + l.recordDependencyCollectionError(dep, "release", err) + return + } + if release == nil { + return + } + dep.Health.LatestRelease = &DependencyRelease{ + Tag: release.GetTagName(), + PublishedAt: githubTimestampTime(release.PublishedAt), + } +} + +func (l *GithubReposPlugin) collectDependencyCommit(ctx context.Context, dep *RepositoryDependency, defaultBranch string) { + commits, _, err := l.githubClient.Repositories.ListCommits(ctx, dep.Repository.Owner, dep.Repository.Name, &github.CommitsListOptions{ + SHA: defaultBranch, + ListOptions: github.ListOptions{PerPage: 1}, + }) + if err != nil { + l.recordDependencyCollectionError(dep, "commit", err) + return + } + if len(commits) == 0 || commits[0] == nil { + return + } + dep.Health.LatestCommit = &DependencyCommit{ + SHA: commits[0].GetSHA(), + } + if commits[0].Commit != nil && commits[0].Commit.Committer != nil { + dep.Health.LatestCommit.CommittedAt = githubTimestampTime(commits[0].Commit.Committer.Date) + } +} + +func (l *GithubReposPlugin) collectDependencyWorkflows(ctx context.Context, dep *RepositoryDependency, defaultBranch string) { + workflows, _, err := l.githubClient.Actions.ListWorkflows(ctx, dep.Repository.Owner, dep.Repository.Name, nil) + if err != nil { + l.recordDependencyCollectionError(dep, "workflows", err) + return + } + summary := &DependencyWorkflowSummary{} + if workflows != nil { + summary.Count = workflows.GetTotalCount() + } + + runs, _, err := l.githubClient.Actions.ListRepositoryWorkflowRuns(ctx, dep.Repository.Owner, dep.Repository.Name, &github.ListWorkflowRunsOptions{ + Branch: defaultBranch, + ListOptions: github.ListOptions{PerPage: 1}, + }) + if err != nil { + l.recordDependencyCollectionError(dep, "workflow_runs", err) + dep.Health.Workflows = summary + return + } + if runs != nil && len(runs.WorkflowRuns) > 0 && runs.WorkflowRuns[0] != nil { + run := runs.WorkflowRuns[0] + summary.LatestDefaultBranchRun = &DependencyWorkflowRun{ + Status: run.GetStatus(), + Conclusion: run.GetConclusion(), + CreatedAt: githubTimestampTime(run.CreatedAt), + } + } + dep.Health.Workflows = summary +} + +func (l *GithubReposPlugin) collectDependencyLicense(ctx context.Context, dep *RepositoryDependency) { + dep.SupplyChain.License = &DependencyLicenseSummary{} + license, resp, err := l.githubClient.Repositories.License(ctx, dep.Repository.Owner, dep.Repository.Name) + if err != nil { + if resp != nil && resp.Response != nil && resp.StatusCode == 404 { + dep.SupplyChain.License.Collected = true + dep.CollectionStatus.LicenseCollected = true + return + } + l.recordDependencyCollectionError(dep, "license", err) + return + } + dep.SupplyChain.License.Collected = true + dep.CollectionStatus.LicenseCollected = true + if license == nil || license.License == nil { + return + } + dep.SupplyChain.License.SPDXID = license.License.GetSPDXID() + dep.SupplyChain.License.Name = license.License.GetName() + dep.SupplyChain.License.URL = license.License.GetURL() +} + +func (l *GithubReposPlugin) collectDependencySBOM(ctx context.Context, dep *RepositoryDependency) { + dep.SupplyChain.SBOM = &DependencySBOMSummary{} + sbom, resp, err := l.githubClient.DependencyGraph.GetSBOM(ctx, dep.Repository.Owner, dep.Repository.Name) + if err != nil { + if resp != nil && resp.Response != nil && resp.StatusCode == 404 { + dep.SupplyChain.SBOM.Collected = true + dep.CollectionStatus.SBOMCollected = true + return + } + l.recordDependencyCollectionError(dep, "sbom", err) + return + } + dep.SupplyChain.SBOM.Collected = true + dep.CollectionStatus.SBOMCollected = true + if sbom == nil || sbom.SBOM == nil { + return + } + info := sbom.SBOM + dep.SupplyChain.SBOM.Available = true + dep.SupplyChain.SBOM.PackageCount = len(info.Packages) + dep.SupplyChain.SBOM.SPDXID = info.GetSPDXID() + dep.SupplyChain.SBOM.SPDXVersion = info.GetSPDXVersion() + if info.CreationInfo != nil { + dep.SupplyChain.SBOM.CreationInfoCreated = githubTimestampTime(info.CreationInfo.Created) + } +} + +func (l *GithubReposPlugin) collectDependencyPullRequests(ctx context.Context, dep *RepositoryDependency) { + stats := &DependencyPullRequestStats{} + + openPRs, openCapped, err := l.listPullRequestIssues(ctx, dep.Repository.Owner, dep.Repository.Name, "open", time.Time{}) + if err != nil { + l.recordDependencyCollectionError(dep, "pull_requests_open", err) + dep.Health.PullRequests = stats + return + } + stats.OpenCount = len(openPRs) + stats.OpenCountCapped = openCapped + for _, pr := range openPRs { + created := githubTimestampTime(pr.CreatedAt) + if created == nil { + continue + } + if stats.OldestOpenCreatedAt == nil || created.Before(*stats.OldestOpenCreatedAt) { + stats.OldestOpenCreatedAt = created + } + } + + since := time.Now().AddDate(0, 0, -l.config.dependencyHealthClosedPRLookbackDays) + closedPRs, closedCapped, err := l.listPullRequestIssues(ctx, dep.Repository.Owner, dep.Repository.Name, "closed", since) + if err != nil { + l.recordDependencyCollectionError(dep, "pull_requests_closed", err) + dep.Health.PullRequests = stats + return + } + closedPRs = filterPullRequestsClosedSince(closedPRs, since) + stats.RecentClosedCount = len(closedPRs) + stats.RecentClosedCountCapped = closedCapped + stats.MedianDaysToClose = medianDaysToClose(closedPRs) + dep.Health.PullRequests = stats + stats.MedianHoursToFirstInteraction = l.medianHoursToFirstInteraction(ctx, dep, closedPRs) +} + +func (l *GithubReposPlugin) listPullRequestIssues(ctx context.Context, owner, repo, state string, since time.Time) ([]*github.Issue, bool, error) { + opts := &github.IssueListByRepoOptions{ + State: state, + ListOptions: github.ListOptions{PerPage: dependencyPRPageSize, Page: 1}, + } + if state == "open" { + opts.Sort = "created" + opts.Direction = "asc" + } + if state == "closed" { + opts.Sort = "updated" + opts.Direction = "desc" + } + if !since.IsZero() { + opts.Since = since + } + + prs := make([]*github.Issue, 0, dependencyPRPageSize) + for pages := 0; pages < dependencyPRMaxPages; pages++ { + issues, resp, err := l.githubClient.Issues.ListByRepo(ctx, owner, repo, opts) + if err != nil { + return nil, false, err + } + for _, issue := range issues { + if issue == nil || !issue.IsPullRequest() { + continue + } + prs = append(prs, issue) + } + if resp == nil || resp.NextPage == 0 { + return prs, false, nil + } + opts.Page = resp.NextPage + } + return prs, true, nil +} + +func filterPullRequestsClosedSince(prs []*github.Issue, since time.Time) []*github.Issue { + if since.IsZero() { + return prs + } + filtered := make([]*github.Issue, 0, len(prs)) + for _, pr := range prs { + closed := githubTimestampTime(pr.ClosedAt) + if closed == nil || closed.Before(since) { + continue + } + filtered = append(filtered, pr) + } + return filtered +} + +func medianDaysToClose(prs []*github.Issue) *float64 { + values := make([]float64, 0, len(prs)) + for _, pr := range prs { + created := githubTimestampTime(pr.CreatedAt) + closed := githubTimestampTime(pr.ClosedAt) + if created == nil || closed == nil || closed.Before(*created) { + continue + } + values = append(values, closed.Sub(*created).Hours()/24) + } + return medianFloat64(values) +} + +func (l *GithubReposPlugin) medianHoursToFirstInteraction(ctx context.Context, dep *RepositoryDependency, prs []*github.Issue) *float64 { + limit := l.config.dependencyHealthPRInteractionSampleSize + if limit > len(prs) { + limit = len(prs) + } + values := make([]float64, 0, limit) + sampled := 0 + for i := 0; i < limit; i++ { + pr := prs[i] + if pr == nil { + continue + } + created := githubTimestampTime(pr.CreatedAt) + if created == nil { + continue + } + sampled++ + first, err := l.firstPullRequestInteraction(ctx, dep.Repository.Owner, dep.Repository.Name, pr.GetNumber(), *created) + if err != nil { + l.recordDependencyCollectionError(dep, "pull_request_interactions", err) + break + } + if first == nil { + continue + } + values = append(values, first.Sub(*created).Hours()) + } + if dep.Health.PullRequests != nil { + dep.Health.PullRequests.FirstInteractionSampledPullRequests = sampled + } + return medianFloat64(values) +} + +func (l *GithubReposPlugin) firstPullRequestInteraction(ctx context.Context, owner, repo string, number int, created time.Time) (*time.Time, error) { + var first *time.Time + comments, _, err := l.githubClient.Issues.ListComments(ctx, owner, repo, number, &github.IssueListCommentsOptions{ + Sort: github.Ptr("created"), + Direction: github.Ptr("asc"), + ListOptions: github.ListOptions{PerPage: 100}, + }) + if err != nil { + return nil, err + } + for _, comment := range comments { + ts := githubTimestampTime(comment.CreatedAt) + if ts == nil || !ts.After(created) { + continue + } + if first == nil || ts.Before(*first) { + first = ts + } + } + + reviews, _, err := l.githubClient.PullRequests.ListReviews(ctx, owner, repo, number, &github.ListOptions{PerPage: 100}) + if err != nil { + return nil, err + } + for _, review := range reviews { + ts := githubTimestampTime(review.SubmittedAt) + if ts == nil || !ts.After(created) { + continue + } + if first == nil || ts.Before(*first) { + first = ts + } + } + return first, nil +} + +func medianFloat64(values []float64) *float64 { + if len(values) == 0 { + return nil + } + sort.Float64s(values) + mid := len(values) / 2 + if len(values)%2 == 1 { + return &values[mid] + } + median := (values[mid-1] + values[mid]) / 2 + return &median +} + +func githubTimestampTime(ts *github.Timestamp) *time.Time { + if ts == nil { + return nil + } + t := ts.Time + return &t +} + +func (l *GithubReposPlugin) recordDependencyCollectionError(dep *RepositoryDependency, scope string, err error) { + if dep == nil || dep.CollectionStatus == nil || err == nil { + return + } + dep.CollectionStatus.Errors = append(dep.CollectionStatus.Errors, &DependencyCollectionError{ + Scope: scope, + Message: err.Error(), + }) + if l != nil && l.Logger != nil { + l.Logger.Warn("dependency health collection partially failed", "dependency", dep.Name, "scope", scope, "error", err) + } +} diff --git a/dependencies_test.go b/dependencies_test.go new file mode 100644 index 0000000..3c45d2f --- /dev/null +++ b/dependencies_test.go @@ -0,0 +1,1086 @@ +package main + +import ( + "encoding/base64" + "encoding/json" + "fmt" + "net/http" + "net/http/httptest" + "net/url" + "os" + "path/filepath" + "slices" + "strings" + "testing" + "time" + + "github.com/compliance-framework/agent/runner/proto" + "github.com/google/go-github/v71/github" + "github.com/hashicorp/go-hclog" + "google.golang.org/protobuf/types/known/structpb" +) + +// TestParseGoModDirectDependencies verifies direct go.mod requirements are parsed and indirect ones are ignored. +func TestParseGoModDirectDependencies(t *testing.T) { + content := []byte(`module example.com/app + +require github.com/single/line v1.0.0 + +require ( + github.com/direct/lib v1.2.3 + github.com/indirect/lib v0.4.0 // indirect +) +`) + + deps, err := parseGoModDirectDependencies(content) + if err != nil { + t.Fatalf("parseGoModDirectDependencies returned error: %v", err) + } + if len(deps) != 2 { + t.Fatalf("expected 2 direct dependencies, got %d", len(deps)) + } + if deps[0].Name != "github.com/single/line" || deps[0].Version != "v1.0.0" { + t.Fatalf("unexpected single-line dependency: %#v", deps[0]) + } + if deps[1].Name != "github.com/direct/lib" || deps[1].Version != "v1.2.3" { + t.Fatalf("unexpected block dependency: %#v", deps[1]) + } +} + +// TestResolveGitHubModulePath verifies GitHub module paths resolve to owner and repository names. +func TestResolveGitHubModulePath(t *testing.T) { + owner, repo, ok := resolveGitHubModulePath("github.com/google/go-github/v71") + if !ok { + t.Fatal("expected GitHub module path to resolve") + } + if owner != "google" || repo != "go-github" { + t.Fatalf("unexpected resolution: %s/%s", owner, repo) + } + + _, _, ok = resolveGitHubModulePath("golang.org/x/mod") + if ok { + t.Fatal("expected non-GitHub module path not to resolve") + } +} + +// TestDependencyHealthConfigDefaultsAndInvalidValues verifies dependency health config parsing defaults and validation. +func TestDependencyHealthConfigDefaultsAndInvalidValues(t *testing.T) { + cfg := &PluginConfig{} + if err := cfg.parseDependencyHealthConfig(); err != nil { + t.Fatalf("parseDependencyHealthConfig returned error: %v", err) + } + if cfg.dependencyHealthEnabled { + t.Fatal("dependency health should default to disabled") + } + if cfg.dependencyHealthMaxDependencies != 50 { + t.Fatalf("expected max dependencies default 50, got %d", cfg.dependencyHealthMaxDependencies) + } + if !cfg.dependencyHealthIncludeUnresolved { + t.Fatal("include unresolved should default to true") + } + if !cfg.dependencyHealthCollectSBOM { + t.Fatal("collect SBOM should default to true") + } + + cfg = &PluginConfig{DependencyHealthMaxDependencies: "0"} + if err := cfg.parseDependencyHealthConfig(); err == nil { + t.Fatal("expected invalid max dependencies to fail") + } + + cfg = &PluginConfig{DependencyHealthEnabled: "not-bool"} + if err := cfg.parseDependencyHealthConfig(); err == nil { + t.Fatal("expected invalid bool to fail") + } +} + +// TestConfigureDefaultsPolicyData verifies Configure initializes policy data to an empty map. +func TestConfigureDefaultsPolicyData(t *testing.T) { + plugin := &GithubReposPlugin{Logger: hclog.NewNullLogger()} + _, err := plugin.Configure(&proto.ConfigureRequest{ + Config: map[string]string{ + "token": "test-token", + "organization": "test-org", + }, + }) + if err != nil { + t.Fatalf("Configure returned error: %v", err) + } + if plugin.config.policyData == nil { + t.Fatal("expected policy data to default to an empty map") + } + if len(plugin.config.policyData) != 0 { + t.Fatalf("expected empty policy data, got %#v", plugin.config.policyData) + } +} + +// TestConfigureLegacyPolicyInputFallback verifies legacy policy_input config still populates policy data. +func TestConfigureLegacyPolicyInputFallback(t *testing.T) { + plugin := &GithubReposPlugin{Logger: hclog.NewNullLogger()} + _, err := plugin.Configure(&proto.ConfigureRequest{ + Config: map[string]string{ + "token": "test-token", + "organization": "test-org", + "policy_input": `{"workflow_names":["ci.yml"],"enabled":true}`, + }, + }) + if err != nil { + t.Fatalf("Configure returned error: %v", err) + } + if got := plugin.config.policyData["enabled"]; got != true { + t.Fatalf("expected legacy policy_input to populate policy data, got %#v", got) + } + workflowNames, ok := plugin.config.policyData["workflow_names"].([]interface{}) + if !ok || len(workflowNames) != 1 || workflowNames[0] != "ci.yml" { + t.Fatalf("unexpected workflow_names from legacy policy_input: %#v", plugin.config.policyData["workflow_names"]) + } +} + +// TestConfigurePolicyDataOverridesLegacyPolicyInput verifies request policy_data wins over legacy config. +func TestConfigurePolicyDataOverridesLegacyPolicyInput(t *testing.T) { + plugin := &GithubReposPlugin{Logger: hclog.NewNullLogger()} + policyData, err := structpb.NewStruct(map[string]interface{}{ + "source": "request", + }) + if err != nil { + t.Fatalf("NewStruct returned error: %v", err) + } + _, err = plugin.Configure(&proto.ConfigureRequest{ + Config: map[string]string{ + "token": "test-token", + "organization": "test-org", + "policy_input": `{"source":"legacy"}`, + }, + PolicyData: policyData, + }) + if err != nil { + t.Fatalf("Configure returned error: %v", err) + } + if got := plugin.config.policyData["source"]; got != "request" { + t.Fatalf("expected request policy_data to win, got %#v", got) + } +} + +// TestConfigureInvalidLegacyPolicyInputFails verifies invalid legacy policy_input JSON fails configuration. +func TestConfigureInvalidLegacyPolicyInputFails(t *testing.T) { + plugin := &GithubReposPlugin{Logger: hclog.NewNullLogger()} + _, err := plugin.Configure(&proto.ConfigureRequest{ + Config: map[string]string{ + "token": "test-token", + "organization": "test-org", + "policy_input": `not-json`, + }, + }) + if err == nil { + t.Fatal("expected invalid legacy policy_input to fail") + } +} + +// TestSaturatedRepositoryPolicyInputAlias verifies policy_input mirrors policy_data for repository policy compatibility. +func TestSaturatedRepositoryPolicyInputAlias(t *testing.T) { + repo := &SaturatedRepository{ + PolicyData: map[string]interface{}{"source": "policy-data"}, + } + plugin := &GithubReposPlugin{Logger: hclog.NewNullLogger()} + _, err := plugin.EvaluatePolicies(t.Context(), repo, nil, nil, nil) + if err != nil { + t.Fatalf("EvaluatePolicies returned error: %v", err) + } + if repo.PolicyInput["source"] != "policy-data" { + t.Fatalf("expected policy_input alias to match policy_data, got %#v", repo.PolicyInput) + } +} + +// TestSaturatedRepositoryPolicyInputAliasMarshalsWhenEmpty verifies empty policy_input aliases are preserved in JSON. +func TestSaturatedRepositoryPolicyInputAliasMarshalsWhenEmpty(t *testing.T) { + repo := &SaturatedRepository{ + PolicyData: map[string]interface{}{}, + PolicyInput: map[string]interface{}{}, + } + payload, err := json.Marshal(repo) + if err != nil { + t.Fatalf("failed to marshal repository: %v", err) + } + if !strings.Contains(string(payload), `"policy_input":{}`) { + t.Fatalf("expected empty policy_input to be present, got %s", payload) + } +} + +// TestMedianHelpers verifies median helper functions for dependency pull request metrics. +func TestMedianHelpers(t *testing.T) { + prs := []*github.Issue{ + { + CreatedAt: githubTimestamp("2026-01-01T00:00:00Z"), + ClosedAt: githubTimestamp("2026-01-03T00:00:00Z"), + }, + { + CreatedAt: githubTimestamp("2026-01-01T00:00:00Z"), + ClosedAt: githubTimestamp("2026-01-05T00:00:00Z"), + }, + } + median := medianDaysToClose(prs) + if median == nil || *median != 3 { + t.Fatalf("expected median close time 3 days, got %v", median) + } + + values := []float64{10, 2, 4} + got := medianFloat64(values) + if got == nil || *got != 4 { + t.Fatalf("expected median 4, got %v", got) + } +} + +// TestRequestWithDefaultPolicyBehaviorClassifiesAllPolicyPaths verifies policy paths are routed to the expected behavior. +func TestRequestWithDefaultPolicyBehaviorClassifiesAllPolicyPaths(t *testing.T) { + req := &proto.EvalRequest{ + PolicyPaths: []string{ + "ghcr.io/compliance-framework/plugin-github-repositories-policies:v0.6.1", + "/policies/plugin-github-repositories-dependency-policies.tar.gz", + "/policies/custom-github-repository-policies.tar.gz", + }, + } + + policyRequest := requestWithDefaultPolicyBehavior(req) + + assertStringSlicesEqual(t, policyRequest.PolicyPathsForBehavior(policyBehaviorRepository), []string{ + "ghcr.io/compliance-framework/plugin-github-repositories-policies:v0.6.1", + "/policies/custom-github-repository-policies.tar.gz", + }) + assertStringSlicesEqual(t, policyRequest.PolicyPathsForBehavior(policyBehaviorDependency), []string{ + "/policies/plugin-github-repositories-dependency-policies.tar.gz", + }) +} + +// TestGatherRepositoryDependenciesEndToEnd verifies dependency parsing, resolution, and health collection against a fake API. +func TestGatherRepositoryDependenciesEndToEnd(t *testing.T) { + mux := http.NewServeMux() + server := httptest.NewServer(mux) + defer server.Close() + + goMod := `module github.com/source/target + +require ( + github.com/good/lib v1.2.3 + github.com/good/lib/submodule v1.2.4 + github.com/quiet/lib v0.9.0 + example.com/unresolved/lib v0.1.0 + github.com/indirect/lib v0.4.0 // indirect +) +` + goodRepoGetCount := 0 + mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + switch { + case r.URL.Path == "/repos/source/target/contents/go.mod": + writeJSON(t, w, map[string]any{ + "type": "file", + "name": "go.mod", + "path": "go.mod", + "encoding": "base64", + "content": base64.StdEncoding.EncodeToString([]byte(goMod)), + }) + case r.URL.Path == "/repos/good/lib": + goodRepoGetCount++ + writeJSON(t, w, map[string]any{ + "name": "lib", + "full_name": "good/lib", + "default_branch": "main", + "archived": false, + }) + case r.URL.Path == "/repos/quiet/lib": + writeJSON(t, w, map[string]any{ + "name": "lib", + "full_name": "quiet/lib", + "default_branch": "main", + "archived": true, + }) + case r.URL.Path == "/repos/good/lib/releases/latest": + writeJSON(t, w, map[string]any{ + "tag_name": "v1.3.0", + "published_at": "2026-01-10T00:00:00Z", + }) + case r.URL.Path == "/repos/quiet/lib/releases/latest": + http.NotFound(w, r) + case r.URL.Path == "/repos/good/lib/commits": + writeJSON(t, w, []map[string]any{{ + "sha": "abc123", + "commit": map[string]any{ + "committer": map[string]any{"date": "2026-02-01T00:00:00Z"}, + }, + }}) + case r.URL.Path == "/repos/quiet/lib/commits": + writeJSON(t, w, []map[string]any{}) + case r.URL.Path == "/repos/good/lib/actions/workflows": + writeJSON(t, w, map[string]any{ + "total_count": 3, + "workflows": []map[string]any{ + {"id": 1, "name": "ci"}, + {"id": 2, "name": "release"}, + }, + }) + case r.URL.Path == "/repos/quiet/lib/actions/workflows": + writeJSON(t, w, map[string]any{"total_count": 0, "workflows": []any{}}) + case r.URL.Path == "/repos/good/lib/actions/runs": + writeJSON(t, w, map[string]any{ + "total_count": 1, + "workflow_runs": []map[string]any{{ + "id": 1, + "status": "completed", + "conclusion": "success", + "created_at": "2026-02-02T00:00:00Z", + }}, + }) + case r.URL.Path == "/repos/quiet/lib/actions/runs": + writeJSON(t, w, map[string]any{"total_count": 0, "workflow_runs": []any{}}) + case r.URL.Path == "/repos/good/lib/license": + writeJSON(t, w, map[string]any{ + "license": map[string]any{ + "spdx_id": "MIT", + "name": "MIT License", + "url": "https://api.github.com/licenses/mit", + }, + }) + case r.URL.Path == "/repos/quiet/lib/license": + http.NotFound(w, r) + case r.URL.Path == "/repos/good/lib/dependency-graph/sbom": + writeJSON(t, w, map[string]any{ + "sbom": map[string]any{ + "SPDXID": "SPDXRef-DOCUMENT", + "spdxVersion": "SPDX-2.3", + "creationInfo": map[string]any{ + "created": "2026-02-01T00:00:00Z", + }, + "packages": []map[string]any{ + {"name": "a"}, + {"name": "b"}, + }, + }, + }) + case r.URL.Path == "/repos/quiet/lib/dependency-graph/sbom": + http.Error(w, "forbidden", http.StatusForbidden) + case r.URL.Path == "/repos/good/lib/issues" && r.URL.Query().Get("state") == "open": + writeJSON(t, w, []map[string]any{{ + "number": 3, + "created_at": "2026-01-01T00:00:00Z", + "pull_request": map[string]any{"url": "https://api.github.test/repos/good/lib/pulls/3"}, + }}) + case r.URL.Path == "/repos/good/lib/issues" && r.URL.Query().Get("state") == "closed": + writeJSON(t, w, []map[string]any{ + { + "number": 7, + "created_at": "2026-01-01T00:00:00Z", + "closed_at": "2026-01-05T00:00:00Z", + "pull_request": map[string]any{"url": "https://api.github.test/repos/good/lib/pulls/7"}, + }, + { + "number": 8, + "created_at": "2026-01-01T00:00:00Z", + "closed_at": "2026-01-10T00:00:00Z", + "pull_request": map[string]any{"url": "https://api.github.test/repos/good/lib/pulls/8"}, + }, + }) + case r.URL.Path == "/repos/quiet/lib/issues": + writeJSON(t, w, []any{}) + case r.URL.Path == "/repos/good/lib/issues/7/comments": + writeJSON(t, w, []map[string]any{{ + "id": 10, + "created_at": "2026-01-02T00:00:00Z", + }}) + case r.URL.Path == "/repos/good/lib/pulls/7/reviews": + writeJSON(t, w, []map[string]any{{ + "id": 11, + "submitted_at": "2026-01-03T00:00:00Z", + }}) + case r.URL.Path == "/repos/good/lib/issues/8/comments": + writeJSON(t, w, []any{}) + case r.URL.Path == "/repos/good/lib/pulls/8/reviews": + writeJSON(t, w, []any{}) + default: + t.Fatalf("unexpected GitHub API request: %s?%s", r.URL.Path, r.URL.RawQuery) + } + }) + + plugin := newTestPlugin(t, server.URL) + repo := &github.Repository{ + Name: github.Ptr("target"), + DefaultBranch: github.Ptr("main"), + Owner: &github.User{Login: github.Ptr("source")}, + } + + deps := plugin.GatherRepositoryDependencies(t.Context(), repo) + if len(deps) != 4 { + t.Fatalf("expected 4 dependencies, got %d", len(deps)) + } + + good := findDependency(t, deps, "github.com/good/lib") + if !good.Repository.Resolved || good.Repository.Owner != "good" || good.Repository.Name != "lib" { + t.Fatalf("good dependency did not resolve: %#v", good.Repository) + } + if good.Health.LatestRelease == nil || good.Health.LatestRelease.Tag != "v1.3.0" { + t.Fatalf("latest release not collected: %#v", good.Health.LatestRelease) + } + if good.Health.LatestCommit == nil || good.Health.LatestCommit.SHA != "abc123" { + t.Fatalf("latest commit not collected: %#v", good.Health.LatestCommit) + } + if good.Health.Workflows == nil || good.Health.Workflows.Count != 3 || good.Health.Workflows.LatestDefaultBranchRun.Conclusion != "success" { + t.Fatalf("workflow summary not collected: %#v", good.Health.Workflows) + } + if good.SupplyChain.License == nil || good.SupplyChain.License.SPDXID != "MIT" { + t.Fatalf("license not collected: %#v", good.SupplyChain.License) + } + if good.SupplyChain.SBOM == nil || !good.SupplyChain.SBOM.Available || good.SupplyChain.SBOM.PackageCount != 2 { + t.Fatalf("SBOM not collected: %#v", good.SupplyChain.SBOM) + } + if good.Health.PullRequests == nil || good.Health.PullRequests.OpenCount != 1 || good.Health.PullRequests.OpenCountCapped || good.Health.PullRequests.RecentClosedCount != 2 || good.Health.PullRequests.RecentClosedCountCapped { + t.Fatalf("PR stats not collected: %#v", good.Health.PullRequests) + } + if good.Health.PullRequests.MedianDaysToClose == nil || *good.Health.PullRequests.MedianDaysToClose != 6.5 { + t.Fatalf("expected median days to close 6.5, got %#v", good.Health.PullRequests.MedianDaysToClose) + } + if good.Health.PullRequests.MedianHoursToFirstInteraction == nil || *good.Health.PullRequests.MedianHoursToFirstInteraction != 24 { + t.Fatalf("expected median hours to first interaction 24, got %#v", good.Health.PullRequests.MedianHoursToFirstInteraction) + } + if good.Health.PullRequests.FirstInteractionSampledPullRequests != 2 { + t.Fatalf("expected two first-interaction samples, got %d", good.Health.PullRequests.FirstInteractionSampledPullRequests) + } + if !good.CollectionStatus.HealthCollected { + t.Fatal("expected complete health collection to be marked collected") + } + goodSubmodule := findDependency(t, deps, "github.com/good/lib/submodule") + if goodSubmodule.Health.LatestRelease == nil || goodSubmodule.Health.LatestRelease.Tag != "v1.3.0" { + t.Fatalf("expected cached latest release for submodule dependency, got %#v", goodSubmodule.Health.LatestRelease) + } + if goodRepoGetCount != 1 { + t.Fatalf("expected good/lib repository facts to be fetched once, got %d", goodRepoGetCount) + } + + quiet := findDependency(t, deps, "github.com/quiet/lib") + if !quiet.Health.RepositoryArchived { + t.Fatal("expected quiet dependency to be archived") + } + if quiet.Health.LatestRelease != nil { + t.Fatalf("expected no latest release, got %#v", quiet.Health.LatestRelease) + } + if quiet.SupplyChain.License == nil || !quiet.SupplyChain.License.Collected || quiet.SupplyChain.License.SPDXID != "" { + t.Fatalf("expected unknown collected license, got %#v", quiet.SupplyChain.License) + } + if quiet.CollectionStatus.SBOMCollected { + t.Fatal("expected inaccessible SBOM not to be marked collected") + } + if len(quiet.CollectionStatus.Errors) == 0 { + t.Fatal("expected inaccessible SBOM to record a collection error") + } + if !quiet.CollectionStatus.HealthCollected { + t.Fatal("expected SBOM-only failure not to mark health collection incomplete") + } + + unresolved := findDependency(t, deps, "example.com/unresolved/lib") + if unresolved.Repository.Resolved { + t.Fatalf("expected unresolved dependency, got %#v", unresolved.Repository) + } +} + +// TestGatherRepositoryDependenciesRequiresConfiguration verifies dependency collection fails before plugin configuration. +func TestGatherRepositoryDependenciesRequiresConfiguration(t *testing.T) { + plugin := &GithubReposPlugin{} + repo := &github.Repository{ + Name: github.Ptr("target"), + DefaultBranch: github.Ptr("main"), + Owner: &github.User{Login: github.Ptr("source")}, + } + + if deps := plugin.GatherRepositoryDependencies(t.Context(), repo); deps != nil { + t.Fatalf("expected no dependencies from unconfigured plugin, got %#v", deps) + } + + if _, err := plugin.gatherRepositoryDependencies(t.Context(), repo, nil); err == nil { + t.Fatal("expected unconfigured plugin to return an error") + } +} + +// TestCollectDependencySBOMTreatsNotFoundAsCollectedUnavailable verifies missing SBOMs are recorded as collected but unavailable. +func TestCollectDependencySBOMTreatsNotFoundAsCollectedUnavailable(t *testing.T) { + mux := http.NewServeMux() + server := httptest.NewServer(mux) + defer server.Close() + mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/repos/missing/lib/dependency-graph/sbom" { + t.Fatalf("unexpected GitHub API request: %s", r.URL.Path) + } + http.NotFound(w, r) + }) + + plugin := newTestPlugin(t, server.URL) + dep := newRepositoryDependency(goModuleDependency{ + Name: "github.com/missing/lib", + Version: "v1.0.0", + Direct: true, + }) + dep.Repository = &DependencyRepository{ + Provider: "github", + Owner: "missing", + Name: "lib", + URL: "https://github.com/missing/lib", + Resolved: true, + } + + plugin.collectDependencySBOM(t.Context(), dep) + + if dep.SupplyChain.SBOM == nil || !dep.SupplyChain.SBOM.Collected || dep.SupplyChain.SBOM.Available { + t.Fatalf("expected unavailable SBOM to be collected without availability, got %#v", dep.SupplyChain.SBOM) + } + if !dep.CollectionStatus.SBOMCollected { + t.Fatal("expected SBOM collection status to be marked collected") + } + if len(dep.CollectionStatus.Errors) != 0 { + t.Fatalf("expected no collection errors for missing SBOM, got %#v", dep.CollectionStatus.Errors) + } +} + +// TestCollectDependencyRepositoryFactsMarksHealthIncompleteOnHealthError verifies health collection remains incomplete after a health API error. +func TestCollectDependencyRepositoryFactsMarksHealthIncompleteOnHealthError(t *testing.T) { + mux := http.NewServeMux() + server := httptest.NewServer(mux) + defer server.Close() + mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + switch { + case r.URL.Path == "/repos/good/lib": + writeJSON(t, w, map[string]any{ + "name": "lib", + "full_name": "good/lib", + "default_branch": "main", + "archived": false, + }) + case r.URL.Path == "/repos/good/lib/releases/latest": + http.NotFound(w, r) + case r.URL.Path == "/repos/good/lib/commits": + writeJSON(t, w, []map[string]any{{ + "sha": "abc123", + "commit": map[string]any{ + "committer": map[string]any{"date": "2026-02-01T00:00:00Z"}, + }, + }}) + case r.URL.Path == "/repos/good/lib/actions/workflows": + http.Error(w, "forbidden", http.StatusForbidden) + case r.URL.Path == "/repos/good/lib/issues": + writeJSON(t, w, []any{}) + case r.URL.Path == "/repos/good/lib/license": + http.NotFound(w, r) + case r.URL.Path == "/repos/good/lib/dependency-graph/sbom": + writeJSON(t, w, map[string]any{"sbom": map[string]any{}}) + default: + t.Fatalf("unexpected GitHub API request: %s?%s", r.URL.Path, r.URL.RawQuery) + } + }) + + plugin := newTestPlugin(t, server.URL) + dep := newRepositoryDependency(goModuleDependency{Name: "github.com/good/lib", Version: "v1.0.0", Direct: true}) + resolveDependencyRepository(dep) + + plugin.collectDependencyRepositoryFacts(t.Context(), dep) + + if dep.CollectionStatus.HealthCollected { + t.Fatal("expected health collection to remain incomplete after workflows error") + } + if len(dep.CollectionStatus.Errors) != 1 || dep.CollectionStatus.Errors[0].Scope != "workflows" { + t.Fatalf("expected one workflows collection error, got %#v", dep.CollectionStatus.Errors) + } +} + +// TestGatherRepositoryDependenciesMissingGoMod verifies repositories without go.mod return no dependencies. +func TestGatherRepositoryDependenciesMissingGoMod(t *testing.T) { + mux := http.NewServeMux() + server := httptest.NewServer(mux) + defer server.Close() + mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/repos/source/target/contents/go.mod" { + http.NotFound(w, r) + return + } + t.Fatalf("unexpected GitHub API request: %s", r.URL.Path) + }) + + plugin := newTestPlugin(t, server.URL) + repo := &github.Repository{ + Name: github.Ptr("target"), + DefaultBranch: github.Ptr("main"), + Owner: &github.User{Login: github.Ptr("source")}, + } + deps := plugin.GatherRepositoryDependencies(t.Context(), repo) + if len(deps) != 0 { + t.Fatalf("expected no dependencies for missing go.mod, got %d", len(deps)) + } +} + +// TestGatherRepositoryDependenciesMissingGoModEmitsCollectionGapForPolicies verifies callback mode emits a dependency collection gap. +func TestGatherRepositoryDependenciesMissingGoModEmitsCollectionGapForPolicies(t *testing.T) { + mux := http.NewServeMux() + server := httptest.NewServer(mux) + defer server.Close() + mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/repos/source/target/contents/go.mod" { + http.NotFound(w, r) + return + } + t.Fatalf("unexpected GitHub API request: %s", r.URL.Path) + }) + + plugin := newTestPlugin(t, server.URL) + repo := &github.Repository{ + Name: github.Ptr("target"), + DefaultBranch: github.Ptr("main"), + Owner: &github.User{Login: github.Ptr("source")}, + } + emitted := []*RepositoryDependency{} + deps, err := plugin.gatherRepositoryDependencies(t.Context(), repo, func(dep *RepositoryDependency) error { + emitted = append(emitted, dep) + return nil + }) + if err != nil { + t.Fatalf("gatherRepositoryDependencies returned error: %v", err) + } + if len(deps) != 1 || len(emitted) != 1 { + t.Fatalf("expected one collection gap dependency, got deps=%d emitted=%d", len(deps), len(emitted)) + } + dep := emitted[0] + if dep.Name != "dependency-collection-unavailable" { + t.Fatalf("expected collection gap dependency name, got %q", dep.Name) + } + if dep.CollectionStatus == nil || dep.CollectionStatus.DependencyParsed { + t.Fatalf("expected dependency parsing to be unavailable, got %#v", dep.CollectionStatus) + } + if len(dep.CollectionStatus.Errors) != 1 || dep.CollectionStatus.Errors[0].Scope != "go_mod_fetch" { + t.Fatalf("expected one go_mod_fetch collection error, got %#v", dep.CollectionStatus.Errors) + } + if !strings.Contains(dep.CollectionStatus.Errors[0].Message, "404") { + t.Fatalf("expected go_mod_fetch collection error to preserve GitHub status, got %q", dep.CollectionStatus.Errors[0].Message) + } +} + +// TestNewDependencyCollectionGapSerializesEmptyErrors verifies collection-gap dependencies marshal empty error lists. +func TestNewDependencyCollectionGapSerializesEmptyErrors(t *testing.T) { + dep := newDependencyCollectionGap("go_mod_fetch", nil) + payload, err := json.Marshal(dep) + if err != nil { + t.Fatalf("failed to marshal dependency collection gap: %v", err) + } + if !strings.Contains(string(payload), `"errors":[]`) { + t.Fatalf("expected empty errors array, got %s", payload) + } +} + +// TestEvaluatePoliciesRunsDependencyPoliciesPerDependency verifies dependency policies emit evidence per dependency. +func TestEvaluatePoliciesRunsDependencyPoliciesPerDependency(t *testing.T) { + policyDir := filepath.Join(t.TempDir(), "plugin-github-repositories-dependency-policies") + if err := os.MkdirAll(policyDir, 0o755); err != nil { + t.Fatalf("failed to create policy dir: %v", err) + } + rego := []byte(`package compliance_framework.dependency_archived + +title := "Dependency is not archived" +description := "Dependency repositories should not be archived." + +violation[{"id": "dependency_repository_archived"}] if { + object.get(object.get(input.policy_data, "dependency_health", {}), "fail_archived", false) + input.dependency.health.repository_archived == true +} +`) + if err := os.WriteFile(filepath.Join(policyDir, "dependency_archived.rego"), rego, 0o644); err != nil { + t.Fatalf("failed to write policy: %v", err) + } + + plugin := &GithubReposPlugin{ + Logger: hclog.NewNullLogger(), + } + repo := &github.Repository{ + Name: github.Ptr("api"), + FullName: github.Ptr("ccf/api"), + HTMLURL: github.Ptr("https://github.com/ccf/api"), + Owner: &github.User{Login: github.Ptr("ccf"), Name: github.Ptr("Continuous Compliance Framework")}, + } + data := &SaturatedRepository{ + Settings: repo, + PolicyData: map[string]interface{}{"dependency_health": map[string]interface{}{"fail_archived": true}}, + } + deps := []*RepositoryDependency{ + { + Name: "internally-maintained-open-source/foo", + Ecosystem: "go", + DeclaredVersion: "v1.0.0", + Repository: &DependencyRepository{URL: "https://github.com/internally-maintained-open-source/foo"}, + Health: &DependencyHealth{RepositoryArchived: false}, + }, + { + Name: "competitor-maintained-open-source/bar", + Ecosystem: "go", + DeclaredVersion: "v2.0.0", + Repository: &DependencyRepository{URL: "https://github.com/competitor-maintained-open-source/bar"}, + Health: &DependencyHealth{RepositoryArchived: true}, + }, + } + + evidence, err := plugin.EvaluatePolicies(t.Context(), data, deps, []string{policyDir}, data.PolicyData) + if err != nil { + t.Fatalf("EvaluatePolicies returned error: %v", err) + } + if len(evidence) != 2 { + t.Fatalf("expected one evidence per dependency, got %d", len(evidence)) + } + + byDependency := map[string]*proto.Evidence{} + for _, ev := range evidence { + labels := ev.GetLabels() + byDependency[labels["dependency"]] = ev + if labels["type"] != "repository-dependency" { + t.Fatalf("expected dependency evidence type label, got %q", labels["type"]) + } + } + + foo := byDependency["internally-maintained-open-source/foo"] + if foo == nil { + t.Fatal("missing evidence for foo dependency") + } + if foo.GetStatus().GetState() != proto.EvidenceStatusState_EVIDENCE_STATUS_STATE_SATISFIED { + t.Fatalf("expected foo evidence to pass, got %s", foo.GetStatus().GetState()) + } + if len(foo.GetSubjects()) == 0 || !strings.Contains(foo.GetSubjects()[0].GetIdentifier(), "internally-maintained-open-source/foo@v1.0.0") { + t.Fatalf("expected foo dependency subject, got %#v", foo.GetSubjects()) + } + if !evidenceHasHref(foo, "https://github.com/internally-maintained-open-source/foo") { + t.Fatalf("expected foo evidence to link to dependency repository, got %#v", foo.GetLinks()) + } + + bar := byDependency["competitor-maintained-open-source/bar"] + if bar == nil { + t.Fatal("missing evidence for bar dependency") + } + if bar.GetStatus().GetState() != proto.EvidenceStatusState_EVIDENCE_STATUS_STATE_NOT_SATISFIED { + t.Fatalf("expected bar evidence to fail, got %s", bar.GetStatus().GetState()) + } + if len(bar.GetProps()) != 1 || bar.GetProps()[0].GetValue() != "dependency_repository_archived" { + t.Fatalf("expected archived violation prop, got %#v", bar.GetProps()) + } + if !evidenceHasHref(bar, "https://github.com/competitor-maintained-open-source/bar") { + t.Fatalf("expected bar evidence to link to dependency repository, got %#v", bar.GetLinks()) + } +} + +func evidenceHasHref(evidence *proto.Evidence, href string) bool { + for _, link := range evidence.GetLinks() { + if link.GetHref() == href { + return true + } + } + return false +} + +// TestDependencyPolicyInputDefaultsPolicyData verifies dependency policy inputs default policy data to an empty map. +func TestDependencyPolicyInputDefaultsPolicyData(t *testing.T) { + repo := &github.Repository{ + Name: github.Ptr("api"), + FullName: github.Ptr("ccf/api"), + HTMLURL: github.Ptr("https://github.com/ccf/api"), + Owner: &github.User{Login: github.Ptr("ccf")}, + } + input := dependencyPolicyInput(repo, &RepositoryDependency{Name: "github.com/example/lib"}, nil) + if input.PolicyData == nil { + t.Fatal("expected dependency policy data to default to an empty map") + } + if len(input.PolicyData) != 0 { + t.Fatalf("expected empty dependency policy data, got %#v", input.PolicyData) + } +} + +// TestMedianHoursToFirstInteractionStopsAfterFirstCollectionError verifies first-interaction collection stops after an API error. +func TestMedianHoursToFirstInteractionStopsAfterFirstCollectionError(t *testing.T) { + mux := http.NewServeMux() + server := httptest.NewServer(mux) + defer server.Close() + requests := 0 + mux.HandleFunc("/repos/good/lib/issues/1/comments", func(w http.ResponseWriter, r *http.Request) { + requests++ + http.Error(w, "forbidden", http.StatusForbidden) + }) + + plugin := newTestPlugin(t, server.URL) + dep := newRepositoryDependency(goModuleDependency{Name: "github.com/good/lib", Version: "v1.0.0", Direct: true}) + resolveDependencyRepository(dep) + dep.Health.PullRequests = &DependencyPullRequestStats{} + prs := []*github.Issue{ + {Number: github.Ptr(1), CreatedAt: githubTimestamp("2026-01-01T00:00:00Z")}, + {Number: github.Ptr(2), CreatedAt: githubTimestamp("2026-01-02T00:00:00Z")}, + } + + median := plugin.medianHoursToFirstInteraction(t.Context(), dep, prs) + + if median != nil { + t.Fatalf("expected no median after collection error, got %v", *median) + } + if requests != 1 { + t.Fatalf("expected first interaction collection to stop after one error, got %d requests", requests) + } + if len(dep.CollectionStatus.Errors) != 1 || dep.CollectionStatus.Errors[0].Scope != "pull_request_interactions" { + t.Fatalf("expected one pull_request_interactions error, got %#v", dep.CollectionStatus.Errors) + } +} + +// TestMedianHoursToFirstInteractionSamplesOnlyPullRequestsWithCreatedAt verifies sampling skips PRs without creation times. +func TestMedianHoursToFirstInteractionSamplesOnlyPullRequestsWithCreatedAt(t *testing.T) { + mux := http.NewServeMux() + server := httptest.NewServer(mux) + defer server.Close() + requests := 0 + mux.HandleFunc("/repos/good/lib/issues/2/comments", func(w http.ResponseWriter, r *http.Request) { + requests++ + writeJSON(t, w, []map[string]any{{ + "created_at": "2026-01-03T00:00:00Z", + }}) + }) + mux.HandleFunc("/repos/good/lib/pulls/2/reviews", func(w http.ResponseWriter, r *http.Request) { + writeJSON(t, w, []map[string]any{}) + }) + + plugin := newTestPlugin(t, server.URL) + dep := newRepositoryDependency(goModuleDependency{Name: "github.com/good/lib", Version: "v1.0.0", Direct: true}) + resolveDependencyRepository(dep) + dep.Health.PullRequests = &DependencyPullRequestStats{} + prs := []*github.Issue{ + {Number: github.Ptr(1)}, + {Number: github.Ptr(2), CreatedAt: githubTimestamp("2026-01-02T00:00:00Z")}, + } + + median := plugin.medianHoursToFirstInteraction(t.Context(), dep, prs) + + if median == nil || *median != 24 { + t.Fatalf("expected median first interaction to use valid PR, got %#v", median) + } + if requests != 1 { + t.Fatalf("expected one first interaction request, got %d", requests) + } + if dep.Health.PullRequests.FirstInteractionSampledPullRequests != 1 { + t.Fatalf("expected one sampled pull request, got %d", dep.Health.PullRequests.FirstInteractionSampledPullRequests) + } +} + +// TestListPullRequestIssuesFiltersPullRequests verifies issue results are filtered to pull requests. +func TestListPullRequestIssuesFiltersPullRequests(t *testing.T) { + mux := http.NewServeMux() + server := httptest.NewServer(mux) + defer server.Close() + mux.HandleFunc("/repos/good/lib/issues", func(w http.ResponseWriter, r *http.Request) { + if r.URL.Query().Get("state") != "closed" { + t.Fatalf("unexpected state: %s", r.URL.Query().Get("state")) + } + writeJSON(t, w, []map[string]any{ + {"number": 1, "pull_request": map[string]any{"url": "https://api.github.test/repos/good/lib/pulls/1"}}, + {"number": 2}, + }) + }) + + plugin := newTestPlugin(t, server.URL) + prs, capped, err := plugin.listPullRequestIssues(t.Context(), "good", "lib", "closed", time.Time{}) + if err != nil { + t.Fatalf("listPullRequestIssues returned error: %v", err) + } + if capped { + t.Fatal("expected uncapped pull request issue result") + } + if len(prs) != 1 { + t.Fatalf("expected 1 pull request issue, got %d", len(prs)) + } + if prs[0].GetNumber() != 1 { + t.Fatalf("unexpected pull request issue number: %d", prs[0].GetNumber()) + } +} + +// TestListPullRequestIssuesSortsOpenPullRequestsOldestFirst verifies open PR issue collection requests oldest-first ordering. +func TestListPullRequestIssuesSortsOpenPullRequestsOldestFirst(t *testing.T) { + mux := http.NewServeMux() + server := httptest.NewServer(mux) + defer server.Close() + mux.HandleFunc("/repos/good/lib/issues", func(w http.ResponseWriter, r *http.Request) { + if r.URL.Query().Get("state") != "open" { + t.Fatalf("unexpected state: %s", r.URL.Query().Get("state")) + } + if r.URL.Query().Get("sort") != "created" { + t.Fatalf("unexpected sort: %s", r.URL.Query().Get("sort")) + } + if r.URL.Query().Get("direction") != "asc" { + t.Fatalf("unexpected direction: %s", r.URL.Query().Get("direction")) + } + writeJSON(t, w, []map[string]any{{ + "number": 1, + "pull_request": map[string]any{"url": "https://api.github.test/repos/good/lib/pulls/1"}, + }}) + }) + + plugin := newTestPlugin(t, server.URL) + prs, capped, err := plugin.listPullRequestIssues(t.Context(), "good", "lib", "open", time.Time{}) + if err != nil { + t.Fatalf("listPullRequestIssues returned error: %v", err) + } + if capped { + t.Fatal("expected uncapped pull request issue result") + } + if len(prs) != 1 { + t.Fatalf("expected 1 pull request issue, got %d", len(prs)) + } +} + +// TestListPullRequestIssuesSortsClosedPullRequestsByRecentUpdate verifies closed PR issue collection requests recent updates. +func TestListPullRequestIssuesSortsClosedPullRequestsByRecentUpdate(t *testing.T) { + mux := http.NewServeMux() + server := httptest.NewServer(mux) + defer server.Close() + since := time.Date(2026, 1, 10, 0, 0, 0, 0, time.UTC) + mux.HandleFunc("/repos/good/lib/issues", func(w http.ResponseWriter, r *http.Request) { + if r.URL.Query().Get("state") != "closed" { + t.Fatalf("unexpected state: %s", r.URL.Query().Get("state")) + } + if r.URL.Query().Get("sort") != "updated" { + t.Fatalf("unexpected sort: %s", r.URL.Query().Get("sort")) + } + if r.URL.Query().Get("direction") != "desc" { + t.Fatalf("unexpected direction: %s", r.URL.Query().Get("direction")) + } + if r.URL.Query().Get("since") == "" { + t.Fatal("expected since query parameter") + } + writeJSON(t, w, []map[string]any{{ + "number": 1, + "pull_request": map[string]any{"url": "https://api.github.test/repos/good/lib/pulls/1"}, + }}) + }) + + plugin := newTestPlugin(t, server.URL) + prs, capped, err := plugin.listPullRequestIssues(t.Context(), "good", "lib", "closed", since) + if err != nil { + t.Fatalf("listPullRequestIssues returned error: %v", err) + } + if capped { + t.Fatal("expected uncapped pull request issue result") + } + if len(prs) != 1 { + t.Fatalf("expected 1 pull request issue, got %d", len(prs)) + } +} + +// TestListPullRequestIssuesStopsAtMaxPages verifies pull request issue collection reports capped pagination. +func TestListPullRequestIssuesStopsAtMaxPages(t *testing.T) { + mux := http.NewServeMux() + server := httptest.NewServer(mux) + defer server.Close() + requests := 0 + mux.HandleFunc("/repos/good/lib/issues", func(w http.ResponseWriter, r *http.Request) { + requests++ + if requests > dependencyPRMaxPages { + t.Fatalf("requested more than %d pages", dependencyPRMaxPages) + } + nextPage := requests + 1 + w.Header().Set("Link", fmt.Sprintf(`<%s/repos/good/lib/issues?page=%d>; rel="next"`, server.URL, nextPage)) + writeJSON(t, w, []map[string]any{{ + "number": requests, + "pull_request": map[string]any{"url": fmt.Sprintf("https://api.github.test/repos/good/lib/pulls/%d", requests)}, + }}) + }) + + plugin := newTestPlugin(t, server.URL) + prs, capped, err := plugin.listPullRequestIssues(t.Context(), "good", "lib", "closed", time.Time{}) + if err != nil { + t.Fatalf("listPullRequestIssues returned error: %v", err) + } + if !capped { + t.Fatal("expected capped pull request issue result") + } + if requests != dependencyPRMaxPages { + t.Fatalf("expected %d requests, got %d", dependencyPRMaxPages, requests) + } + if len(prs) != dependencyPRMaxPages { + t.Fatalf("expected %d pull requests, got %d", dependencyPRMaxPages, len(prs)) + } +} + +// TestFilterPullRequestsClosedSinceUsesClosedAt verifies closed PR filtering uses ClosedAt rather than update time. +func TestFilterPullRequestsClosedSinceUsesClosedAt(t *testing.T) { + since := time.Date(2026, 1, 10, 0, 0, 0, 0, time.UTC) + prs := []*github.Issue{ + { + Number: github.Ptr(1), + ClosedAt: githubTimestamp("2026-01-09T23:59:59Z"), + }, + { + Number: github.Ptr(2), + ClosedAt: githubTimestamp("2026-01-10T00:00:00Z"), + }, + { + Number: github.Ptr(3), + }, + } + + filtered := filterPullRequestsClosedSince(prs, since) + if len(filtered) != 1 || filtered[0].GetNumber() != 2 { + t.Fatalf("expected only PR 2, got %#v", filtered) + } +} + +func assertStringSlicesEqual(t *testing.T, got []string, want []string) { + t.Helper() + if !slices.Equal(got, want) { + t.Fatalf("got %#v, want %#v", got, want) + } +} + +func newTestPlugin(t *testing.T, serverURL string) *GithubReposPlugin { + t.Helper() + client := github.NewClient(http.DefaultClient) + baseURL, err := url.Parse(serverURL + "/") + if err != nil { + t.Fatalf("failed to parse test server URL: %v", err) + } + client.BaseURL = baseURL + return &GithubReposPlugin{ + Logger: hclog.NewNullLogger(), + githubClient: client, + config: &PluginConfig{ + dependencyHealthEnabled: true, + dependencyHealthMaxDependencies: 50, + dependencyHealthClosedPRLookbackDays: 3650, + dependencyHealthIncludeUnresolved: true, + dependencyHealthCollectSBOM: true, + dependencyHealthPRInteractionSampleSize: 20, + }, + } +} + +func writeJSON(t *testing.T, w http.ResponseWriter, value any) { + t.Helper() + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(value); err != nil { + t.Fatalf("failed to write JSON: %v", err) + } +} + +func findDependency(t *testing.T, deps []*RepositoryDependency, name string) *RepositoryDependency { + t.Helper() + for _, dep := range deps { + if dep.Name == name { + return dep + } + } + t.Fatalf("dependency %q not found; got %s", name, dependencyNames(deps)) + return nil +} + +func dependencyNames(deps []*RepositoryDependency) string { + names := make([]string, 0, len(deps)) + for _, dep := range deps { + names = append(names, dep.Name) + } + return strings.Join(names, ", ") +} + +func githubTimestamp(value string) *github.Timestamp { + parsed, err := time.Parse(time.RFC3339, value) + if err != nil { + panic(err) + } + return &github.Timestamp{Time: parsed} +} diff --git a/dependency-health-design.md b/dependency-health-design.md new file mode 100644 index 0000000..683f987 --- /dev/null +++ b/dependency-health-design.md @@ -0,0 +1,1140 @@ +# Repository Dependency Health Evidence Design + +## Purpose + +This design defines how `plugin-github-repositories` can collect direct dependency health facts for a monitored GitHub repository, and how CCF policies can turn those facts into evidence. + +The first target use case is a CRA-oriented demo: + +> For repository `ccf/api`, CCF can show that direct dependencies are being tracked, that some dependencies show maintenance risk, and that the organization has evidence to support review, remediation, or replacement planning. + +The feature is intended to demonstrate third-party component due diligence. It should not claim that upstream dependency repositories are controlled by the assessed organization. + +## Scope + +### In Scope + +- Parse direct dependencies from `go.mod`. +- Ignore transitive and indirect dependencies. +- Resolve obvious GitHub-hosted module paths, such as `github.com/org/repo`. +- Collect public/free GitHub metadata for each resolved dependency repository. +- Collect dependency license and SBOM visibility facts where public GitHub APIs expose them. +- Expose dependency facts to policies as part of the repository evaluation data. +- Add a small number of policy examples that demonstrate how dependency health evidence can be produced. + +### Out of Scope For The First Version + +- Transitive dependency parsing. +- Vulnerability lookup. +- Go-specific health scoring. +- Go module version correctness checks. +- Private upstream repository access. +- Non-GitHub repository health collection. +- Hardcoded compliance scoring in the plugin. +- Treating every unresolved dependency as unhealthy. + +## Design Principle + +The plugin collects facts. Policies decide what those facts mean. + +The plugin should answer: + +> What direct dependencies does this repository declare, and what public maintenance signals are visible for each dependency? + +Policies should answer: + +> Given configured thresholds and organizational expectations, which dependencies are healthy, stale, unknown, or require review? + +## Data Ownership + +### Plugin-Owned Responsibilities + +The plugin is responsible for collection and normalization. + +It should: + +- Fetch `go.mod` from the monitored repository's default branch. +- Parse only direct `require` dependencies. +- Ignore dependencies marked with `// indirect`. +- Normalize each dependency into a generic dependency data model. +- Resolve dependencies hosted at `github.com/org/repo`. +- Collect public GitHub repository health facts for resolved dependencies. +- Collect license metadata for resolved dependency repositories. +- Attempt to collect dependency repository SBOM metadata for resolved dependency repositories. +- Preserve collection errors as data where possible. +- Avoid failing the entire repository evaluation when one dependency cannot be resolved or inspected. + +The plugin should not: + +- Decide whether a dependency is compliant. +- Hardcode age thresholds. +- Hardcode PR staleness thresholds. +- Treat lack of releases, workflows, or permissions as automatic failure. +- Create CRA-specific conclusions directly in Go code. + +### Policy-Owned Responsibilities + +Policies are responsible for interpretation. + +Policies should: + +- Apply thresholds from policy input. +- Decide which signals are warnings, failures, or informational findings. +- Generate CCF evidence for the assessed repository. +- Include the dependency name, version, upstream URL, observed value, and threshold in evidence output. +- Distinguish unhealthy dependencies from dependencies with unknown health. +- Distinguish missing dependency SBOM evidence from collection failures. +- Evaluate dependency licenses using policy-owned allowed or banned license lists. + +Policies should not: + +- Depend on Go-specific parser details when a generic dependency field is available. +- Assume every dependency has a GitHub repository. +- Assume every healthy project has frequent releases. +- Assume every project uses GitHub Actions. +- Assume every public dependency repository exposes an SBOM. + +## Dependency Policy Input Model + +Dependency facts are collected as a repository-scoped list internally, but dependency policy bundles should be evaluated once per dependency. Policies should not iterate over `input.dependencies`; they should evaluate the single dependency in `input.dependency`. + +Conceptual shape: + +```json +{ + "repository": { + "organization": "ccf", + "name": "api", + "full_name": "ccf/api", + "url": "https://github.com/ccf/api" + }, + "dependency": { + "name": "github.com/example/lib", + "ecosystem": "go", + "source_file": "go.mod", + "direct": true, + "declared_version": "v1.2.3", + "repository": { + "provider": "github", + "owner": "example", + "name": "lib", + "url": "https://github.com/example/lib", + "resolved": true + }, + "health": { + "repository_archived": false, + "latest_release": { + "tag": "v1.3.0", + "published_at": "2026-01-10T00:00:00Z" + }, + "latest_commit": { + "sha": "abc123", + "committed_at": "2026-04-15T00:00:00Z" + }, + "workflows": { + "count": 3, + "latest_default_branch_run": { + "status": "completed", + "conclusion": "success", + "created_at": "2026-04-16T00:00:00Z" + } + }, + "pull_requests": { + "open_count": 12, + "oldest_open_created_at": "2025-09-01T00:00:00Z", + "recent_closed_count": 20, + "median_days_to_close": 8, + "median_hours_to_first_interaction": 14 + } + }, + "supply_chain": { + "license": { + "spdx_id": "MIT", + "name": "MIT License", + "url": "https://api.github.com/licenses/mit", + "collected": true + }, + "sbom": { + "available": true, + "package_count": 42, + "spdx_id": "SPDXRef-DOCUMENT", + "spdx_version": "SPDX-2.3", + "creation_info_created": "2026-01-10T00:00:00Z", + "collected": true + } + }, + "collection_status": { + "dependency_parsed": true, + "repository_resolved": true, + "health_collected": true, + "license_collected": true, + "sbom_collected": true, + "errors": [] + } + }, + "policy_data": {} +} +``` + +This structure is intentionally generic. Go is only the first source parser. + +## First Version Dependency Parsing + +The first version should only parse direct dependencies from `go.mod`. + +Example: + +```go +require ( + github.com/example/lib v1.2.3 + github.com/example/indirect v0.4.0 // indirect +) +``` + +Expected result: + +- Include `github.com/example/lib`. +- Exclude `github.com/example/indirect`. + +Single-line direct requirements should also be supported: + +```go +require github.com/example/lib v1.2.3 +``` + +The parser should record: + +- module path +- declared version +- source file +- ecosystem +- whether it is direct + +The parser should not perform Go module health checks in the first version. + +## Repository Resolution + +The first resolver should handle obvious GitHub module paths: + +```text +github.com/{owner}/{repo} +``` + +For example: + +```text +github.com/google/go-github/v71 +``` + +Should resolve to: + +```text +owner: google +repo: go-github +url: https://github.com/google/go-github +``` + +If a dependency cannot be resolved to GitHub, the plugin should still emit the dependency with: + +```json +{ + "repository": { + "resolved": false + }, + "collection_status": { + "repository_resolved": false + } +} +``` + +Unresolved dependencies are useful evidence for visibility gaps, but they should not be treated as failures by the plugin. + +## Health Signals Collected By The Plugin + +For each resolved GitHub dependency repository, the plugin should collect public/free signals where available. + +### Repository Metadata + +- repository exists +- repository URL +- default branch +- archived status +- disabled status, if available + +### Release Activity + +- latest release tag +- latest release published date +- no release found, if applicable + +No release should be represented as data, not as a collection failure. + +### Commit Activity + +- latest commit on default branch +- latest commit date + +### Workflow Activity + +- workflow count +- latest workflow run on default branch +- latest workflow run status +- latest workflow run conclusion +- latest workflow run creation date + +Missing or inaccessible Actions data should be represented as unknown or unavailable. + +### License Metadata + +The plugin should collect dependency repository license metadata from the resolved GitHub repository when available. + +Suggested facts: + +- SPDX license ID +- license name +- license URL +- whether license collection succeeded + +An absent or unknown license should be represented as data, not as a collection failure. + +The plugin should not decide which licenses are allowed or banned. + +### SBOM Metadata + +The plugin should attempt to collect the dependency repository SBOM using GitHub's dependency graph SBOM endpoint for each resolved dependency repository. + +Suggested facts: + +- whether an SBOM was available +- package count +- SPDX document ID +- SPDX version +- creation timestamp, when present +- collection status and any permission or availability error + +The first version should not store the full dependency SBOM for every dependency unless needed. A summary is enough for policy checks and keeps evidence payloads smaller. + +Missing or inaccessible SBOM data should be represented as unknown or unavailable. It should not fail the parent repository evaluation. + +### Pull Request Staleness + +The first version should collect enough data for policies to reason about PR staleness without requiring deep analysis. + +Suggested facts: + +- open PR count +- oldest open PR creation date +- recent closed PR count within a bounded lookback window +- median days to close recent closed PRs +- median hours to first interaction on recently closed PRs + +The plugin should keep the collection bounded to reduce GitHub API usage. + +Suggested initial bounds: + +- first 100 open PRs +- first 100 recently closed PRs +- closed PR lookback controlled by plugin config or a conservative default + +## Collection Status + +Each dependency should carry collection status. + +This is important because unresolved or inaccessible dependencies are different from unhealthy dependencies. + +Example statuses: + +```json +{ + "dependency_parsed": true, + "repository_resolved": true, + "health_collected": false, + "errors": [ + { + "scope": "workflows", + "message": "actions metadata unavailable" + } + ] +} +``` + +The plugin should continue evaluating the parent repository when dependency health collection is partial. + +## Policy Design + +The initial policy set should be small and demo-focused. + +The goal is to show that dependency facts can produce meaningful repository evidence, not to define a complete CRA compliance profile. + +These policies should be shipped as a separate opt-in policy collection, not as part of the default `plugin-github-repositories-policies` bundle. The default policy bundle should remain focused on broadly applicable repository controls such as branch protection, workflows, releases, SBOM presence, secret scanning, and access control. + +Recommended repository name: + +```text +plugin-github-repositories-dependency-policies +``` + +Alternative CRA-focused name: + +```text +plugin-github-repositories-cra-dependency-policies +``` + +The preferred name is `plugin-github-repositories-dependency-policies` because the expected input data is produced by `plugin-github-repositories`, while the policy domain is dependency health and supply-chain visibility. CRA-specific behavior can be expressed through policy input, profiles, or later policy packages without coupling the whole repository name to CRA. + +The existing `plugin-github-repositories-policies` repository uses one Rego file per policy, paired with a `_test.rego` file. Policies expose: + +- `package compliance_framework.` +- `title` +- `description` +- optional `remarks` +- optional `skip_reason` +- `risk_templates` +- `violation[{"id": "...", "remarks": "..."}]` + +Dependency policies should follow the same pattern. Risk templates should use `violation_ids` when one policy has multiple violation IDs and the risks need to bind to specific outcomes. + +### Policy Input + +Thresholds should come from policy input. + +Example: + +```json +{ + "dependency_health": { + "max_days_since_release": 365, + "max_days_since_commit": 180, + "max_open_prs": 50, + "max_oldest_open_pr_age_days": 180, + "max_median_days_to_close_pr": 30, + "require_dependency_sbom": false, + "banned_licenses": ["BUSL-1.1", "SSPL-1.0", "PolyForm-Noncommercial-1.0.0"], + "allowed_licenses": [], + "unknown_health_is_violation": false + } +} +``` + +Policies should read this via: + +```rego +dependency_health_input := object.get(object.get(input, "policy_data", {}), "dependency_health", {}) +``` + +Default thresholds should be conservative and visible in each policy file. + +### Policy Files To Add + +The first policy set should include six files and six matching test files: + +```text +policies/gh_repo_dependency_repository_archived.rego +policies/gh_repo_dependency_repository_archived_test.rego +policies/gh_repo_dependency_activity_stale.rego +policies/gh_repo_dependency_activity_stale_test.rego +policies/gh_repo_dependency_pr_staleness.rego +policies/gh_repo_dependency_pr_staleness_test.rego +policies/gh_repo_dependency_sbom_available.rego +policies/gh_repo_dependency_sbom_available_test.rego +policies/gh_repo_dependency_license_allowed.rego +policies/gh_repo_dependency_license_allowed_test.rego +policies/gh_repo_dependency_health_unknown.rego +policies/gh_repo_dependency_health_unknown_test.rego +``` + +All six policies should evaluate `input.dependency`. The plugin routes policies with dependency policy behavior metadata through dependency-granular evaluation, producing one evidence result per dependency per policy. Requests default the `plugin-github-repositories-dependency-policies` source to dependency behavior; other dependency policy bundles should opt in by setting dependency behavior metadata. + +Each violation should include dependency-specific `remarks`. Evidence also carries dependency-specific labels so risk acceptance can distinguish one dependency from another. + +Recommended remark shape: + +```rego +sprintf("Direct dependency %q at version %q has archived upstream repository %q.", [input.dependency.name, input.dependency.declared_version, input.dependency.repository.url]) +``` + +### Policy 1: Archived Dependency Repository + +File: + +```text +policies/gh_repo_dependency_repository_archived.rego +``` + +Package: + +```rego +package compliance_framework.dependency_repository_archived +``` + +Flag a direct dependency when its resolved GitHub repository is archived. + +This is a strong and easy-to-explain signal. + +Suggested violation: + +```rego +violation[{"id": "dependency_repository_archived", "remarks": remarks}] if { + input.dependency.direct == true + input.dependency.repository.resolved == true + input.dependency.health.repository_archived == true + remarks := sprintf("Direct dependency %q at version %q uses archived upstream repository %q.", [input.dependency.name, input.dependency.declared_version, input.dependency.repository.url]) +} +``` + +Suggested title: + +```rego +title := "Direct dependency repository is archived" +``` + +Suggested description: + +```rego +description := "Direct dependencies should not rely on archived upstream repositories unless the dependency has been reviewed and accepted. Archived repositories no longer receive normal maintenance signals and may stop receiving security fixes." +``` + +Suggested risk template: + +```json +{ + "name": "Direct dependency uses archived upstream repository", + "title": "Archived Third Party Component May Stop Receiving Security Maintenance", + "statement": "A direct dependency whose upstream repository is archived may no longer receive bug fixes, security patches, or maintainer review. Continued use increases the risk that known or future vulnerabilities remain unresolved in the product's dependency chain.", + "likelihood_hint": "moderate", + "impact_hint": "high", + "violation_ids": ["dependency_repository_archived"], + "threat_refs": [ + { + "system": "https://cwe.mitre.org", + "external_id": "CWE-1104", + "title": "Use of Unmaintained Third Party Components", + "url": "https://cwe.mitre.org/data/definitions/1104.html" + } + ], + "remediation": { + "title": "Review or replace the archived dependency", + "description": "Assess whether the archived dependency is still safe to use. Replace it with a maintained alternative, fork and maintain it internally, or document a time-bound risk acceptance.", + "tasks": [ + { "title": "Identify code paths that use the archived dependency" }, + { "title": "Check whether a maintained replacement exists" }, + { "title": "Plan dependency replacement or internal maintenance ownership" }, + { "title": "Document any temporary risk acceptance with an expiry date" } + ] + } +} +``` + +Evidence should include: + +- assessed repository +- dependency module path +- declared version +- upstream repository URL +- archived status + +Example evidence statement: + +> Repository `ccf/api` declares direct dependency `github.com/example/lib@v1.2.3`. The dependency repository is archived, so this dependency requires review or replacement planning. + +### Policy 2: Stale Dependency Activity + +File: + +```text +policies/gh_repo_dependency_activity_stale.rego +``` + +Package: + +```rego +package compliance_framework.dependency_activity_stale +``` + +Flag a direct dependency when both release activity and commit activity are stale based on configured thresholds. + +Recommended behavior: + +- Old release alone should not automatically fail. +- No release should be treated separately from old release. +- Recent commits can offset old release activity for libraries that do not publish frequent GitHub releases. + +Suggested violation IDs: + +- `dependency_activity_stale` +- `dependency_has_no_activity_signal` + +Suggested behavior: + +- `dependency_activity_stale`: latest release is older than `max_days_since_release` and latest commit is older than `max_days_since_commit`. +- `dependency_has_no_activity_signal`: no latest release and no latest commit were collected for a resolved dependency repository. +- If release is old but commit is recent, do not violate. +- If release is absent but commit is recent, do not violate. + +Suggested title: + +```rego +title := "Direct dependency shows no recent upstream activity" +``` + +Suggested description: + +```rego +description := "Direct dependency repositories should show recent release or commit activity. A dependency with no recent upstream activity may require review to confirm it is still maintained and safe to rely on." +``` + +Suggested risk templates: + +```json +[ + { + "name": "Direct dependency has stale upstream activity", + "title": "Stale Third Party Component Maintenance May Delay Security Fixes", + "statement": "A direct dependency with no recent release and no recent default-branch commit may indicate reduced upstream maintenance. Reduced maintenance can delay security fixes, compatibility updates, and review of reported defects.", + "likelihood_hint": "moderate", + "impact_hint": "moderate", + "violation_ids": ["dependency_activity_stale"], + "threat_refs": [ + { + "system": "https://cwe.mitre.org", + "external_id": "CWE-1104", + "title": "Use of Unmaintained Third Party Components", + "url": "https://cwe.mitre.org/data/definitions/1104.html" + } + ], + "remediation": { + "title": "Review stale dependency maintenance status", + "description": "Confirm whether the dependency remains maintained and appropriate for production use. If not, plan replacement or internal maintenance ownership.", + "tasks": [ + { "title": "Review upstream repository activity and maintainer communications" }, + { "title": "Check whether newer maintained alternatives exist" }, + { "title": "Create a remediation issue for replacement or upgrade" }, + { "title": "Document acceptance rationale if the dependency is intentionally stable" } + ] + } + }, + { + "name": "Direct dependency has no collected activity signal", + "title": "Missing Upstream Activity Evidence Limits Dependency Due Diligence", + "statement": "When no release or commit activity can be collected for a direct dependency, the organization lacks evidence to determine whether the upstream component is actively maintained. This creates a visibility gap in third-party component risk management.", + "likelihood_hint": "moderate", + "impact_hint": "moderate", + "violation_ids": ["dependency_has_no_activity_signal"], + "threat_refs": [ + { + "system": "https://cwe.mitre.org", + "external_id": "CWE-1059", + "title": "Incomplete Documentation", + "url": "https://cwe.mitre.org/data/definitions/1059.html" + } + ], + "remediation": { + "title": "Record dependency maintenance evidence", + "description": "Investigate why upstream activity could not be collected and record an alternative maintenance signal or replacement plan.", + "tasks": [ + { "title": "Verify the dependency source repository" }, + { "title": "Collect an alternative maintenance signal if GitHub data is unavailable" }, + { "title": "Document the dependency review result" } + ] + } + } +] +``` + +Evidence should include: + +- latest release date, if any +- latest commit date, if any +- configured thresholds +- dependency repository URL + +### Policy 3: PR Maintenance Staleness + +File: + +```text +policies/gh_repo_dependency_pr_staleness.rego +``` + +Package: + +```rego +package compliance_framework.dependency_pr_staleness +``` + +Flag a direct dependency when PR maintenance signals exceed configured thresholds. + +Suggested violation IDs: + +- `dependency_open_pr_backlog_stale` +- `dependency_pr_close_time_stale` + +Suggested behavior: + +- `dependency_open_pr_backlog_stale`: open PR count exceeds `max_open_prs` and oldest open PR age exceeds `max_oldest_open_pr_age_days`. +- `dependency_pr_close_time_stale`: median days to close recent closed PRs exceeds `max_median_days_to_close_pr`. +- If PR data is unavailable, this policy should skip or stay silent. Unknown collection belongs to `dependency_health_unknown`. + +Suggested title: + +```rego +title := "Direct dependency pull requests are maintained within expected thresholds" +``` + +Suggested description: + +```rego +description := "Direct dependency repositories should not show excessive stale pull request backlog or slow pull request closure because those signals may indicate reduced maintainer responsiveness." +``` + +Suggested risk template: + +```json +{ + "name": "Direct dependency has stale pull request maintenance", + "title": "Low Upstream Maintainer Responsiveness May Delay Dependency Fixes", + "statement": "A dependency repository with a large stale pull request backlog or slow pull request closure may have reduced maintainer responsiveness. This can delay bug fixes, security patches, and compatibility updates needed by downstream products.", + "likelihood_hint": "moderate", + "impact_hint": "moderate", + "violation_ids": ["dependency_open_pr_backlog_stale", "dependency_pr_close_time_stale"], + "threat_refs": [ + { + "system": "https://cwe.mitre.org", + "external_id": "CWE-1104", + "title": "Use of Unmaintained Third Party Components", + "url": "https://cwe.mitre.org/data/definitions/1104.html" + } + ], + "remediation": { + "title": "Review dependency maintainer responsiveness", + "description": "Assess whether the dependency is still suitable based on upstream maintainer responsiveness and create a tracked remediation or replacement plan where needed.", + "tasks": [ + { "title": "Review open pull request backlog and maintainer responses" }, + { "title": "Check whether critical fixes are delayed upstream" }, + { "title": "Evaluate maintained alternatives or internal fork ownership" }, + { "title": "Create a remediation issue for dependencies with unacceptable responsiveness" } + ] + } +} +``` + +Evidence should include: + +- open PR count +- oldest open PR age +- median days to close, if available +- configured thresholds + +Example evidence statement: + +> Repository `ccf/api` declares direct dependency `github.com/example/lib@v1.2.3`. The upstream project has 84 open pull requests and the oldest open pull request is 290 days old, exceeding the configured maintenance threshold. + +### Policy 4: Dependency SBOM Available + +File: + +```text +policies/gh_repo_dependency_sbom_available.rego +``` + +Package: + +```rego +package compliance_framework.dependency_sbom_available +``` + +Flag a direct dependency when dependency SBOM evidence is required and the resolved upstream repository does not expose an SBOM summary. + +This policy should be disabled by default for the demo unless `require_dependency_sbom` is set to `true`, because many public repositories will not expose SBOM data through GitHub's dependency graph endpoint. + +Suggested violation IDs: + +- `dependency_sbom_absent` +- `dependency_sbom_empty` + +Suggested behavior: + +- If `require_dependency_sbom` is `false`, do not emit violations. +- `dependency_sbom_absent`: dependency is direct and resolved, but `dep.supply_chain.sbom.available` is not true. +- `dependency_sbom_empty`: dependency SBOM is available but package count is zero. +- If SBOM collection failed due to permissions or API availability, leave that to `dependency_health_unknown` unless `require_dependency_sbom` is true. + +Suggested title: + +```rego +title := "Direct dependency repository exposes SBOM evidence" +``` + +Suggested description: + +```rego +description := "Direct dependencies should expose SBOM evidence when dependency SBOM visibility is required by policy. Dependency SBOMs improve third-party component visibility and support downstream vulnerability response." +``` + +Suggested risk template: + +```json +{ + "name": "Direct dependency SBOM is absent", + "title": "Missing Dependency SBOM Limits Third Party Component Visibility", + "statement": "When a direct dependency does not expose SBOM evidence, downstream users have less visibility into that component's own dependency chain. This can slow vulnerability impact analysis and weaken supply chain due diligence for products that rely on the component.", + "likelihood_hint": "moderate", + "impact_hint": "moderate", + "violation_ids": ["dependency_sbom_absent", "dependency_sbom_empty"], + "threat_refs": [ + { + "system": "https://cwe.mitre.org", + "external_id": "CWE-1059", + "title": "Incomplete Documentation", + "url": "https://cwe.mitre.org/data/definitions/1059.html" + }, + { + "system": "https://cwe.mitre.org", + "external_id": "CWE-1104", + "title": "Use of Unmaintained Third Party Components", + "url": "https://cwe.mitre.org/data/definitions/1104.html" + } + ], + "remediation": { + "title": "Review dependency SBOM visibility", + "description": "Record whether the dependency provides SBOM information or whether alternative component visibility is available through package metadata, internal scanning, or manual review.", + "tasks": [ + { "title": "Check whether the upstream dependency publishes an SBOM in releases or repository artifacts" }, + { "title": "Scan the consumed dependency version with an internal SBOM generation tool where feasible" }, + { "title": "Document the dependency SBOM visibility result" }, + { "title": "Prefer dependencies with stronger component transparency for high-risk use cases" } + ] + } +} +``` + +Evidence should include: + +- dependency module path +- declared version +- upstream repository URL +- SBOM availability +- package count, if available +- `require_dependency_sbom` policy input value + +### Policy 5: Dependency License Allowed + +File: + +```text +policies/gh_repo_dependency_license_allowed.rego +``` + +Package: + +```rego +package compliance_framework.dependency_license_allowed +``` + +Flag a direct dependency when its collected upstream repository license is banned or, when an allow-list is configured, not present in the allow-list. + +This policy should mirror the existing repository/SBOM license policy style, but it should evaluate `input.dependency.supply_chain.license`. + +Suggested violation IDs: + +- `dependency_banned_license` +- `dependency_license_not_allowed` +- `dependency_license_unknown` + +Suggested behavior: + +- `dependency_banned_license`: dependency license SPDX ID matches `banned_licenses`. +- `dependency_license_not_allowed`: `allowed_licenses` is non-empty and dependency license SPDX ID is not in it. +- `dependency_license_unknown`: license collection succeeded but SPDX ID is empty or missing. +- If license collection failed due to repository access or API availability, leave that to `dependency_health_unknown`. + +Suggested title: + +```rego +title := "Direct dependency license is allowed" +``` + +Suggested description: + +```rego +description := "Direct dependencies should use licenses that are acceptable for the repository's distribution and compliance requirements. Banned, unknown, or non-allow-listed licenses require review." +``` + +Suggested risk template: + +```json +{ + "name": "Direct dependency has unacceptable or unknown license", + "title": "Dependency License Creates Legal or Distribution Risk", + "statement": "A direct dependency with a banned, unknown, or non-allow-listed license can create legal, commercial, or redistribution risk for the product. License uncertainty also weakens the evidence needed to demonstrate third-party component due diligence.", + "likelihood_hint": "moderate", + "impact_hint": "high", + "violation_ids": ["dependency_banned_license", "dependency_license_not_allowed", "dependency_license_unknown"], + "threat_refs": [ + { + "system": "https://cwe.mitre.org", + "external_id": "CWE-1059", + "title": "Incomplete Documentation", + "url": "https://cwe.mitre.org/data/definitions/1059.html" + } + ], + "remediation": { + "title": "Review or replace dependency with unacceptable license", + "description": "Confirm whether the dependency license is compatible with the repository's use case. Replace the dependency, obtain legal approval, or document an accepted exception where appropriate.", + "tasks": [ + { "title": "Review the dependency's upstream license file and package metadata" }, + { "title": "Confirm compatibility with product distribution and customer obligations" }, + { "title": "Replace dependencies with unacceptable licenses" }, + { "title": "Document legal approval or risk acceptance for exceptions" }, + { "title": "Add license scanning to prevent recurrence" } + ] + } +} +``` + +Evidence should include: + +- dependency module path +- declared version +- upstream repository URL +- collected SPDX ID +- banned or allowed license list used by policy + +### Policy 6: Unknown Dependency Health + +File: + +```text +policies/gh_repo_dependency_health_unknown.rego +``` + +Package: + +```rego +package compliance_framework.dependency_health_unknown +``` + +Warn when dependency health cannot be collected. + +This should support due-diligence evidence without overclaiming risk. + +Examples: + +- dependency repository cannot be resolved +- dependency repository is not hosted on GitHub +- dependency repository is inaccessible +- workflows are unavailable +- license metadata is unavailable +- SBOM metadata is unavailable + +Unknown health should be a warning or informational finding unless the organization's policy says otherwise. + +Because the current policy engine models policy outcomes through violations, this policy should be configurable: + +- If `unknown_health_is_violation` is `false`, expose a `skip_reason` or no violation. +- If `unknown_health_is_violation` is `true`, emit violations for unresolved or uncollected dependency health. + +Suggested violation IDs: + +- `dependency_repository_unresolved` +- `dependency_health_not_collected` + +Suggested title: + +```rego +title := "Direct dependency health is observable" +``` + +Suggested description: + +```rego +description := "Direct dependency health should be observable enough to support third-party component due diligence. Unresolved or inaccessible dependencies create visibility gaps that may require manual review." +``` + +Suggested risk template: + +```json +{ + "name": "Direct dependency health could not be determined", + "title": "Unknown Third Party Component Health Creates Supply Chain Visibility Gap", + "statement": "When the source repository or maintenance health of a direct dependency cannot be determined, the organization cannot demonstrate complete due diligence over that third-party component. This may delay vulnerability response and complicate replacement planning when the component becomes risky.", + "likelihood_hint": "moderate", + "impact_hint": "moderate", + "violation_ids": ["dependency_repository_unresolved", "dependency_health_not_collected"], + "threat_refs": [ + { + "system": "https://cwe.mitre.org", + "external_id": "CWE-1059", + "title": "Incomplete Documentation", + "url": "https://cwe.mitre.org/data/definitions/1059.html" + }, + { + "system": "https://cwe.mitre.org", + "external_id": "CWE-1104", + "title": "Use of Unmaintained Third Party Components", + "url": "https://cwe.mitre.org/data/definitions/1104.html" + } + ], + "remediation": { + "title": "Resolve dependency source and maintenance evidence", + "description": "Determine the upstream source and maintenance status for the dependency, or document why the dependency cannot be evaluated automatically.", + "tasks": [ + { "title": "Confirm the dependency source repository or package registry metadata" }, + { "title": "Record a manual maintenance review when automatic collection is unavailable" }, + { "title": "Replace dependencies whose source cannot be trusted or verified" }, + { "title": "Document any accepted visibility gap with a review date" } + ] + } +} +``` + +## Policy Work Required + +The new policy repository should be bootstrapped with the same conventions as `plugin-github-repositories-policies`. + +Recommended repository: + +```text +plugin-github-repositories-dependency-policies +``` + +The policy workspace changes should include: + +- Add the six policy files listed above. +- Add one `_test.rego` file per policy. +- Add `README.md` explaining that the policy collection expects `plugin-github-repositories` input with dependency collection enabled. +- Add `Makefile` or equivalent bundle/test workflow matching `plugin-github-repositories-policies`. +- Use `input.dependency` as the policy input surface. +- Use `input.policy_data.dependency_health` for thresholds. +- Include `title`, `description`, and `risk_templates` in every policy. +- Include dependency-specific `remarks` in every violation. +- Add skip tests where data is absent or collection is unavailable. +- Run `opa test policies`. + +Minimum test coverage per policy: + +- passing case with healthy dependency data +- violation case with one unhealthy dependency +- non-direct dependency ignored +- unresolved or partial collection handled as intended +- policy input threshold override where applicable + +## Evidence Relationship Model + +The primary assessed subject remains the monitored repository. + +Example: + +```text +github-repository/ccf/api +``` + +Each dependency should be referenced in evidence details. + +Recommended dependency identifier: + +```text +github-repository-dependency/ccf/api/github.com/example/lib@v1.2.3 +``` + +The first implementation can keep dependencies nested under repository evaluation data. These stable identifiers can be promoted to first-class inventory items later. + +Future inventory item example: + +```text +github-repository-dependency/ccf/api/github.com/example/lib@v1.2.3 +``` + +This allows future evidence relationships such as: + +```text +github-repository/ccf/api uses dependency/go/github.com/example/lib@v1.2.3 +``` + +## Implementation Shape + +When implementation starts, split the plugin code into three conceptual layers. + +### 1. Dependency Parser + +First implementation: + +- `go.mod` parser +- direct dependencies only +- generic output + +Future implementations can add parsers for other ecosystems without changing policy shape. + +### 2. Repository Resolver + +First implementation: + +- resolve `github.com/{owner}/{repo}` module paths + +Future implementations can add: + +- vanity Go import resolution +- npm repository metadata +- PyPI project URLs +- Maven SCM URLs + +### 3. Dependency Health Collector + +First implementation: + +- collect GitHub repository metadata +- latest release +- latest commit +- workflow summary +- PR staleness summary +- license summary +- SBOM summary + +The collector should use the existing GitHub client and should treat dependency-level failures as partial dependency collection, not as parent repository evaluation failure. + +## Configuration Considerations + +The first version can use conservative defaults, but the following plugin-level settings may be useful: + +```yaml +dependency_health_enabled: true +dependency_health_max_dependencies: 50 +dependency_health_closed_pr_lookback_days: 180 +dependency_health_include_unresolved: true +dependency_health_collect_sbom: true +``` + +Policy thresholds should remain in policy input, not plugin config. + +Plugin config should control collection cost and feature enablement. + +Policy input should control interpretation. + +## CRA Demo Framing + +The recommended CRA-oriented framing is: + +> The organization maintains visibility into direct third-party software dependencies and monitors public maintenance signals for those dependencies. Dependencies with weak or unknown maintenance signals are tracked for review, remediation, or replacement planning. + +Avoid this framing: + +> This dependency is CRA-compliant or non-compliant. + +The plugin can provide evidence supporting third-party component due diligence, but it should not certify upstream projects. + +## First Demo Outcome + +The first demo should be able to say: + +> For repository `ccf/api`, CCF identified direct dependencies from `go.mod`, resolved the GitHub-hosted upstream repositories, collected public maintenance and supply-chain visibility signals, and produced evidence that selected dependencies require review. + +Concrete demo findings could include: + +- direct dependency repository is archived +- direct dependency has no recent release and no recent commits +- direct dependency has stale PR maintenance indicators +- direct dependency has unacceptable or unknown license metadata +- direct dependency does not expose SBOM evidence when dependency SBOM visibility is required +- direct dependency health could not be determined + +This is enough to prove the capability while keeping the implementation small and the policy behavior explainable. diff --git a/go.mod b/go.mod index 63cbe2b..e19c203 100644 --- a/go.mod +++ b/go.mod @@ -3,12 +3,13 @@ module github.com/compliance-framework/plugin-github-repositories go 1.26.1 require ( - github.com/compliance-framework/agent v0.6.2 + github.com/compliance-framework/agent v0.7.0 github.com/google/go-github/v71 v71.0.0 github.com/hashicorp/go-hclog v1.6.3 github.com/hashicorp/go-plugin v1.7.0 github.com/mitchellh/mapstructure v1.5.0 github.com/shurcooL/githubv4 v0.0.0-20240727222349-48295856cce7 + golang.org/x/mod v0.34.0 golang.org/x/oauth2 v0.35.0 ) diff --git a/go.sum b/go.sum index c4f536f..37df4c3 100644 --- a/go.sum +++ b/go.sum @@ -56,8 +56,8 @@ github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqy github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/compliance-framework/agent v0.6.2 h1:4Ha3kTDpoAXDsGOnczeVXdf56dl7h2XNxIfawWJc+LI= -github.com/compliance-framework/agent v0.6.2/go.mod h1:k6sNhVQXviFHbz/Fe/jOkfBZ+AFLnRPIuOH2aaaCTNo= +github.com/compliance-framework/agent v0.7.0 h1:ZNuztQKLNvazIqe9QVV9OjERCPOt3GlZ1/wv9iLOwtU= +github.com/compliance-framework/agent v0.7.0/go.mod h1:k6sNhVQXviFHbz/Fe/jOkfBZ+AFLnRPIuOH2aaaCTNo= github.com/compliance-framework/api v0.16.0 h1:0HO5a5N80ktJLeLD5GVeTk7cK7PO9Xj5WN4SR+KGBH0= github.com/compliance-framework/api v0.16.0/go.mod h1:BupcN8mQFgB0/2+YShU/r4BUYoGwzSjbz2esdOUaX/4= github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI= diff --git a/main.go b/main.go index 500308f..5851ad2 100644 --- a/main.go +++ b/main.go @@ -21,27 +21,42 @@ import ( "golang.org/x/oauth2" ) +// Validator is implemented by configuration values that can validate themselves. type Validator interface { Validate() error } +// PluginConfig contains user-provided and parsed runtime configuration for the plugin. type PluginConfig struct { - Token string `mapstructure:"token"` - Organization string `mapstructure:"organization"` - IncludedRepositories string `mapstructure:"included_repositories"` - ExcludedRepositories string `mapstructure:"excluded_repositories"` - DeploymentLookbackDays string `mapstructure:"deployment_lookback_days"` // Number of days to look back for deployments (default: 90) - OnlyActiveDeployments string `mapstructure:"only_active_deployments"` // Only fetch deployments that are still active (not superseded) (default: false) - IncludeFailedDeployments string `mapstructure:"include_failed_deployments"` // Include deployments with failure/error states (default: false) - PolicyInput string `mapstructure:"policy_input"` // Policy-specific input as JSON string (e.g., {"workflow_names": ["ci.yml", "build.yml"]}) + Token string `mapstructure:"token"` + Organization string `mapstructure:"organization"` + IncludedRepositories string `mapstructure:"included_repositories"` + ExcludedRepositories string `mapstructure:"excluded_repositories"` + DeploymentLookbackDays string `mapstructure:"deployment_lookback_days"` // Number of days to look back for deployments (default: 90) + OnlyActiveDeployments string `mapstructure:"only_active_deployments"` // Only fetch deployments that are still active (not superseded) (default: false) + IncludeFailedDeployments string `mapstructure:"include_failed_deployments"` // Include deployments with failure/error states (default: false) + DependencyHealthEnabled string `mapstructure:"dependency_health_enabled"` + DependencyHealthMaxDependencies string `mapstructure:"dependency_health_max_dependencies"` + DependencyHealthClosedPRLookbackDays string `mapstructure:"dependency_health_closed_pr_lookback_days"` + DependencyHealthIncludeUnresolved string `mapstructure:"dependency_health_include_unresolved"` + DependencyHealthCollectSBOM string `mapstructure:"dependency_health_collect_sbom"` + DependencyHealthPRInteractionSampleSize string `mapstructure:"dependency_health_pr_interaction_sample_size"` + PolicyInput string `mapstructure:"policy_input"` // Parsed values (set during Configure) - deploymentLookbackDays int - onlyActiveDeployments bool - includeFailedDeployments bool - policyInputMap map[string]interface{} + policyData map[string]interface{} + deploymentLookbackDays int + onlyActiveDeployments bool + includeFailedDeployments bool + dependencyHealthEnabled bool + dependencyHealthMaxDependencies int + dependencyHealthClosedPRLookbackDays int + dependencyHealthIncludeUnresolved bool + dependencyHealthCollectSBOM bool + dependencyHealthPRInteractionSampleSize int } +// Validate checks required configuration fields and mutually exclusive repository filters. func (c *PluginConfig) Validate() error { if c.Token == "" { return fmt.Errorf("token is required") @@ -95,26 +110,79 @@ func (c *PluginConfig) parseDeploymentConfig() error { return nil } -func (c *PluginConfig) parsePolicyInput() error { - // Parse policy input JSON string (default: empty map) +func (c *PluginConfig) parseDependencyHealthConfig() error { + var err error + c.dependencyHealthEnabled, err = parseBoolConfig(c.DependencyHealthEnabled, false, "dependency_health_enabled") + if err != nil { + return err + } + c.dependencyHealthMaxDependencies, err = parsePositiveIntConfig(c.DependencyHealthMaxDependencies, 50, "dependency_health_max_dependencies") + if err != nil { + return err + } + c.dependencyHealthClosedPRLookbackDays, err = parsePositiveIntConfig(c.DependencyHealthClosedPRLookbackDays, 180, "dependency_health_closed_pr_lookback_days") + if err != nil { + return err + } + c.dependencyHealthIncludeUnresolved, err = parseBoolConfig(c.DependencyHealthIncludeUnresolved, true, "dependency_health_include_unresolved") + if err != nil { + return err + } + c.dependencyHealthCollectSBOM, err = parseBoolConfig(c.DependencyHealthCollectSBOM, true, "dependency_health_collect_sbom") + if err != nil { + return err + } + c.dependencyHealthPRInteractionSampleSize, err = parsePositiveIntConfig(c.DependencyHealthPRInteractionSampleSize, 20, "dependency_health_pr_interaction_sample_size") + if err != nil { + return err + } + return nil +} + +func (c *PluginConfig) parseLegacyPolicyInput() (map[string]interface{}, error) { if c.PolicyInput == "" { - c.policyInputMap = make(map[string]interface{}) - return nil + return map[string]interface{}{}, nil } var result map[string]interface{} if err := json.Unmarshal([]byte(c.PolicyInput), &result); err != nil { - return fmt.Errorf("invalid policy_input JSON: %w", err) + return nil, fmt.Errorf("invalid policy_input JSON: %w", err) } - c.policyInputMap = result - return nil + return result, nil } +func parseBoolConfig(value string, defaultValue bool, name string) (bool, error) { + if value == "" { + return defaultValue, nil + } + parsed, err := strconv.ParseBool(value) + if err != nil { + return false, fmt.Errorf("invalid %s: %w", name, err) + } + return parsed, nil +} + +func parsePositiveIntConfig(value string, defaultValue int, name string) (int, error) { + if value == "" { + return defaultValue, nil + } + parsed, err := strconv.Atoi(value) + if err != nil { + return 0, fmt.Errorf("invalid %s: %w", name, err) + } + if parsed <= 0 { + return 0, fmt.Errorf("invalid %s: must be greater than 0", name) + } + return parsed, nil +} + +// DeploymentWithStatuses pairs a deployment with its observed deployment statuses. type DeploymentWithStatuses struct { Deployment *github.Deployment `json:"deployment"` Statuses []*github.DeploymentStatus `json:"statuses"` } +// SaturatedRepository contains all repository facts passed to repository policies. type SaturatedRepository struct { Settings *github.Repository `json:"settings"` Workflows []*github.Workflow `json:"workflows"` @@ -136,9 +204,11 @@ type SaturatedRepository struct { RepositoryTeams []*RepositoryTeam `json:"repository_teams"` Environments []*RepositoryEnvironment `json:"environments"` EffectiveBranchRules map[string]*BranchRuleEvidence `json:"effective_branch_rules"` + PolicyData map[string]interface{} `json:"policy_data"` PolicyInput map[string]interface{} `json:"policy_input"` } +// GithubReposPlugin implements the CCF runner interface for GitHub repository evidence. type GithubReposPlugin struct { Logger hclog.Logger @@ -147,9 +217,10 @@ type GithubReposPlugin struct { graphqlClient *githubv4.Client } +// Configure validates configuration and initializes GitHub REST and GraphQL clients. func (l *GithubReposPlugin) Configure(req *proto.ConfigureRequest) (*proto.ConfigureResponse, error) { l.Logger.Info("Configuring GitHub Repositories Plugin") - config := &PluginConfig{} + config := &PluginConfig{policyData: map[string]interface{}{}} if err := mapstructure.Decode(req.Config, config); err != nil { l.Logger.Error("Error decoding config", "error", err) @@ -166,12 +237,34 @@ func (l *GithubReposPlugin) Configure(req *proto.ConfigureRequest) (*proto.Confi l.Logger.Error("Error parsing deployment config", "error", err) return nil, err } - - // Parse policy input JSON string - if err := config.parsePolicyInput(); err != nil { - l.Logger.Error("Error parsing policy input", "error", err) + if err := config.parseDependencyHealthConfig(); err != nil { + l.Logger.Error("Error parsing dependency health config", "error", err) return nil, err } + if req.GetPolicyData() != nil { + config.policyData = req.GetPolicyData().AsMap() + } else { + legacyPolicyInput, err := config.parseLegacyPolicyInput() + if err != nil { + l.Logger.Error("Error parsing legacy policy input", "error", err) + return nil, err + } + config.policyData = legacyPolicyInput + } + l.Logger.Debug( + "Policy data parsed", + "policy_data_keys", mapKeys(config.policyData), + "policy_data_count", len(config.policyData), + ) + l.Logger.Debug( + "Dependency health config parsed", + "enabled", config.dependencyHealthEnabled, + "max_dependencies", config.dependencyHealthMaxDependencies, + "closed_pr_lookback_days", config.dependencyHealthClosedPRLookbackDays, + "include_unresolved", config.dependencyHealthIncludeUnresolved, + "collect_sbom", config.dependencyHealthCollectSBOM, + "pr_interaction_sample_size", config.dependencyHealthPRInteractionSampleSize, + ) l.config = config httpClient := oauth2.NewClient(context.Background(), oauth2.StaticTokenSource(&oauth2.Token{ @@ -183,6 +276,7 @@ func (l *GithubReposPlugin) Configure(req *proto.ConfigureRequest) (*proto.Confi return &proto.ConfigureResponse{}, nil } +// Init registers subject templates and policy-derived risks for this plugin. func (l *GithubReposPlugin) Init(req *proto.InitRequest, apiHelper runner.ApiHelper) (*proto.InitResponse, error) { ctx := context.Background() @@ -206,8 +300,18 @@ func (l *GithubReposPlugin) Init(req *proto.InitRequest, apiHelper runner.ApiHel return runner.InitWithSubjectsAndRisksFromPolicies(ctx, l.Logger, req, apiHelper, subjectTemplates) } +// Eval gathers repository evidence, evaluates repository policies, and evaluates dependency policies when configured. func (l *GithubReposPlugin) Eval(req *proto.EvalRequest, apiHelper runner.ApiHelper) (*proto.EvalResponse, error) { ctx := context.TODO() + policyRequest := requestWithDefaultPolicyBehavior(req) + repositoryPolicyPaths := policyRequest.PolicyPathsForBehavior(policyBehaviorRepository) + dependencyPolicyPaths := policyRequest.PolicyPathsForBehavior(policyBehaviorDependency) + l.Logger.Debug( + "Resolved policy paths by behavior", + "policy_paths", policyRequest.GetPolicyPaths(), + "repository_policy_paths", repositoryPolicyPaths, + "dependency_policy_paths", dependencyPolicyPaths, + ) repochan, errchan := l.FetchRepositories(ctx, req) done := false @@ -387,26 +491,73 @@ func (l *GithubReposPlugin) Eval(req *proto.EvalRequest, apiHelper runner.ApiHel RepositoryTeams: repositoryTeams, Environments: environments, EffectiveBranchRules: effectiveBranchRules, - PolicyInput: l.config.policyInputMap, + PolicyData: l.config.policyData, + PolicyInput: l.config.policyData, + } + + if len(repositoryPolicyPaths) > 0 { + evidences, err := l.EvaluatePolicies(ctx, data, nil, repositoryPolicyPaths, nil) + if err != nil { + l.Logger.Error("Error evaluating repository policies", "error", err) + return &proto.EvalResponse{ + Status: proto.ExecutionStatus_FAILURE, + }, err + } + if err := apiHelper.CreateEvidence(ctx, evidences); err != nil { + l.Logger.Error("Error creating repository evidence", "error", err) + return &proto.EvalResponse{ + Status: proto.ExecutionStatus_FAILURE, + }, err + } + } + + if len(dependencyPolicyPaths) == 0 { + continue + } + if !l.config.dependencyHealthEnabled { + l.Logger.Warn( + "Dependency policy paths were provided, but dependency health collection is disabled", + "repo", repo.GetFullName(), + "dependency_policy_paths", dependencyPolicyPaths, + ) + continue } - // Uncomment to check the data that is being passed through from - // the client, as data formats are often slightly different than - // the raw API endpoints - evidences, err := l.EvaluatePolicies(ctx, data, req) + + l.Logger.Debug("Collecting repository dependencies", "repo", repo.GetFullName()) + dependencies, err := l.gatherRepositoryDependencies(ctx, repo, func(*RepositoryDependency) error { + return nil + }) if err != nil { - l.Logger.Error("Error evaluating policies", "error", err) + l.Logger.Error("Error collecting repository dependencies", "error", err) return &proto.EvalResponse{ Status: proto.ExecutionStatus_FAILURE, }, err } - - if err := apiHelper.CreateEvidence(ctx, evidences); err != nil { - l.Logger.Error("Error creating evidence", "error", err) + dependencyEvidences, err := l.EvaluatePolicies(ctx, data, dependencies, dependencyPolicyPaths, l.config.policyData) + if err != nil { + l.Logger.Error("Error evaluating dependency policies", "error", err) return &proto.EvalResponse{ Status: proto.ExecutionStatus_FAILURE, }, err } - + if len(dependencyEvidences) > 0 { + if err := apiHelper.CreateEvidence(ctx, dependencyEvidences); err != nil { + l.Logger.Error("Error creating dependency evidence", "error", err) + return &proto.EvalResponse{ + Status: proto.ExecutionStatus_FAILURE, + }, err + } + } + l.Logger.Debug("Submitted dependency evidence", "repo", repo.GetFullName(), "evidence_count", len(dependencyEvidences)) + l.Logger.Debug("Repository dependency collection complete", "repo", repo.GetFullName(), "dependencies", len(dependencies)) + if len(dependencies) == 0 { + l.Logger.Warn( + "Dependency policy paths were provided, but no dependencies are available for evaluation", + "repo", repo.GetFullName(), + "dependency_policy_paths", dependencyPolicyPaths, + "dependency_health_enabled", l.config.dependencyHealthEnabled, + ) + } } } @@ -415,6 +566,7 @@ func (l *GithubReposPlugin) Eval(req *proto.EvalRequest, apiHelper runner.ApiHel }, nil } +// FetchRepositories streams repositories selected by the plugin include/exclude configuration. func (l *GithubReposPlugin) FetchRepositories(ctx context.Context, req *proto.EvalRequest) (chan *github.Repository, chan error) { repochan := make(chan *github.Repository) errchan := make(chan error) @@ -477,6 +629,7 @@ func (l *GithubReposPlugin) FetchRepositories(ctx context.Context, req *proto.Ev return repochan, errchan } +// FecthLatestRelease retrieves the latest GitHub release, returning nil when none exists. func (l *GithubReposPlugin) FecthLatestRelease(ctx context.Context, repo *github.Repository) (*github.RepositoryRelease, error) { owner := repo.GetOwner().GetLogin() name := repo.GetName() @@ -494,6 +647,7 @@ func (l *GithubReposPlugin) FecthLatestRelease(ctx context.Context, repo *github return release, nil } +// GatherConfiguredWorkflows returns workflows configured in the repository. func (l *GithubReposPlugin) GatherConfiguredWorkflows(ctx context.Context, repo *github.Repository) ([]*github.Workflow, error) { workflows, _, err := l.githubClient.Actions.ListWorkflows(ctx, repo.GetOwner().GetLogin(), repo.GetName(), nil) if err != nil { @@ -502,6 +656,7 @@ func (l *GithubReposPlugin) GatherConfiguredWorkflows(ctx context.Context, repo return workflows.Workflows, nil } +// GatherWorkflowRuns returns recent workflow runs for the repository. func (l *GithubReposPlugin) GatherWorkflowRuns(ctx context.Context, repo *github.Repository) ([]*github.WorkflowRun, error) { opts := &github.ListOptions{ PerPage: 100, @@ -515,6 +670,7 @@ func (l *GithubReposPlugin) GatherWorkflowRuns(ctx context.Context, repo *github return workflowRuns.WorkflowRuns, nil } +// FetchDeploymentsWithStatuses returns filtered deployment evidence with statuses attached. func (l *GithubReposPlugin) FetchDeploymentsWithStatuses(ctx context.Context, repo *github.Repository) ([]*DeploymentWithStatuses, error) { deployments, err := l.fetchDeploymentsWithStatuses(ctx, repo) if err != nil { @@ -523,6 +679,7 @@ func (l *GithubReposPlugin) FetchDeploymentsWithStatuses(ctx context.Context, re return l.filterDeployments(deployments), nil } +// FetchFailedDeploymentsWithStatuses returns deployments whose latest collected statuses include failures. func (l *GithubReposPlugin) FetchFailedDeploymentsWithStatuses(ctx context.Context, repo *github.Repository) ([]*DeploymentWithStatuses, error) { deployments, err := l.fetchDeploymentsWithStatuses(ctx, repo) if err != nil { @@ -652,6 +809,7 @@ func (l *GithubReposPlugin) shouldSkipDeployment(deployment *github.Deployment, return false } +// ListProtectedBranches returns all protected branches visible through the GitHub API. func (l *GithubReposPlugin) ListProtectedBranches(ctx context.Context, repo *github.Repository) ([]*github.Branch, error) { owner := repo.GetOwner().GetLogin() name := repo.GetName() @@ -674,6 +832,7 @@ func (l *GithubReposPlugin) ListProtectedBranches(ctx context.Context, repo *git return out, nil } +// GetBranchProtectionAndRequiredStatusCheck merges branch protection and ruleset status-check evidence. func (l *GithubReposPlugin) GetBranchProtectionAndRequiredStatusCheck(ctx context.Context, repo *github.Repository, branch string) (*github.Protection, *github.RequiredStatusChecks, error) { owner := repo.GetOwner().GetLogin() name := repo.GetName() @@ -774,6 +933,7 @@ func (l *GithubReposPlugin) GetBranchProtectionAndRequiredStatusCheck(ctx contex return branchProtection, result, nil } +// GatherSBOM returns the repository SBOM when GitHub dependency graph access permits it. func (l *GithubReposPlugin) GatherSBOM(ctx context.Context, repo *github.Repository) (*github.SBOM, error) { sbom, _, err := l.githubClient.DependencyGraph.GetSBOM(ctx, repo.GetOwner().GetLogin(), repo.GetName()) if err != nil { @@ -787,6 +947,7 @@ func (l *GithubReposPlugin) GatherSBOM(ctx context.Context, repo *github.Reposit return sbom, nil } +// GatherOpenPullRequests returns open pull requests for the repository. func (l *GithubReposPlugin) GatherOpenPullRequests(ctx context.Context, repo *github.Repository) ([]*github.PullRequest, error) { opts := &github.ListOptions{ PerPage: 100, @@ -801,7 +962,21 @@ func (l *GithubReposPlugin) GatherOpenPullRequests(ctx context.Context, repo *gi return pullRequests, nil } -func (l *GithubReposPlugin) EvaluatePolicies(ctx context.Context, data *SaturatedRepository, req *proto.EvalRequest) ([]*proto.Evidence, error) { +// EvaluatePolicies evaluates repository or dependency policy paths and returns generated evidence. +func (l *GithubReposPlugin) EvaluatePolicies(ctx context.Context, data *SaturatedRepository, dependencies []*RepositoryDependency, policyPaths []string, dependencyPolicyData map[string]interface{}) ([]*proto.Evidence, error) { + if data == nil { + return nil, errors.New("cannot evaluate policies without repository data") + } + if data.PolicyInput == nil { + data.PolicyInput = data.PolicyData + } + if data.Settings == nil { + if len(policyPaths) == 0 && len(dependencies) == 0 { + return nil, nil + } + return nil, errors.New("cannot evaluate policies without repository settings") + } + var accumulatedErrors error activities := make([]*proto.Activity, 0) @@ -919,31 +1094,248 @@ func (l *GithubReposPlugin) EvaluatePolicies(ctx context.Context, data *Saturate }, } - for _, policyPath := range req.GetPolicyPaths() { - processor := policyManager.NewPolicyProcessor( - l.Logger, - map[string]string{ - "provider": "github", - "type": "repository", - "repository": data.Settings.GetName(), - "organization": data.Settings.GetOwner().GetLogin(), - }, - subjects, - components, - inventory, - actors, - activities, - ) - evidence, err := processor.GenerateResults(ctx, policyPath, data) - evidences = slices.Concat(evidences, evidence) - if err != nil { - accumulatedErrors = errors.Join(accumulatedErrors, err) + l.Logger.Debug( + "Evaluating policies", + "repo", data.Settings.GetFullName(), + "policy_paths", policyPaths, + "dependencies", len(dependencies), + ) + + if len(dependencies) == 0 { + for _, policyPath := range policyPaths { + l.Logger.Debug("Evaluating repository policy path", "repo", data.Settings.GetFullName(), "policy_path", policyPath) + l.Logger.Debug( + "Repository policy data prepared for evaluation", + "repo", data.Settings.GetFullName(), + "policy_path", policyPath, + "policy_data_keys", mapKeys(data.PolicyData), + "policy_data_count", len(data.PolicyData), + ) + processor := policyManager.NewPolicyProcessor( + l.Logger, + map[string]string{ + "provider": "github", + "type": "repository", + "repository": data.Settings.GetName(), + "organization": data.Settings.GetOwner().GetLogin(), + }, + subjects, + components, + inventory, + actors, + activities, + data.PolicyData, + ) + evidence, err := processor.GenerateResults(ctx, policyPath, data) + l.Logger.Debug("Repository policy evaluation complete", "repo", data.Settings.GetFullName(), "policy_path", policyPath, "evidence_count", len(evidence)) + evidences = slices.Concat(evidences, evidence) + if err != nil { + accumulatedErrors = errors.Join(accumulatedErrors, err) + } + } + } + + for _, dependency := range dependencies { + if dependency == nil { + continue + } + dependencyInput := dependencyPolicyInput(data.Settings, dependency, dependencyPolicyData) + dependencyComponents := slices.Concat(components, []*proto.Component{dependencyComponent()}) + dependencyInventory := slices.Concat(inventory, []*proto.InventoryItem{dependencyInventoryItem(data.Settings, dependency)}) + dependencySubjects := dependencySubjects(data.Settings, dependency) + dependencyLabels := map[string]string{ + "provider": "github", + "type": "repository-dependency", + "repository": data.Settings.GetName(), + "organization": data.Settings.GetOwner().GetLogin(), + "dependency": dependency.Name, + "ecosystem": dependency.Ecosystem, + } + if dependency.DeclaredVersion != "" { + dependencyLabels["dependency_version"] = dependency.DeclaredVersion + } + + for _, policyPath := range policyPaths { + l.Logger.Debug( + "Evaluating dependency policy path", + "repo", data.Settings.GetFullName(), + "dependency", dependency.Name, + "declared_version", dependency.DeclaredVersion, + "resolved", dependency.Repository != nil && dependency.Repository.Resolved, + "policy_path", policyPath, + ) + l.Logger.Debug( + "Dependency policy data prepared for evaluation", + "repo", data.Settings.GetFullName(), + "dependency", dependency.Name, + "policy_path", policyPath, + "policy_data_keys", mapKeys(dependencyPolicyData), + "policy_data_count", len(dependencyPolicyData), + ) + processor := policyManager.NewPolicyProcessor( + l.Logger, + dependencyLabels, + dependencySubjects, + dependencyComponents, + dependencyInventory, + actors, + activities, + dependencyPolicyData, + ) + evidence, err := processor.GenerateResults(ctx, policyPath, dependencyInput) + appendDependencyEvidenceLinks(evidence, dependency) + l.Logger.Debug( + "Dependency policy evaluation complete", + "repo", data.Settings.GetFullName(), + "dependency", dependency.Name, + "policy_path", policyPath, + "evidence_count", len(evidence), + ) + evidences = slices.Concat(evidences, evidence) + if err != nil { + accumulatedErrors = errors.Join(accumulatedErrors, err) + } } } return evidences, accumulatedErrors } +func mapKeys(value map[string]interface{}) []string { + keys := make([]string, 0, len(value)) + for key := range value { + keys = append(keys, key) + } + slices.Sort(keys) + return keys +} + +const ( + policyBehaviorRepository = "repository" + policyBehaviorDependency = "dependency" + + defaultDependencyPolicySource = "plugin-github-repositories-dependency-policies" +) + +var defaultPolicyBehaviors = map[string][]string{ + defaultDependencyPolicySource: {policyBehaviorDependency}, +} + +func requestWithDefaultPolicyBehavior(req *proto.EvalRequest) *proto.EvalRequest { + if req == nil { + return nil + } + return req. + WithDefaultPolicyBehavior(defaultPolicyBehaviors). + WithUndefinedMappedTo([]string{policyBehaviorRepository}) +} + +func appendDependencyEvidenceLinks(evidences []*proto.Evidence, dependency *RepositoryDependency) { + if dependency == nil || dependency.Repository == nil || dependency.Repository.URL == "" { + return + } + link := &proto.Link{ + Href: dependency.Repository.URL, + Rel: policyManager.Pointer("evidence"), + Text: policyManager.Pointer("Dependency Repository"), + } + for _, evidence := range evidences { + if evidence == nil || evidenceHasLink(evidence, link.Href) { + continue + } + evidence.Links = append(evidence.Links, link) + } +} + +func evidenceHasLink(evidence *proto.Evidence, href string) bool { + for _, link := range evidence.GetLinks() { + if link.GetHref() == href { + return true + } + } + return false +} + +func dependencyPolicyInput(repo *github.Repository, dependency *RepositoryDependency, policyData map[string]interface{}) *DependencyPolicyInput { + if policyData == nil { + policyData = map[string]interface{}{} + } + return &DependencyPolicyInput{ + Repository: &DependencyParentRepository{ + Organization: repo.GetOwner().GetLogin(), + Name: repo.GetName(), + FullName: repo.GetFullName(), + URL: repo.GetHTMLURL(), + }, + Dependency: dependency, + PolicyData: policyData, + } +} + +func dependencyComponent() *proto.Component { + return &proto.Component{ + Identifier: "common-components/repository-dependency", + Type: "software", + Title: "Repository Dependency", + Description: "A software dependency declared by a monitored source repository.", + Purpose: "To represent third-party or internally maintained software components that the repository relies on.", + } +} + +func dependencyInventoryItem(repo *github.Repository, dependency *RepositoryDependency) *proto.InventoryItem { + props := []*proto.Property{ + {Name: "name", Value: dependency.Name}, + {Name: "ecosystem", Value: dependency.Ecosystem}, + {Name: "repository", Value: repo.GetFullName()}, + } + if dependency.DeclaredVersion != "" { + props = append(props, &proto.Property{Name: "declared_version", Value: dependency.DeclaredVersion}) + } + + links := []*proto.Link{} + if dependency.Repository != nil && dependency.Repository.URL != "" { + links = append(links, &proto.Link{ + Href: dependency.Repository.URL, + Text: policyManager.Pointer("Dependency Repository URL"), + }) + } + + return &proto.InventoryItem{ + Identifier: dependencyIdentifier(repo, dependency), + Type: "repository-dependency", + Title: fmt.Sprintf("Repository Dependency [%s]", dependency.Name), + Props: props, + Links: links, + ImplementedComponents: []*proto.InventoryItemImplementedComponent{ + {Identifier: "common-components/repository-dependency"}, + }, + } +} + +func dependencySubjects(repo *github.Repository, dependency *RepositoryDependency) []*proto.Subject { + return []*proto.Subject{ + { + Type: proto.SubjectType_SUBJECT_TYPE_INVENTORY_ITEM, + Identifier: dependencyIdentifier(repo, dependency), + }, + { + Type: proto.SubjectType_SUBJECT_TYPE_INVENTORY_ITEM, + Identifier: fmt.Sprintf("github-repository/%s", repo.GetFullName()), + }, + { + Type: proto.SubjectType_SUBJECT_TYPE_COMPONENT, + Identifier: "common-components/repository-dependency", + }, + } +} + +func dependencyIdentifier(repo *github.Repository, dependency *RepositoryDependency) string { + if dependency.DeclaredVersion == "" { + return fmt.Sprintf("github-repository-dependency/%s/%s", repo.GetFullName(), dependency.Name) + } + return fmt.Sprintf("github-repository-dependency/%s/%s@%s", repo.GetFullName(), dependency.Name, dependency.DeclaredVersion) +} + // isPermissionError returns true if the error from the GitHub client indicates // a permissions or visibility issue (e.g., 401/403/404). func isPermissionError(err error) bool { diff --git a/org_teams.go b/org_teams.go index 318cca0..613432f 100644 --- a/org_teams.go +++ b/org_teams.go @@ -6,6 +6,7 @@ import ( "github.com/google/go-github/v71/github" ) +// GatherOrgTeams returns organization teams and their member logins. func (l *GithubReposPlugin) GatherOrgTeams(ctx context.Context) ([]*OrgTeam, error) { opts := &github.ListOptions{ PerPage: 100, diff --git a/pull_requests.go b/pull_requests.go index 47f9452..36adfe4 100644 --- a/pull_requests.go +++ b/pull_requests.go @@ -114,6 +114,7 @@ func pullRequestKey(pr *github.PullRequest) int64 { return int64(pr.GetNumber()) } +// FetchReviewThreads returns review-thread discussions keyed by pull request review ID. func (l *GithubReposPlugin) FetchReviewThreads(ctx context.Context, prs []*github.PullRequest, reviewsByPR map[int64][]*github.PullRequestReview) (map[int64][]*PullRequestReviewThread, error) { threadsByReview := make(map[int64][]*PullRequestReviewThread) if l.graphqlClient == nil { diff --git a/repository_controls.go b/repository_controls.go index efb8f32..e0d690a 100644 --- a/repository_controls.go +++ b/repository_controls.go @@ -9,6 +9,7 @@ import ( const maxEnvironmentDetailConcurrency = 5 +// GatherRepositoryCollaborators returns direct collaborators and their repository permissions. func (l *GithubReposPlugin) GatherRepositoryCollaborators(ctx context.Context, repo *github.Repository) ([]*RepositoryCollaborator, error) { owner := repo.GetOwner().GetLogin() name := repo.GetName() @@ -48,6 +49,7 @@ func (l *GithubReposPlugin) GatherRepositoryCollaborators(ctx context.Context, r return collaborators, nil } +// GatherRepositoryTeams returns teams with repository access and resolves known team members. func (l *GithubReposPlugin) GatherRepositoryTeams(ctx context.Context, repo *github.Repository, orgTeams []*OrgTeam) ([]*RepositoryTeam, error) { owner := repo.GetOwner().GetLogin() name := repo.GetName() @@ -88,6 +90,7 @@ func (l *GithubReposPlugin) GatherRepositoryTeams(ctx context.Context, repo *git return teams, nil } +// GatherRepositoryEnvironments returns GitHub environments enriched with protection details. func (l *GithubReposPlugin) GatherRepositoryEnvironments(ctx context.Context, repo *github.Repository) ([]*RepositoryEnvironment, error) { owner := repo.GetOwner().GetLogin() name := repo.GetName() @@ -121,6 +124,7 @@ func (l *GithubReposPlugin) GatherRepositoryEnvironments(ctx context.Context, re return environments, nil } +// GatherEffectiveBranchRules returns effective ruleset evidence for protected and default branches. func (l *GithubReposPlugin) GatherEffectiveBranchRules(ctx context.Context, repo *github.Repository, branches []string) (map[string]*BranchRuleEvidence, error) { owner := repo.GetOwner().GetLogin() name := repo.GetName() diff --git a/types.go b/types.go index f235cae..8df3fff 100644 --- a/types.go +++ b/types.go @@ -50,12 +50,14 @@ type OrgTeam struct { Members []string `json:"members"` } +// RepositoryCollaborator captures a direct repository collaborator and their permissions. type RepositoryCollaborator struct { Login string `json:"login"` RoleName string `json:"role_name"` Permissions map[string]bool `json:"permissions"` } +// RepositoryTeam captures a GitHub team with repository access and known members. type RepositoryTeam struct { ID int64 `json:"id"` Name string `json:"name"` @@ -65,6 +67,7 @@ type RepositoryTeam struct { Members []string `json:"members"` } +// EnvironmentReviewer represents a user or team configured as an environment reviewer. type EnvironmentReviewer struct { Type string `json:"type"` ID int64 `json:"id,omitempty"` @@ -73,6 +76,7 @@ type EnvironmentReviewer struct { Name string `json:"name,omitempty"` } +// EnvironmentProtectionRule captures a GitHub environment protection rule. type EnvironmentProtectionRule struct { ID int64 `json:"id"` Type string `json:"type"` @@ -81,11 +85,13 @@ type EnvironmentProtectionRule struct { Reviewers []*EnvironmentReviewer `json:"reviewers,omitempty"` } +// EnvironmentBranchPolicy captures branch deployment restrictions for an environment. type EnvironmentBranchPolicy struct { ProtectedBranches bool `json:"protected_branches"` CustomBranchPolicies bool `json:"custom_branch_policies"` } +// RepositoryEnvironment captures GitHub environment settings relevant to deployment policy checks. type RepositoryEnvironment struct { ID int64 `json:"id"` Name string `json:"name"` @@ -98,6 +104,7 @@ type RepositoryEnvironment struct { DeploymentBranchPolicy *EnvironmentBranchPolicy `json:"deployment_branch_policy,omitempty"` } +// BranchRuleEvidence captures effective branch rules for repository policy evaluation. type BranchRuleEvidence struct { RequiredSignatures bool `json:"required_signatures"` RequiredDeployments []string `json:"required_deployments,omitempty"` @@ -106,3 +113,126 @@ type BranchRuleEvidence struct { DismissStaleReviewsOnPush bool `json:"dismiss_stale_reviews_on_push,omitempty"` RequireCodeOwnerReview bool `json:"require_code_owner_review,omitempty"` } + +// RepositoryDependency captures a direct dependency and any collected upstream health facts. +type RepositoryDependency struct { + Name string `json:"name"` + Ecosystem string `json:"ecosystem"` + SourceFile string `json:"source_file"` + Direct bool `json:"direct"` + DeclaredVersion string `json:"declared_version"` + Repository *DependencyRepository `json:"repository"` + Health *DependencyHealth `json:"health"` + SupplyChain *DependencySupplyChain `json:"supply_chain"` + CollectionStatus *DependencyCollectionStatus `json:"collection_status"` +} + +// DependencyPolicyInput is the policy input shape for dependency-granular evaluation. +type DependencyPolicyInput struct { + Repository *DependencyParentRepository `json:"repository"` + Dependency *RepositoryDependency `json:"dependency"` + PolicyData map[string]interface{} `json:"policy_data"` +} + +// DependencyParentRepository identifies the repository that declared a dependency. +type DependencyParentRepository struct { + Organization string `json:"organization"` + Name string `json:"name"` + FullName string `json:"full_name"` + URL string `json:"url,omitempty"` +} + +// DependencyRepository identifies the upstream repository resolved for a dependency. +type DependencyRepository struct { + Provider string `json:"provider,omitempty"` + Owner string `json:"owner,omitempty"` + Name string `json:"name,omitempty"` + URL string `json:"url,omitempty"` + Resolved bool `json:"resolved"` +} + +// DependencyHealth captures maintenance and activity signals for a resolved dependency repository. +type DependencyHealth struct { + RepositoryArchived bool `json:"repository_archived"` + LatestRelease *DependencyRelease `json:"latest_release,omitempty"` + LatestCommit *DependencyCommit `json:"latest_commit,omitempty"` + Workflows *DependencyWorkflowSummary `json:"workflows,omitempty"` + PullRequests *DependencyPullRequestStats `json:"pull_requests,omitempty"` +} + +// DependencyRelease captures the latest release observed for a dependency. +type DependencyRelease struct { + Tag string `json:"tag,omitempty"` + PublishedAt *time.Time `json:"published_at,omitempty"` +} + +// DependencyCommit captures the latest default-branch commit observed for a dependency. +type DependencyCommit struct { + SHA string `json:"sha,omitempty"` + CommittedAt *time.Time `json:"committed_at,omitempty"` +} + +// DependencyWorkflowSummary summarizes workflow availability for a dependency repository. +type DependencyWorkflowSummary struct { + Count int `json:"count"` + LatestDefaultBranchRun *DependencyWorkflowRun `json:"latest_default_branch_run,omitempty"` +} + +// DependencyWorkflowRun captures the latest default-branch workflow run state. +type DependencyWorkflowRun struct { + Status string `json:"status,omitempty"` + Conclusion string `json:"conclusion,omitempty"` + CreatedAt *time.Time `json:"created_at,omitempty"` +} + +// DependencyPullRequestStats summarizes dependency repository pull request activity. +type DependencyPullRequestStats struct { + OpenCount int `json:"open_count"` + OpenCountCapped bool `json:"open_count_capped"` + OldestOpenCreatedAt *time.Time `json:"oldest_open_created_at,omitempty"` + RecentClosedCount int `json:"recent_closed_count"` + RecentClosedCountCapped bool `json:"recent_closed_count_capped"` + MedianDaysToClose *float64 `json:"median_days_to_close,omitempty"` + MedianHoursToFirstInteraction *float64 `json:"median_hours_to_first_interaction,omitempty"` + FirstInteractionSampledPullRequests int `json:"first_interaction_sampled_pull_requests"` +} + +// DependencySupplyChain captures dependency license and SBOM evidence. +type DependencySupplyChain struct { + License *DependencyLicenseSummary `json:"license,omitempty"` + SBOM *DependencySBOMSummary `json:"sbom,omitempty"` +} + +// DependencyLicenseSummary captures collected dependency license metadata. +type DependencyLicenseSummary struct { + SPDXID string `json:"spdx_id,omitempty"` + Name string `json:"name,omitempty"` + URL string `json:"url,omitempty"` + Collected bool `json:"collected"` +} + +// DependencySBOMSummary captures collected dependency SBOM metadata. +type DependencySBOMSummary struct { + Available bool `json:"available"` + PackageCount int `json:"package_count"` + SPDXID string `json:"spdx_id,omitempty"` + SPDXVersion string `json:"spdx_version,omitempty"` + CreationInfoCreated *time.Time `json:"creation_info_created,omitempty"` + Collected bool `json:"collected"` +} + +// DependencyCollectionStatus records which dependency collection stages completed. +type DependencyCollectionStatus struct { + DependencyParsed bool `json:"dependency_parsed"` + RepositoryResolved bool `json:"repository_resolved"` + HealthCollected bool `json:"health_collected"` + LicenseCollected bool `json:"license_collected"` + SBOMCollected bool `json:"sbom_collected"` + Errors []*DependencyCollectionError `json:"errors"` +} + +// DependencyCollectionError records a non-fatal dependency collection failure. +type DependencyCollectionError struct { + Scope string `json:"scope"` + Message string `json:"message"` +}