From dd87217e74b7d8d91084740da82beb33f1d008cc Mon Sep 17 00:00:00 2001
From: Niran Babalola <niran.babalola@coinbase.com>
Date: Tue, 19 May 2026 13:04:05 -0500
Subject: [PATCH 1/2] Add sequencer-only benchmark role

---
 README.md                           |   2 +
 configs/examples/snapshot.yml       |   2 +
 docs/benchmark-types.md             |  23 +++++-
 runner/benchmark/definition.go      |  92 ++++++++++++++++++++++++
 runner/benchmark/matrix.go          |  11 +++
 runner/benchmark/matrix_test.go     | 105 ++++++++++++++++++++++++++++
 runner/benchmark/result_metadata.go |  21 ++++--
 runner/network/network_benchmark.go |  54 +++++++++++---
 runner/service.go                   |  29 +++++---
 9 files changed, 311 insertions(+), 28 deletions(-)

diff --git a/README.md b/README.md
index 527c4a81..42ea5f15 100644
--- a/README.md
+++ b/README.md
@@ -201,6 +201,8 @@ Each test executes a standardized workflow:
 
 This approach allows precise measurement of performance characteristics for both block production and validation.
 
+Benchmarks run both phases by default. Set `roles: [sequencer]` on a benchmark definition to run only the sequencer/block-building phase, which is useful for snapshot startup and load-test coverage that does not need validator payload replay.
+
 ## Configuration
 
 ### Available Flags
diff --git a/configs/examples/snapshot.yml b/configs/examples/snapshot.yml
index d802945d..252590d6 100644
--- a/configs/examples/snapshot.yml
+++ b/configs/examples/snapshot.yml
@@ -17,6 +17,8 @@ benchmarks:
       # just delete the snapshot directory to force a full copy
       command: ./scripts/copy-local-snapshot.sh --skip-if-nonempty
       genesis_file: ../../sepolia-alpha/sepolia-alpha-genesis.json
+    roles:
+      - sequencer
     # force_clean is true by default to ensure consistency, but we can skip it for testing
     force_clean: false
     variables:
diff --git a/docs/benchmark-types.md b/docs/benchmark-types.md
index 77580b92..fdd8fca0 100644
--- a/docs/benchmark-types.md
+++ b/docs/benchmark-types.md
@@ -23,7 +23,28 @@
     - Collect block metrics
 - Reason we don't need to test mempool for validating node: only used for tx gossip, no logic actually has to be executed
 
+## Role selection
+
+Benchmark definitions run both roles by default:
+
+```yaml
+benchmarks:
+  - variables:
+      # ...
+```
+
+Set `roles: [sequencer]` when a benchmark only needs block-building or snapshot startup coverage and does not need to validate the generated payloads:
+
+```yaml
+benchmarks:
+  - roles: [sequencer]
+    variables:
+      # ...
+```
+
+The validator role cannot run without the sequencer role because validator benchmarks consume payloads produced by the sequencer phase. Proof-program benchmarks also require the validator role.
+
 ## op-challenger test
 
 - batch all blocks in the test to L1
-- run op-program on those batches - verify output root
\ No newline at end of file
+- run op-program on those batches - verify output root
diff --git a/runner/benchmark/definition.go b/runner/benchmark/definition.go
index e9e96ff5..9eae0004 100644
--- a/runner/benchmark/definition.go
+++ b/runner/benchmark/definition.go
@@ -13,6 +13,83 @@ import (
 	"github.com/base/base-bench/runner/payload"
 )
 
+type BenchmarkRole string
+
+const (
+	BenchmarkRoleSequencer BenchmarkRole = "sequencer"
+	BenchmarkRoleValidator BenchmarkRole = "validator"
+)
+
+var defaultBenchmarkRoles = []BenchmarkRole{BenchmarkRoleSequencer, BenchmarkRoleValidator}
+
+func DefaultBenchmarkRoles() []BenchmarkRole {
+	return append([]BenchmarkRole(nil), defaultBenchmarkRoles...)
+}
+
+func NormalizeBenchmarkRoles(roles []BenchmarkRole) ([]BenchmarkRole, error) {
+	if len(roles) == 0 {
+		return DefaultBenchmarkRoles(), nil
+	}
+
+	seen := make(map[BenchmarkRole]bool, len(roles))
+	for _, role := range roles {
+		switch role {
+		case BenchmarkRoleSequencer, BenchmarkRoleValidator:
+		default:
+			return nil, fmt.Errorf("invalid benchmark role %q", role)
+		}
+
+		if seen[role] {
+			return nil, fmt.Errorf("duplicate benchmark role %q", role)
+		}
+		seen[role] = true
+	}
+
+	if !seen[BenchmarkRoleSequencer] {
+		return nil, fmt.Errorf("benchmark roles must include %q", BenchmarkRoleSequencer)
+	}
+
+	normalized := []BenchmarkRole{BenchmarkRoleSequencer}
+	if seen[BenchmarkRoleValidator] {
+		normalized = append(normalized, BenchmarkRoleValidator)
+	}
+
+	return normalized, nil
+}
+
+func BenchmarkRolesContain(roles []BenchmarkRole, role BenchmarkRole) bool {
+	for _, r := range roles {
+		if r == role {
+			return true
+		}
+	}
+	return false
+}
+
+func BenchmarkRoleNames(roles []BenchmarkRole) []string {
+	names := make([]string, 0, len(roles))
+	for _, role := range roles {
+		names = append(names, string(role))
+	}
+	return names
+}
+
+func BenchmarkRolesString(roles []BenchmarkRole) string {
+	return strings.Join(BenchmarkRoleNames(roles), ",")
+}
+
+func IsDefaultBenchmarkRoles(roles []BenchmarkRole) bool {
+	if len(roles) != len(defaultBenchmarkRoles) {
+		return false
+	}
+	for i, role := range roles {
+		if role != defaultBenchmarkRoles[i] {
+			return false
+		}
+	}
+	return true
+}
+
 // Param is a single dimension of a benchmark matrix. It can be a
 // single value or a list of values.
 type Param struct {
@@ -134,11 +211,22 @@ type TestDefinition struct {
 	Snapshot     *SnapshotDefinition  `yaml:"snapshot"`
 	Metrics      *ThresholdConfig     `yaml:"metrics"`
 	Tags         *map[string]string   `yaml:"tags"`
+	Roles        []BenchmarkRole      `yaml:"roles"`
 	Variables    []Param              `yaml:"variables"`
 	ProofProgram *ProofProgramOptions `yaml:"proof_program"`
 }
 
 func (bc *TestDefinition) Check() error {
+	roles, err := NormalizeBenchmarkRoles(bc.Roles)
+	if err != nil {
+		return err
+	}
+
+	proofProgramEnabled := bc.ProofProgram != nil && (bc.ProofProgram.Enabled == nil || *bc.ProofProgram.Enabled)
+	if proofProgramEnabled && !BenchmarkRolesContain(roles, BenchmarkRoleValidator) {
+		return errors.New("proof_program requires the validator benchmark role")
+	}
+
 	for _, b := range bc.Variables {
 		err := b.Check()
 		if err != nil {
@@ -147,3 +235,7 @@ func (bc *TestDefinition) Check() error {
 	}
 	return nil
 }
+
+func (bc *TestDefinition) NormalizedRoles() ([]BenchmarkRole, error) {
+	return NormalizeBenchmarkRoles(bc.Roles)
+}
diff --git a/runner/benchmark/matrix.go b/runner/benchmark/matrix.go
index 430aa892..b406d428 100644
--- a/runner/benchmark/matrix.go
+++ b/runner/benchmark/matrix.go
@@ -17,9 +17,19 @@ type TestPlan struct {
 	Snapshot     *SnapshotDefinition
 	ProofProgram *ProofProgramOptions
 	Thresholds   *ThresholdConfig
+	Roles        []BenchmarkRole
 }
 
 func NewTestPlanFromConfig(c TestDefinition, testFileName string, config *BenchmarkConfig) (*TestPlan, error) {
+	if err := c.Check(); err != nil {
+		return nil, err
+	}
+
+	roles, err := c.NormalizedRoles()
+	if err != nil {
+		return nil, err
+	}
+
 	testRuns, err := ResolveTestRunsFromMatrix(c, testFileName, config)
 	if err != nil {
 		return nil, err
@@ -42,6 +52,7 @@ func NewTestPlanFromConfig(c TestDefinition, testFileName string, config *Benchm
 		Snapshot:     c.Snapshot,
 		ProofProgram: proofProgram,
 		Thresholds:   c.Metrics,
+		Roles:        roles,
 	}, nil
 }
 
diff --git a/runner/benchmark/matrix_test.go b/runner/benchmark/matrix_test.go
index 5440dccf..e5d1e1aa 100644
--- a/runner/benchmark/matrix_test.go
+++ b/runner/benchmark/matrix_test.go
@@ -159,6 +159,111 @@ func TestResolveTestRunsFromMatrix(t *testing.T) {
 	}
 }
 
+func TestNewTestPlanFromConfigRoles(t *testing.T) {
+	config := &benchmark.BenchmarkConfig{Name: "test"}
+	definition := benchmark.TestDefinition{
+		Roles: []benchmark.BenchmarkRole{benchmark.BenchmarkRoleSequencer},
+		Variables: []benchmark.Param{
+			{
+				ParamType: "payload",
+				Value:     "simple",
+			},
+		},
+	}
+
+	plan, err := benchmark.NewTestPlanFromConfig(definition, "config.yml", config)
+	require.NoError(t, err)
+	require.Equal(t, []benchmark.BenchmarkRole{benchmark.BenchmarkRoleSequencer}, plan.Roles)
+
+	metadata := benchmark.RunGroupFromTestPlans([]benchmark.TestPlan{*plan}, nil)
+	require.Len(t, metadata.Runs, 1)
+	require.Equal(t, "sequencer", metadata.Runs[0].TestConfig["Roles"])
+}
+
+func TestNewTestPlanFromConfigDefaultsToBothRoles(t *testing.T) {
+	config := &benchmark.BenchmarkConfig{Name: "test"}
+	definition := benchmark.TestDefinition{
+		Variables: []benchmark.Param{
+			{
+				ParamType: "payload",
+				Value:     "simple",
+			},
+		},
+	}
+
+	plan, err := benchmark.NewTestPlanFromConfig(definition, "config.yml", config)
+	require.NoError(t, err)
+	require.Equal(t, []benchmark.BenchmarkRole{
+		benchmark.BenchmarkRoleSequencer,
+		benchmark.BenchmarkRoleValidator,
+	}, plan.Roles)
+
+	metadata := benchmark.RunGroupFromTestPlans([]benchmark.TestPlan{*plan}, nil)
+	require.Len(t, metadata.Runs, 1)
+	require.NotContains(t, metadata.Runs[0].TestConfig, "Roles")
+}
+
+func TestNewTestPlanFromConfigRejectsInvalidRoles(t *testing.T) {
+	tests := []struct {
+		name  string
+		roles []benchmark.BenchmarkRole
+	}{
+		{
+			name:  "unknown role",
+			roles: []benchmark.BenchmarkRole{"other"},
+		},
+		{
+			name:  "duplicate role",
+			roles: []benchmark.BenchmarkRole{benchmark.BenchmarkRoleSequencer, benchmark.BenchmarkRoleSequencer},
+		},
+		{
+			name:  "validator without sequencer",
+			roles: []benchmark.BenchmarkRole{benchmark.BenchmarkRoleValidator},
+		},
+	}
+
+	config := &benchmark.BenchmarkConfig{Name: "test"}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			definition := benchmark.TestDefinition{
+				Roles: tt.roles,
+				Variables: []benchmark.Param{
+					{
+						ParamType: "payload",
+						Value:     "simple",
+					},
+				},
+			}
+
+			_, err := benchmark.NewTestPlanFromConfig(definition, "config.yml", config)
+			require.Error(t, err)
+		})
+	}
+}
+
+func TestNewTestPlanFromConfigRejectsProofProgramWithoutValidator(t *testing.T) {
+	config := &benchmark.BenchmarkConfig{Name: "test"}
+	definition := benchmark.TestDefinition{
+		Roles: []benchmark.BenchmarkRole{benchmark.BenchmarkRoleSequencer},
+		ProofProgram: &benchmark.ProofProgramOptions{
+			Enabled: boolPtr(true),
+		},
+		Variables: []benchmark.Param{
+			{
+				ParamType: "payload",
+				Value:     "simple",
+			},
+		},
+	}
+
+	_, err := benchmark.NewTestPlanFromConfig(definition, "config.yml", config)
+	require.ErrorContains(t, err, "proof_program requires the validator benchmark role")
+}
+
 func stringPtr(s string) *string {
 	return &s
 }
+
+func boolPtr(b bool) *bool {
+	return &b
+}
diff --git a/runner/benchmark/result_metadata.go b/runner/benchmark/result_metadata.go
index e31bdbff..1deb4d20 100644
--- a/runner/benchmark/result_metadata.go
+++ b/runner/benchmark/result_metadata.go
@@ -7,11 +7,11 @@ import (
 )
 
 type RunResult struct {
-	Success          bool                      `json:"success"`
-	Complete         bool                      `json:"complete"`
-	SequencerMetrics types.SequencerKeyMetrics `json:"sequencerMetrics"`
-	ValidatorMetrics types.ValidatorKeyMetrics `json:"validatorMetrics"`
-	ClientVersion    string                    `json:"clientVersion,omitempty"`
+	Success          bool                       `json:"success"`
+	Complete         bool                       `json:"complete"`
+	SequencerMetrics *types.SequencerKeyMetrics `json:"sequencerMetrics,omitempty"`
+	ValidatorMetrics *types.ValidatorKeyMetrics `json:"validatorMetrics,omitempty"`
+	ClientVersion    string                     `json:"clientVersion,omitempty"`
 }
 
 // MachineInfo contains information about the machine running the benchmark
@@ -61,13 +61,22 @@ func RunGroupFromTestPlans(testPlans []TestPlan, machineInfo *MachineInfo) RunGr
 	}
 
 	for _, testPlan := range testPlans {
+		roles := testPlan.Roles
+		if len(roles) == 0 {
+			roles = DefaultBenchmarkRoles()
+		}
 		for _, params := range testPlan.Runs {
+			testConfig := params.Params.ToConfig()
+			if !IsDefaultBenchmarkRoles(roles) {
+				testConfig["Roles"] = BenchmarkRolesString(roles)
+			}
+
 			metadata.Runs = append(metadata.Runs, Run{
 				ID:              params.ID,
 				SourceFile:      params.TestFile,
 				TestName:        params.Name,
 				TestDescription: params.Description,
-				TestConfig:      params.Params.ToConfig(),
+				TestConfig:      testConfig,
 				OutputDir:       params.OutputDir,
 				Thresholds:      testPlan.Thresholds,
 				CreatedAt:       &now,
diff --git a/runner/network/network_benchmark.go b/runner/network/network_benchmark.go
index 2bf13775..91ee4e36 100644
--- a/runner/network/network_benchmark.go
+++ b/runner/network/network_benchmark.go
@@ -44,13 +44,28 @@ type NetworkBenchmark struct {
 	testConfig  *benchtypes.TestConfig
 	proofConfig *benchmark.ProofProgramOptions
 
-	transactionPayload     payload.Definition
-	ports                  portmanager.PortManager
-	flashblocksBlockTime   string
+	transactionPayload   payload.Definition
+	ports                portmanager.PortManager
+	roles                []benchmark.BenchmarkRole
+	flashblocksBlockTime string
 }
 
 // NewNetworkBenchmark creates a new network benchmark and initializes the payload worker and consensus client
-func NewNetworkBenchmark(config *benchtypes.TestConfig, log log.Logger, sequencerOptions *config.InternalClientOptions, validatorOptions *config.InternalClientOptions, proofConfig *benchmark.ProofProgramOptions, transactionPayload payload.Definition, ports portmanager.PortManager, flashblocksBlockTime string) (*NetworkBenchmark, error) {
+func NewNetworkBenchmark(config *benchtypes.TestConfig, log log.Logger, sequencerOptions *config.InternalClientOptions, validatorOptions *config.InternalClientOptions, proofConfig *benchmark.ProofProgramOptions, transactionPayload payload.Definition, ports portmanager.PortManager, roles []benchmark.BenchmarkRole, flashblocksBlockTime string) (*NetworkBenchmark, error) {
+	normalizedRoles, err := benchmark.NormalizeBenchmarkRoles(roles)
+	if err != nil {
+		return nil, err
+	}
+	if !benchmark.BenchmarkRolesContain(normalizedRoles, benchmark.BenchmarkRoleSequencer) {
+		return nil, errors.New("network benchmark requires the sequencer role")
+	}
+	if benchmark.BenchmarkRolesContain(normalizedRoles, benchmark.BenchmarkRoleValidator) && validatorOptions == nil {
+		return nil, errors.New("validator options are required when the validator role is enabled")
+	}
+	if proofConfig != nil && !benchmark.BenchmarkRolesContain(normalizedRoles, benchmark.BenchmarkRoleValidator) {
+		return nil, errors.New("proof program benchmark requires the validator role")
+	}
+
 	return &NetworkBenchmark{
 		log:                  log,
 		sequencerOptions:     sequencerOptions,
@@ -59,6 +74,7 @@ func NewNetworkBenchmark(config *benchtypes.TestConfig, log log.Logger, sequence
 		proofConfig:          proofConfig,
 		transactionPayload:   transactionPayload,
 		ports:                ports,
+		roles:                normalizedRoles,
 		flashblocksBlockTime: flashblocksBlockTime,
 	}, nil
 }
@@ -81,6 +97,12 @@ func (nb *NetworkBenchmark) Run(ctx context.Context) error {
 		return fmt.Errorf("failed to run sequencer benchmark: %w", err)
 	}
 
+	if !nb.runsValidator() {
+		nb.log.Info("Skipping validator benchmark", "roles", benchmark.BenchmarkRolesString(nb.roles))
+		sequencerClient.Stop()
+		return nil
+	}
+
 	// Benchmark the validator to sync the payloads
 	if err := nb.benchmarkValidator(ctx, payloadResult, lastSetupBlock, l1Chain, sequencerClient); err != nil {
 		return fmt.Errorf("failed to run validator benchmark: %w", err)
@@ -243,16 +265,28 @@ func (nb *NetworkBenchmark) benchmarkValidator(ctx context.Context, payloadResul
 }
 
 func (nb *NetworkBenchmark) GetResult() (*benchmark.RunResult, error) {
-	if nb.collectedSequencerMetrics == nil || nb.collectedValidatorMetrics == nil {
-		return nil, errors.New("metrics not collected")
+	if nb.collectedSequencerMetrics == nil {
+		return nil, errors.New("sequencer metrics not collected")
 	}
 
-	return &benchmark.RunResult{
-		SequencerMetrics: *nb.collectedSequencerMetrics,
-		ValidatorMetrics: *nb.collectedValidatorMetrics,
+	result := &benchmark.RunResult{
+		SequencerMetrics: nb.collectedSequencerMetrics,
 		Success:          true,
 		Complete:         true,
-	}, nil
+	}
+
+	if nb.runsValidator() {
+		if nb.collectedValidatorMetrics == nil {
+			return nil, errors.New("validator metrics not collected")
+		}
+		result.ValidatorMetrics = nb.collectedValidatorMetrics
+	}
+
+	return result, nil
+}
+
+func (nb *NetworkBenchmark) runsValidator() bool {
+	return benchmark.BenchmarkRolesContain(nb.roles, benchmark.BenchmarkRoleValidator)
 }
 
 func setupNode(ctx context.Context, l log.Logger, nodeTypeStr string, params benchtypes.RunParams, options *config.InternalClientOptions, portManager portmanager.PortManager, flashblockServerURL string, flashblocksBlockTime string) (types.ExecutionClient, error) {
diff --git a/runner/service.go b/runner/service.go
index 2e1c0309..6f2ec308 100644
--- a/runner/service.go
+++ b/runner/service.go
@@ -339,7 +339,7 @@ func (s *service) getGenesisForSnapshotConfig(snapshotConfig *benchmark.Snapshot
 	return genesis, nil
 }
 
-func (s *service) setupDataDirs(workingDir string, params types.RunParams, genesis *core.Genesis, snapshot *benchmark.SnapshotDefinition, datadirsConfig *benchmark.DatadirConfig) (*config.InternalClientOptions, *config.InternalClientOptions, error) {
+func (s *service) setupDataDirs(workingDir string, params types.RunParams, genesis *core.Genesis, snapshot *benchmark.SnapshotDefinition, datadirsConfig *benchmark.DatadirConfig, roles []benchmark.BenchmarkRole) (*config.InternalClientOptions, *config.InternalClientOptions, error) {
 	// create temp directory for this test
 	testName := fmt.Sprintf("%d-%s-test", time.Now().Unix(), params.NodeType)
 	sequencerTestDir := path.Join(workingDir, fmt.Sprintf("%s-sequencer", testName))
@@ -350,9 +350,12 @@ func (s *service) setupDataDirs(workingDir string, params types.RunParams, genes
 		return nil, nil, errors.Wrap(err, "failed to setup internal directories")
 	}
 
-	validatorOptions, err := s.setupInternalDirectories(validatorTestDir, params, genesis, snapshot, "validator", datadirsConfig)
-	if err != nil {
-		return nil, nil, errors.Wrap(err, "failed to setup internal directories")
+	var validatorOptions *config.InternalClientOptions
+	if benchmark.BenchmarkRolesContain(roles, benchmark.BenchmarkRoleValidator) {
+		validatorOptions, err = s.setupInternalDirectories(validatorTestDir, params, genesis, snapshot, "validator", datadirsConfig)
+		if err != nil {
+			return nil, nil, errors.Wrap(err, "failed to setup internal directories")
+		}
 	}
 
 	return sequencerOptions, validatorOptions, nil
@@ -368,7 +371,7 @@ func (s *service) setupBlobsDir(workingDir string) error {
 	return nil
 }
 
-func (s *service) runTest(ctx context.Context, params types.RunParams, workingDir string, outputDir string, snapshotConfig *benchmark.SnapshotDefinition, proofConfig *benchmark.ProofProgramOptions, transactionPayload payload.Definition, datadirsConfig *benchmark.DatadirConfig, flashblocksBlockTime string) (*benchmark.RunResult, error) {
+func (s *service) runTest(ctx context.Context, params types.RunParams, workingDir string, outputDir string, snapshotConfig *benchmark.SnapshotDefinition, proofConfig *benchmark.ProofProgramOptions, transactionPayload payload.Definition, datadirsConfig *benchmark.DatadirConfig, roles []benchmark.BenchmarkRole, flashblocksBlockTime string) (*benchmark.RunResult, error) {
 
 	s.log.Info(fmt.Sprintf("Running benchmark with params: %+v", params))
 
@@ -384,7 +387,7 @@ func (s *service) runTest(ctx context.Context, params types.RunParams, workingDi
 	validatorTestDir := path.Join(workingDir, fmt.Sprintf("%s-validator", testName))
 
 	// setup data directories (restore from snapshot if needed)
-	sequencerOptions, validatorOptions, err := s.setupDataDirs(workingDir, params, genesis, snapshotConfig, datadirsConfig)
+	sequencerOptions, validatorOptions, err := s.setupDataDirs(workingDir, params, genesis, snapshotConfig, datadirsConfig, roles)
 	if err != nil {
 		return nil, errors.Wrap(err, "failed to setup data dirs")
 	}
@@ -432,7 +435,7 @@ func (s *service) runTest(ctx context.Context, params types.RunParams, workingDi
 	}
 
 	// Run benchmark
-	benchmark, err := network.NewNetworkBenchmark(config, s.log, sequencerOptions, validatorOptions, proofConfig, transactionPayload, s.portState, flashblocksBlockTime)
+	benchmark, err := network.NewNetworkBenchmark(config, s.log, sequencerOptions, validatorOptions, proofConfig, transactionPayload, s.portState, roles, flashblocksBlockTime)
 	if err != nil {
 		return nil, errors.Wrap(err, "failed to create network benchmark")
 	}
@@ -444,13 +447,17 @@ func (s *service) runTest(ctx context.Context, params types.RunParams, workingDi
 		s.log.Error("failed to export sequencer output", "err", exportErr)
 	}
 
-	if exportErr := s.exportOutput(testName, runErr, validatorOptions, outputDir, "validator"); exportErr != nil {
-		s.log.Error("failed to export validator output", "err", exportErr)
+	if validatorOptions != nil {
+		if exportErr := s.exportOutput(testName, runErr, validatorOptions, outputDir, "validator"); exportErr != nil {
+			s.log.Error("failed to export validator output", "err", exportErr)
+		}
 	}
 
 	if runErr != nil {
 		s.dumpLogFile(sequencerOptions, "sequencer")
-		s.dumpLogFile(validatorOptions, "validator")
+		if validatorOptions != nil {
+			s.dumpLogFile(validatorOptions, "validator")
+		}
 		return nil, errors.Wrap(runErr, "failed to run benchmark")
 	}
 
@@ -617,7 +624,7 @@ outerLoop:
 				return errors.Wrap(err, "failed to create output directory")
 			}
 
-			metricSummary, err := s.runTest(ctx, c.Params, s.config.DataDir(), outputDir, testPlan.Snapshot, testPlan.ProofProgram, transactionPayloads[c.Params.PayloadID], testPlan.Datadir, config.FlashblocksBlockTime())
+			metricSummary, err := s.runTest(ctx, c.Params, s.config.DataDir(), outputDir, testPlan.Snapshot, testPlan.ProofProgram, transactionPayloads[c.Params.PayloadID], testPlan.Datadir, testPlan.Roles, config.FlashblocksBlockTime())
 			if err != nil {
 				log.Error("Failed to run test", "err", err)
 				metricSummary = &benchmark.RunResult{

From 7efe94bd83ef382afec5ab52cff117d84a9b2e02 Mon Sep 17 00:00:00 2001
From: Niran Babalola <niran.babalola@coinbase.com>
Date: Wed, 27 May 2026 00:23:54 -0500
Subject: [PATCH 2/2] Refine benchmark role execution model

---
 docs/benchmark-types.md             |   6 +-
 runner/benchmark/definition.go      | 111 +++++++++++++++++-----------
 runner/benchmark/matrix.go          |   8 +-
 runner/benchmark/matrix_test.go     |  50 +++++++++++--
 runner/benchmark/result_metadata.go |   8 +-
 runner/network/network_benchmark.go |  27 +++----
 runner/service.go                   |  14 ++--
 7 files changed, 144 insertions(+), 80 deletions(-)

diff --git a/docs/benchmark-types.md b/docs/benchmark-types.md
index fdd8fca0..dc3cf2c1 100644
--- a/docs/benchmark-types.md
+++ b/docs/benchmark-types.md
@@ -25,7 +25,9 @@
 
 ## Role selection
 
-Benchmark definitions run both roles by default:
+Benchmark definitions always run the sequencer role. The sequencer phase builds the payloads used by the rest of the benchmark.
+
+By default, benchmarks also run the validator role after the sequencer phase:
 
 ```yaml
 benchmarks:
@@ -42,7 +44,7 @@ benchmarks:
       # ...
 ```
 
-The validator role cannot run without the sequencer role because validator benchmarks consume payloads produced by the sequencer phase. Proof-program benchmarks also require the validator role.
+The validator role cannot run by itself because validator benchmarks consume payloads produced by the sequencer phase. Proof-program benchmarks also require the validator role.
 
 ## op-challenger test
 
diff --git a/runner/benchmark/definition.go b/runner/benchmark/definition.go
index 9eae0004..6e5decf5 100644
--- a/runner/benchmark/definition.go
+++ b/runner/benchmark/definition.go
@@ -16,19 +16,30 @@ import (
 type BenchmarkRole string
 
 const (
+	// BenchmarkRoleSequencer is always required. Every benchmark starts by
+	// running the sequencer phase, which builds the payloads consumed by any
+	// later validator phase.
 	BenchmarkRoleSequencer BenchmarkRole = "sequencer"
+
+	// BenchmarkRoleValidator is optional. When enabled, the validator phase
+	// replays the payloads produced by the sequencer phase.
 	BenchmarkRoleValidator BenchmarkRole = "validator"
 )
 
-var defaultBenchmarkRoles = []BenchmarkRole{BenchmarkRoleSequencer, BenchmarkRoleValidator}
-
-func DefaultBenchmarkRoles() []BenchmarkRole {
-	return append([]BenchmarkRole(nil), defaultBenchmarkRoles...)
+// BenchmarkExecutionMode is the normalized internal execution model.
+//
+// The YAML config exposes "roles", but the runner does not support arbitrary
+// role combinations: the sequencer phase always runs, and the only real choice
+// is whether to also run the validator phase after it.
+type BenchmarkExecutionMode struct {
+	RunValidator bool
 }
 
-func NormalizeBenchmarkRoles(roles []BenchmarkRole) ([]BenchmarkRole, error) {
+var defaultBenchmarkExecutionMode = BenchmarkExecutionMode{RunValidator: true}
+
+func BenchmarkExecutionModeFromRoles(roles []BenchmarkRole) (BenchmarkExecutionMode, error) {
 	if len(roles) == 0 {
-		return DefaultBenchmarkRoles(), nil
+		return defaultBenchmarkExecutionMode, nil
 	}
 
 	seen := make(map[BenchmarkRole]bool, len(roles))
@@ -36,58 +47,44 @@ func NormalizeBenchmarkRoles(roles []BenchmarkRole) ([]BenchmarkRole, error) {
 		switch role {
 		case BenchmarkRoleSequencer, BenchmarkRoleValidator:
 		default:
-			return nil, fmt.Errorf("invalid benchmark role %q", role)
+			return BenchmarkExecutionMode{}, fmt.Errorf("invalid benchmark role %q", role)
 		}
 
 		if seen[role] {
-			return nil, fmt.Errorf("duplicate benchmark role %q", role)
+			return BenchmarkExecutionMode{}, fmt.Errorf("duplicate benchmark role %q", role)
 		}
 		seen[role] = true
 	}
 
 	if !seen[BenchmarkRoleSequencer] {
-		return nil, fmt.Errorf("benchmark roles must include %q", BenchmarkRoleSequencer)
+		return BenchmarkExecutionMode{}, fmt.Errorf("benchmark roles must include %q", BenchmarkRoleSequencer)
 	}
 
-	normalized := []BenchmarkRole{BenchmarkRoleSequencer}
-	if seen[BenchmarkRoleValidator] {
-		normalized = append(normalized, BenchmarkRoleValidator)
-	}
-
-	return normalized, nil
+	// A validator-only benchmark is invalid because the validator phase consumes
+	// payloads and setup state produced by the sequencer phase.
+	return BenchmarkExecutionMode{RunValidator: seen[BenchmarkRoleValidator]}, nil
 }
 
-func BenchmarkRolesContain(roles []BenchmarkRole, role BenchmarkRole) bool {
-	for _, r := range roles {
-		if r == role {
-			return true
-		}
+// Roles returns the config-facing role list for metadata and logs. Internally,
+// callers should use RunValidator instead of reinterpreting the role slice.
+func (mode BenchmarkExecutionMode) Roles() []BenchmarkRole {
+	roles := []BenchmarkRole{BenchmarkRoleSequencer}
+	if mode.RunValidator {
+		roles = append(roles, BenchmarkRoleValidator)
 	}
-	return false
+	return roles
 }
 
-func BenchmarkRoleNames(roles []BenchmarkRole) []string {
-	names := make([]string, 0, len(roles))
-	for _, role := range roles {
+func (mode BenchmarkExecutionMode) RolesString() string {
+	names := make([]string, 0, 2)
+	for _, role := range mode.Roles() {
 		names = append(names, string(role))
 	}
-	return names
-}
-
-func BenchmarkRolesString(roles []BenchmarkRole) string {
-	return strings.Join(BenchmarkRoleNames(roles), ",")
+	return strings.Join(names, ",")
 }
 
-func IsDefaultBenchmarkRoles(roles []BenchmarkRole) bool {
-	if len(roles) != len(defaultBenchmarkRoles) {
-		return false
-	}
-	for i, role := range roles {
-		if role != defaultBenchmarkRoles[i] {
-			return false
-		}
-	}
-	return true
+func (mode BenchmarkExecutionMode) IsDefault() bool {
+	return mode == defaultBenchmarkExecutionMode
 }
 
 // Param is a single dimension of a benchmark matrix. It can be a
@@ -217,16 +214,20 @@ type TestDefinition struct {
 }
 
 func (bc *TestDefinition) Check() error {
-	roles, err := NormalizeBenchmarkRoles(bc.Roles)
+	mode, err := bc.ExecutionMode()
 	if err != nil {
 		return err
 	}
 
 	proofProgramEnabled := bc.ProofProgram != nil && (bc.ProofProgram.Enabled == nil || *bc.ProofProgram.Enabled)
-	if proofProgramEnabled && !BenchmarkRolesContain(roles, BenchmarkRoleValidator) {
+	if proofProgramEnabled && !mode.RunValidator {
 		return errors.New("proof_program requires the validator benchmark role")
 	}
 
+	if err := bc.validateThresholdRoles(mode); err != nil {
+		return err
+	}
+
 	for _, b := range bc.Variables {
 		err := b.Check()
 		if err != nil {
@@ -236,6 +237,30 @@ func (bc *TestDefinition) Check() error {
 	return nil
 }
 
-func (bc *TestDefinition) NormalizedRoles() ([]BenchmarkRole, error) {
-	return NormalizeBenchmarkRoles(bc.Roles)
+func (bc *TestDefinition) ExecutionMode() (BenchmarkExecutionMode, error) {
+	return BenchmarkExecutionModeFromRoles(bc.Roles)
+}
+
+func (bc *TestDefinition) validateThresholdRoles(mode BenchmarkExecutionMode) error {
+	if bc.Metrics == nil {
+		return nil
+	}
+
+	for level, thresholds := range map[string]map[string]float64{
+		"warning": bc.Metrics.Warning,
+		"error":   bc.Metrics.Error,
+	} {
+		for metric := range thresholds {
+			role, _, ok := strings.Cut(metric, "/")
+			if !ok {
+				continue
+			}
+
+			if BenchmarkRole(role) == BenchmarkRoleValidator && !mode.RunValidator {
+				return fmt.Errorf("%s threshold %q requires the validator benchmark role", level, metric)
+			}
+		}
+	}
+
+	return nil
 }
diff --git a/runner/benchmark/matrix.go b/runner/benchmark/matrix.go
index b406d428..180f78f1 100644
--- a/runner/benchmark/matrix.go
+++ b/runner/benchmark/matrix.go
@@ -17,7 +17,9 @@ type TestPlan struct {
 	Snapshot     *SnapshotDefinition
 	ProofProgram *ProofProgramOptions
 	Thresholds   *ThresholdConfig
-	Roles        []BenchmarkRole
+	// Mode is normalized from the YAML roles field. The sequencer phase is
+	// always part of a test plan; Mode only controls whether validator replay runs.
+	Mode BenchmarkExecutionMode
 }
 
 func NewTestPlanFromConfig(c TestDefinition, testFileName string, config *BenchmarkConfig) (*TestPlan, error) {
@@ -25,7 +27,7 @@ func NewTestPlanFromConfig(c TestDefinition, testFileName string, config *Benchm
 		return nil, err
 	}
 
-	roles, err := c.NormalizedRoles()
+	mode, err := c.ExecutionMode()
 	if err != nil {
 		return nil, err
 	}
@@ -52,7 +54,7 @@ func NewTestPlanFromConfig(c TestDefinition, testFileName string, config *Benchm
 		Snapshot:     c.Snapshot,
 		ProofProgram: proofProgram,
 		Thresholds:   c.Metrics,
-		Roles:        roles,
+		Mode:         mode,
 	}, nil
 }
 
diff --git a/runner/benchmark/matrix_test.go b/runner/benchmark/matrix_test.go
index e5d1e1aa..ec3b05e4 100644
--- a/runner/benchmark/matrix_test.go
+++ b/runner/benchmark/matrix_test.go
@@ -173,7 +173,7 @@ func TestNewTestPlanFromConfigRoles(t *testing.T) {
 
 	plan, err := benchmark.NewTestPlanFromConfig(definition, "config.yml", config)
 	require.NoError(t, err)
-	require.Equal(t, []benchmark.BenchmarkRole{benchmark.BenchmarkRoleSequencer}, plan.Roles)
+	require.False(t, plan.Mode.RunValidator)
 
 	metadata := benchmark.RunGroupFromTestPlans([]benchmark.TestPlan{*plan}, nil)
 	require.Len(t, metadata.Runs, 1)
@@ -193,10 +193,7 @@ func TestNewTestPlanFromConfigDefaultsToBothRoles(t *testing.T) {
 
 	plan, err := benchmark.NewTestPlanFromConfig(definition, "config.yml", config)
 	require.NoError(t, err)
-	require.Equal(t, []benchmark.BenchmarkRole{
-		benchmark.BenchmarkRoleSequencer,
-		benchmark.BenchmarkRoleValidator,
-	}, plan.Roles)
+	require.True(t, plan.Mode.RunValidator)
 
 	metadata := benchmark.RunGroupFromTestPlans([]benchmark.TestPlan{*plan}, nil)
 	require.Len(t, metadata.Runs, 1)
@@ -260,6 +257,49 @@ func TestNewTestPlanFromConfigRejectsProofProgramWithoutValidator(t *testing.T)
 	require.ErrorContains(t, err, "proof_program requires the validator benchmark role")
 }
 
+func TestNewTestPlanFromConfigRejectsValidatorThresholdsWithoutValidator(t *testing.T) {
+	config := &benchmark.BenchmarkConfig{Name: "test"}
+	definition := benchmark.TestDefinition{
+		Roles: []benchmark.BenchmarkRole{benchmark.BenchmarkRoleSequencer},
+		Metrics: &benchmark.ThresholdConfig{
+			Error: map[string]float64{
+				"validator/latency/new_payload": 1e9,
+			},
+		},
+		Variables: []benchmark.Param{
+			{
+				ParamType: "payload",
+				Value:     "simple",
+			},
+		},
+	}
+
+	_, err := benchmark.NewTestPlanFromConfig(definition, "config.yml", config)
+	require.ErrorContains(t, err, `error threshold "validator/latency/new_payload" requires the validator benchmark role`)
+}
+
+func TestNewTestPlanFromConfigAllowsSequencerThresholdsWithoutValidator(t *testing.T) {
+	config := &benchmark.BenchmarkConfig{Name: "test"}
+	definition := benchmark.TestDefinition{
+		Roles: []benchmark.BenchmarkRole{benchmark.BenchmarkRoleSequencer},
+		Metrics: &benchmark.ThresholdConfig{
+			Error: map[string]float64{
+				"sequencer/latency/get_payload": 1e9,
+			},
+		},
+		Variables: []benchmark.Param{
+			{
+				ParamType: "payload",
+				Value:     "simple",
+			},
+		},
+	}
+
+	plan, err := benchmark.NewTestPlanFromConfig(definition, "config.yml", config)
+	require.NoError(t, err)
+	require.False(t, plan.Mode.RunValidator)
+}
+
 func stringPtr(s string) *string {
 	return &s
 }
diff --git a/runner/benchmark/result_metadata.go b/runner/benchmark/result_metadata.go
index 1deb4d20..22f79e6f 100644
--- a/runner/benchmark/result_metadata.go
+++ b/runner/benchmark/result_metadata.go
@@ -61,14 +61,10 @@ func RunGroupFromTestPlans(testPlans []TestPlan, machineInfo *MachineInfo) RunGr
 	}
 
 	for _, testPlan := range testPlans {
-		roles := testPlan.Roles
-		if len(roles) == 0 {
-			roles = DefaultBenchmarkRoles()
-		}
 		for _, params := range testPlan.Runs {
 			testConfig := params.Params.ToConfig()
-			if !IsDefaultBenchmarkRoles(roles) {
-				testConfig["Roles"] = BenchmarkRolesString(roles)
+			if !testPlan.Mode.IsDefault() {
+				testConfig["Roles"] = testPlan.Mode.RolesString()
 			}
 
 			metadata.Runs = append(metadata.Runs, Run{
diff --git a/runner/network/network_benchmark.go b/runner/network/network_benchmark.go
index 91ee4e36..37b336c7 100644
--- a/runner/network/network_benchmark.go
+++ b/runner/network/network_benchmark.go
@@ -46,23 +46,20 @@ type NetworkBenchmark struct {
 
 	transactionPayload   payload.Definition
 	ports                portmanager.PortManager
-	roles                []benchmark.BenchmarkRole
+	mode                 benchmark.BenchmarkExecutionMode
 	flashblocksBlockTime string
 }
 
-// NewNetworkBenchmark creates a new network benchmark and initializes the payload worker and consensus client
-func NewNetworkBenchmark(config *benchtypes.TestConfig, log log.Logger, sequencerOptions *config.InternalClientOptions, validatorOptions *config.InternalClientOptions, proofConfig *benchmark.ProofProgramOptions, transactionPayload payload.Definition, ports portmanager.PortManager, roles []benchmark.BenchmarkRole, flashblocksBlockTime string) (*NetworkBenchmark, error) {
-	normalizedRoles, err := benchmark.NormalizeBenchmarkRoles(roles)
-	if err != nil {
-		return nil, err
-	}
-	if !benchmark.BenchmarkRolesContain(normalizedRoles, benchmark.BenchmarkRoleSequencer) {
-		return nil, errors.New("network benchmark requires the sequencer role")
-	}
-	if benchmark.BenchmarkRolesContain(normalizedRoles, benchmark.BenchmarkRoleValidator) && validatorOptions == nil {
+// NewNetworkBenchmark creates a new network benchmark.
+//
+// The sequencer phase always runs and produces the payload stream. The
+// normalized execution mode only controls whether the validator phase is run
+// afterward to replay that stream.
+func NewNetworkBenchmark(config *benchtypes.TestConfig, log log.Logger, sequencerOptions *config.InternalClientOptions, validatorOptions *config.InternalClientOptions, proofConfig *benchmark.ProofProgramOptions, transactionPayload payload.Definition, ports portmanager.PortManager, mode benchmark.BenchmarkExecutionMode, flashblocksBlockTime string) (*NetworkBenchmark, error) {
+	if mode.RunValidator && validatorOptions == nil {
 		return nil, errors.New("validator options are required when the validator role is enabled")
 	}
-	if proofConfig != nil && !benchmark.BenchmarkRolesContain(normalizedRoles, benchmark.BenchmarkRoleValidator) {
+	if proofConfig != nil && !mode.RunValidator {
 		return nil, errors.New("proof program benchmark requires the validator role")
 	}
 
@@ -74,7 +71,7 @@ func NewNetworkBenchmark(config *benchtypes.TestConfig, log log.Logger, sequence
 		proofConfig:          proofConfig,
 		transactionPayload:   transactionPayload,
 		ports:                ports,
-		roles:                normalizedRoles,
+		mode:                 mode,
 		flashblocksBlockTime: flashblocksBlockTime,
 	}, nil
 }
@@ -98,7 +95,7 @@ func (nb *NetworkBenchmark) Run(ctx context.Context) error {
 	}
 
 	if !nb.runsValidator() {
-		nb.log.Info("Skipping validator benchmark", "roles", benchmark.BenchmarkRolesString(nb.roles))
+		nb.log.Info("Skipping validator benchmark", "roles", nb.mode.RolesString())
 		sequencerClient.Stop()
 		return nil
 	}
@@ -286,7 +283,7 @@ func (nb *NetworkBenchmark) GetResult() (*benchmark.RunResult, error) {
 }
 
 func (nb *NetworkBenchmark) runsValidator() bool {
-	return benchmark.BenchmarkRolesContain(nb.roles, benchmark.BenchmarkRoleValidator)
+	return nb.mode.RunValidator
 }
 
 func setupNode(ctx context.Context, l log.Logger, nodeTypeStr string, params benchtypes.RunParams, options *config.InternalClientOptions, portManager portmanager.PortManager, flashblockServerURL string, flashblocksBlockTime string) (types.ExecutionClient, error) {
diff --git a/runner/service.go b/runner/service.go
index 6f2ec308..2d7f64fa 100644
--- a/runner/service.go
+++ b/runner/service.go
@@ -339,7 +339,7 @@ func (s *service) getGenesisForSnapshotConfig(snapshotConfig *benchmark.Snapshot
 	return genesis, nil
 }
 
-func (s *service) setupDataDirs(workingDir string, params types.RunParams, genesis *core.Genesis, snapshot *benchmark.SnapshotDefinition, datadirsConfig *benchmark.DatadirConfig, roles []benchmark.BenchmarkRole) (*config.InternalClientOptions, *config.InternalClientOptions, error) {
+func (s *service) setupDataDirs(workingDir string, params types.RunParams, genesis *core.Genesis, snapshot *benchmark.SnapshotDefinition, datadirsConfig *benchmark.DatadirConfig, mode benchmark.BenchmarkExecutionMode) (*config.InternalClientOptions, *config.InternalClientOptions, error) {
 	// create temp directory for this test
 	testName := fmt.Sprintf("%d-%s-test", time.Now().Unix(), params.NodeType)
 	sequencerTestDir := path.Join(workingDir, fmt.Sprintf("%s-sequencer", testName))
@@ -351,7 +351,9 @@ func (s *service) setupDataDirs(workingDir string, params types.RunParams, genes
 	}
 
 	var validatorOptions *config.InternalClientOptions
-	if benchmark.BenchmarkRolesContain(roles, benchmark.BenchmarkRoleValidator) {
+	// The sequencer datadir is always required. Only create validator state when
+	// the normalized execution mode includes validator replay.
+	if mode.RunValidator {
 		validatorOptions, err = s.setupInternalDirectories(validatorTestDir, params, genesis, snapshot, "validator", datadirsConfig)
 		if err != nil {
 			return nil, nil, errors.Wrap(err, "failed to setup internal directories")
@@ -371,7 +373,7 @@ func (s *service) setupBlobsDir(workingDir string) error {
 	return nil
 }
 
-func (s *service) runTest(ctx context.Context, params types.RunParams, workingDir string, outputDir string, snapshotConfig *benchmark.SnapshotDefinition, proofConfig *benchmark.ProofProgramOptions, transactionPayload payload.Definition, datadirsConfig *benchmark.DatadirConfig, roles []benchmark.BenchmarkRole, flashblocksBlockTime string) (*benchmark.RunResult, error) {
+func (s *service) runTest(ctx context.Context, params types.RunParams, workingDir string, outputDir string, snapshotConfig *benchmark.SnapshotDefinition, proofConfig *benchmark.ProofProgramOptions, transactionPayload payload.Definition, datadirsConfig *benchmark.DatadirConfig, mode benchmark.BenchmarkExecutionMode, flashblocksBlockTime string) (*benchmark.RunResult, error) {
 
 	s.log.Info(fmt.Sprintf("Running benchmark with params: %+v", params))
 
@@ -387,7 +389,7 @@ func (s *service) runTest(ctx context.Context, params types.RunParams, workingDi
 	validatorTestDir := path.Join(workingDir, fmt.Sprintf("%s-validator", testName))
 
 	// setup data directories (restore from snapshot if needed)
-	sequencerOptions, validatorOptions, err := s.setupDataDirs(workingDir, params, genesis, snapshotConfig, datadirsConfig, roles)
+	sequencerOptions, validatorOptions, err := s.setupDataDirs(workingDir, params, genesis, snapshotConfig, datadirsConfig, mode)
 	if err != nil {
 		return nil, errors.Wrap(err, "failed to setup data dirs")
 	}
@@ -435,7 +437,7 @@ func (s *service) runTest(ctx context.Context, params types.RunParams, workingDi
 	}
 
 	// Run benchmark
-	benchmark, err := network.NewNetworkBenchmark(config, s.log, sequencerOptions, validatorOptions, proofConfig, transactionPayload, s.portState, roles, flashblocksBlockTime)
+	benchmark, err := network.NewNetworkBenchmark(config, s.log, sequencerOptions, validatorOptions, proofConfig, transactionPayload, s.portState, mode, flashblocksBlockTime)
 	if err != nil {
 		return nil, errors.Wrap(err, "failed to create network benchmark")
 	}
@@ -624,7 +626,7 @@ outerLoop:
 				return errors.Wrap(err, "failed to create output directory")
 			}
 
-			metricSummary, err := s.runTest(ctx, c.Params, s.config.DataDir(), outputDir, testPlan.Snapshot, testPlan.ProofProgram, transactionPayloads[c.Params.PayloadID], testPlan.Datadir, testPlan.Roles, config.FlashblocksBlockTime())
+			metricSummary, err := s.runTest(ctx, c.Params, s.config.DataDir(), outputDir, testPlan.Snapshot, testPlan.ProofProgram, transactionPayloads[c.Params.PayloadID], testPlan.Datadir, testPlan.Mode, config.FlashblocksBlockTime())
 			if err != nil {
 				log.Error("Failed to run test", "err", err)
 				metricSummary = &benchmark.RunResult{