diff --git a/README.md b/README.md index 527c4a81..42ea5f15 100644 --- a/README.md +++ b/README.md @@ -201,6 +201,8 @@ Each test executes a standardized workflow: This approach allows precise measurement of performance characteristics for both block production and validation. +Benchmarks run both phases by default. Set `roles: [sequencer]` on a benchmark definition to run only the sequencer/block-building phase, which is useful for snapshot startup and load-test coverage that does not need validator payload replay. + ## Configuration ### Available Flags diff --git a/configs/examples/snapshot.yml b/configs/examples/snapshot.yml index d802945d..252590d6 100644 --- a/configs/examples/snapshot.yml +++ b/configs/examples/snapshot.yml @@ -17,6 +17,8 @@ benchmarks: # just delete the snapshot directory to force a full copy command: ./scripts/copy-local-snapshot.sh --skip-if-nonempty genesis_file: ../../sepolia-alpha/sepolia-alpha-genesis.json + roles: + - sequencer # force_clean is true by default to ensure consistency, but we can skip it for testing force_clean: false variables: diff --git a/docs/benchmark-types.md b/docs/benchmark-types.md index 77580b92..dc3cf2c1 100644 --- a/docs/benchmark-types.md +++ b/docs/benchmark-types.md @@ -23,7 +23,30 @@ - Collect block metrics - Reason we don't need to test mempool for validating node: only used for tx gossip, no logic actually has to be executed +## Role selection + +Benchmark definitions always run the sequencer role. The sequencer phase builds the payloads used by the rest of the benchmark. + +By default, benchmarks also run the validator role after the sequencer phase: + +```yaml +benchmarks: + - variables: + # ... +``` + +Set `roles: [sequencer]` when a benchmark only needs block-building or snapshot startup coverage and does not need to validate the generated payloads: + +```yaml +benchmarks: + - roles: [sequencer] + variables: + # ... +``` + +The validator role cannot run by itself because validator benchmarks consume payloads produced by the sequencer phase. Proof-program benchmarks also require the validator role. + ## op-challenger test - batch all blocks in the test to L1 -- run op-program on those batches - verify output root \ No newline at end of file +- run op-program on those batches - verify output root diff --git a/runner/benchmark/definition.go b/runner/benchmark/definition.go index e9e96ff5..6e5decf5 100644 --- a/runner/benchmark/definition.go +++ b/runner/benchmark/definition.go @@ -13,6 +13,80 @@ import ( "github.com/base/base-bench/runner/payload" ) +type BenchmarkRole string + +const ( + // BenchmarkRoleSequencer is always required. Every benchmark starts by + // running the sequencer phase, which builds the payloads consumed by any + // later validator phase. + BenchmarkRoleSequencer BenchmarkRole = "sequencer" + + // BenchmarkRoleValidator is optional. When enabled, the validator phase + // replays the payloads produced by the sequencer phase. + BenchmarkRoleValidator BenchmarkRole = "validator" +) + +// BenchmarkExecutionMode is the normalized internal execution model. +// +// The YAML config exposes "roles", but the runner does not support arbitrary +// role combinations: the sequencer phase always runs, and the only real choice +// is whether to also run the validator phase after it. +type BenchmarkExecutionMode struct { + RunValidator bool +} + +var defaultBenchmarkExecutionMode = BenchmarkExecutionMode{RunValidator: true} + +func BenchmarkExecutionModeFromRoles(roles []BenchmarkRole) (BenchmarkExecutionMode, error) { + if len(roles) == 0 { + return defaultBenchmarkExecutionMode, nil + } + + seen := make(map[BenchmarkRole]bool, len(roles)) + for _, role := range roles { + switch role { + case BenchmarkRoleSequencer, BenchmarkRoleValidator: + default: + return BenchmarkExecutionMode{}, fmt.Errorf("invalid benchmark role %q", role) + } + + if seen[role] { + return BenchmarkExecutionMode{}, fmt.Errorf("duplicate benchmark role %q", role) + } + seen[role] = true + } + + if !seen[BenchmarkRoleSequencer] { + return BenchmarkExecutionMode{}, fmt.Errorf("benchmark roles must include %q", BenchmarkRoleSequencer) + } + + // A validator-only benchmark is invalid because the validator phase consumes + // payloads and setup state produced by the sequencer phase. + return BenchmarkExecutionMode{RunValidator: seen[BenchmarkRoleValidator]}, nil +} + +// Roles returns the config-facing role list for metadata and logs. Internally, +// callers should use RunValidator instead of reinterpreting the role slice. +func (mode BenchmarkExecutionMode) Roles() []BenchmarkRole { + roles := []BenchmarkRole{BenchmarkRoleSequencer} + if mode.RunValidator { + roles = append(roles, BenchmarkRoleValidator) + } + return roles +} + +func (mode BenchmarkExecutionMode) RolesString() string { + names := make([]string, 0, 2) + for _, role := range mode.Roles() { + names = append(names, string(role)) + } + return strings.Join(names, ",") +} + +func (mode BenchmarkExecutionMode) IsDefault() bool { + return mode == defaultBenchmarkExecutionMode +} + // Param is a single dimension of a benchmark matrix. It can be a // single value or a list of values. type Param struct { @@ -134,11 +208,26 @@ type TestDefinition struct { Snapshot *SnapshotDefinition `yaml:"snapshot"` Metrics *ThresholdConfig `yaml:"metrics"` Tags *map[string]string `yaml:"tags"` + Roles []BenchmarkRole `yaml:"roles"` Variables []Param `yaml:"variables"` ProofProgram *ProofProgramOptions `yaml:"proof_program"` } func (bc *TestDefinition) Check() error { + mode, err := bc.ExecutionMode() + if err != nil { + return err + } + + proofProgramEnabled := bc.ProofProgram != nil && (bc.ProofProgram.Enabled == nil || *bc.ProofProgram.Enabled) + if proofProgramEnabled && !mode.RunValidator { + return errors.New("proof_program requires the validator benchmark role") + } + + if err := bc.validateThresholdRoles(mode); err != nil { + return err + } + for _, b := range bc.Variables { err := b.Check() if err != nil { @@ -147,3 +236,31 @@ func (bc *TestDefinition) Check() error { } return nil } + +func (bc *TestDefinition) ExecutionMode() (BenchmarkExecutionMode, error) { + return BenchmarkExecutionModeFromRoles(bc.Roles) +} + +func (bc *TestDefinition) validateThresholdRoles(mode BenchmarkExecutionMode) error { + if bc.Metrics == nil { + return nil + } + + for level, thresholds := range map[string]map[string]float64{ + "warning": bc.Metrics.Warning, + "error": bc.Metrics.Error, + } { + for metric := range thresholds { + role, _, ok := strings.Cut(metric, "/") + if !ok { + continue + } + + if BenchmarkRole(role) == BenchmarkRoleValidator && !mode.RunValidator { + return fmt.Errorf("%s threshold %q requires the validator benchmark role", level, metric) + } + } + } + + return nil +} diff --git a/runner/benchmark/matrix.go b/runner/benchmark/matrix.go index 430aa892..180f78f1 100644 --- a/runner/benchmark/matrix.go +++ b/runner/benchmark/matrix.go @@ -17,9 +17,21 @@ type TestPlan struct { Snapshot *SnapshotDefinition ProofProgram *ProofProgramOptions Thresholds *ThresholdConfig + // Mode is normalized from the YAML roles field. The sequencer phase is + // always part of a test plan; Mode only controls whether validator replay runs. + Mode BenchmarkExecutionMode } func NewTestPlanFromConfig(c TestDefinition, testFileName string, config *BenchmarkConfig) (*TestPlan, error) { + if err := c.Check(); err != nil { + return nil, err + } + + mode, err := c.ExecutionMode() + if err != nil { + return nil, err + } + testRuns, err := ResolveTestRunsFromMatrix(c, testFileName, config) if err != nil { return nil, err @@ -42,6 +54,7 @@ func NewTestPlanFromConfig(c TestDefinition, testFileName string, config *Benchm Snapshot: c.Snapshot, ProofProgram: proofProgram, Thresholds: c.Metrics, + Mode: mode, }, nil } diff --git a/runner/benchmark/matrix_test.go b/runner/benchmark/matrix_test.go index 5440dccf..ec3b05e4 100644 --- a/runner/benchmark/matrix_test.go +++ b/runner/benchmark/matrix_test.go @@ -159,6 +159,151 @@ func TestResolveTestRunsFromMatrix(t *testing.T) { } } +func TestNewTestPlanFromConfigRoles(t *testing.T) { + config := &benchmark.BenchmarkConfig{Name: "test"} + definition := benchmark.TestDefinition{ + Roles: []benchmark.BenchmarkRole{benchmark.BenchmarkRoleSequencer}, + Variables: []benchmark.Param{ + { + ParamType: "payload", + Value: "simple", + }, + }, + } + + plan, err := benchmark.NewTestPlanFromConfig(definition, "config.yml", config) + require.NoError(t, err) + require.False(t, plan.Mode.RunValidator) + + metadata := benchmark.RunGroupFromTestPlans([]benchmark.TestPlan{*plan}, nil) + require.Len(t, metadata.Runs, 1) + require.Equal(t, "sequencer", metadata.Runs[0].TestConfig["Roles"]) +} + +func TestNewTestPlanFromConfigDefaultsToBothRoles(t *testing.T) { + config := &benchmark.BenchmarkConfig{Name: "test"} + definition := benchmark.TestDefinition{ + Variables: []benchmark.Param{ + { + ParamType: "payload", + Value: "simple", + }, + }, + } + + plan, err := benchmark.NewTestPlanFromConfig(definition, "config.yml", config) + require.NoError(t, err) + require.True(t, plan.Mode.RunValidator) + + metadata := benchmark.RunGroupFromTestPlans([]benchmark.TestPlan{*plan}, nil) + require.Len(t, metadata.Runs, 1) + require.NotContains(t, metadata.Runs[0].TestConfig, "Roles") +} + +func TestNewTestPlanFromConfigRejectsInvalidRoles(t *testing.T) { + tests := []struct { + name string + roles []benchmark.BenchmarkRole + }{ + { + name: "unknown role", + roles: []benchmark.BenchmarkRole{"other"}, + }, + { + name: "duplicate role", + roles: []benchmark.BenchmarkRole{benchmark.BenchmarkRoleSequencer, benchmark.BenchmarkRoleSequencer}, + }, + { + name: "validator without sequencer", + roles: []benchmark.BenchmarkRole{benchmark.BenchmarkRoleValidator}, + }, + } + + config := &benchmark.BenchmarkConfig{Name: "test"} + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + definition := benchmark.TestDefinition{ + Roles: tt.roles, + Variables: []benchmark.Param{ + { + ParamType: "payload", + Value: "simple", + }, + }, + } + + _, err := benchmark.NewTestPlanFromConfig(definition, "config.yml", config) + require.Error(t, err) + }) + } +} + +func TestNewTestPlanFromConfigRejectsProofProgramWithoutValidator(t *testing.T) { + config := &benchmark.BenchmarkConfig{Name: "test"} + definition := benchmark.TestDefinition{ + Roles: []benchmark.BenchmarkRole{benchmark.BenchmarkRoleSequencer}, + ProofProgram: &benchmark.ProofProgramOptions{ + Enabled: boolPtr(true), + }, + Variables: []benchmark.Param{ + { + ParamType: "payload", + Value: "simple", + }, + }, + } + + _, err := benchmark.NewTestPlanFromConfig(definition, "config.yml", config) + require.ErrorContains(t, err, "proof_program requires the validator benchmark role") +} + +func TestNewTestPlanFromConfigRejectsValidatorThresholdsWithoutValidator(t *testing.T) { + config := &benchmark.BenchmarkConfig{Name: "test"} + definition := benchmark.TestDefinition{ + Roles: []benchmark.BenchmarkRole{benchmark.BenchmarkRoleSequencer}, + Metrics: &benchmark.ThresholdConfig{ + Error: map[string]float64{ + "validator/latency/new_payload": 1e9, + }, + }, + Variables: []benchmark.Param{ + { + ParamType: "payload", + Value: "simple", + }, + }, + } + + _, err := benchmark.NewTestPlanFromConfig(definition, "config.yml", config) + require.ErrorContains(t, err, `error threshold "validator/latency/new_payload" requires the validator benchmark role`) +} + +func TestNewTestPlanFromConfigAllowsSequencerThresholdsWithoutValidator(t *testing.T) { + config := &benchmark.BenchmarkConfig{Name: "test"} + definition := benchmark.TestDefinition{ + Roles: []benchmark.BenchmarkRole{benchmark.BenchmarkRoleSequencer}, + Metrics: &benchmark.ThresholdConfig{ + Error: map[string]float64{ + "sequencer/latency/get_payload": 1e9, + }, + }, + Variables: []benchmark.Param{ + { + ParamType: "payload", + Value: "simple", + }, + }, + } + + plan, err := benchmark.NewTestPlanFromConfig(definition, "config.yml", config) + require.NoError(t, err) + require.False(t, plan.Mode.RunValidator) +} + func stringPtr(s string) *string { return &s } + +func boolPtr(b bool) *bool { + return &b +} diff --git a/runner/benchmark/result_metadata.go b/runner/benchmark/result_metadata.go index e31bdbff..22f79e6f 100644 --- a/runner/benchmark/result_metadata.go +++ b/runner/benchmark/result_metadata.go @@ -7,11 +7,11 @@ import ( ) type RunResult struct { - Success bool `json:"success"` - Complete bool `json:"complete"` - SequencerMetrics types.SequencerKeyMetrics `json:"sequencerMetrics"` - ValidatorMetrics types.ValidatorKeyMetrics `json:"validatorMetrics"` - ClientVersion string `json:"clientVersion,omitempty"` + Success bool `json:"success"` + Complete bool `json:"complete"` + SequencerMetrics *types.SequencerKeyMetrics `json:"sequencerMetrics,omitempty"` + ValidatorMetrics *types.ValidatorKeyMetrics `json:"validatorMetrics,omitempty"` + ClientVersion string `json:"clientVersion,omitempty"` } // MachineInfo contains information about the machine running the benchmark @@ -62,12 +62,17 @@ func RunGroupFromTestPlans(testPlans []TestPlan, machineInfo *MachineInfo) RunGr for _, testPlan := range testPlans { for _, params := range testPlan.Runs { + testConfig := params.Params.ToConfig() + if !testPlan.Mode.IsDefault() { + testConfig["Roles"] = testPlan.Mode.RolesString() + } + metadata.Runs = append(metadata.Runs, Run{ ID: params.ID, SourceFile: params.TestFile, TestName: params.Name, TestDescription: params.Description, - TestConfig: params.Params.ToConfig(), + TestConfig: testConfig, OutputDir: params.OutputDir, Thresholds: testPlan.Thresholds, CreatedAt: &now, diff --git a/runner/network/network_benchmark.go b/runner/network/network_benchmark.go index 2bf13775..37b336c7 100644 --- a/runner/network/network_benchmark.go +++ b/runner/network/network_benchmark.go @@ -44,13 +44,25 @@ type NetworkBenchmark struct { testConfig *benchtypes.TestConfig proofConfig *benchmark.ProofProgramOptions - transactionPayload payload.Definition - ports portmanager.PortManager - flashblocksBlockTime string + transactionPayload payload.Definition + ports portmanager.PortManager + mode benchmark.BenchmarkExecutionMode + flashblocksBlockTime string } -// NewNetworkBenchmark creates a new network benchmark and initializes the payload worker and consensus client -func NewNetworkBenchmark(config *benchtypes.TestConfig, log log.Logger, sequencerOptions *config.InternalClientOptions, validatorOptions *config.InternalClientOptions, proofConfig *benchmark.ProofProgramOptions, transactionPayload payload.Definition, ports portmanager.PortManager, flashblocksBlockTime string) (*NetworkBenchmark, error) { +// NewNetworkBenchmark creates a new network benchmark. +// +// The sequencer phase always runs and produces the payload stream. The +// normalized execution mode only controls whether the validator phase is run +// afterward to replay that stream. +func NewNetworkBenchmark(config *benchtypes.TestConfig, log log.Logger, sequencerOptions *config.InternalClientOptions, validatorOptions *config.InternalClientOptions, proofConfig *benchmark.ProofProgramOptions, transactionPayload payload.Definition, ports portmanager.PortManager, mode benchmark.BenchmarkExecutionMode, flashblocksBlockTime string) (*NetworkBenchmark, error) { + if mode.RunValidator && validatorOptions == nil { + return nil, errors.New("validator options are required when the validator role is enabled") + } + if proofConfig != nil && !mode.RunValidator { + return nil, errors.New("proof program benchmark requires the validator role") + } + return &NetworkBenchmark{ log: log, sequencerOptions: sequencerOptions, @@ -59,6 +71,7 @@ func NewNetworkBenchmark(config *benchtypes.TestConfig, log log.Logger, sequence proofConfig: proofConfig, transactionPayload: transactionPayload, ports: ports, + mode: mode, flashblocksBlockTime: flashblocksBlockTime, }, nil } @@ -81,6 +94,12 @@ func (nb *NetworkBenchmark) Run(ctx context.Context) error { return fmt.Errorf("failed to run sequencer benchmark: %w", err) } + if !nb.runsValidator() { + nb.log.Info("Skipping validator benchmark", "roles", nb.mode.RolesString()) + sequencerClient.Stop() + return nil + } + // Benchmark the validator to sync the payloads if err := nb.benchmarkValidator(ctx, payloadResult, lastSetupBlock, l1Chain, sequencerClient); err != nil { return fmt.Errorf("failed to run validator benchmark: %w", err) @@ -243,16 +262,28 @@ func (nb *NetworkBenchmark) benchmarkValidator(ctx context.Context, payloadResul } func (nb *NetworkBenchmark) GetResult() (*benchmark.RunResult, error) { - if nb.collectedSequencerMetrics == nil || nb.collectedValidatorMetrics == nil { - return nil, errors.New("metrics not collected") + if nb.collectedSequencerMetrics == nil { + return nil, errors.New("sequencer metrics not collected") } - return &benchmark.RunResult{ - SequencerMetrics: *nb.collectedSequencerMetrics, - ValidatorMetrics: *nb.collectedValidatorMetrics, + result := &benchmark.RunResult{ + SequencerMetrics: nb.collectedSequencerMetrics, Success: true, Complete: true, - }, nil + } + + if nb.runsValidator() { + if nb.collectedValidatorMetrics == nil { + return nil, errors.New("validator metrics not collected") + } + result.ValidatorMetrics = nb.collectedValidatorMetrics + } + + return result, nil +} + +func (nb *NetworkBenchmark) runsValidator() bool { + return nb.mode.RunValidator } func setupNode(ctx context.Context, l log.Logger, nodeTypeStr string, params benchtypes.RunParams, options *config.InternalClientOptions, portManager portmanager.PortManager, flashblockServerURL string, flashblocksBlockTime string) (types.ExecutionClient, error) { diff --git a/runner/service.go b/runner/service.go index 2e1c0309..2d7f64fa 100644 --- a/runner/service.go +++ b/runner/service.go @@ -339,7 +339,7 @@ func (s *service) getGenesisForSnapshotConfig(snapshotConfig *benchmark.Snapshot return genesis, nil } -func (s *service) setupDataDirs(workingDir string, params types.RunParams, genesis *core.Genesis, snapshot *benchmark.SnapshotDefinition, datadirsConfig *benchmark.DatadirConfig) (*config.InternalClientOptions, *config.InternalClientOptions, error) { +func (s *service) setupDataDirs(workingDir string, params types.RunParams, genesis *core.Genesis, snapshot *benchmark.SnapshotDefinition, datadirsConfig *benchmark.DatadirConfig, mode benchmark.BenchmarkExecutionMode) (*config.InternalClientOptions, *config.InternalClientOptions, error) { // create temp directory for this test testName := fmt.Sprintf("%d-%s-test", time.Now().Unix(), params.NodeType) sequencerTestDir := path.Join(workingDir, fmt.Sprintf("%s-sequencer", testName)) @@ -350,9 +350,14 @@ func (s *service) setupDataDirs(workingDir string, params types.RunParams, genes return nil, nil, errors.Wrap(err, "failed to setup internal directories") } - validatorOptions, err := s.setupInternalDirectories(validatorTestDir, params, genesis, snapshot, "validator", datadirsConfig) - if err != nil { - return nil, nil, errors.Wrap(err, "failed to setup internal directories") + var validatorOptions *config.InternalClientOptions + // The sequencer datadir is always required. Only create validator state when + // the normalized execution mode includes validator replay. + if mode.RunValidator { + validatorOptions, err = s.setupInternalDirectories(validatorTestDir, params, genesis, snapshot, "validator", datadirsConfig) + if err != nil { + return nil, nil, errors.Wrap(err, "failed to setup internal directories") + } } return sequencerOptions, validatorOptions, nil @@ -368,7 +373,7 @@ func (s *service) setupBlobsDir(workingDir string) error { return nil } -func (s *service) runTest(ctx context.Context, params types.RunParams, workingDir string, outputDir string, snapshotConfig *benchmark.SnapshotDefinition, proofConfig *benchmark.ProofProgramOptions, transactionPayload payload.Definition, datadirsConfig *benchmark.DatadirConfig, flashblocksBlockTime string) (*benchmark.RunResult, error) { +func (s *service) runTest(ctx context.Context, params types.RunParams, workingDir string, outputDir string, snapshotConfig *benchmark.SnapshotDefinition, proofConfig *benchmark.ProofProgramOptions, transactionPayload payload.Definition, datadirsConfig *benchmark.DatadirConfig, mode benchmark.BenchmarkExecutionMode, flashblocksBlockTime string) (*benchmark.RunResult, error) { s.log.Info(fmt.Sprintf("Running benchmark with params: %+v", params)) @@ -384,7 +389,7 @@ func (s *service) runTest(ctx context.Context, params types.RunParams, workingDi validatorTestDir := path.Join(workingDir, fmt.Sprintf("%s-validator", testName)) // setup data directories (restore from snapshot if needed) - sequencerOptions, validatorOptions, err := s.setupDataDirs(workingDir, params, genesis, snapshotConfig, datadirsConfig) + sequencerOptions, validatorOptions, err := s.setupDataDirs(workingDir, params, genesis, snapshotConfig, datadirsConfig, mode) if err != nil { return nil, errors.Wrap(err, "failed to setup data dirs") } @@ -432,7 +437,7 @@ func (s *service) runTest(ctx context.Context, params types.RunParams, workingDi } // Run benchmark - benchmark, err := network.NewNetworkBenchmark(config, s.log, sequencerOptions, validatorOptions, proofConfig, transactionPayload, s.portState, flashblocksBlockTime) + benchmark, err := network.NewNetworkBenchmark(config, s.log, sequencerOptions, validatorOptions, proofConfig, transactionPayload, s.portState, mode, flashblocksBlockTime) if err != nil { return nil, errors.Wrap(err, "failed to create network benchmark") } @@ -444,13 +449,17 @@ func (s *service) runTest(ctx context.Context, params types.RunParams, workingDi s.log.Error("failed to export sequencer output", "err", exportErr) } - if exportErr := s.exportOutput(testName, runErr, validatorOptions, outputDir, "validator"); exportErr != nil { - s.log.Error("failed to export validator output", "err", exportErr) + if validatorOptions != nil { + if exportErr := s.exportOutput(testName, runErr, validatorOptions, outputDir, "validator"); exportErr != nil { + s.log.Error("failed to export validator output", "err", exportErr) + } } if runErr != nil { s.dumpLogFile(sequencerOptions, "sequencer") - s.dumpLogFile(validatorOptions, "validator") + if validatorOptions != nil { + s.dumpLogFile(validatorOptions, "validator") + } return nil, errors.Wrap(runErr, "failed to run benchmark") } @@ -617,7 +626,7 @@ outerLoop: return errors.Wrap(err, "failed to create output directory") } - metricSummary, err := s.runTest(ctx, c.Params, s.config.DataDir(), outputDir, testPlan.Snapshot, testPlan.ProofProgram, transactionPayloads[c.Params.PayloadID], testPlan.Datadir, config.FlashblocksBlockTime()) + metricSummary, err := s.runTest(ctx, c.Params, s.config.DataDir(), outputDir, testPlan.Snapshot, testPlan.ProofProgram, transactionPayloads[c.Params.PayloadID], testPlan.Datadir, testPlan.Mode, config.FlashblocksBlockTime()) if err != nil { log.Error("Failed to run test", "err", err) metricSummary = &benchmark.RunResult{