From 9a27a18f9c2cb3feaeafb6804c2b3f5226ee32e3 Mon Sep 17 00:00:00 2001 From: GatewayJ <18332154+GatewayJ@users.noreply.github.com> Date: Sun, 14 Jun 2026 22:00:30 +0800 Subject: [PATCH 01/20] docs: add fault injection test plan --- e2e/FAULT_INJECTION_TEST_PLAN.md | 909 +++++++++++++++++++++++++++++++ 1 file changed, 909 insertions(+) create mode 100644 e2e/FAULT_INJECTION_TEST_PLAN.md diff --git a/e2e/FAULT_INJECTION_TEST_PLAN.md b/e2e/FAULT_INJECTION_TEST_PLAN.md new file mode 100644 index 0000000..6038587 --- /dev/null +++ b/e2e/FAULT_INJECTION_TEST_PLAN.md @@ -0,0 +1,909 @@ + + +# RustFS Operator 故障注入测试方案 + +本文档描述如何在 RustFS Operator 当前 e2e 框架中落地一套可执行、可诊断、可逐步增强的故障注入测试体系。 + +核心原则: + +- **Operator 负责测试环境编排**:创建 Tenant、准备本地 PV、暴露 RustFS S3 服务、等待状态、收集诊断现场。 +- **故障注入器负责制造故障**:优先使用 Kubernetes-native 的 Chaos Mesh。 +- **S3 workload 负责产生真实对象访问流量**:持续执行 `PUT`、`GET`、`HEAD`、`LIST` 等操作。 +- **Jepsen-like checker 负责判断正确性**:它不制造故障,只基于操作历史和最终读取结果判断 RustFS 是否丢数据、读错数据或返回假成功。 + +也就是说,这套测试不是单纯验证 Operator 是否能拉起 StatefulSet,而是通过 Operator 部署出来的 RustFS 集群来验证 RustFS 在故障下的数据正确性。 + +## 目标 + +故障注入测试需要回答这些问题: + +1. RustFS 在 Pod、节点、网络、磁盘 I/O 故障下,已经成功写入的数据是否仍然存在。 +2. RustFS 是否会在磁盘损坏或网络分区后,把错误对象内容以 `200 OK` 返回给客户端。 +3. RustFS 在请求超时、连接中断、部分失败后,是否存在“客户端认为失败但服务端实际写入”的未知状态。 +4. Operator 是否能在故障期间正确观测 Tenant 状态,并在故障解除后回到 Ready。 +5. 当测试失败时,e2e harness 是否能留下足够的日志、事件、历史记录和 checker 报告用于定位。 + +最重要的判定不是“故障期间所有请求都成功”,而是: + +```text +可以失败,但不能假成功。 +可以超时,但不能返回错误数据。 +故障恢复后,已经确认成功的数据必须一致。 +``` + +## 非目标 + +第一阶段不做这些事: + +- 不替代 RustFS 自身的单元测试、集成测试或存储引擎内部测试。 +- 不直接引入完整 Clojure Jepsen 测试套件。 +- 不在普通开发集群上运行 destructive 测试。 +- 不把性能压测结果当成 correctness 结论。 +- 不在第一版验证所有 S3 线性一致性细节。 +- 不默认测试多 Tenant、跨集群、真实块设备故障。 + +第一阶段的目标是补齐当前最大缺口:**真实故障注入 + 对象内容正确性检查**。 + +## 当前 e2e 可复用基础 + +当前项目已经有适合故障测试的骨架,不需要另起一套测试系统。 + +已有能力: + +| 能力 | 当前位置 | 用途 | +| --- | --- | --- | +| destructive 入口 | `make e2e-live-faults` | 专门运行破坏性故障测试。 | +| fault suite 占位 | `e2e/tests/faults.rs` | 后续真实故障测试入口。 | +| live/destructive/context guard | `e2e/src/framework/live.rs` | 防止误跑到非专用集群。 | +| local PV 准备 | `e2e/src/framework/storage.rs` | 为 RustFS Tenant 准备本地卷。 | +| Tenant/Secret 创建 | `e2e/src/framework/resources.rs` | 创建 e2e namespace、凭据和 Tenant。 | +| S3 port-forward | `e2e/src/framework/port_forward.rs` | 将 Tenant S3 服务暴露到本地。 | +| artifact collector | `e2e/src/framework/artifacts.rs` | 测试失败后收集 Kubernetes 现场。 | + +关键约定: + +- RustFS Pod selector 可使用 `rustfs.tenant=`。 +- RustFS 容器名是 `rustfs`。 +- RustFS 数据卷路径遵循 `/data/rustfs0`、`/data/rustfs1`。 +- Kind worker 将宿主机 `/tmp/rustfs-e2e-storage-*` 挂载到 worker 内部 `/mnt/data`。 +- local PV 最终落在 worker 内部 `/mnt/data/volN`。 + +因此推荐方案是: + +```text +复用当前 e2e harness + + 新增 Chaos Mesh 故障注入模块 + + 新增 S3 workload + + 新增 operation history + + 新增对象存储 checker +``` + +## 总体架构 + +```text +e2e/tests/faults.rs + | + +-- 环境保护:live / destructive / dedicated Kind context + +-- 环境准备:local PV / Tenant / Secret / Ready 等待 + +-- S3 workload:持续读写对象 + +-- history recorder:记录每次操作的开始、结束、结果、hash + +-- nemesis:通过 Chaos Mesh 注入故障 + +-- checker:基于 history 和最终读回结果判断正确性 + +-- artifact collector:失败时收集诊断现场 +``` + +建议新增模块: + +```text +e2e/src/framework/chaos_mesh.rs +e2e/src/framework/fault_scenarios.rs +e2e/src/framework/s3_workload.rs +e2e/src/framework/history.rs +e2e/src/framework/checker.rs +``` + +模块职责: + +| 模块 | 职责 | +| --- | --- | +| `chaos_mesh` | 生成、apply、describe、delete Chaos Mesh 资源。 | +| `fault_scenarios` | 定义故障场景名称、默认参数、目标对象和执行顺序。 | +| `s3_workload` | 对 RustFS Tenant S3 endpoint 执行对象读写流量。 | +| `history` | 将每个 S3 操作记录成 JSON Lines。 | +| `checker` | 基于 history 和最终读回结果验证对象存储不变量。 | +| `faults.rs` | 编排完整测试流程,不承载底层实现细节。 | + +## 为什么优先用 Chaos Mesh + +当前场景是在 Kubernetes 中通过 Operator 部署 RustFS,因此故障注入也应该尽量 Kubernetes-native。 + +Chaos Mesh 适合第一阶段,原因: + +- 可以通过 namespace 和 label 精准选择 RustFS Pod。 +- 可以指定容器名,避免影响非目标 sidecar 或其他组件。 +- 支持 `PodChaos`、`NetworkChaos`、`IOChaos`。 +- `IOChaos` 能对指定挂载路径返回 `EIO`,适合模拟磁盘坏块或磁盘 I/O 错误。 +- `IOChaos mistake` 能模拟读写返回错误字节,适合模拟 bit rot / 静默损坏。 +- 以 CRD 形式管理故障,方便 e2e harness apply/delete/describe/collect。 + +第一阶段建议只要求: + +```text +Chaos Mesh 已安装 +iochaos.chaos-mesh.org CRD 存在 +podchaos.chaos-mesh.org CRD 存在 +networkchaos.chaos-mesh.org CRD 存在 +``` + +如果 CRD 不存在,测试应明确失败并给出提示,而不是静默跳过。 + +## 为什么不是直接上完整 Jepsen + +完整 Jepsen 很强,但第一阶段不建议直接引入,原因: + +- 当前项目 e2e 是 Rust-native,直接接入 Clojure Jepsen 成本高。 +- 当前最大的缺口是“没有真实故障注入”和“没有对象内容正确性 checker”。 +- 对象存储第一阶段最关键的不变量可以用更轻量的 checker 覆盖。 +- 先把 `PUT/GET/hash` 这条基本正确性链路跑通,收益更高。 + +因此建议路线是: + +```text +先做 Jepsen-like checker +后续再逐步增强为更完整的并发历史模型 +``` + +Jepsen-like 的含义是: + +- 有 workload。 +- 有 nemesis。 +- 有 operation history。 +- 有明确 correctness model。 +- 有自动 checker。 + +它不是简单 chaos smoke test。 + +## 安全模型 + +故障测试必须默认安全,不能误伤开发者当前 kube context。 + +必须保留并强化这些保护: + +1. 必须设置 `RUSTFS_E2E_LIVE=1`。 +2. 必须设置 `RUSTFS_E2E_DESTRUCTIVE=1`。 +3. 当前 kube context 必须是专用 Kind:`kind-rustfs-e2e`。 +4. 目标 namespace 必须来自 e2e 配置,例如 `rustfs-e2e-smoke`。 +5. 所有故障资源必须带唯一 run id label。 +6. 每个 Chaos 资源必须有 RAII-style cleanup guard。 +7. 正常结束和异常失败都必须 best-effort 删除故障资源。 +8. 默认故障持续时间要短,默认故障比例要小。 +9. 测试失败时必须先收集 artifacts,再清理会影响诊断的信息。 +10. destructive 场景保持 `#[ignore]`,只能通过显式 Make 目标执行。 + +建议增加环境变量: + +| 变量 | 默认值 | 作用 | +| --- | --- | --- | +| `RUSTFS_E2E_FAULT_SCENARIO` | `io-eio` | 选择故障场景。 | +| `RUSTFS_E2E_FAULT_DURATION_SECONDS` | `60` | 故障持续时间。 | +| `RUSTFS_E2E_FAULT_PERCENT` | `20` | 支持百分比注入的场景使用。 | +| `RUSTFS_E2E_WORKLOAD_OBJECTS` | `200` | 写入或校验对象数量。 | +| `RUSTFS_E2E_WORKLOAD_CONCURRENCY` | `8` | S3 并发度。 | +| `RUSTFS_E2E_CHAOS_NAMESPACE` | `chaos-mesh` | Chaos Mesh 资源所在 namespace。 | + +## 操作历史模型 + +每个客户端可见的 S3 操作都应记录一条 JSON Lines。 + +示例: + +```json +{ + "id": "op-000001", + "scenario": "io-eio", + "kind": "put", + "bucket": "rustfs-fault-e2e", + "key": "fault-e2e/run-123/object-1", + "value_sha256": "abc123", + "size_bytes": 1048576, + "started_at_ms": 1710000000000, + "ended_at_ms": 1710000001234, + "outcome": "ok", + "http_status": 200, + "error": null +} +``` + +`outcome` 建议只保留四类,语义必须清晰: + +| outcome | 含义 | checker 处理 | +| --- | --- | --- | +| `ok` | 客户端收到明确成功响应。 | 作为强正确性输入。 | +| `failed` | 客户端收到明确失败响应。 | 不要求最终存在。 | +| `timeout` | 客户端超时,不知道服务端是否完成。 | 作为 unknown 处理。 | +| `unknown` | 连接中断、body 未读完、port-forward 中断等。 | 作为 unknown 处理。 | + +第一版 checker 只对 `ok` 的 `PUT` 做强校验。 + +对于 `timeout` 和 `unknown` 的写入: + +- 最终存在可以接受。 +- 最终不存在也可以接受。 +- 需要在 report 中单独列出,方便后续分析。 + +这样可以避免把网络中断导致的“未知成功”误判为 RustFS 数据错误。 + +## Checker 不变量 + +### 不变量 1:成功写入的数据不能丢 + +如果客户端收到了成功写入: + +```text +PUT key value_hash=H -> ok +``` + +故障解除并等待 Tenant 恢复后,必须满足: + +```text +GET key -> 200 +sha256(body) == H +``` + +否则 hard fail。 + +### 不变量 2:成功读取不能返回错误内容 + +任何一次 `GET` 只要返回 `200 OK`,并且该 key 有已知成功写入值,则: + +```text +sha256(body) == expected_hash +``` + +如果 `GET` 返回 `200` 但 hash 不一致,这是最高优先级失败。 + +这比“请求是否成功”更重要,因为对象存储最危险的问题不是失败,而是**成功返回错误数据**。 + +### 不变量 3:明确失败的写入不要求存在 + +如果 `PUT` 返回明确失败: + +```text +PUT key -> failed +``` + +那么最终这个 key 存在或不存在,都不作为第一版 hard fail。 + +### 不变量 4:未知结果单独记录 + +如果 `PUT` 是: + +```text +timeout +unknown +``` + +则 checker 记录它最终是否 materialized,但不作为第一版 hard fail。 + +### 不变量 5:恢复后的 LIST 先作为 warning + +故障解除并等待 Tenant Ready 后: + +```text +LIST prefix +``` + +理论上应包含所有成功 `PUT` 且未成功 `DELETE` 的 key。 + +第一版可以将 LIST 缺失作为 warning,而不是 hard fail。等 RustFS 对 LIST 一致性的目标语义确认后,再升级为 hard fail。 + +## S3 workload 设计 + +第一阶段建议使用 Rust 代码实现 S3 workload,而不是依赖外部 `aws` 或 `mc` CLI。 + +原因: + +- 操作历史更容易结构化记录。 +- 请求 timeout、transport error、body error 更容易准确分类。 +- 对象 hash 和操作结果可以在同一进程中关联。 +- CI 和本地依赖更少。 +- 后续可以扩展为并发 workload 和 checker replay。 + +建议在 `e2e/Cargo.toml` 后续增加: + +```text +aws-sdk-s3 +aws-config +aws-credential-types +sha2 +rand +hex +``` + +第一版 workload 操作: + +```text +CreateBucket +PutObject +GetObject +HeadObject +ListObjectsV2 +DeleteObject +``` + +第一版建议使用唯一 key,不要并发覆盖同一个 key。 + +key 格式: + +```text +fault-e2e//small/ +fault-e2e//medium/ +fault-e2e//large/ +``` + +对象大小建议: + +| 类型 | 大小 | +| --- | --- | +| small | 4 KiB | +| medium | 64 KiB | +| large | 1 MiB | +| xlarge | 8 MiB | + +第一版不建议默认使用太大对象,避免 e2e 运行过慢。 + +## 初始故障场景优先级 + +| 优先级 | 场景 | 后端 | 目的 | +| --- | --- | --- | --- | +| P0 | `io-eio` | Chaos Mesh `IOChaos` | 模拟单个 RustFS 数据卷读写返回 `EIO`。 | +| P0 | `pod-kill-one` | Chaos Mesh `PodChaos` | 模拟一个 RustFS Pod 死亡和 StatefulSet 恢复。 | +| P0 | `operator-restart` | Kubernetes delete/rollout | 验证控制面重启后状态恢复。 | +| P1 | `network-partition-one` | Chaos Mesh `NetworkChaos` | 模拟一个 RustFS Pod 与集群网络分区。 | +| P1 | `io-read-mistake` | Chaos Mesh `IOChaos` | 模拟读路径返回错误字节,即静默坏块。 | +| P1 | `disk-full` | local PV 填充或 IOChaos | 验证单盘空间耗尽行为。 | +| P2 | `direct-pv-corruption` | Kind worker 文件系统改写 | 模拟已经落盘的数据被破坏。 | +| P2 | `worker-restart` | Docker restart Kind worker | 模拟节点重启。 | +| P3 | `dm-flakey` | device mapper / loop device | 更接近真实块设备故障。 | +| P3 | `warp-under-chaos` | MinIO Warp + chaos | 故障期间性能退化分析。 | + +## P0 场景:磁盘 EIO + +这是建议最先实现的场景。 + +它能直接验证 RustFS 在磁盘读写失败下是否会丢失已提交对象,且非常适合当前 Kind local PV 结构。 + +目标: + +```text +让某一个 RustFS Pod 的某一块数据卷,在部分 READ/WRITE 调用上返回 EIO。 +``` + +Chaos Mesh `IOChaos` 示例: + +```yaml +apiVersion: chaos-mesh.org/v1alpha1 +kind: IOChaos +metadata: + name: rustfs-e2e-io-eio + namespace: chaos-mesh + labels: + rustfs-e2e/run-id: "" +spec: + action: fault + mode: one + selector: + namespaces: + - rustfs-e2e-smoke + labelSelectors: + rustfs.tenant: e2e-tenant + containerNames: + - rustfs + volumePath: /data/rustfs0 + path: /data/rustfs0/**/* + methods: + - READ + - WRITE + errno: 5 + percent: 20 + duration: "60s" +``` + +关键点: + +- `volumePath` 是 RustFS 容器内挂载路径,不是宿主机 `/tmp/rustfs-e2e-storage-*`。 +- `errno: 5` 对应 Linux `EIO`。 +- `mode: one` 表示只选择一个匹配 Pod,避免第一版故障面过大。 +- `percent: 20` 表示只影响部分 I/O 调用,避免全量不可用。 + +预期行为: + +- 故障期间 S3 请求可以失败、超时或返回 5xx。 +- RustFS 不能把错误数据作为成功响应返回。 +- 已经成功 `PUT` 的对象,在故障解除后必须 hash 一致。 +- Tenant 可以短暂 Degraded,但最终应回到 Ready。 +- Chaos 资源必须被删除。 + +## P1 场景:静默坏块 / bit rot + +EIO 是显式错误,比较容易处理;更危险的是静默损坏。 + +静默坏块的模拟方式: + +```text +磁盘读操作看起来成功,但返回的字节是错的。 +``` + +Chaos Mesh `IOChaos mistake` 示例: + +```yaml +apiVersion: chaos-mesh.org/v1alpha1 +kind: IOChaos +metadata: + name: rustfs-e2e-io-read-mistake + namespace: chaos-mesh +spec: + action: mistake + mode: one + selector: + namespaces: + - rustfs-e2e-smoke + labelSelectors: + rustfs.tenant: e2e-tenant + containerNames: + - rustfs + volumePath: /data/rustfs0 + path: /data/rustfs0/**/* + methods: + - READ + mistake: + filling: random + maxOccurrences: 1 + maxLength: 4096 + percent: 5 + duration: "60s" +``` + +预期行为: + +- RustFS 可以返回错误。 +- RustFS 可以从健康 shard 修复或读取。 +- RustFS 不能返回 `200 OK` 且 body hash 错误。 + +这个场景是对象存储非常关键的测试,因为它验证的是“不要静默返回坏数据”。 + +## P2 场景:直接破坏 local PV 文件 + +当前 Kind worker 将宿主机目录挂载到 worker 内部: + +```text +/tmp/rustfs-e2e-storage-1 -> /mnt/data +/tmp/rustfs-e2e-storage-2 -> /mnt/data +/tmp/rustfs-e2e-storage-3 -> /mnt/data +``` + +local PV 位于 worker 内部: + +```text +/mnt/data/vol1 +/mnt/data/vol2 +... +``` + +后续可以通过直接改写某个 PV 文件模拟已经落盘的数据损坏: + +```bash +docker exec rustfs-e2e-worker sh -c ' + f=$(find /mnt/data/vol1 -type f -size +4096c | head -n1) + dd if=/dev/urandom of="$f" bs=4096 count=1 seek=1 conv=notrunc +' +``` + +这个场景比 `IOChaos mistake` 更接近真实“落盘数据已经损坏”,但也更危险: + +- 可能破坏 RustFS 元数据。 +- 可能导致恢复语义更复杂。 +- 需要更明确的预期结果。 +- 适合作为 P2,不适合作为第一版。 + +## 测试流程 + +第一版完整流程建议如下: + +```text +1. 读取 E2eConfig +2. 检查 RUSTFS_E2E_LIVE=1 +3. 检查 RUSTFS_E2E_DESTRUCTIVE=1 +4. 检查 kube context == kind-rustfs-e2e +5. 检查 Chaos Mesh CRD 存在 +6. 准备 local PV +7. 创建 e2e Tenant +8. 等待 Tenant Ready +9. 启动 Tenant S3 port-forward +10. 创建测试 bucket +11. 预写入一批对象,记录 key 和 sha256 +12. 启动后台 verifier 持续读取已提交对象 +13. apply Chaos Mesh 故障资源 +14. 故障期间继续执行混合 S3 workload +15. delete Chaos Mesh 故障资源 +16. 等待 Tenant 再次 Ready +17. 对所有成功 PUT 对象做最终 GET + sha256 校验 +18. 生成 checker report +19. 成功则清理测试资源 +20. 失败则收集 Kubernetes artifacts +``` + +伪代码: + +```rust +#[tokio::test] +#[ignore = "destructive fault scenario; run through `make e2e-live-faults`"] +async fn fault_io_eio_preserves_committed_objects() -> Result<()> { + let config = E2eConfig::from_env(); + + live::require_live_enabled(&config)?; + live::ensure_dedicated_context(&config)?; + live::require_destructive_enabled(&config)?; + chaos_mesh::require_iochaos_crd(&config)?; + + let result = async { + storage::prepare_local_storage(&config)?; + resources::apply_smoke_tenant_resources(&config)?; + + let client = kube_client::default_client().await?; + let tenants = kube_client::tenant_api(client.clone(), &config.test_namespace); + wait::wait_for_tenant_ready(tenants, &config.tenant_name, config.timeout).await?; + + let mut port_forward = PortForwardSpec::start_tenant_io(&config)?; + let s3 = s3_workload::Client::from_tenant_port_forward(&config, &mut port_forward).await?; + + let mut history = history::Recorder::new("io-eio")?; + s3.create_bucket().await?; + s3.prefill_objects(&mut history).await?; + + let chaos = chaos_mesh::IoChaos::eio_on_rustfs_volume( + &config, + "/data/rustfs0", + 20, + Duration::from_secs(60), + ); + + let guard = chaos.apply()?; + s3.run_mixed_workload(&mut history).await?; + drop(guard); + + wait::wait_for_tenant_ready( + kube_client::tenant_api(client, &config.test_namespace), + &config.tenant_name, + config.timeout, + ) + .await?; + + let report = checker::check_s3_history(&s3, &history).await?; + report.require_success()?; + + Ok(()) + } + .await; + + if result.is_err() { + ArtifactCollector::new(&config.artifacts_dir) + .collect_kubernetes_snapshot("fault_io_eio_preserves_committed_objects", &config)?; + } + + result +} +``` + +## Chaos Mesh 模块设计 + +`chaos_mesh.rs` 建议提供这些能力: + +```rust +pub fn require_iochaos_crd(config: &E2eConfig) -> Result<()>; +pub fn require_podchaos_crd(config: &E2eConfig) -> Result<()>; +pub fn require_networkchaos_crd(config: &E2eConfig) -> Result<()>; + +pub struct ChaosGuard { + name: String, + namespace: String, + kind: String, +} + +impl Drop for ChaosGuard { + fn drop(&mut self) { + // best-effort kubectl delete + } +} + +pub struct IoChaosSpec { + pub name: String, + pub target_namespace: String, + pub tenant_name: String, + pub container_name: String, + pub volume_path: String, + pub methods: Vec, + pub action: IoChaosAction, + pub percent: u8, + pub duration: Duration, +} +``` + +实现要求: + +- 所有 `kubectl` 命令必须通过现有 `framework::kubectl` 和 `framework::command` 边界。 +- apply 前检查 CRD 是否存在。 +- apply 后可以 `kubectl describe` 保存到 artifacts。 +- 删除时必须 best-effort,不应 panic。 +- 每个资源都带 `rustfs-e2e/run-id` label。 +- 允许按 label 清理上一次异常残留。 + +## S3 workload 模块设计 + +`s3_workload.rs` 建议提供: + +```rust +pub struct S3WorkloadClient { + bucket: String, + endpoint: String, + timeout: Duration, +} + +pub struct ObjectSpec { + key: String, + size_bytes: usize, + sha256: String, +} + +impl S3WorkloadClient { + pub async fn create_bucket(&self) -> Result<()>; + pub async fn put_object(&self, object: &ObjectSpec, history: &mut Recorder) -> Result<()>; + pub async fn get_object(&self, key: &str, history: &mut Recorder) -> Result>>; + pub async fn head_object(&self, key: &str, history: &mut Recorder) -> Result<()>; + pub async fn list_prefix(&self, prefix: &str, history: &mut Recorder) -> Result>; +} +``` + +注意点: + +- 每个请求必须有明确 timeout。 +- 不要在 workload 层做无限 retry。 +- 如果要 retry,必须记录每次尝试,而不是只记录最终结果。 +- body 读取失败不能记为 `failed`,应记为 `unknown`。 +- `PUT` 返回成功后才进入 committed set。 + +## Checker report 设计 + +最终 report 建议保存为 JSON: + +```json +{ + "scenario": "io-eio", + "run_id": "run-123", + "committed_puts": 200, + "missing_committed_objects": [], + "hash_mismatches": [], + "successful_corrupted_reads": [], + "unknown_writes_materialized": [], + "list_warnings": [], + "tenant_recovered": true, + "passed": true +} +``` + +hard fail 条件: + +1. 成功 `PUT` 的对象最终 `GET` 不到。 +2. 成功 `PUT` 的对象最终 `GET` hash 不一致。 +3. 任意成功 `GET` 返回的 body hash 与预期不一致。 +4. 故障解除后 Tenant 在 timeout 内没有回到 Ready。 +5. Chaos 资源删除失败并仍然残留。 +6. RustFS Pod 进入不可恢复 CrashLoopBackOff。 + +允许出现: + +1. 故障期间 S3 请求失败。 +2. 故障期间 S3 请求 timeout。 +3. 故障期间 port-forward 连接中断。 +4. Tenant 短暂 Degraded。 +5. unknown write 最终存在或不存在。 +6. 故障期间 LIST 不完整。 + +## artifacts 设计 + +每次 fault run 至少应该保存: + +```text +history.jsonl +checker-report.json +chaos-manifest.yaml +chaos-describe.txt +events.yaml +pv-paths.txt +rustfs-pods-current.log +rustfs-pods-previous.log +tenant-describe.txt +pods-describe.txt +``` + +其中最关键的是: + +- `history.jsonl`:复盘客户端看到的世界。 +- `checker-report.json`:复盘 correctness verdict。 +- `rustfs-pods-current.log`:定位 RustFS 如何处理故障。 +- `events.yaml`:定位 Kubernetes 层是否出现调度、挂载、重启问题。 +- `pv-paths.txt`:定位具体 PVC/PV/worker/hostPath 映射。 + +## Makefile 入口 + +保留现有总入口: + +```bash +make e2e-live-faults +``` + +后续可以增加聚焦入口,方便本地调试: + +```makefile +e2e-live-faults-io: + RUSTFS_E2E_LIVE=1 RUSTFS_E2E_DESTRUCTIVE=1 RUSTFS_E2E_FAULT_SCENARIO=io-eio \ + cargo test --manifest-path $(E2E_MANIFEST) --test faults -- --ignored --nocapture + +e2e-live-faults-pod: + RUSTFS_E2E_LIVE=1 RUSTFS_E2E_DESTRUCTIVE=1 RUSTFS_E2E_FAULT_SCENARIO=pod-kill-one \ + cargo test --manifest-path $(E2E_MANIFEST) --test faults -- --ignored --nocapture +``` + +普通开发检查仍然使用: + +```bash +make e2e-check +make pre-commit +``` + +不要把 destructive 场景混进普通 `make e2e-live-run`。 + +## 第一版最小可交付范围 + +建议第一版只交付一个真实场景: + +```text +fault_io_eio_preserves_committed_objects +``` + +它应该包含: + +1. live/destructive/context guard。 +2. Chaos Mesh `IOChaos` CRD 检查。 +3. Tenant 创建和 Ready 等待。 +4. S3 bucket 创建。 +5. S3 prefill 对象并记录 hash。 +6. apply `IOChaos fault errno=5`。 +7. 故障期间持续读写。 +8. delete `IOChaos`。 +9. Tenant 恢复 Ready 等待。 +10. 所有成功 `PUT` 对象最终 `GET + sha256` 校验。 +11. history 和 checker report 输出。 +12. 失败时 artifacts 收集。 + +这个版本已经能证明系统从“占位骨架”升级为“真实故障注入 + 数据正确性校验”。 + +## 分阶段实施计划 + +### Phase 1:磁盘 EIO 基线 + +- 新增 `chaos_mesh`。 +- 新增 `history`。 +- 新增 `checker`。 +- 新增 `s3_workload`。 +- 实现 `io-eio`。 +- 使用唯一对象 key。 +- 默认小对象数、短持续时间、低故障比例。 + +验收: + +- `make e2e-check` 通过。 +- `make e2e-live-faults` 可在专用 Kind 集群运行 `io-eio`。 +- 如果 committed object 丢失,测试失败。 +- 如果 successful GET 返回错误字节,测试失败。 + +### Phase 2:进程和网络故障 + +- 新增 `pod-kill-one`。 +- 新增 `network-partition-one`。 +- 复用同一套 workload/history/checker。 + +验收: + +- Pod 死亡后 StatefulSet 能恢复。 +- 网络分区期间可以失败,但不能返回错误数据。 +- 网络恢复后 committed object 可读回。 + +### Phase 3:静默损坏 + +- 新增 `io-read-mistake`。 +- 新增 direct local-PV corruption。 +- 强化 hash mismatch 和 repair behavior 报告。 + +验收: + +- RustFS 对错误字节返回错误或修复。 +- 不允许 `200 OK` 返回错误对象内容。 + +### Phase 4:长稳和性能 + +- 增加随机组合故障。 +- 增加长时间 soak。 +- 可选接入 MinIO Warp 或 COSBench。 + +注意: + +- 性能结果和 correctness verdict 必须分离。 +- 压测失败不等于数据错误。 +- 数据错误永远是 hard fail。 + +### Phase 5:块设备级故障 + +- 研究 `dm-flakey`、`dm-error`、loop device-backed PV。 +- 只在 Linux runner 或专用环境启用。 +- 不进入默认本地 Kind 流程。 + +这个阶段更接近真实磁盘坏块,但环境成本明显更高。 + +## 与其他测试框架的关系 + +| 框架或工具 | 当前项目定位 | +| --- | --- | +| 当前 e2e harness | Operator 编排、Tenant 生命周期、artifacts 收集。 | +| Chaos Mesh | Kubernetes-native nemesis,负责制造故障。 | +| Jepsen-like checker | 判断对象存储 correctness,不制造故障。 | +| MinIO Mint | 后续用于 S3 API 兼容性,不作为故障 checker。 | +| MinIO Warp | 后续用于故障期间性能压测,不作为 correctness verdict。 | +| COSBench | 后续用于大规模对象存储压测。 | +| Ceph s3-tests | 后续用于 S3 行为兼容性参考。 | +| Ceph Teuthology | 借鉴大规模编排思想,当前不直接引入。 | +| Ozone fault injection | 借鉴 FUSE/agent 精细磁盘故障思想,作为后续增强。 | + +当前最优组合: + +```text +RustFS Operator e2e + + Chaos Mesh + + Rust-native S3 workload + + Jepsen-like object checker +``` + +## 实现注意事项 + +- 所有外部调用必须有 timeout。 +- workload 不要无限 retry。 +- retry 必须记录每次尝试。 +- 不要把 transport unknown 错误归类为 definite failed。 +- 不要把 performance degradation 误判为 correctness failure。 +- 故障资源必须总是 best-effort cleanup。 +- artifacts 中不要记录密钥明文。 +- 第一版避免覆盖同一个 key,降低 checker 复杂度。 +- 后续再逐步加入 same-key overwrite、delete、multipart、LIST consistency。 + +## 参考资料 + +- [Chaos Mesh IOChaos](https://chaos-mesh.org/docs/simulate-io-chaos-on-kubernetes/) +- [Chaos Mesh Documentation](https://chaos-mesh.org/docs/) +- [Jepsen](https://jepsen.io/) +- [MinIO Warp](https://docs.min.io/warp/) +- [COSBench](https://github.com/intel-cloud/cosbench) +- [Ceph s3-tests](https://github.com/ceph/s3-tests) From 5a1da86413cbbd0f6e5b518f42781fcf01cecd15 Mon Sep 17 00:00:00 2001 From: GatewayJ <18332154+GatewayJ@users.noreply.github.com> Date: Tue, 16 Jun 2026 23:08:40 +0800 Subject: [PATCH 02/20] test(chaos): add test arch --- Makefile | 3 +- README.md | 2 +- e2e/Cargo.lock | 1136 ++++++++++++++++++++++++-- e2e/Cargo.toml | 5 + e2e/FAULT_INJECTION_TEST_PLAN.md | 92 ++- e2e/src/cases/faults.rs | 40 + e2e/src/cases/mod.rs | 5 + e2e/src/framework/chaos_mesh.rs | 359 ++++++++ e2e/src/framework/checker.rs | 218 +++++ e2e/src/framework/config.rs | 44 + e2e/src/framework/fault_scenarios.rs | 92 +++ e2e/src/framework/history.rs | 195 +++++ e2e/src/framework/live.rs | 9 + e2e/src/framework/mod.rs | 5 + e2e/src/framework/resources.rs | 4 + e2e/src/framework/s3_workload.rs | 446 ++++++++++ e2e/tests/faults.rs | 401 ++++++++- 17 files changed, 2961 insertions(+), 95 deletions(-) create mode 100644 e2e/src/cases/faults.rs create mode 100644 e2e/src/framework/chaos_mesh.rs create mode 100644 e2e/src/framework/checker.rs create mode 100644 e2e/src/framework/fault_scenarios.rs create mode 100644 e2e/src/framework/history.rs create mode 100644 e2e/src/framework/s3_workload.rs diff --git a/Makefile b/Makefile index ec0028e..b7df9b7 100644 --- a/Makefile +++ b/Makefile @@ -43,7 +43,7 @@ help: @echo " make e2e-check - Check Rust-native e2e harness (fmt + test + clippy)" @echo " make e2e-live-create - Clean dedicated storage, recreate live Kind environment, install cert-manager, and load e2e image" @echo " make e2e-live-run - Run all non-destructive live suites in the existing live environment" - @echo " make e2e-live-faults - Run destructive live fault suites with RUSTFS_E2E_DESTRUCTIVE=1" + @echo " make e2e-live-faults - Run destructive fault suites against the current kubectl context" @echo " make e2e-live-update - Rebuild image and update the live environment (load + rollout)" @echo " make e2e-live-delete - Delete live Kind environment and clean dedicated storage" @@ -131,6 +131,7 @@ e2e-live-run: @echo "configured live e2e suites passed." e2e-live-faults: + @echo "running destructive fault e2e against current kubectl context: $$(kubectl config current-context)" RUSTFS_E2E_LIVE=1 RUSTFS_E2E_DESTRUCTIVE=1 cargo test --manifest-path $(E2E_MANIFEST) --test faults -- --ignored --test-threads=$(E2E_TEST_THREADS) --nocapture e2e-live-update: diff --git a/README.md b/README.md index f1d2246..abece1b 100755 --- a/README.md +++ b/README.md @@ -73,7 +73,7 @@ From the repo root: | `make e2e-check` | Validate the e2e harness without creating a live cluster. | | `make e2e-live-create` | Build e2e images, recreate the dedicated Kind cluster, install cert-manager, and load images. | | `make e2e-live-run` | Deploy the dev control plane and run all non-destructive live suites. | -| `make e2e-live-faults` | Run destructive live fault suites with `RUSTFS_E2E_DESTRUCTIVE=1`. | +| `make e2e-live-faults` | Run destructive fault suites against the current kubectl context. | | `make e2e-live-update` | Rebuild images, reload them into Kind, and roll out control-plane deployments. | | `make e2e-live-delete` | Delete the dedicated Kind cluster and its local storage. | diff --git a/e2e/Cargo.lock b/e2e/Cargo.lock index fb8ae15..8ca0435 100644 --- a/e2e/Cargo.lock +++ b/e2e/Cargo.lock @@ -110,6 +110,15 @@ dependencies = [ "derive_arbitrary", ] +[[package]] +name = "arc-swap" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a3a1fd6f75306b68087b831f025c712524bcb19aad54e557b1129cfa0a2b207" +dependencies = [ + "rustversion", +] + [[package]] name = "async-broadcast" version = "0.7.2" @@ -179,6 +188,49 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +[[package]] +name = "aws-config" +version = "1.8.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e33f815b73a3899c03b380d543532e5865f230dce9678d108dc10732a8682275" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-sdk-sso", + "aws-sdk-ssooidc", + "aws-sdk-sts", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-schema", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "hex", + "http 1.4.0", + "sha1 0.10.6", + "time", + "tokio", + "tracing", + "url", + "zeroize", +] + +[[package]] +name = "aws-credential-types" +version = "1.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f20799b373a1be121fe3005fba0c2090af9411573878f224df44b42727fcaf7" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "zeroize", +] + [[package]] name = "aws-lc-rs" version = "1.17.0" @@ -201,6 +253,414 @@ dependencies = [ "fs_extra", ] +[[package]] +name = "aws-runtime" +version = "1.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c9b9de216a988dd54b754a82a7660cfe14cee4f6782ae4524470972fa0ccb39" +dependencies = [ + "aws-credential-types", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-eventstream", + "aws-smithy-http", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "bytes-utils", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "http-body 1.0.1", + "percent-encoding", + "pin-project-lite", + "tracing", + "uuid", +] + +[[package]] +name = "aws-sdk-s3" +version = "1.137.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2dd7213994e2ff9382ff100403b78c30d1b74cdfcd8fa9d0d1dc3a94a5c4874" +dependencies = [ + "arc-swap", + "aws-credential-types", + "aws-runtime", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-checksums", + "aws-smithy-eventstream", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-observability", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", + "bytes", + "fastrand", + "hex", + "hmac 0.13.0", + "http 0.2.12", + "http 1.4.0", + "http-body 1.0.1", + "lru", + "percent-encoding", + "regex-lite", + "sha2 0.11.0", + "tracing", + "url", +] + +[[package]] +name = "aws-sdk-sso" +version = "1.102.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c82b3ac19f1431854f7ace3a7531674633e286bfdde21976893bfee36fd493b" +dependencies = [ + "arc-swap", + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-observability", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sdk-ssooidc" +version = "1.104.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "321000d2b4c5519ee573f73167f612efd7329322d9b26969ad1979f0427f1913" +dependencies = [ + "arc-swap", + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-observability", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sdk-sts" +version = "1.107.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d0d328ba962af23ecfa3c9f23b98d3d35e325fa218d7f13d17a6bf522f8a560" +dependencies = [ + "arc-swap", + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-observability", + "aws-smithy-query", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sigv4" +version = "1.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bae38512beae0ffee7010fc24e7a8a123c53efdfef42a61e80fda4882418dc71" +dependencies = [ + "aws-credential-types", + "aws-smithy-eventstream", + "aws-smithy-http", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "crypto-bigint", + "form_urlencoded", + "hex", + "hmac 0.13.0", + "http 0.2.12", + "http 1.4.0", + "p256", + "percent-encoding", + "sha2 0.11.0", + "subtle", + "time", + "tracing", + "zeroize", +] + +[[package]] +name = "aws-smithy-async" +version = "1.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ffcaf626bdda484571968400c326a244598634dc75fd451325a54ad1a59acfc" +dependencies = [ + "futures-util", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "aws-smithy-checksums" +version = "0.64.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9e8e65f4f81fcccdeb6c3eca2af17ac21d421a1786a26a394aecf421d616d3a" +dependencies = [ + "aws-smithy-http", + "aws-smithy-types", + "bytes", + "crc-fast", + "hex", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "md-5", + "pin-project-lite", + "sha1 0.11.0", + "sha2 0.11.0", + "tracing", +] + +[[package]] +name = "aws-smithy-eventstream" +version = "0.60.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78d8391e65fcea47c586a22e1a41f173b38615b112b2c6b7a44e80cec3e6b706" +dependencies = [ + "aws-smithy-types", + "bytes", + "crc32fast", +] + +[[package]] +name = "aws-smithy-http" +version = "0.63.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba1ab2dc1c2c3749ead27180d333c42f11be8b0e934058fb4b2258ee8dbe5231" +dependencies = [ + "aws-smithy-eventstream", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "bytes-utils", + "futures-core", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "percent-encoding", + "pin-project-lite", + "pin-utils", + "tracing", +] + +[[package]] +name = "aws-smithy-http-client" +version = "1.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c3ef8931ad1c98aa6a55b4256f847f3116090819844e0dd41ea682cac5dd2d3" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "h2 0.3.27", + "h2 0.4.14", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "hyper 0.14.32", + "hyper 1.9.0", + "hyper-rustls 0.24.2", + "hyper-rustls 0.27.9", + "hyper-util", + "pin-project-lite", + "rustls 0.21.12", + "rustls 0.23.40", + "rustls-native-certs", + "rustls-pki-types", + "tokio", + "tokio-rustls 0.26.4", + "tower", + "tracing", +] + +[[package]] +name = "aws-smithy-json" +version = "0.62.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "701a947f4797e52a911e114a898667c746c39feea467bbd1abd7b3721f702ffa" +dependencies = [ + "aws-smithy-runtime-api", + "aws-smithy-schema", + "aws-smithy-types", +] + +[[package]] +name = "aws-smithy-observability" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06c2315d173edbf1920da8ba3a7189695827002e4c0fc961973ab1c54abca9c" +dependencies = [ + "aws-smithy-runtime-api", +] + +[[package]] +name = "aws-smithy-query" +version = "0.60.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a56d79744fb3edb5d722ef79d86081e121d3b9422cb209eb03aea6aa4f21ebd" +dependencies = [ + "aws-smithy-types", + "urlencoding", +] + +[[package]] +name = "aws-smithy-runtime" +version = "1.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e6f5caf6fea86f8c2206541ab5857cfcda9013426cdbe8fa0098b9e2d32182" +dependencies = [ + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-http-client", + "aws-smithy-observability", + "aws-smithy-runtime-api", + "aws-smithy-schema", + "aws-smithy-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "http-body 1.0.1", + "http-body-util", + "pin-project-lite", + "pin-utils", + "tokio", + "tracing", +] + +[[package]] +name = "aws-smithy-runtime-api" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9db177daa6ba8afb9ee1aefcf548c907abcf52065e394ee11a92780057fe0e8c" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api-macros", + "aws-smithy-types", + "bytes", + "http 0.2.12", + "http 1.4.0", + "pin-project-lite", + "tokio", + "tracing", + "zeroize", +] + +[[package]] +name = "aws-smithy-runtime-api-macros" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d7396fd9500589e62e460e987ecb671bad374934e55ec3b5f498cc7a8a8a7b7" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "aws-smithy-schema" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7442cb268338f0eb8278140a107c046756aa01093d8ef5e99628d34ae09c94f5" +dependencies = [ + "aws-smithy-runtime-api", + "aws-smithy-types", + "http 1.4.0", +] + +[[package]] +name = "aws-smithy-types" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32b42fcf341259d85ca10fac9a2f6448a8ec691c6955a18e45bc3b71a85fab85" +dependencies = [ + "base64-simd", + "bytes", + "bytes-utils", + "futures-core", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "http-body 1.0.1", + "http-body-util", + "itoa", + "num-integer", + "pin-project-lite", + "pin-utils", + "ryu", + "serde", + "time", + "tokio", + "tokio-util", +] + +[[package]] +name = "aws-smithy-xml" +version = "0.60.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce02add1aa3677d022f8adf81dcbe3046a95f17a1b1e8979c145cd21d3d22b3" +dependencies = [ + "xmlparser", +] + +[[package]] +name = "aws-types" +version = "1.3.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d16bf10b03a3c01e6b3b7d47cd964e873ffe9e7d4e80fad16bd4c077cb068531" +dependencies = [ + "aws-credential-types", + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-schema", + "aws-smithy-types", + "rustc_version", + "tracing", +] + [[package]] name = "axum" version = "0.7.9" @@ -212,10 +672,10 @@ dependencies = [ "axum-macros", "bytes", "futures-util", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "http-body-util", - "hyper", + "hyper 1.9.0", "hyper-util", "itoa", "matchit", @@ -245,8 +705,8 @@ dependencies = [ "async-trait", "bytes", "futures-util", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "http-body-util", "mime", "pin-project-lite", @@ -279,12 +739,34 @@ dependencies = [ "tokio", ] +[[package]] +name = "base16ct" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf" + [[package]] name = "base64" version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "base64-simd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195" +dependencies = [ + "outref", + "vsimd", +] + +[[package]] +name = "base64ct" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06" + [[package]] name = "bitflags" version = "2.11.1" @@ -300,6 +782,15 @@ dependencies = [ "generic-array", ] +[[package]] +name = "block-buffer" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2f6c7dbe95a6ed67ad9f18e57daf93a2f034c524b99fd2b76d18fdfeb6660aa" +dependencies = [ + "hybrid-array", +] + [[package]] name = "bumpalo" version = "3.20.2" @@ -312,6 +803,16 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +[[package]] +name = "bytes-utils" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dafe3a8757b027e2be6e4e5601ed563c55989fcf1546e933c66c8eb3a058d35" +dependencies = [ + "bytes", + "either", +] + [[package]] name = "cc" version = "1.2.62" @@ -399,6 +900,12 @@ dependencies = [ "cc", ] +[[package]] +name = "cmov" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c9ea0ac24bc397ab3c98583a3c9ba74fa56b09a4449bbe172b9b1ddb016027a" + [[package]] name = "colorchoice" version = "1.0.5" @@ -431,6 +938,18 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "const-oid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" + +[[package]] +name = "const-oid" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6ef517f0926dd24a1582492c791b6a4818a4d94e789a334894aa15b0d12f55c" + [[package]] name = "const-str" version = "1.1.0" @@ -512,6 +1031,25 @@ dependencies = [ "libc", ] +[[package]] +name = "cpufeatures" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +dependencies = [ + "libc", +] + +[[package]] +name = "crc-fast" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e75b2483e97a5a7da73ac68a05b629f9c53cff58d8ed1c77866079e18b00dba5" +dependencies = [ + "digest 0.10.7", + "spin", +] + [[package]] name = "crc32fast" version = "1.5.0" @@ -527,6 +1065,18 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crypto-bigint" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76" +dependencies = [ + "generic-array", + "rand_core 0.6.4", + "subtle", + "zeroize", +] + [[package]] name = "crypto-common" version = "0.1.7" @@ -537,6 +1087,24 @@ dependencies = [ "typenum", ] +[[package]] +name = "crypto-common" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce6e4c961d6cd6c9a86db418387425e8bdeaf05b3c8bc1411e6dca4c252f1453" +dependencies = [ + "hybrid-array", +] + +[[package]] +name = "ctutils" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d5515a3834141de9eafb9717ad39eea8247b5674e6066c404e8c4b365d2a29e" +dependencies = [ + "cmov", +] + [[package]] name = "darling" version = "0.21.3" @@ -572,6 +1140,17 @@ dependencies = [ "syn", ] +[[package]] +name = "der" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" +dependencies = [ + "const-oid 0.9.6", + "pem-rfc7468", + "zeroize", +] + [[package]] name = "deranged" version = "0.5.8" @@ -619,11 +1198,24 @@ version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ - "block-buffer", - "crypto-common", + "block-buffer 0.10.4", + "const-oid 0.9.6", + "crypto-common 0.1.7", "subtle", ] +[[package]] +name = "digest" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1dd6dbb5841937940781866fa1281a1ff7bd3bf827091440879f9994983d5c2" +dependencies = [ + "block-buffer 0.12.1", + "const-oid 0.10.2", + "crypto-common 0.2.2", + "ctutils", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -656,6 +1248,20 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" +[[package]] +name = "ecdsa" +version = "0.16.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee27f32b5c5292967d2d4a9d7f1e0b0aed2c15daded5a60300e4abb9d8020bca" +dependencies = [ + "der", + "digest 0.10.7", + "elliptic-curve", + "rfc6979", + "signature", + "spki", +] + [[package]] name = "educe" version = "0.6.0" @@ -674,6 +1280,26 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +[[package]] +name = "elliptic-curve" +version = "0.13.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6043086bf7973472e0c7dff2142ea0b680d30e18d9cc40f267efbf222bd47" +dependencies = [ + "base16ct", + "crypto-bigint", + "digest 0.10.7", + "ff", + "generic-array", + "group", + "pem-rfc7468", + "pkcs8", + "rand_core 0.6.4", + "sec1", + "subtle", + "zeroize", +] + [[package]] name = "enum-ordinalize" version = "4.3.2" @@ -735,7 +1361,17 @@ dependencies = [ name = "fastrand" version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" +checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" + +[[package]] +name = "ff" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0b50bfb653653f9ca9095b427bed08ab8d75a137839d9ad64eb11810d5b6393" +dependencies = [ + "rand_core 0.6.4", + "subtle", +] [[package]] name = "find-msvc-tools" @@ -765,6 +1401,12 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +[[package]] +name = "foldhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" + [[package]] name = "form_urlencoded" version = "1.2.2" @@ -876,6 +1518,7 @@ checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" dependencies = [ "typenum", "version_check", + "zeroize", ] [[package]] @@ -943,6 +1586,36 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "group" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63" +dependencies = [ + "ff", + "rand_core 0.6.4", + "subtle", +] + +[[package]] +name = "h2" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http 0.2.12", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "h2" version = "0.4.14" @@ -954,7 +1627,7 @@ dependencies = [ "fnv", "futures-core", "futures-sink", - "http", + "http 1.4.0", "indexmap", "slab", "tokio", @@ -970,7 +1643,18 @@ checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ "allocator-api2", "equivalent", - "foldhash", + "foldhash 0.1.5", +] + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash 0.2.0", ] [[package]] @@ -997,7 +1681,16 @@ version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" dependencies = [ - "digest", + "digest 0.10.7", +] + +[[package]] +name = "hmac" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6303bc9732ae41b04cb554b844a762b4115a61bfaa81e3e83050991eeb56863f" +dependencies = [ + "digest 0.11.3", ] [[package]] @@ -1020,6 +1713,17 @@ dependencies = [ "windows-link", ] +[[package]] +name = "http" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + [[package]] name = "http" version = "1.4.0" @@ -1030,6 +1734,17 @@ dependencies = [ "itoa", ] +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http 0.2.12", + "pin-project-lite", +] + [[package]] name = "http-body" version = "1.0.1" @@ -1037,7 +1752,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http", + "http 1.4.0", ] [[package]] @@ -1048,8 +1763,8 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" dependencies = [ "bytes", "futures-core", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "pin-project-lite", ] @@ -1071,6 +1786,39 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" +[[package]] +name = "hybrid-array" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9155a582abd142abc056962c29e3ce5ff2ad5469f4246b537ed42c5deba857da" +dependencies = [ + "typenum", +] + +[[package]] +name = "hyper" +version = "0.14.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2 0.3.27", + "http 0.2.12", + "http-body 0.4.6", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2 0.5.10", + "tokio", + "tower-service", + "tracing", + "want", +] + [[package]] name = "hyper" version = "1.9.0" @@ -1081,9 +1829,9 @@ dependencies = [ "bytes", "futures-channel", "futures-core", - "h2", - "http", - "http-body", + "h2 0.4.14", + "http 1.4.0", + "http-body 1.0.1", "httparse", "httpdate", "itoa", @@ -1093,20 +1841,35 @@ dependencies = [ "want", ] +[[package]] +name = "hyper-rustls" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" +dependencies = [ + "futures-util", + "http 0.2.12", + "hyper 0.14.32", + "log", + "rustls 0.21.12", + "tokio", + "tokio-rustls 0.24.1", +] + [[package]] name = "hyper-rustls" version = "0.27.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ca68d021ef39cf6463ab54c1d0f5daf03377b70561305bb89a8f83aab66e0f" dependencies = [ - "http", - "hyper", + "http 1.4.0", + "hyper 1.9.0", "hyper-util", "log", - "rustls", + "rustls 0.23.40", "rustls-native-certs", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tower-service", "webpki-roots", ] @@ -1117,7 +1880,7 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" dependencies = [ - "hyper", + "hyper 1.9.0", "hyper-util", "pin-project-lite", "tokio", @@ -1134,14 +1897,14 @@ dependencies = [ "bytes", "futures-channel", "futures-util", - "http", - "http-body", - "hyper", + "http 1.4.0", + "http-body 1.0.1", + "hyper 1.9.0", "ipnet", "libc", "percent-encoding", "pin-project-lite", - "socket2", + "socket2 0.6.3", "tokio", "tower-service", "tracing", @@ -1432,18 +2195,18 @@ dependencies = [ "either", "futures", "home", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "http-body-util", - "hyper", - "hyper-rustls", + "hyper 1.9.0", + "hyper-rustls 0.27.9", "hyper-timeout", "hyper-util", "jsonpath-rust", "k8s-openapi", "kube-core", "pem", - "rustls", + "rustls 0.23.40", "secrecy", "serde", "serde_json", @@ -1465,7 +2228,7 @@ dependencies = [ "chrono", "derive_more", "form_urlencoded", - "http", + "http 1.4.0", "json-patch", "k8s-openapi", "schemars", @@ -1608,6 +2371,15 @@ version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +[[package]] +name = "lru" +version = "0.16.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f66e8d5d03f609abc3a39e6f08e4164ebf1447a732906d39eb9b99b7919ef39" +dependencies = [ + "hashbrown 0.16.1", +] + [[package]] name = "lru-slab" version = "0.1.2" @@ -1629,6 +2401,16 @@ version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" +[[package]] +name = "md-5" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69b6441f590336821bb897fb28fc622898ccceb1d6cea3fde5ea86b090c4de98" +dependencies = [ + "cfg-if", + "digest 0.11.3", +] + [[package]] name = "memchr" version = "2.8.0" @@ -1687,6 +2469,15 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967" +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -1735,10 +2526,10 @@ dependencies = [ "const-str", "futures", "hex", - "hmac", + "hmac 0.12.1", "hostname", - "http", - "hyper", + "http 1.4.0", + "hyper 1.9.0", "hyper-util", "k8s-openapi", "kube", @@ -1746,19 +2537,19 @@ dependencies = [ "rcgen", "reqwest", "ring", - "rustls", + "rustls 0.23.40", "rustls-pemfile", - "rustls-webpki", + "rustls-webpki 0.103.13", "schemars", "serde", "serde_json", "serde_yaml_ng", - "sha2", + "sha2 0.10.9", "shadow-rs", "snafu", "strum", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tokio-stream", "tokio-util", "tower", @@ -1779,6 +2570,24 @@ dependencies = [ "num-traits", ] +[[package]] +name = "outref" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" + +[[package]] +name = "p256" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9863ad85fa8f4460f9c48cb909d38a0d689dba1f6f6988a5e3e0d31071bcd4b" +dependencies = [ + "ecdsa", + "elliptic-curve", + "primeorder", + "sha2 0.10.9", +] + [[package]] name = "parking" version = "2.2.1" @@ -1818,6 +2627,15 @@ dependencies = [ "serde_core", ] +[[package]] +name = "pem-rfc7468" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" +dependencies = [ + "base64ct", +] + [[package]] name = "percent-encoding" version = "2.3.2" @@ -1864,7 +2682,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "89815c69d36021a140146f26659a81d6c2afa33d216d736dd4be5381a7362220" dependencies = [ "pest", - "sha2", + "sha2 0.10.9", ] [[package]] @@ -1893,6 +2711,22 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkcs8" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" +dependencies = [ + "der", + "spki", +] + [[package]] name = "pkg-config" version = "0.3.33" @@ -1933,6 +2767,15 @@ dependencies = [ "syn", ] +[[package]] +name = "primeorder" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "353e1ca18966c16d9deb1c69278edbc5f194139612772bd9537af60ac231e1e6" +dependencies = [ + "elliptic-curve", +] + [[package]] name = "proc-macro2" version = "1.0.106" @@ -1970,8 +2813,8 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash", - "rustls", - "socket2", + "rustls 0.23.40", + "socket2 0.6.3", "thiserror", "tokio", "tracing", @@ -1990,7 +2833,7 @@ dependencies = [ "rand", "ring", "rustc-hash", - "rustls", + "rustls 0.23.40", "rustls-pki-types", "slab", "thiserror", @@ -2008,7 +2851,7 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2", + "socket2 0.6.3", "tracing", "windows-sys 0.60.2", ] @@ -2041,7 +2884,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea" dependencies = [ "rand_chacha", - "rand_core", + "rand_core 0.9.5", ] [[package]] @@ -2051,7 +2894,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.17", ] [[package]] @@ -2128,6 +2980,12 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "regex-lite" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973" + [[package]] name = "regex-syntax" version = "0.8.10" @@ -2146,25 +3004,25 @@ dependencies = [ "cookie_store", "futures-core", "futures-util", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "http-body-util", - "hyper", - "hyper-rustls", + "hyper 1.9.0", + "hyper-rustls 0.27.9", "hyper-util", "js-sys", "log", "percent-encoding", "pin-project-lite", "quinn", - "rustls", + "rustls 0.23.40", "rustls-pki-types", "serde", "serde_json", "serde_urlencoded", "sync_wrapper", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tokio-util", "tower", "tower-http", @@ -2177,6 +3035,16 @@ dependencies = [ "webpki-roots", ] +[[package]] +name = "rfc6979" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dd2a808d456c4a54e300a23e9f5a67e122c3024119acbfd73e3bf664491cb2" +dependencies = [ + "hmac 0.12.1", + "subtle", +] + [[package]] name = "ring" version = "0.17.14" @@ -2221,7 +3089,7 @@ version = "8.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5bcdef0be6fe7f6fa333b1073c949729274b05f123a0ad7efcb8efd878e5c3b1" dependencies = [ - "sha2", + "sha2 0.10.9", "walkdir", ] @@ -2245,8 +3113,12 @@ name = "rustfs-operator-e2e" version = "0.1.0" dependencies = [ "anyhow", + "aws-config", + "aws-credential-types", + "aws-sdk-s3", "axum", "futures", + "hex", "k8s-openapi", "kube", "operator", @@ -2254,6 +3126,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml_ng", + "sha2 0.10.9", "tempfile", "tokio", "tower", @@ -2273,6 +3146,18 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "rustls" +version = "0.21.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" +dependencies = [ + "log", + "ring", + "rustls-webpki 0.101.7", + "sct", +] + [[package]] name = "rustls" version = "0.23.40" @@ -2284,7 +3169,7 @@ dependencies = [ "once_cell", "ring", "rustls-pki-types", - "rustls-webpki", + "rustls-webpki 0.103.13", "subtle", "zeroize", ] @@ -2320,6 +3205,16 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rustls-webpki" +version = "0.101.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +dependencies = [ + "ring", + "untrusted", +] + [[package]] name = "rustls-webpki" version = "0.103.13" @@ -2393,6 +3288,30 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "sct" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "sec1" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3e97a565f76233a6003f9f5c54be1d9c5bdfa3eccfb189469f11ec4901c47dc" +dependencies = [ + "base16ct", + "der", + "generic-array", + "pkcs8", + "subtle", + "zeroize", +] + [[package]] name = "secrecy" version = "0.10.3" @@ -2544,6 +3463,28 @@ dependencies = [ "unsafe-libyaml", ] +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures 0.2.17", + "digest 0.10.7", +] + +[[package]] +name = "sha1" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aacc4cc499359472b4abe1bf11d0b12e688af9a805fa5e3016f9a386dc2d0214" +dependencies = [ + "cfg-if", + "cpufeatures 0.3.0", + "digest 0.11.3", +] + [[package]] name = "sha2" version = "0.10.9" @@ -2551,8 +3492,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" dependencies = [ "cfg-if", - "cpufeatures", - "digest", + "cpufeatures 0.2.17", + "digest 0.10.7", +] + +[[package]] +name = "sha2" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "446ba717509524cb3f22f17ecc096f10f4822d76ab5c0b9822c5f9c284e825f4" +dependencies = [ + "cfg-if", + "cpufeatures 0.3.0", + "digest 0.11.3", ] [[package]] @@ -2593,6 +3545,16 @@ dependencies = [ "libc", ] +[[package]] +name = "signature" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" +dependencies = [ + "digest 0.10.7", + "rand_core 0.6.4", +] + [[package]] name = "simd-adler32" version = "0.3.9" @@ -2634,6 +3596,16 @@ dependencies = [ "syn", ] +[[package]] +name = "socket2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + [[package]] name = "socket2" version = "0.6.3" @@ -2644,6 +3616,22 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "spin" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5fe4ccb98d9c292d56fec89a5e07da7fc4cf0dc11e156b41793132775d3e591" + +[[package]] +name = "spki" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" +dependencies = [ + "base64ct", + "der", +] + [[package]] name = "stable_deref_trait" version = "1.2.1" @@ -2825,7 +3813,7 @@ dependencies = [ "mio", "pin-project-lite", "signal-hook-registry", - "socket2", + "socket2 0.6.3", "tokio-macros", "windows-sys 0.61.2", ] @@ -2841,13 +3829,23 @@ dependencies = [ "syn", ] +[[package]] +name = "tokio-rustls" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" +dependencies = [ + "rustls 0.21.12", + "tokio", +] + [[package]] name = "tokio-rustls" version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" dependencies = [ - "rustls", + "rustls 0.23.40", "tokio", ] @@ -2908,8 +3906,8 @@ dependencies = [ "bytes", "futures-core", "futures-util", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "http-body-util", "http-range-header", "httpdate", @@ -3086,6 +4084,12 @@ dependencies = [ "serde", ] +[[package]] +name = "urlencoding" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" + [[package]] name = "utf8_iter" version = "1.0.4" @@ -3175,6 +4179,12 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "vsimd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" + [[package]] name = "walkdir" version = "2.5.0" @@ -3673,6 +4683,12 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4" +[[package]] +name = "xmlparser" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" + [[package]] name = "yasna" version = "0.5.2" diff --git a/e2e/Cargo.toml b/e2e/Cargo.toml index d8e2c64..5793d7c 100644 --- a/e2e/Cargo.toml +++ b/e2e/Cargo.toml @@ -8,11 +8,16 @@ publish = false operator = { path = ".." } anyhow = "1" +aws-config = "1" +aws-credential-types = "1" +aws-sdk-s3 = "1" axum = { version = "0.7", features = ["macros"] } futures = "0.3.31" +hex = "0.4" serde = { version = "1.0.228", features = ["derive"] } serde_json = "1.0.148" serde_yaml_ng = "0.10.0" +sha2 = "0.10" tempfile = "3" tokio = { version = "1.49.0", features = ["rt-multi-thread", "macros", "process", "time", "io-util"] } tower = "0.5" diff --git a/e2e/FAULT_INJECTION_TEST_PLAN.md b/e2e/FAULT_INJECTION_TEST_PLAN.md index 6038587..9c7035d 100644 --- a/e2e/FAULT_INJECTION_TEST_PLAN.md +++ b/e2e/FAULT_INJECTION_TEST_PLAN.md @@ -27,6 +27,19 @@ limitations under the License. 也就是说,这套测试不是单纯验证 Operator 是否能拉起 StatefulSet,而是通过 Operator 部署出来的 RustFS 集群来验证 RustFS 在故障下的数据正确性。 +## 边界澄清 + +这套故障测试的测试对象是 **Operator 编排出的 RustFS workload**,不是 Operator 控制面自身。 + +边界如下: + +- Operator 只负责把 RustFS Tenant、Service、PVC、Secret 等测试环境编排出来。 +- 故障注入作用于 RustFS Pod、RustFS 容器、RustFS 数据卷和 RustFS 服务路径。 +- checker 判断的是 RustFS 对象读写正确性:已经确认成功写入的数据不能丢,成功读取不能返回错误内容。 +- Operator 状态只作为恢复观察信号,例如故障解除后 Tenant 是否重新回到 Ready;它不是第一阶段 correctness verdict 的主体。 +- 不在 Tenant Console 或生产 Operator Console 中提供 destructive fault test 入口。 +- Chaos Mesh Dashboard 可以作为观察 Chaos 资源的外部工具,但 e2e 的权威输出是 `history.jsonl`、`checker-report.json` 和 Kubernetes artifacts。 + ## 目标 故障注入测试需要回答这些问题: @@ -34,7 +47,7 @@ limitations under the License. 1. RustFS 在 Pod、节点、网络、磁盘 I/O 故障下,已经成功写入的数据是否仍然存在。 2. RustFS 是否会在磁盘损坏或网络分区后,把错误对象内容以 `200 OK` 返回给客户端。 3. RustFS 在请求超时、连接中断、部分失败后,是否存在“客户端认为失败但服务端实际写入”的未知状态。 -4. Operator 是否能在故障期间正确观测 Tenant 状态,并在故障解除后回到 Ready。 +4. Operator 编排出的 Tenant 是否能在故障解除后回到 Ready,作为 RustFS workload 恢复观察信号。 5. 当测试失败时,e2e harness 是否能留下足够的日志、事件、历史记录和 checker 报告用于定位。 最重要的判定不是“故障期间所有请求都成功”,而是: @@ -51,10 +64,13 @@ limitations under the License. - 不替代 RustFS 自身的单元测试、集成测试或存储引擎内部测试。 - 不直接引入完整 Clojure Jepsen 测试套件。 -- 不在普通开发集群上运行 destructive 测试。 +- 不在共享开发集群或生产集群上运行 destructive 测试;真实 Kubernetes 集群也必须使用专用测试 namespace、Tenant 和 StorageClass。 - 不把性能压测结果当成 correctness 结论。 - 不在第一版验证所有 S3 线性一致性细节。 - 不默认测试多 Tenant、跨集群、真实块设备故障。 +- 不把故障测试放进 Tenant Console。 +- 不在生产 Operator Console 提供运行 destructive 测试的入口。 +- 不把 Operator 控制面重启、升级、Leader Election 等作为第一阶段核心验证对象。 第一阶段的目标是补齐当前最大缺口:**真实故障注入 + 对象内容正确性检查**。 @@ -68,7 +84,7 @@ limitations under the License. | --- | --- | --- | | destructive 入口 | `make e2e-live-faults` | 专门运行破坏性故障测试。 | | fault suite 占位 | `e2e/tests/faults.rs` | 后续真实故障测试入口。 | -| live/destructive/context guard | `e2e/src/framework/live.rs` | 防止误跑到非专用集群。 | +| live/destructive/context guard | `e2e/src/framework/live.rs` | 强制显式 live/destructive opt-in,并让 fault suite 绑定当前 kubectl context。 | | local PV 准备 | `e2e/src/framework/storage.rs` | 为 RustFS Tenant 准备本地卷。 | | Tenant/Secret 创建 | `e2e/src/framework/resources.rs` | 创建 e2e namespace、凭据和 Tenant。 | | S3 port-forward | `e2e/src/framework/port_forward.rs` | 将 Tenant S3 服务暴露到本地。 | @@ -97,12 +113,12 @@ limitations under the License. ```text e2e/tests/faults.rs | - +-- 环境保护:live / destructive / dedicated Kind context - +-- 环境准备:local PV / Tenant / Secret / Ready 等待 + +-- 环境保护:live / destructive / current kubectl context + +-- 环境准备:强故障 case reset;Kind 使用 local PV,真实集群使用配置的 StorageClass +-- S3 workload:持续读写对象 +-- history recorder:记录每次操作的开始、结束、结果、hash - +-- nemesis:通过 Chaos Mesh 注入故障 - +-- checker:基于 history 和最终读回结果判断正确性 + +-- nemesis:通过 Chaos Mesh 对 RustFS workload 注入故障 + +-- checker:基于 history 和最终读回结果判断 RustFS 对象正确性 +-- artifact collector:失败时收集诊断现场 ``` @@ -124,7 +140,7 @@ e2e/src/framework/checker.rs | `fault_scenarios` | 定义故障场景名称、默认参数、目标对象和执行顺序。 | | `s3_workload` | 对 RustFS Tenant S3 endpoint 执行对象读写流量。 | | `history` | 将每个 S3 操作记录成 JSON Lines。 | -| `checker` | 基于 history 和最终读回结果验证对象存储不变量。 | +| `checker` | 基于 history 和最终读回结果验证 RustFS 对象存储不变量。 | | `faults.rs` | 编排完整测试流程,不承载底层实现细节。 | ## 为什么优先用 Chaos Mesh @@ -185,24 +201,26 @@ Jepsen-like 的含义是: 1. 必须设置 `RUSTFS_E2E_LIVE=1`。 2. 必须设置 `RUSTFS_E2E_DESTRUCTIVE=1`。 -3. 当前 kube context 必须是专用 Kind:`kind-rustfs-e2e`。 +3. fault suite 使用当前 `kubectl config current-context`;可以是 dedicated Kind,也可以是真实 Kubernetes 测试集群。 4. 目标 namespace 必须来自 e2e 配置,例如 `rustfs-e2e-smoke`。 5. 所有故障资源必须带唯一 run id label。 6. 每个 Chaos 资源必须有 RAII-style cleanup guard。 7. 正常结束和异常失败都必须 best-effort 删除故障资源。 -8. 默认故障持续时间要短,默认故障比例要小。 -9. 测试失败时必须先收集 artifacts,再清理会影响诊断的信息。 -10. destructive 场景保持 `#[ignore]`,只能通过显式 Make 目标执行。 +8. `io-eio` 这类存储破坏/强干扰 case 必须在 case 前 reset Tenant/PVC/PV;后续 pod kill、network delay、短暂 disconnect 可以按场景复用 Tenant。 +9. 默认故障持续时间要覆盖 workload 窗口,默认故障比例要小。 +10. 测试失败时必须先收集 artifacts,再清理会影响诊断的信息。 +11. destructive 场景保持 `#[ignore]`,只能通过显式 Make 目标执行。 建议增加环境变量: | 变量 | 默认值 | 作用 | | --- | --- | --- | | `RUSTFS_E2E_FAULT_SCENARIO` | `io-eio` | 选择故障场景。 | -| `RUSTFS_E2E_FAULT_DURATION_SECONDS` | `60` | 故障持续时间。 | +| `RUSTFS_E2E_FAULT_DURATION_SECONDS` | `180` | 故障持续时间,默认覆盖串行小对象 workload。 | | `RUSTFS_E2E_FAULT_PERCENT` | `20` | 支持百分比注入的场景使用。 | -| `RUSTFS_E2E_WORKLOAD_OBJECTS` | `200` | 写入或校验对象数量。 | -| `RUSTFS_E2E_WORKLOAD_CONCURRENCY` | `8` | S3 并发度。 | +| `RUSTFS_E2E_WORKLOAD_OBJECTS` | `40` | 写入或校验对象数量。 | +| `RUSTFS_E2E_FAULT_REQUEST_TIMEOUT_SECONDS` | `3` | 单次 S3 请求超时时间。 | +| `RUSTFS_E2E_FAULT_REQUIRE_CLIENT_DISRUPTION` | `false` | 是否要求故障期间至少出现一次客户端可见失败/超时/unknown。 | | `RUSTFS_E2E_CHAOS_NAMESPACE` | `chaos-mesh` | Chaos Mesh 资源所在 namespace。 | ## 操作历史模型 @@ -342,7 +360,6 @@ PutObject GetObject HeadObject ListObjectsV2 -DeleteObject ``` 第一版建议使用唯一 key,不要并发覆盖同一个 key。 @@ -372,7 +389,6 @@ fault-e2e//large/ | --- | --- | --- | --- | | P0 | `io-eio` | Chaos Mesh `IOChaos` | 模拟单个 RustFS 数据卷读写返回 `EIO`。 | | P0 | `pod-kill-one` | Chaos Mesh `PodChaos` | 模拟一个 RustFS Pod 死亡和 StatefulSet 恢复。 | -| P0 | `operator-restart` | Kubernetes delete/rollout | 验证控制面重启后状态恢复。 | | P1 | `network-partition-one` | Chaos Mesh `NetworkChaos` | 模拟一个 RustFS Pod 与集群网络分区。 | | P1 | `io-read-mistake` | Chaos Mesh `IOChaos` | 模拟读路径返回错误字节,即静默坏块。 | | P1 | `disk-full` | local PV 填充或 IOChaos | 验证单盘空间耗尽行为。 | @@ -381,6 +397,8 @@ fault-e2e//large/ | P3 | `dm-flakey` | device mapper / loop device | 更接近真实块设备故障。 | | P3 | `warp-under-chaos` | MinIO Warp + chaos | 故障期间性能退化分析。 | +`operator-restart` 可以作为独立 Operator 控制面韧性测试,但不放入本方案第一阶段的 RustFS workload fault matrix,避免混淆测试对象。 + ## P0 场景:磁盘 EIO 这是建议最先实现的场景。 @@ -732,6 +750,8 @@ history.jsonl checker-report.json chaos-manifest.yaml chaos-describe.txt +chaos-describe-.txt +chaos-.yaml events.yaml pv-paths.txt rustfs-pods-current.log @@ -744,6 +764,7 @@ pods-describe.txt - `history.jsonl`:复盘客户端看到的世界。 - `checker-report.json`:复盘 correctness verdict。 +- `chaos-describe-.txt` / `chaos-.yaml`:在故障资源被清理前保留 Chaos Mesh 现场。 - `rustfs-pods-current.log`:定位 RustFS 如何处理故障。 - `events.yaml`:定位 Kubernetes 层是否出现调度、挂载、重启问题。 - `pv-paths.txt`:定位具体 PVC/PV/worker/hostPath 映射。 @@ -756,6 +777,8 @@ pods-describe.txt make e2e-live-faults ``` +该入口使用当前 `kubectl` context。Kind context 会重置 e2e local PV;非 Kind context 会跳过 Kind local storage reset,并使用 `RUSTFS_E2E_STORAGE_CLASS` 指向的集群 StorageClass。 + 后续可以增加聚焦入口,方便本地调试: ```makefile @@ -787,21 +810,30 @@ fault_io_eio_preserves_committed_objects 它应该包含: -1. live/destructive/context guard。 +1. live/destructive/current context guard。 2. Chaos Mesh `IOChaos` CRD 检查。 -3. Tenant 创建和 Ready 等待。 -4. S3 bucket 创建。 -5. S3 prefill 对象并记录 hash。 -6. apply `IOChaos fault errno=5`。 -7. 故障期间持续读写。 -8. delete `IOChaos`。 -9. Tenant 恢复 Ready 等待。 -10. 所有成功 `PUT` 对象最终 `GET + sha256` 校验。 -11. history 和 checker report 输出。 -12. 失败时 artifacts 收集。 +3. 启动前按 `app.kubernetes.io/managed-by=rustfs-operator-e2e` 清理上次异常残留的 `IOChaos`。 +4. `io-eio` case 前 reset Tenant/PVC/PV;Kind context 同时 reset local PV,真实集群使用配置的 StorageClass。 +5. Tenant 创建和 Ready 等待。 +6. S3 bucket 创建。 +7. S3 prefill 对象并记录 hash;prefill 阶段必须明确成功,避免空用例通过。 +8. apply `IOChaos fault errno=5`。 +9. 等待 `IOChaos` 进入已选择目标且已注入状态,再开始故障 workload。 +10. 故障期间持续读写并输出 `workload-summary.json`。 +11. workload 结束后确认 `IOChaos` 仍处于 active,避免 workload 跑出故障窗口。 +12. 故障 workload 失败、故障证据不足或 Chaos 删除失败时,先保存 Chaos Mesh describe/yaml,再触发 cleanup。 +13. delete `IOChaos`。 +14. Tenant 恢复 Ready 等待。 +15. 所有成功 `PUT` 对象最终 `GET + sha256` 校验。 +16. 恢复后执行 `LIST prefix`,缺失项先作为 warning。 +17. AWS SDK error 按 service failure / timeout / dispatch-response unknown 分类写入 history。 +18. history、workload summary 和 checker report 输出。 +19. 失败时 artifacts 收集。 这个版本已经能证明系统从“占位骨架”升级为“真实故障注入 + 数据正确性校验”。 +当前可执行用例只包含 `io-eio`。上面的故障矩阵保留为后续路线图,不表示 Phase 1 已经实现所有矩阵项。 + ## 分阶段实施计划 ### Phase 1:磁盘 EIO 基线 @@ -817,9 +849,11 @@ fault_io_eio_preserves_committed_objects 验收: - `make e2e-check` 通过。 -- `make e2e-live-faults` 可在专用 Kind 集群运行 `io-eio`。 +- `make e2e-live-faults` 可在当前 kubectl context 运行 `io-eio`;Kind 和真实 Kubernetes 测试集群均可。 - 如果 committed object 丢失,测试失败。 - 如果 successful GET 返回错误字节,测试失败。 +- 如果 workload 跑出 IOChaos active 窗口,测试失败。 +- case inventory 中清晰标注该用例边界为 `rustfs-workload/fault-injection`,不归类为 Operator 控制面测试。 ### Phase 2:进程和网络故障 diff --git a/e2e/src/cases/faults.rs b/e2e/src/cases/faults.rs new file mode 100644 index 0000000..c89d9c3 --- /dev/null +++ b/e2e/src/cases/faults.rs @@ -0,0 +1,40 @@ +// Copyright 2025 RustFS Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::{CaseSpec, Suite}; + +pub fn cases() -> Vec { + vec![CaseSpec::new( + Suite::Faults, + "fault_io_eio_preserves_committed_objects", + "Inject IOChaos EIO into one RustFS data volume and verify committed S3 objects remain readable with matching hashes after recovery.", + "rustfs-workload/fault-injection", + "faults", + )] +} + +#[cfg(test)] +mod tests { + use super::cases; + + #[test] + fn fault_case_inventory_matches_executable_tests() { + let names = cases() + .into_iter() + .map(|case| case.name) + .collect::>(); + + assert_eq!(names, vec!["fault_io_eio_preserves_committed_objects"]); + } +} diff --git a/e2e/src/cases/mod.rs b/e2e/src/cases/mod.rs index 04933f3..022ba73 100644 --- a/e2e/src/cases/mod.rs +++ b/e2e/src/cases/mod.rs @@ -14,6 +14,7 @@ pub mod cert_manager_tls; pub mod console; +pub mod faults; pub mod operator; pub mod smoke; pub mod sts; @@ -25,6 +26,7 @@ pub enum Suite { Console, Sts, CertManagerTls, + Faults, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -61,6 +63,7 @@ pub fn all_cases() -> Vec { cases.extend(sts::cases()); cases.extend(console::cases()); cases.extend(cert_manager_tls::cases()); + cases.extend(faults::cases()); cases } @@ -79,6 +82,7 @@ mod tests { assert!(suites.contains(&Suite::Sts)); assert!(suites.contains(&Suite::Console)); assert!(suites.contains(&Suite::CertManagerTls)); + assert!(suites.contains(&Suite::Faults)); } #[test] @@ -127,5 +131,6 @@ mod tests { .unwrap_or_default(), 9 ); + assert_eq!(counts.get(&Suite::Faults).copied().unwrap_or_default(), 1); } } diff --git a/e2e/src/framework/chaos_mesh.rs b/e2e/src/framework/chaos_mesh.rs new file mode 100644 index 0000000..6725a8e --- /dev/null +++ b/e2e/src/framework/chaos_mesh.rs @@ -0,0 +1,359 @@ +// Copyright 2025 RustFS Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use anyhow::{Context, Result, bail, ensure}; +use serde_json::Value; +use std::thread::sleep; +use std::time::{Duration, Instant}; + +use crate::framework::{config::E2eConfig, kubectl::Kubectl}; + +const IOCHAOS_CRD: &str = "iochaos.chaos-mesh.org"; +const RUN_ID_LABEL: &str = "rustfs-e2e/run-id"; +const SCENARIO_LABEL: &str = "rustfs-e2e/scenario"; +const MANAGED_BY_LABEL: &str = "app.kubernetes.io/managed-by"; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct IoChaosSpec { + pub name: String, + pub namespace: String, + pub run_id: String, + pub scenario: String, + pub target_namespace: String, + pub tenant_name: String, + pub container_name: String, + pub volume_path: String, + pub methods: Vec, + pub errno: u8, + pub percent: u8, + pub duration: Duration, +} + +#[derive(Debug, Clone)] +pub struct ChaosGuard { + config: E2eConfig, + kind: &'static str, + namespace: String, + name: String, + deleted: bool, +} + +impl IoChaosSpec { + pub fn eio_on_rustfs_volume( + config: &E2eConfig, + run_id: impl Into, + scenario: impl Into, + volume_path: impl Into, + percent: u8, + duration: Duration, + ) -> Result { + ensure!( + (1..=100).contains(&percent), + "IOChaos percent must be in 1..=100, got {percent}" + ); + ensure!( + duration > Duration::ZERO, + "IOChaos duration must be positive" + ); + + let run_id = run_id.into(); + let short_run_id = run_id.chars().take(12).collect::(); + let scenario = scenario.into(); + + Ok(Self { + name: format!("rustfs-e2e-io-eio-{short_run_id}"), + namespace: config.chaos_namespace.clone(), + run_id, + scenario, + target_namespace: config.test_namespace.clone(), + tenant_name: config.tenant_name.clone(), + container_name: "rustfs".to_string(), + volume_path: volume_path.into(), + methods: vec!["READ".to_string(), "WRITE".to_string()], + errno: 5, + percent, + duration, + }) + } + + pub fn manifest(&self) -> String { + let methods = self + .methods + .iter() + .map(|method| format!(" - {method}")) + .collect::>() + .join("\n"); + let seconds = self.duration.as_secs(); + + format!( + r#"apiVersion: chaos-mesh.org/v1alpha1 +kind: IOChaos +metadata: + name: {name} + namespace: {namespace} + labels: + {run_id_label}: "{run_id}" + {scenario_label}: "{scenario}" + {managed_by_label}: rustfs-operator-e2e +spec: + action: fault + mode: one + selector: + namespaces: + - {target_namespace} + labelSelectors: + rustfs.tenant: {tenant_name} + containerNames: + - {container_name} + volumePath: {volume_path} + path: {volume_path}/**/* + methods: +{methods} + errno: {errno} + percent: {percent} + duration: "{seconds}s" +"#, + name = self.name, + namespace = self.namespace, + run_id_label = RUN_ID_LABEL, + run_id = self.run_id, + scenario_label = SCENARIO_LABEL, + scenario = self.scenario, + managed_by_label = MANAGED_BY_LABEL, + target_namespace = self.target_namespace, + tenant_name = self.tenant_name, + container_name = self.container_name, + volume_path = self.volume_path, + methods = methods, + errno = self.errno, + percent = self.percent, + ) + } +} + +pub fn require_iochaos_crd(config: &E2eConfig) -> Result<()> { + let output = Kubectl::new(config) + .command(["get", "crd", IOCHAOS_CRD]) + .run()?; + ensure!( + output.code == Some(0), + "Chaos Mesh IOChaos CRD {IOCHAOS_CRD} is required for fault e2e; install Chaos Mesh before running faults\nstdout:\n{}\nstderr:\n{}", + output.stdout, + output.stderr + ); + Ok(()) +} + +pub fn cleanup_run(config: &E2eConfig, namespace: &str, run_id: &str) -> Result<()> { + let selector = format!("{RUN_ID_LABEL}={run_id}"); + Kubectl::new(config) + .namespaced(namespace) + .command(["delete", "iochaos", "-l", &selector, "--ignore-not-found"]) + .run_checked()?; + Ok(()) +} + +pub fn cleanup_managed_iochaos(config: &E2eConfig, namespace: &str) -> Result<()> { + let selector = format!("{MANAGED_BY_LABEL}=rustfs-operator-e2e"); + Kubectl::new(config) + .namespaced(namespace) + .command(["delete", "iochaos", "-l", &selector, "--ignore-not-found"]) + .run_checked()?; + Ok(()) +} + +pub fn apply_iochaos(config: &E2eConfig, spec: &IoChaosSpec) -> Result { + cleanup_run(config, &spec.namespace, &spec.run_id)?; + Kubectl::new(config) + .namespaced(&spec.namespace) + .apply_yaml_command(spec.manifest()) + .run_checked()?; + + Ok(ChaosGuard { + config: config.clone(), + kind: "iochaos", + namespace: spec.namespace.clone(), + name: spec.name.clone(), + deleted: false, + }) +} + +impl ChaosGuard { + pub fn wait_active(&self, timeout: Duration) -> Result<()> { + let deadline = Instant::now() + timeout; + + loop { + let status_snapshot = match self.json() { + Ok(status) => { + if iochaos_is_active(&status)? { + return Ok(()); + } + status + } + Err(error) => format!("failed to read IOChaos status: {error}"), + }; + + if Instant::now() >= deadline { + let describe = self + .describe() + .unwrap_or_else(|error| format!("failed to describe IOChaos: {error}")); + bail!( + "timed out waiting for {kind}/{name} to become active after {timeout:?}\nlast status:\n{status_snapshot}\n\ndescribe:\n{describe}", + kind = self.kind, + name = self.name, + ); + } + + sleep(Duration::from_secs(1)); + } + } + + pub fn ensure_active(&self, stage: &str) -> Result<()> { + let status = self.json()?; + ensure!( + iochaos_is_active(&status)?, + "{kind}/{name} is not active at {stage}; status:\n{status}", + kind = self.kind, + name = self.name + ); + Ok(()) + } + + pub fn describe(&self) -> Result { + let output = Kubectl::new(&self.config) + .namespaced(&self.namespace) + .command(["describe", self.kind, &self.name]) + .run_checked()?; + Ok(output.stdout) + } + + pub fn yaml(&self) -> Result { + let output = Kubectl::new(&self.config) + .namespaced(&self.namespace) + .command(["get", self.kind, &self.name, "-o", "yaml"]) + .run_checked()?; + Ok(output.stdout) + } + + pub fn delete(&mut self) -> Result<()> { + self.delete_inner()?; + self.deleted = true; + Ok(()) + } + + fn json(&self) -> Result { + let output = Kubectl::new(&self.config) + .namespaced(&self.namespace) + .command(["get", self.kind, &self.name, "-o", "json"]) + .run_checked()?; + Ok(output.stdout) + } + + fn delete_inner(&self) -> Result<()> { + Kubectl::new(&self.config) + .namespaced(&self.namespace) + .command(["delete", self.kind, &self.name, "--ignore-not-found"]) + .run_checked()?; + Ok(()) + } +} + +fn iochaos_is_active(raw: &str) -> Result { + let value = serde_json::from_str::(raw).context("parse IOChaos status json")?; + let selected = condition_status(&value, "Selected").is_some_and(|status| status == "True"); + let injected = condition_status(&value, "AllInjected") + .or_else(|| condition_status(&value, "Injected")) + .is_some_and(|status| status == "True"); + let recovered = condition_status(&value, "AllRecovered").is_some_and(|status| status == "True"); + + Ok(selected && injected && !recovered) +} + +fn condition_status(value: &Value, condition_type: &str) -> Option { + value + .pointer("/status/conditions")? + .as_array()? + .iter() + .find(|condition| condition.get("type").and_then(Value::as_str) == Some(condition_type))? + .get("status")? + .as_str() + .map(str::to_string) +} + +impl Drop for ChaosGuard { + fn drop(&mut self) { + if !self.deleted { + let _ = self.delete_inner(); + } + } +} + +#[cfg(test)] +mod tests { + use super::{IoChaosSpec, iochaos_is_active}; + use crate::framework::config::E2eConfig; + use std::time::Duration; + + #[test] + fn iochaos_manifest_targets_rustfs_workload_only() { + let config = E2eConfig::defaults(); + let spec = IoChaosSpec::eio_on_rustfs_volume( + &config, + "run-1234567890", + "io-eio", + "/data/rustfs0", + 20, + Duration::from_secs(60), + ) + .expect("valid io chaos"); + let manifest = spec.manifest(); + + assert!(manifest.contains("kind: IOChaos")); + assert!(manifest.contains("namespace: chaos-mesh")); + assert!(manifest.contains("rustfs.tenant: e2e-tenant")); + assert!(manifest.contains("containerNames:\n - rustfs")); + assert!(manifest.contains("volumePath: /data/rustfs0")); + assert!(manifest.contains("errno: 5")); + assert!(manifest.contains("percent: 20")); + } + + #[test] + fn iochaos_active_requires_selected_and_injected_not_recovered() { + let status = r#"{ + "status": { + "conditions": [ + {"type": "Selected", "status": "True"}, + {"type": "AllInjected", "status": "True"}, + {"type": "AllRecovered", "status": "False"} + ] + } + }"#; + + assert!(iochaos_is_active(status).expect("valid status")); + } + + #[test] + fn iochaos_active_rejects_unselected_experiment() { + let status = r#"{ + "status": { + "conditions": [ + {"type": "Selected", "status": "False"}, + {"type": "AllInjected", "status": "True"} + ] + } + }"#; + + assert!(!iochaos_is_active(status).expect("valid status")); + } +} diff --git a/e2e/src/framework/checker.rs b/e2e/src/framework/checker.rs new file mode 100644 index 0000000..140b15c --- /dev/null +++ b/e2e/src/framework/checker.rs @@ -0,0 +1,218 @@ +// Copyright 2025 RustFS Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use anyhow::{Result, ensure}; +use serde::{Deserialize, Serialize}; +use std::collections::{BTreeMap, BTreeSet}; + +use crate::framework::{ + history::{OperationKind, OperationOutcome, OperationRecord, Recorder}, + s3_workload::{S3WorkloadClient, sha256_hex}, +}; + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct CheckerReport { + pub scenario: String, + pub run_id: String, + pub committed_puts: usize, + pub missing_committed_objects: Vec, + pub hash_mismatches: Vec, + pub successful_corrupted_reads: Vec, + pub unknown_writes_materialized: Vec, + pub list_warnings: Vec, + pub tenant_recovered: bool, + pub passed: bool, +} + +impl CheckerReport { + pub fn require_success(&self) -> Result<()> { + ensure!( + self.passed, + "fault checker failed for scenario {} run {}: {}", + self.scenario, + self.run_id, + serde_json::to_string_pretty(self)? + ); + Ok(()) + } +} + +pub async fn check_s3_history( + s3: &S3WorkloadClient, + recorder: &mut Recorder, + tenant_recovered: bool, +) -> Result { + let initial_records = recorder.records().to_vec(); + let committed = committed_puts(&initial_records); + let unknown_writes = unknown_puts(&initial_records); + let mut report = CheckerReport { + scenario: recorder.scenario().to_string(), + run_id: recorder.run_id().to_string(), + committed_puts: committed.len(), + missing_committed_objects: Vec::new(), + hash_mismatches: Vec::new(), + successful_corrupted_reads: successful_corrupted_reads(&initial_records, &committed), + unknown_writes_materialized: Vec::new(), + list_warnings: Vec::new(), + tenant_recovered, + passed: false, + }; + + for (key, expected_hash) in &committed { + match s3.get_object(key, recorder).await? { + Some(body) => { + let actual_hash = sha256_hex(&body); + if actual_hash != *expected_hash { + report.hash_mismatches.push(format!( + "{key}: expected {expected_hash}, got {actual_hash}" + )); + } + } + None => report.missing_committed_objects.push(key.clone()), + } + } + + for (key, attempted_hash) in &unknown_writes { + if let Some(body) = s3.get_object(key, recorder).await? { + let actual_hash = sha256_hex(&body); + report.unknown_writes_materialized.push(format!( + "{key}: attempted {attempted_hash}, got {actual_hash}" + )); + } + } + + let prefix = format!("fault-e2e/{}/", recorder.run_id()); + match s3.list_prefix(&prefix, recorder).await? { + Some(keys) => { + let listed = keys.into_iter().collect::>(); + for key in committed.keys() { + if !listed.contains(key) { + report.list_warnings.push(format!( + "LIST prefix {prefix} did not include committed key {key}" + )); + } + } + } + None => report + .list_warnings + .push(format!("LIST prefix {prefix} did not complete")), + } + + report.passed = report.tenant_recovered + && report.missing_committed_objects.is_empty() + && report.hash_mismatches.is_empty() + && report.successful_corrupted_reads.is_empty(); + + Ok(report) +} + +fn committed_puts(records: &[OperationRecord]) -> BTreeMap { + records + .iter() + .filter(|record| { + record.kind == OperationKind::Put && record.outcome == OperationOutcome::Ok + }) + .filter_map(|record| Some((record.key.clone()?, record.value_sha256.clone()?))) + .collect() +} + +fn unknown_puts(records: &[OperationRecord]) -> BTreeMap { + records + .iter() + .filter(|record| { + record.kind == OperationKind::Put + && matches!( + record.outcome, + OperationOutcome::Timeout | OperationOutcome::Unknown + ) + }) + .filter_map(|record| Some((record.key.clone()?, record.value_sha256.clone()?))) + .collect() +} + +fn successful_corrupted_reads( + records: &[OperationRecord], + committed: &BTreeMap, +) -> Vec { + records + .iter() + .filter(|record| { + record.kind == OperationKind::Get && record.outcome == OperationOutcome::Ok + }) + .filter_map(|record| { + let key = record.key.as_ref()?; + let expected_hash = committed.get(key)?; + let actual_hash = record.value_sha256.as_ref()?; + (expected_hash != actual_hash) + .then(|| format!("{key}: expected {expected_hash}, got {actual_hash}")) + }) + .collect() +} + +#[cfg(test)] +mod tests { + use super::{CheckerReport, successful_corrupted_reads}; + use crate::framework::history::{OperationKind, OperationOutcome, OperationRecord}; + use std::collections::BTreeMap; + + fn record( + kind: OperationKind, + key: &str, + hash: &str, + outcome: OperationOutcome, + ) -> OperationRecord { + OperationRecord { + id: "op-1".to_string(), + scenario: "io-eio".to_string(), + kind, + bucket: "bucket".to_string(), + key: Some(key.to_string()), + value_sha256: Some(hash.to_string()), + size_bytes: Some(1), + started_at_ms: 1, + ended_at_ms: 2, + outcome, + http_status: Some(200), + error: None, + } + } + + #[test] + fn corrupted_successful_get_is_hard_failure_input() { + let records = vec![record(OperationKind::Get, "k", "bad", OperationOutcome::Ok)]; + let committed = BTreeMap::from([("k".to_string(), "good".to_string())]); + + let corrupted = successful_corrupted_reads(&records, &committed); + + assert_eq!(corrupted, vec!["k: expected good, got bad"]); + } + + #[test] + fn report_requires_clean_correctness_verdict() { + let report = CheckerReport { + scenario: "io-eio".to_string(), + run_id: "run-1".to_string(), + committed_puts: 1, + missing_committed_objects: Vec::new(), + hash_mismatches: Vec::new(), + successful_corrupted_reads: Vec::new(), + unknown_writes_materialized: Vec::new(), + list_warnings: Vec::new(), + tenant_recovered: true, + passed: true, + }; + + assert!(report.require_success().is_ok()); + } +} diff --git a/e2e/src/framework/config.rs b/e2e/src/framework/config.rs index c3c4cfe..4b45898 100644 --- a/e2e/src/framework/config.rs +++ b/e2e/src/framework/config.rs @@ -41,6 +41,13 @@ pub struct E2eConfig { pub artifacts_dir: PathBuf, pub live_enabled: bool, pub destructive_enabled: bool, + pub fault_scenario: String, + pub fault_duration: Duration, + pub fault_percent: u8, + pub fault_workload_objects: usize, + pub fault_request_timeout: Duration, + pub fault_require_client_disruption: bool, + pub chaos_namespace: String, pub timeout: Duration, } @@ -93,6 +100,24 @@ impl E2eConfig { pod_management_policy: parse_pod_management_policy(&get_env), live_enabled: env_bool(&get_env, "RUSTFS_E2E_LIVE"), destructive_enabled: env_bool(&get_env, "RUSTFS_E2E_DESTRUCTIVE"), + fault_scenario: env_or(&get_env, "RUSTFS_E2E_FAULT_SCENARIO", "io-eio"), + fault_duration: Duration::from_secs(env_u64( + &get_env, + "RUSTFS_E2E_FAULT_DURATION_SECONDS", + 180, + )), + fault_percent: env_u8(&get_env, "RUSTFS_E2E_FAULT_PERCENT", 20), + fault_workload_objects: env_usize(&get_env, "RUSTFS_E2E_WORKLOAD_OBJECTS", 40), + fault_request_timeout: Duration::from_secs(env_u64( + &get_env, + "RUSTFS_E2E_FAULT_REQUEST_TIMEOUT_SECONDS", + 3, + )), + fault_require_client_disruption: env_bool( + &get_env, + "RUSTFS_E2E_FAULT_REQUIRE_CLIENT_DISRUPTION", + ), + chaos_namespace: env_or(&get_env, "RUSTFS_E2E_CHAOS_NAMESPACE", "chaos-mesh"), timeout: Duration::from_secs(env_u64(&get_env, "RUSTFS_E2E_TIMEOUT_SECONDS", 300)), } } @@ -136,6 +161,15 @@ where .unwrap_or(default) } +fn env_u8(get_env: &F, name: &str, default: u8) -> u8 +where + F: Fn(&str) -> Option, +{ + get_env(name) + .and_then(|value| value.parse::().ok()) + .unwrap_or(default) +} + fn parse_pod_management_policy(get_env: &F) -> Option where F: Fn(&str) -> Option, @@ -165,6 +199,16 @@ mod tests { assert_eq!(config.storage_class, "local-storage"); assert_eq!(config.pv_count, 12); assert_eq!(config.rustfs_image, DEFAULT_RUSTFS_IMAGE); + assert_eq!(config.fault_scenario, "io-eio"); + assert_eq!(config.fault_duration, std::time::Duration::from_secs(180)); + assert_eq!(config.fault_percent, 20); + assert_eq!(config.fault_workload_objects, 40); + assert_eq!( + config.fault_request_timeout, + std::time::Duration::from_secs(3) + ); + assert!(!config.fault_require_client_disruption); + assert_eq!(config.chaos_namespace, "chaos-mesh"); assert_eq!(config.cert_manager_version, "v1.16.2"); assert_eq!( config.kind_config, diff --git a/e2e/src/framework/fault_scenarios.rs b/e2e/src/framework/fault_scenarios.rs new file mode 100644 index 0000000..8a1f43e --- /dev/null +++ b/e2e/src/framework/fault_scenarios.rs @@ -0,0 +1,92 @@ +// Copyright 2025 RustFS Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use anyhow::{Result, ensure}; +use std::time::Duration; + +use crate::framework::config::E2eConfig; + +pub const IO_EIO_SCENARIO: &str = "io-eio"; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct FaultScenario { + pub name: String, + pub duration: Duration, + pub percent: u8, + pub object_count: usize, +} + +impl FaultScenario { + pub fn from_config(config: &E2eConfig) -> Result { + ensure!( + config.fault_scenario == IO_EIO_SCENARIO, + "unsupported fault scenario {:?}; first implementation supports only {IO_EIO_SCENARIO:?}", + config.fault_scenario + ); + ensure!( + (1..=100).contains(&config.fault_percent), + "RUSTFS_E2E_FAULT_PERCENT must be in 1..=100, got {}", + config.fault_percent + ); + ensure!( + config.fault_duration > Duration::ZERO, + "RUSTFS_E2E_FAULT_DURATION_SECONDS must be greater than zero" + ); + ensure!( + config.fault_workload_objects >= 4, + "RUSTFS_E2E_WORKLOAD_OBJECTS must be at least 4" + ); + + Ok(Self { + name: config.fault_scenario.clone(), + duration: config.fault_duration, + percent: config.fault_percent, + object_count: config.fault_workload_objects, + }) + } + + pub fn prefill_count(&self) -> usize { + self.object_count / 2 + } + + pub fn mixed_workload_count(&self) -> usize { + self.object_count - self.prefill_count() + } +} + +#[cfg(test)] +mod tests { + use super::{FaultScenario, IO_EIO_SCENARIO}; + use crate::framework::config::E2eConfig; + use std::time::Duration; + + #[test] + fn default_fault_scenario_is_io_eio_with_split_workload() { + let scenario = FaultScenario::from_config(&E2eConfig::defaults()).expect("valid scenario"); + + assert_eq!(scenario.name, IO_EIO_SCENARIO); + assert_eq!(scenario.duration, Duration::from_secs(180)); + assert_eq!(scenario.percent, 20); + assert_eq!(scenario.prefill_count(), 20); + assert_eq!(scenario.mixed_workload_count(), 20); + } + + #[test] + fn unsupported_fault_scenario_is_rejected() { + let mut config = E2eConfig::defaults(); + config.fault_scenario = "operator-restart".to_string(); + + assert!(FaultScenario::from_config(&config).is_err()); + } +} diff --git a/e2e/src/framework/history.rs b/e2e/src/framework/history.rs new file mode 100644 index 0000000..22468b2 --- /dev/null +++ b/e2e/src/framework/history.rs @@ -0,0 +1,195 @@ +// Copyright 2025 RustFS Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use anyhow::Result; +use serde::{Deserialize, Serialize}; +use std::fs::{self, File}; +use std::io::{BufWriter, Write}; +use std::path::{Path, PathBuf}; +use std::time::{SystemTime, UNIX_EPOCH}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum OperationKind { + CreateBucket, + Put, + Get, + Head, + List, + Delete, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum OperationOutcome { + Ok, + Failed, + Timeout, + Unknown, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct OperationRecord { + pub id: String, + pub scenario: String, + pub kind: OperationKind, + pub bucket: String, + pub key: Option, + pub value_sha256: Option, + pub size_bytes: Option, + pub started_at_ms: u64, + pub ended_at_ms: u64, + pub outcome: OperationOutcome, + pub http_status: Option, + pub error: Option, +} + +#[derive(Debug)] +pub struct Recorder { + path: PathBuf, + scenario: String, + run_id: String, + next_id: usize, + records: Vec, + writer: BufWriter, +} + +impl Recorder { + pub fn create( + path: impl Into, + scenario: impl Into, + run_id: impl Into, + ) -> Result { + let path = path.into(); + if let Some(parent) = path.parent() { + fs::create_dir_all(parent)?; + } + let writer = BufWriter::new(File::create(&path)?); + Ok(Self { + path, + scenario: scenario.into(), + run_id: run_id.into(), + next_id: 1, + records: Vec::new(), + writer, + }) + } + + pub fn begin( + &mut self, + kind: OperationKind, + bucket: impl Into, + key: Option, + value_sha256: Option, + size_bytes: Option, + ) -> OperationRecord { + let id = format!("op-{:06}", self.next_id); + self.next_id += 1; + let started_at_ms = now_ms(); + + OperationRecord { + id, + scenario: self.scenario.clone(), + kind, + bucket: bucket.into(), + key, + value_sha256, + size_bytes, + started_at_ms, + ended_at_ms: started_at_ms, + outcome: OperationOutcome::Unknown, + http_status: None, + error: None, + } + } + + pub fn finish( + &mut self, + mut record: OperationRecord, + outcome: OperationOutcome, + http_status: Option, + error: Option, + ) -> Result<()> { + record.ended_at_ms = now_ms(); + record.outcome = outcome; + record.http_status = http_status; + record.error = error.map(|message| truncate_error(&message)); + + serde_json::to_writer(&mut self.writer, &record)?; + self.writer.write_all(b"\n")?; + self.writer.flush()?; + self.records.push(record); + Ok(()) + } + + pub fn records(&self) -> &[OperationRecord] { + &self.records + } + + pub fn scenario(&self) -> &str { + &self.scenario + } + + pub fn run_id(&self) -> &str { + &self.run_id + } + + pub fn path(&self) -> &Path { + &self.path + } +} + +fn now_ms() -> u64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|duration| duration.as_millis() as u64) + .unwrap_or_default() +} + +fn truncate_error(message: &str) -> String { + const MAX_ERROR_LEN: usize = 300; + if message.len() <= MAX_ERROR_LEN { + message.to_string() + } else { + format!("{}...", &message[..MAX_ERROR_LEN]) + } +} + +#[cfg(test)] +mod tests { + use super::{OperationKind, OperationOutcome, Recorder}; + + #[test] + fn recorder_writes_jsonl_records() { + let dir = tempfile::tempdir().expect("tempdir"); + let path = dir.path().join("history.jsonl"); + let mut recorder = Recorder::create(&path, "io-eio", "run-1").expect("recorder"); + let record = recorder.begin( + OperationKind::Put, + "bucket", + Some("key".to_string()), + Some("abc".to_string()), + Some(3), + ); + + recorder + .finish(record, OperationOutcome::Ok, Some(200), None) + .expect("finish"); + + let content = std::fs::read_to_string(path).expect("history"); + assert!(content.contains("\"scenario\":\"io-eio\"")); + assert!(content.contains("\"kind\":\"put\"")); + assert_eq!(recorder.records().len(), 1); + } +} diff --git a/e2e/src/framework/live.rs b/e2e/src/framework/live.rs index 4c2e9bd..1cfb24c 100644 --- a/e2e/src/framework/live.rs +++ b/e2e/src/framework/live.rs @@ -39,6 +39,15 @@ pub fn current_context() -> Result { Ok(output.stdout.trim().to_string()) } +pub fn use_current_context(config: &mut E2eConfig) -> Result { + let actual = current_context()?; + config.context = actual.clone(); + if let Some(kind_cluster) = actual.strip_prefix("kind-") { + config.cluster_name = kind_cluster.to_string(); + } + Ok(actual) +} + pub fn ensure_dedicated_context(config: &E2eConfig) -> Result { let actual = current_context()?; ensure!( diff --git a/e2e/src/framework/mod.rs b/e2e/src/framework/mod.rs index d7d557d..5f21fd2 100644 --- a/e2e/src/framework/mod.rs +++ b/e2e/src/framework/mod.rs @@ -15,10 +15,14 @@ pub mod artifacts; pub mod assertions; pub mod cert_manager_tls; +pub mod chaos_mesh; +pub mod checker; pub mod command; pub mod config; pub mod console_client; pub mod deploy; +pub mod fault_scenarios; +pub mod history; pub mod images; pub mod kind; pub mod kube_client; @@ -26,6 +30,7 @@ pub mod kubectl; pub mod live; pub mod port_forward; pub mod resources; +pub mod s3_workload; pub mod storage; pub mod tenant_factory; pub mod tools; diff --git a/e2e/src/framework/resources.rs b/e2e/src/framework/resources.rs index c6b36bb..2b0cefd 100644 --- a/e2e/src/framework/resources.rs +++ b/e2e/src/framework/resources.rs @@ -33,6 +33,10 @@ pub fn credential_secret_name(config: &E2eConfig) -> String { format!("{}-credentials", config.tenant_name) } +pub fn e2e_credentials() -> (&'static str, &'static str) { + (E2E_ACCESS_KEY, E2E_SECRET_KEY) +} + pub fn namespace_manifest(namespace: &str) -> String { format!( r#"apiVersion: v1 diff --git a/e2e/src/framework/s3_workload.rs b/e2e/src/framework/s3_workload.rs new file mode 100644 index 0000000..f7fe597 --- /dev/null +++ b/e2e/src/framework/s3_workload.rs @@ -0,0 +1,446 @@ +// Copyright 2025 RustFS Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use anyhow::{Context, Result}; +use aws_config::BehaviorVersion; +use aws_credential_types::Credentials; +use aws_sdk_s3::{Client, config::Region, error::SdkError, primitives::ByteStream}; +use sha2::{Digest, Sha256}; +use std::time::Duration; +use tokio::time::timeout; + +use crate::framework::history::{OperationKind, OperationOutcome, Recorder}; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ObjectSpec { + pub key: String, + pub size_bytes: usize, + pub sha256: String, + body: Vec, +} + +#[derive(Clone)] +pub struct S3WorkloadClient { + client: Client, + bucket: String, + request_timeout: Duration, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct GetObjectResult { + pub outcome: OperationOutcome, + pub body: Option>, +} + +impl ObjectSpec { + pub fn deterministic(run_id: &str, index: usize, size_bytes: usize) -> Self { + let key = format!("fault-e2e/{run_id}/object-{index:06}"); + let body = deterministic_bytes(index, size_bytes); + let sha256 = sha256_hex(&body); + + Self { + key, + size_bytes, + sha256, + body, + } + } +} + +impl S3WorkloadClient { + pub async fn new( + endpoint: impl Into, + bucket: impl Into, + access_key: impl Into, + secret_key: impl Into, + request_timeout: Duration, + ) -> Result { + let credentials = Credentials::new( + access_key.into(), + secret_key.into(), + None, + None, + "rustfs-e2e-static-credentials", + ); + let shared_config = aws_config::defaults(BehaviorVersion::latest()) + .region(Region::new("us-east-1")) + .credentials_provider(credentials) + .endpoint_url(endpoint.into()) + .load() + .await; + let s3_config = aws_sdk_s3::config::Builder::from(&shared_config) + .force_path_style(true) + .build(); + + Ok(Self { + client: Client::from_conf(s3_config), + bucket: bucket.into(), + request_timeout, + }) + } + + pub async fn create_bucket(&self, recorder: &mut Recorder) -> Result { + let record = recorder.begin( + OperationKind::CreateBucket, + self.bucket.clone(), + None, + None, + None, + ); + let result = timeout( + self.request_timeout, + self.client.create_bucket().bucket(&self.bucket).send(), + ) + .await; + + match result { + Ok(Ok(_)) => { + recorder.finish(record, OperationOutcome::Ok, Some(200), None)?; + Ok(OperationOutcome::Ok) + } + Ok(Err(error)) => { + let outcome = classify_sdk_error(&error); + recorder.finish( + record, + outcome, + sdk_error_status(&error), + Some(format!("create bucket failed: {error}")), + )?; + Ok(outcome) + } + Err(_) => { + recorder.finish( + record, + OperationOutcome::Timeout, + None, + Some("create bucket timed out".to_string()), + )?; + Ok(OperationOutcome::Timeout) + } + } + } + + pub async fn put_object( + &self, + object: &ObjectSpec, + recorder: &mut Recorder, + ) -> Result { + let record = recorder.begin( + OperationKind::Put, + self.bucket.clone(), + Some(object.key.clone()), + Some(object.sha256.clone()), + Some(object.size_bytes), + ); + let result = timeout( + self.request_timeout, + self.client + .put_object() + .bucket(&self.bucket) + .key(&object.key) + .body(ByteStream::from(object.body.clone())) + .send(), + ) + .await; + + match result { + Ok(Ok(_)) => { + recorder.finish(record, OperationOutcome::Ok, Some(200), None)?; + Ok(OperationOutcome::Ok) + } + Ok(Err(error)) => { + let outcome = classify_sdk_error(&error); + recorder.finish( + record, + outcome, + sdk_error_status(&error), + Some(format!("put object failed: {error}")), + )?; + Ok(outcome) + } + Err(_) => { + recorder.finish( + record, + OperationOutcome::Timeout, + None, + Some("put object timed out".to_string()), + )?; + Ok(OperationOutcome::Timeout) + } + } + } + + pub async fn get_object(&self, key: &str, recorder: &mut Recorder) -> Result>> { + Ok(self.get_object_result(key, recorder).await?.body) + } + + pub async fn get_object_result( + &self, + key: &str, + recorder: &mut Recorder, + ) -> Result { + let record = recorder.begin( + OperationKind::Get, + self.bucket.clone(), + Some(key.to_string()), + None, + None, + ); + let response = timeout( + self.request_timeout, + self.client + .get_object() + .bucket(&self.bucket) + .key(key) + .send(), + ) + .await; + + let output = match response { + Ok(Ok(output)) => output, + Ok(Err(error)) => { + let outcome = classify_sdk_error(&error); + recorder.finish( + record, + outcome, + sdk_error_status(&error), + Some(format!("get object failed: {error}")), + )?; + return Ok(GetObjectResult { + outcome, + body: None, + }); + } + Err(_) => { + recorder.finish( + record, + OperationOutcome::Timeout, + None, + Some("get object timed out".to_string()), + )?; + return Ok(GetObjectResult { + outcome: OperationOutcome::Timeout, + body: None, + }); + } + }; + + let body = timeout(self.request_timeout, output.body.collect()).await; + match body { + Ok(Ok(bytes)) => { + let body = bytes.into_bytes().to_vec(); + let mut record = record; + record.value_sha256 = Some(sha256_hex(&body)); + record.size_bytes = Some(body.len()); + recorder.finish(record, OperationOutcome::Ok, Some(200), None)?; + Ok(GetObjectResult { + outcome: OperationOutcome::Ok, + body: Some(body), + }) + } + Ok(Err(error)) => { + recorder.finish( + record, + OperationOutcome::Unknown, + Some(200), + Some(format!("get body read failed: {error}")), + )?; + Ok(GetObjectResult { + outcome: OperationOutcome::Unknown, + body: None, + }) + } + Err(_) => { + recorder.finish( + record, + OperationOutcome::Timeout, + Some(200), + Some("get body read timed out".to_string()), + )?; + Ok(GetObjectResult { + outcome: OperationOutcome::Timeout, + body: None, + }) + } + } + } + + pub async fn head_object( + &self, + key: &str, + recorder: &mut Recorder, + ) -> Result { + let record = recorder.begin( + OperationKind::Head, + self.bucket.clone(), + Some(key.to_string()), + None, + None, + ); + let result = timeout( + self.request_timeout, + self.client + .head_object() + .bucket(&self.bucket) + .key(key) + .send(), + ) + .await; + + match result { + Ok(Ok(_)) => { + recorder.finish(record, OperationOutcome::Ok, Some(200), None)?; + Ok(OperationOutcome::Ok) + } + Ok(Err(error)) => { + let outcome = classify_sdk_error(&error); + recorder.finish( + record, + outcome, + sdk_error_status(&error), + Some(format!("head object failed: {error}")), + )?; + Ok(outcome) + } + Err(_) => { + recorder.finish( + record, + OperationOutcome::Timeout, + None, + Some("head object timed out".to_string()), + )?; + Ok(OperationOutcome::Timeout) + } + } + } + + pub async fn list_prefix( + &self, + prefix: &str, + recorder: &mut Recorder, + ) -> Result>> { + let record = recorder.begin( + OperationKind::List, + self.bucket.clone(), + Some(prefix.to_string()), + None, + None, + ); + let response = timeout( + self.request_timeout, + self.client + .list_objects_v2() + .bucket(&self.bucket) + .prefix(prefix) + .send(), + ) + .await; + + match response { + Ok(Ok(output)) => { + let keys = output + .contents() + .iter() + .filter_map(|object| object.key().map(str::to_string)) + .collect::>(); + let mut record = record; + record.size_bytes = Some(keys.len()); + recorder.finish(record, OperationOutcome::Ok, Some(200), None)?; + Ok(Some(keys)) + } + Ok(Err(error)) => { + let outcome = classify_sdk_error(&error); + recorder.finish( + record, + outcome, + sdk_error_status(&error), + Some(format!("list prefix failed: {error}")), + )?; + Ok(None) + } + Err(_) => { + recorder.finish( + record, + OperationOutcome::Timeout, + None, + Some("list prefix timed out".to_string()), + )?; + Ok(None) + } + } + } +} + +pub fn sha256_hex(body: &[u8]) -> String { + let mut hasher = Sha256::new(); + hasher.update(body); + hex::encode(hasher.finalize()) +} + +pub async fn wait_for_s3_endpoint(endpoint: &str, timeout_duration: Duration) -> Result<()> { + let client = reqwest::Client::builder() + .timeout(Duration::from_secs(2)) + .build() + .context("build S3 readiness HTTP client")?; + let start = std::time::Instant::now(); + + loop { + if client.get(endpoint).send().await.is_ok() { + return Ok(()); + } + if start.elapsed() >= timeout_duration { + anyhow::bail!("timed out waiting for S3 endpoint {endpoint}"); + } + tokio::time::sleep(Duration::from_secs(1)).await; + } +} + +fn deterministic_bytes(index: usize, size_bytes: usize) -> Vec { + (0..size_bytes) + .map(|offset| ((offset + index * 31) % 251) as u8) + .collect() +} + +fn classify_sdk_error(error: &SdkError) -> OperationOutcome { + match error { + SdkError::TimeoutError(_) => OperationOutcome::Timeout, + SdkError::DispatchFailure(_) | SdkError::ResponseError(_) => OperationOutcome::Unknown, + SdkError::ConstructionFailure(_) | SdkError::ServiceError(_) => OperationOutcome::Failed, + _ => OperationOutcome::Unknown, + } +} + +fn sdk_error_status(error: &SdkError) -> Option { + match error { + SdkError::ServiceError(context) => Some(context.raw().status().as_u16()), + SdkError::ResponseError(context) => Some(context.raw().status().as_u16()), + _ => None, + } +} + +#[cfg(test)] +mod tests { + use super::{ObjectSpec, sha256_hex}; + + #[test] + fn deterministic_objects_have_stable_keys_sizes_and_hashes() { + let object = ObjectSpec::deterministic("run-1", 7, 4096); + let same = ObjectSpec::deterministic("run-1", 7, 4096); + + assert_eq!(object.key, "fault-e2e/run-1/object-000007"); + assert_eq!(object.size_bytes, 4096); + assert_eq!(object.sha256, same.sha256); + assert_eq!(object.sha256, sha256_hex(&same.body)); + } +} diff --git a/e2e/tests/faults.rs b/e2e/tests/faults.rs index b531cfc..88c0f4f 100644 --- a/e2e/tests/faults.rs +++ b/e2e/tests/faults.rs @@ -12,8 +12,30 @@ // See the License for the specific language governing permissions and // limitations under the License. -use anyhow::Result; -use rustfs_operator_e2e::framework::{config::E2eConfig, live}; +use anyhow::{Context, Result, ensure}; +use kube::Api; +use operator::types::v1alpha1::tenant::Tenant; +use rustfs_operator_e2e::framework::{ + artifacts::ArtifactCollector, + chaos_mesh::{self, IoChaosSpec}, + checker, + config::E2eConfig, + fault_scenarios::FaultScenario, + history::OperationOutcome, + history::Recorder, + kube_client, live, + port_forward::{PortForwardGuard, PortForwardSpec}, + resources, + s3_workload::{ObjectSpec, S3WorkloadClient, wait_for_s3_endpoint}, + storage, wait, +}; +use serde::Serialize; +use std::time::Duration; +use uuid::Uuid; + +const IO_EIO_CASE: &str = "fault_io_eio_preserves_committed_objects"; +const RUSTFS_DATA_VOLUME: &str = "/data/rustfs0"; +const SMALL_OBJECT_SIZE_BYTES: usize = 4 * 1024; #[test] fn faults_are_not_destructive_without_explicit_opt_in() { @@ -26,11 +48,382 @@ fn faults_are_not_destructive_without_explicit_opt_in() { #[test] #[ignore = "reserved for destructive fault scenarios; run through `make e2e-live-faults`"] fn fault_live_suite_requires_explicit_destructive_opt_in() -> Result<()> { - let config = E2eConfig::from_env(); + let mut config = E2eConfig::from_env(); + + live::require_live_enabled(&config)?; + live::require_destructive_enabled(&config)?; + let context = live::use_current_context(&mut config)?; + eprintln!("confirmed destructive fault e2e context: {context}"); + Ok(()) +} + +#[tokio::test] +#[ignore = "destructive RustFS workload fault scenario; run through `make e2e-live-faults`"] +async fn fault_io_eio_preserves_committed_objects() -> Result<()> { + let mut config = E2eConfig::from_env(); live::require_live_enabled(&config)?; - live::ensure_dedicated_context(&config)?; live::require_destructive_enabled(&config)?; + let context = live::use_current_context(&mut config)?; + eprintln!("running destructive RustFS fault e2e against current context: {context}"); + + let collector = ArtifactCollector::new(&config.artifacts_dir); + let result = run_io_eio_case(&config, &collector).await; + + if let Err(error) = &result { + match collector.collect_kubernetes_snapshot(IO_EIO_CASE, &config) { + Ok(report) => { + eprintln!("collected e2e artifacts under {}", report.dir.display()); + eprintln!("{}", report.diagnosis); + } + Err(artifact_error) => { + eprintln!("failed to collect e2e artifacts after {error}: {artifact_error}"); + } + } + } + + result +} + +async fn run_io_eio_case(config: &E2eConfig, collector: &ArtifactCollector) -> Result<()> { + let scenario = FaultScenario::from_config(config)?; + chaos_mesh::require_iochaos_crd(config)?; + chaos_mesh::cleanup_managed_iochaos(config, &config.chaos_namespace)?; + + reset_io_eio_fixture(config)?; + wait_for_ready_tenant(config).await?; + + let run_id = format!("run-{}", Uuid::new_v4()); + let bucket = bucket_name(&run_id); + let history_path = collector.case_dir(IO_EIO_CASE).join("history.jsonl"); + let mut history = Recorder::create(history_path, &scenario.name, &run_id)?; + + let port_forward_spec = PortForwardSpec::tenant_io(&config.test_namespace, &config.tenant_name); + let endpoint = port_forward_spec.local_base_url(); + let mut port_forward = PortForwardSpec::start_tenant_io(config)?; + wait_for_tenant_s3(&mut port_forward, &endpoint, config.timeout).await?; + + let (access_key, secret_key) = resources::e2e_credentials(); + let s3 = S3WorkloadClient::new( + &endpoint, + &bucket, + access_key, + secret_key, + config.fault_request_timeout, + ) + .await?; + let bucket_outcome = s3.create_bucket(&mut history).await?; + ensure!( + bucket_outcome == OperationOutcome::Ok, + "fault workload bucket creation did not succeed: {bucket_outcome:?}" + ); + + let prefilled = prefill_objects(&s3, &mut history, &run_id, scenario.prefill_count()).await?; + let chaos = IoChaosSpec::eio_on_rustfs_volume( + config, + &run_id, + &scenario.name, + RUSTFS_DATA_VOLUME, + scenario.percent, + scenario.duration, + )?; + collector.write_text(IO_EIO_CASE, "chaos-manifest.yaml", &chaos.manifest())?; + let mut guard = chaos_mesh::apply_iochaos(config, &chaos)?; + match guard.describe() { + Ok(describe) => { + collector.write_text(IO_EIO_CASE, "chaos-describe.txt", &describe)?; + } + Err(error) => { + collector.write_text( + IO_EIO_CASE, + "chaos-describe.txt", + &format!("failed to describe IOChaos: {error}"), + )?; + } + } + if let Err(error) = guard.wait_active(config.timeout) { + collect_active_chaos_artifacts(collector, &guard, "wait-active-failed")?; + return Err(error); + } + + let workload_summary = match run_mixed_workload( + &s3, + &mut history, + &run_id, + &prefilled, + scenario.prefill_count(), + scenario.mixed_workload_count(), + ) + .await + { + Ok(summary) => summary, + Err(error) => { + collect_active_chaos_artifacts(collector, &guard, "workload-failed")?; + return Err(error); + } + }; + collector.write_text( + IO_EIO_CASE, + "workload-summary.json", + &serde_json::to_string_pretty(&workload_summary)?, + )?; + if let Err(error) = + workload_summary.require_fault_evidence(config.fault_require_client_disruption) + { + collect_active_chaos_artifacts(collector, &guard, "workload-no-fault-evidence")?; + return Err(error); + } + if let Err(error) = guard.ensure_active("after fault workload") { + collect_active_chaos_artifacts(collector, &guard, "workload-outlived-chaos")?; + return Err(error); + } + + if let Err(error) = guard.delete() { + collect_active_chaos_artifacts(collector, &guard, "delete-failed")?; + return Err(error); + } + + wait_for_ready_tenant(config).await?; + let report = checker::check_s3_history(&s3, &mut history, true).await?; + collector.write_text( + IO_EIO_CASE, + "checker-report.json", + &serde_json::to_string_pretty(&report)?, + )?; + report.require_success()?; + + Ok(()) +} + +fn reset_io_eio_fixture(config: &E2eConfig) -> Result<()> { + resources::reset_smoke_tenant_resources(config)?; + if uses_kind_local_storage(config) { + storage::reset_default_local_storage(config)?; + } else { + eprintln!( + "skipping Kind local storage reset for context {}; using cluster storage class {}", + config.context, config.storage_class + ); + } + resources::apply_smoke_tenant_resources(config)?; + Ok(()) +} + +fn uses_kind_local_storage(config: &E2eConfig) -> bool { + config.context.starts_with("kind-") +} + +fn collect_active_chaos_artifacts( + collector: &ArtifactCollector, + guard: &chaos_mesh::ChaosGuard, + suffix: &str, +) -> Result<()> { + let describe = guard + .describe() + .unwrap_or_else(|error| format!("failed to describe IOChaos before cleanup: {error}")); + collector.write_text( + IO_EIO_CASE, + &format!("chaos-describe-{suffix}.txt"), + &describe, + )?; + + let yaml = guard + .yaml() + .unwrap_or_else(|error| format!("failed to get IOChaos yaml before cleanup: {error}")); + collector.write_text(IO_EIO_CASE, &format!("chaos-{suffix}.yaml"), &yaml)?; Ok(()) } + +async fn wait_for_ready_tenant(config: &E2eConfig) -> Result { + let client = kube_client::default_client().await?; + let tenants: Api = kube_client::tenant_api(client, &config.test_namespace); + wait::wait_for_tenant_ready(tenants, &config.tenant_name, config.timeout).await +} + +async fn wait_for_tenant_s3( + port_forward: &mut PortForwardGuard, + endpoint: &str, + timeout: Duration, +) -> Result<()> { + port_forward.ensure_running()?; + wait_for_s3_endpoint(endpoint, timeout) + .await + .with_context(|| { + format!( + "S3 port-forward was not ready; command: {}; log {}:\n{}", + port_forward.command_display(), + port_forward.log_path().display(), + port_forward.log_contents() + ) + }) +} + +async fn prefill_objects( + s3: &S3WorkloadClient, + history: &mut Recorder, + run_id: &str, + count: usize, +) -> Result> { + let mut objects = Vec::with_capacity(count); + + for index in 0..count { + let object = ObjectSpec::deterministic(run_id, index, SMALL_OBJECT_SIZE_BYTES); + let put_outcome = s3.put_object(&object, history).await?; + ensure!( + put_outcome == OperationOutcome::Ok, + "prefill PUT failed before fault injection for key {}: {put_outcome:?}", + object.key + ); + let head_outcome = s3.head_object(&object.key, history).await?; + ensure!( + head_outcome == OperationOutcome::Ok, + "prefill HEAD failed before fault injection for key {}: {head_outcome:?}", + object.key + ); + objects.push(object); + } + + Ok(objects) +} + +async fn run_mixed_workload( + s3: &S3WorkloadClient, + history: &mut Recorder, + run_id: &str, + prefilled: &[ObjectSpec], + start_index: usize, + count: usize, +) -> Result { + let mut summary = WorkloadSummary::default(); + + for offset in 0..count { + let object = + ObjectSpec::deterministic(run_id, start_index + offset, SMALL_OBJECT_SIZE_BYTES); + let put_outcome = s3.put_object(&object, history).await?; + summary.puts.record(put_outcome); + + if let Some(existing) = prefilled.get(offset % prefilled.len()) { + let get_result = s3.get_object_result(&existing.key, history).await?; + summary.gets.record(get_result.outcome); + } + } + + summary.require_exercised()?; + Ok(summary) +} + +#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize)] +struct WorkloadSummary { + puts: OutcomeCounts, + gets: OutcomeCounts, +} + +impl WorkloadSummary { + fn require_exercised(&self) -> Result<()> { + ensure!( + self.puts.total() > 0 && self.gets.total() > 0, + "fault workload did not exercise both PUT and GET paths: {self:?}" + ); + Ok(()) + } + + fn require_fault_evidence(&self, require_client_disruption: bool) -> Result<()> { + if require_client_disruption { + ensure!( + self.disrupted() > 0, + "IOChaos became active but the S3 workload observed no client-visible disrupted operation; increase RUSTFS_E2E_WORKLOAD_OBJECTS or RUSTFS_E2E_FAULT_PERCENT, or set RUSTFS_E2E_FAULT_REQUIRE_CLIENT_DISRUPTION=0 if this is expected" + ); + } else if self.disrupted() == 0 { + eprintln!( + "IOChaos was active, but the S3 workload observed no client-visible disrupted operation" + ); + } + Ok(()) + } + + fn disrupted(&self) -> usize { + self.puts.disrupted() + self.gets.disrupted() + } +} + +#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize)] +struct OutcomeCounts { + ok: usize, + failed: usize, + timeout: usize, + unknown: usize, +} + +impl OutcomeCounts { + fn record(&mut self, outcome: OperationOutcome) { + match outcome { + OperationOutcome::Ok => self.ok += 1, + OperationOutcome::Failed => self.failed += 1, + OperationOutcome::Timeout => self.timeout += 1, + OperationOutcome::Unknown => self.unknown += 1, + } + } + + fn total(&self) -> usize { + self.ok + self.failed + self.timeout + self.unknown + } + + fn disrupted(&self) -> usize { + self.failed + self.timeout + self.unknown + } +} + +fn bucket_name(run_id: &str) -> String { + let suffix = run_id + .chars() + .filter(|ch| ch.is_ascii_alphanumeric()) + .take(16) + .collect::() + .to_ascii_lowercase(); + format!("rustfs-fault-{suffix}") +} + +#[cfg(test)] +mod tests { + use super::{OutcomeCounts, WorkloadSummary, bucket_name}; + use rustfs_operator_e2e::framework::history::OperationOutcome; + + #[test] + fn fault_bucket_name_is_s3_compatible_and_run_scoped() { + assert_eq!( + bucket_name("run-12345678-abcd-efgh"), + "rustfs-fault-run12345678abcde" + ); + } + + #[test] + fn workload_summary_counts_disrupted_operations() { + let mut summary = WorkloadSummary::default(); + summary.puts.record(OperationOutcome::Ok); + summary.gets.record(OperationOutcome::Timeout); + + assert_eq!(summary.puts.total(), 1); + assert_eq!(summary.gets.total(), 1); + assert_eq!(summary.disrupted(), 1); + assert!(summary.require_exercised().is_ok()); + assert!(summary.require_fault_evidence(true).is_ok()); + } + + #[test] + fn workload_summary_can_require_fault_evidence() { + let summary = WorkloadSummary { + puts: OutcomeCounts { + ok: 1, + ..OutcomeCounts::default() + }, + gets: OutcomeCounts { + ok: 1, + ..OutcomeCounts::default() + }, + }; + + assert!(summary.require_fault_evidence(false).is_ok()); + assert!(summary.require_fault_evidence(true).is_err()); + } +} From b5c360404f2e4b437f2018093d7d8e358b4ba859 Mon Sep 17 00:00:00 2001 From: GatewayJ <18332154+GatewayJ@users.noreply.github.com> Date: Thu, 18 Jun 2026 19:39:24 +0800 Subject: [PATCH 03/20] test(chaos): harden real-cluster fault tests --- ...ST_PLAN.md => FAULT_INJECTION_TEST_PLAN.md | 285 +++++++++--------- Makefile | 13 +- README.md | 15 +- e2e/README.md | 38 ++- e2e/src/bin/rustfs-e2e.rs | 2 +- e2e/src/cases/faults.rs | 40 --- e2e/src/cases/mod.rs | 5 - e2e/src/framework/artifacts.rs | 13 +- e2e/src/framework/cert_manager_tls.rs | 4 +- e2e/src/framework/chaos_mesh.rs | 42 +-- e2e/src/framework/config.rs | 112 +++---- e2e/src/framework/fault_config.rs | 273 +++++++++++++++++ e2e/src/framework/fault_scenarios.rs | 39 +-- e2e/src/framework/kubectl.rs | 19 +- e2e/src/framework/live.rs | 22 +- e2e/src/framework/mod.rs | 3 +- e2e/src/framework/port_forward.rs | 8 +- e2e/src/framework/resources.rs | 199 ++++++++++-- e2e/src/framework/s3_workload.rs | 6 +- e2e/src/framework/tenant_factory.rs | 63 +++- e2e/tests/faults.rs | 110 +++---- 21 files changed, 872 insertions(+), 439 deletions(-) rename e2e/FAULT_INJECTION_TEST_PLAN.md => FAULT_INJECTION_TEST_PLAN.md (71%) delete mode 100644 e2e/src/cases/faults.rs create mode 100644 e2e/src/framework/fault_config.rs diff --git a/e2e/FAULT_INJECTION_TEST_PLAN.md b/FAULT_INJECTION_TEST_PLAN.md similarity index 71% rename from e2e/FAULT_INJECTION_TEST_PLAN.md rename to FAULT_INJECTION_TEST_PLAN.md index 9c7035d..8430b70 100644 --- a/e2e/FAULT_INJECTION_TEST_PLAN.md +++ b/FAULT_INJECTION_TEST_PLAN.md @@ -14,9 +14,9 @@ See the License for the specific language governing permissions and limitations under the License. --> -# RustFS Operator 故障注入测试方案 +# RustFS 故障注入测试方案 -本文档描述如何在 RustFS Operator 当前 e2e 框架中落地一套可执行、可诊断、可逐步增强的故障注入测试体系。 +本文档描述如何复用 RustFS Operator 测试基础设施,在真实 Kubernetes 测试集群中运行可执行、可诊断、可逐步增强的故障注入测试体系。故障测试不属于 Kind e2e suite。 核心原则: @@ -38,7 +38,7 @@ limitations under the License. - checker 判断的是 RustFS 对象读写正确性:已经确认成功写入的数据不能丢,成功读取不能返回错误内容。 - Operator 状态只作为恢复观察信号,例如故障解除后 Tenant 是否重新回到 Ready;它不是第一阶段 correctness verdict 的主体。 - 不在 Tenant Console 或生产 Operator Console 中提供 destructive fault test 入口。 -- Chaos Mesh Dashboard 可以作为观察 Chaos 资源的外部工具,但 e2e 的权威输出是 `history.jsonl`、`checker-report.json` 和 Kubernetes artifacts。 +- Chaos Mesh Dashboard 可以作为观察 Chaos 资源的外部工具,但 fault-test runner 的权威输出是 `history.jsonl`、`checker-report.json` 和 Kubernetes artifacts。 ## 目标 @@ -48,7 +48,7 @@ limitations under the License. 2. RustFS 是否会在磁盘损坏或网络分区后,把错误对象内容以 `200 OK` 返回给客户端。 3. RustFS 在请求超时、连接中断、部分失败后,是否存在“客户端认为失败但服务端实际写入”的未知状态。 4. Operator 编排出的 Tenant 是否能在故障解除后回到 Ready,作为 RustFS workload 恢复观察信号。 -5. 当测试失败时,e2e harness 是否能留下足够的日志、事件、历史记录和 checker 报告用于定位。 +5. 当测试失败时,fault-test runner 是否能留下足够的日志、事件、历史记录和 checker 报告用于定位。 最重要的判定不是“故障期间所有请求都成功”,而是: @@ -74,19 +74,18 @@ limitations under the License. 第一阶段的目标是补齐当前最大缺口:**真实故障注入 + 对象内容正确性检查**。 -## 当前 e2e 可复用基础 +## 可复用的测试基础设施 -当前项目已经有适合故障测试的骨架,不需要另起一套测试系统。 +当前项目已经有适合故障测试的底层模块,不需要复制 kubectl、S3、history 和 checker 实现。但故障测试拥有独立配置、命令和安全边界,不属于 Kind e2e case inventory。 已有能力: | 能力 | 当前位置 | 用途 | | --- | --- | --- | -| destructive 入口 | `make e2e-live-faults` | 专门运行破坏性故障测试。 | -| fault suite 占位 | `e2e/tests/faults.rs` | 后续真实故障测试入口。 | -| live/destructive/context guard | `e2e/src/framework/live.rs` | 强制显式 live/destructive opt-in,并让 fault suite 绑定当前 kubectl context。 | -| local PV 准备 | `e2e/src/framework/storage.rs` | 为 RustFS Tenant 准备本地卷。 | -| Tenant/Secret 创建 | `e2e/src/framework/resources.rs` | 创建 e2e namespace、凭据和 Tenant。 | +| destructive 入口 | `make fault-test` | 专门在真实 Kubernetes 测试集群运行破坏性故障测试。 | +| fault runner | `e2e/tests/faults.rs` | 真实集群故障测试入口,不属于 e2e case inventory。 | +| fault config/context guard | `e2e/src/framework/fault_config.rs` | 读取独立 fault-test 配置、绑定当前 context,并拒绝 Kind。 | +| Tenant/Secret 创建 | `e2e/src/framework/resources.rs` | 创建 fault-test namespace、凭据和真实集群 Tenant。 | | S3 port-forward | `e2e/src/framework/port_forward.rs` | 将 Tenant S3 服务暴露到本地。 | | artifact collector | `e2e/src/framework/artifacts.rs` | 测试失败后收集 Kubernetes 现场。 | @@ -95,13 +94,13 @@ limitations under the License. - RustFS Pod selector 可使用 `rustfs.tenant=`。 - RustFS 容器名是 `rustfs`。 - RustFS 数据卷路径遵循 `/data/rustfs0`、`/data/rustfs1`。 -- Kind worker 将宿主机 `/tmp/rustfs-e2e-storage-*` 挂载到 worker 内部 `/mnt/data`。 -- local PV 最终落在 worker 内部 `/mnt/data/volN`。 +- 故障测试要求真实集群提供动态 StorageClass,不操作 Kind hostPath 或 local PV。 因此推荐方案是: ```text -复用当前 e2e harness +复用当前测试基础设施 + + 独立 FaultTestConfig 与 Make 入口 + 新增 Chaos Mesh 故障注入模块 + 新增 S3 workload + 新增 operation history @@ -111,10 +110,10 @@ limitations under the License. ## 总体架构 ```text -e2e/tests/faults.rs +make fault-test -> e2e/tests/faults.rs | - +-- 环境保护:live / destructive / current kubectl context - +-- 环境准备:强故障 case reset;Kind 使用 local PV,真实集群使用配置的 StorageClass + +-- 环境保护:destructive opt-in / current real Kubernetes context / required StorageClass + +-- 环境准备:强故障 case reset;真实集群使用配置的动态 StorageClass +-- S3 workload:持续读写对象 +-- history recorder:记录每次操作的开始、结束、结果、hash +-- nemesis:通过 Chaos Mesh 对 RustFS workload 注入故障 @@ -126,6 +125,7 @@ e2e/tests/faults.rs ```text e2e/src/framework/chaos_mesh.rs +e2e/src/framework/fault_config.rs e2e/src/framework/fault_scenarios.rs e2e/src/framework/s3_workload.rs e2e/src/framework/history.rs @@ -154,7 +154,7 @@ Chaos Mesh 适合第一阶段,原因: - 支持 `PodChaos`、`NetworkChaos`、`IOChaos`。 - `IOChaos` 能对指定挂载路径返回 `EIO`,适合模拟磁盘坏块或磁盘 I/O 错误。 - `IOChaos mistake` 能模拟读写返回错误字节,适合模拟 bit rot / 静默损坏。 -- 以 CRD 形式管理故障,方便 e2e harness apply/delete/describe/collect。 +- 以 CRD 形式管理故障,方便 fault-test runner apply/delete/describe/collect。 第一阶段建议只要求: @@ -195,33 +195,37 @@ Jepsen-like 的含义是: ## 安全模型 -故障测试必须默认安全,不能误伤开发者当前 kube context。 +故障测试必须默认安全,只能面向当前真实 Kubernetes 测试集群,不能运行在 Kind、共享开发集群或生产集群。 必须保留并强化这些保护: -1. 必须设置 `RUSTFS_E2E_LIVE=1`。 -2. 必须设置 `RUSTFS_E2E_DESTRUCTIVE=1`。 -3. fault suite 使用当前 `kubectl config current-context`;可以是 dedicated Kind,也可以是真实 Kubernetes 测试集群。 -4. 目标 namespace 必须来自 e2e 配置,例如 `rustfs-e2e-smoke`。 -5. 所有故障资源必须带唯一 run id label。 -6. 每个 Chaos 资源必须有 RAII-style cleanup guard。 -7. 正常结束和异常失败都必须 best-effort 删除故障资源。 -8. `io-eio` 这类存储破坏/强干扰 case 必须在 case 前 reset Tenant/PVC/PV;后续 pod kill、network delay、短暂 disconnect 可以按场景复用 Tenant。 -9. 默认故障持续时间要覆盖 workload 窗口,默认故障比例要小。 -10. 测试失败时必须先收集 artifacts,再清理会影响诊断的信息。 -11. destructive 场景保持 `#[ignore]`,只能通过显式 Make 目标执行。 +1. 必须设置 `RUSTFS_FAULT_TEST_DESTRUCTIVE=1`;`make fault-test` 会显式设置。 +2. fault runner 使用当前 `kubectl config current-context`,并拒绝 `kind-*` context。 +3. 必须显式提供 `RUSTFS_FAULT_TEST_STORAGE_CLASS`,目标 StorageClass 应支持动态供给。 +4. 目标 namespace 必须来自 fault-test 配置,默认 `rustfs-fault-test`;runner 创建 namespace 时必须写入 `app.kubernetes.io/managed-by=rustfs-operator-fault-test` label 和匹配 Tenant 的 `rustfs.com/fault-test-tenant` annotation。 +5. 已存在 namespace 只有在上述所有权标记完全匹配时才允许 reset;runner 不得自动认领未标记 namespace。 +6. 所有故障资源必须带唯一 run id label。 +7. 每个 Chaos 资源必须有 RAII-style cleanup guard。 +8. 正常结束和异常失败都必须 best-effort 删除故障资源。 +9. `io-eio` 这类存储破坏/强干扰 case 必须在 case 前 reset Tenant/PVC/PV;后续 pod kill、network delay、短暂 disconnect 可以按场景复用 Tenant。 +10. 默认故障持续时间要覆盖 workload 窗口,默认故障比例要小。 +11. 测试失败时必须先收集 artifacts,再清理会影响诊断的信息。 +12. destructive 场景保持 `#[ignore]`,只能通过显式 Make 目标执行。 建议增加环境变量: | 变量 | 默认值 | 作用 | | --- | --- | --- | -| `RUSTFS_E2E_FAULT_SCENARIO` | `io-eio` | 选择故障场景。 | -| `RUSTFS_E2E_FAULT_DURATION_SECONDS` | `180` | 故障持续时间,默认覆盖串行小对象 workload。 | -| `RUSTFS_E2E_FAULT_PERCENT` | `20` | 支持百分比注入的场景使用。 | -| `RUSTFS_E2E_WORKLOAD_OBJECTS` | `40` | 写入或校验对象数量。 | -| `RUSTFS_E2E_FAULT_REQUEST_TIMEOUT_SECONDS` | `3` | 单次 S3 请求超时时间。 | -| `RUSTFS_E2E_FAULT_REQUIRE_CLIENT_DISRUPTION` | `false` | 是否要求故障期间至少出现一次客户端可见失败/超时/unknown。 | -| `RUSTFS_E2E_CHAOS_NAMESPACE` | `chaos-mesh` | Chaos Mesh 资源所在 namespace。 | +| `RUSTFS_FAULT_TEST_STORAGE_CLASS` | required | 真实集群动态 StorageClass。 | +| `RUSTFS_FAULT_TEST_NAMESPACE` | `rustfs-fault-test` | 专用测试 namespace。 | +| `RUSTFS_FAULT_TEST_TENANT` | `fault-test-tenant` | 专用测试 Tenant。 | +| `RUSTFS_FAULT_TEST_SCENARIO` | `io-eio` | 选择故障场景。 | +| `RUSTFS_FAULT_TEST_DURATION_SECONDS` | `180` | 故障持续时间,默认覆盖串行小对象 workload。 | +| `RUSTFS_FAULT_TEST_PERCENT` | `20` | 支持百分比注入的场景使用。 | +| `RUSTFS_FAULT_TEST_WORKLOAD_OBJECTS` | `40` | 写入或校验对象数量。 | +| `RUSTFS_FAULT_TEST_REQUEST_TIMEOUT_SECONDS` | `3` | 单次 S3 请求超时时间。 | +| `RUSTFS_FAULT_TEST_REQUIRE_CLIENT_DISRUPTION` | `false` | 是否要求故障期间至少出现一次客户端可见失败/超时/unknown。 | +| `RUSTFS_FAULT_TEST_CHAOS_NAMESPACE` | `chaos-mesh` | Chaos Mesh 资源所在 namespace。 | ## 操作历史模型 @@ -234,8 +238,8 @@ Jepsen-like 的含义是: "id": "op-000001", "scenario": "io-eio", "kind": "put", - "bucket": "rustfs-fault-e2e", - "key": "fault-e2e/run-123/object-1", + "bucket": "rustfs-fault-run123", + "key": "fault-test/run-123/object-1", "value_sha256": "abc123", "size_bytes": 1048576, "started_at_ms": 1710000000000, @@ -367,9 +371,9 @@ ListObjectsV2 key 格式: ```text -fault-e2e//small/ -fault-e2e//medium/ -fault-e2e//large/ +fault-test//small/ +fault-test//medium/ +fault-test//large/ ``` 对象大小建议: @@ -381,7 +385,7 @@ fault-e2e//large/ | large | 1 MiB | | xlarge | 8 MiB | -第一版不建议默认使用太大对象,避免 e2e 运行过慢。 +第一版不建议默认使用太大对象,避免故障测试运行过慢。 ## 初始故障场景优先级 @@ -391,9 +395,9 @@ fault-e2e//large/ | P0 | `pod-kill-one` | Chaos Mesh `PodChaos` | 模拟一个 RustFS Pod 死亡和 StatefulSet 恢复。 | | P1 | `network-partition-one` | Chaos Mesh `NetworkChaos` | 模拟一个 RustFS Pod 与集群网络分区。 | | P1 | `io-read-mistake` | Chaos Mesh `IOChaos` | 模拟读路径返回错误字节,即静默坏块。 | -| P1 | `disk-full` | local PV 填充或 IOChaos | 验证单盘空间耗尽行为。 | -| P2 | `direct-pv-corruption` | Kind worker 文件系统改写 | 模拟已经落盘的数据被破坏。 | -| P2 | `worker-restart` | Docker restart Kind worker | 模拟节点重启。 | +| P1 | `disk-full` | IOChaos 或 CSI 后端专用工具 | 验证单盘空间耗尽行为。 | +| P2 | `direct-volume-corruption` | 存储后端专用测试环境 | 模拟已经落盘的数据被破坏。 | +| P2 | `node-restart` | 集群节点运维接口 | 模拟节点重启。 | | P3 | `dm-flakey` | device mapper / loop device | 更接近真实块设备故障。 | | P3 | `warp-under-chaos` | MinIO Warp + chaos | 故障期间性能退化分析。 | @@ -403,7 +407,7 @@ fault-e2e//large/ 这是建议最先实现的场景。 -它能直接验证 RustFS 在磁盘读写失败下是否会丢失已提交对象,且非常适合当前 Kind local PV 结构。 +它能直接验证 RustFS 在真实集群 CSI 数据卷发生读写错误时,是否会丢失已提交对象。 目标: @@ -417,18 +421,18 @@ Chaos Mesh `IOChaos` 示例: apiVersion: chaos-mesh.org/v1alpha1 kind: IOChaos metadata: - name: rustfs-e2e-io-eio + name: rustfs-fault-io-eio namespace: chaos-mesh labels: - rustfs-e2e/run-id: "" + rustfs-fault-test/run-id: "" spec: action: fault mode: one selector: namespaces: - - rustfs-e2e-smoke + - rustfs-fault-test labelSelectors: - rustfs.tenant: e2e-tenant + rustfs.tenant: fault-test-tenant containerNames: - rustfs volumePath: /data/rustfs0 @@ -443,7 +447,7 @@ spec: 关键点: -- `volumePath` 是 RustFS 容器内挂载路径,不是宿主机 `/tmp/rustfs-e2e-storage-*`。 +- `volumePath` 是 RustFS 容器内的 CSI 数据卷挂载路径。 - `errno: 5` 对应 Linux `EIO`。 - `mode: one` 表示只选择一个匹配 Pod,避免第一版故障面过大。 - `percent: 20` 表示只影响部分 I/O 调用,避免全量不可用。 @@ -472,16 +476,16 @@ Chaos Mesh `IOChaos mistake` 示例: apiVersion: chaos-mesh.org/v1alpha1 kind: IOChaos metadata: - name: rustfs-e2e-io-read-mistake + name: rustfs-fault-io-read-mistake namespace: chaos-mesh spec: action: mistake mode: one selector: namespaces: - - rustfs-e2e-smoke + - rustfs-fault-test labelSelectors: - rustfs.tenant: e2e-tenant + rustfs.tenant: fault-test-tenant containerNames: - rustfs volumePath: /data/rustfs0 @@ -504,32 +508,9 @@ spec: 这个场景是对象存储非常关键的测试,因为它验证的是“不要静默返回坏数据”。 -## P2 场景:直接破坏 local PV 文件 +## P2 场景:存储后端级数据破坏 -当前 Kind worker 将宿主机目录挂载到 worker 内部: - -```text -/tmp/rustfs-e2e-storage-1 -> /mnt/data -/tmp/rustfs-e2e-storage-2 -> /mnt/data -/tmp/rustfs-e2e-storage-3 -> /mnt/data -``` - -local PV 位于 worker 内部: - -```text -/mnt/data/vol1 -/mnt/data/vol2 -... -``` - -后续可以通过直接改写某个 PV 文件模拟已经落盘的数据损坏: - -```bash -docker exec rustfs-e2e-worker sh -c ' - f=$(find /mnt/data/vol1 -type f -size +4096c | head -n1) - dd if=/dev/urandom of="$f" bs=4096 count=1 seek=1 conv=notrunc -' -``` +真实集群不能假设能够直接访问宿主机或 CSI 后端文件。该场景必须在专用存储测试环境中,通过存储后端提供的故障工具、快照克隆或块设备测试接口实现。 这个场景比 `IOChaos mistake` 更接近真实“落盘数据已经损坏”,但也更危险: @@ -543,58 +524,67 @@ docker exec rustfs-e2e-worker sh -c ' 第一版完整流程建议如下: ```text -1. 读取 E2eConfig -2. 检查 RUSTFS_E2E_LIVE=1 -3. 检查 RUSTFS_E2E_DESTRUCTIVE=1 -4. 检查 kube context == kind-rustfs-e2e +1. 读取 FaultTestConfig +2. 检查 RUSTFS_FAULT_TEST_DESTRUCTIVE=1 +3. 读取当前 kube context 并拒绝 kind-* context +4. 检查 RUSTFS_FAULT_TEST_STORAGE_CLASS 已配置 5. 检查 Chaos Mesh CRD 存在 -6. 准备 local PV -7. 创建 e2e Tenant -8. 等待 Tenant Ready -9. 启动 Tenant S3 port-forward -10. 创建测试 bucket -11. 预写入一批对象,记录 key 和 sha256 -12. 启动后台 verifier 持续读取已提交对象 -13. apply Chaos Mesh 故障资源 -14. 故障期间继续执行混合 S3 workload -15. delete Chaos Mesh 故障资源 -16. 等待 Tenant 再次 Ready -17. 对所有成功 PUT 对象做最终 GET + sha256 校验 -18. 生成 checker report -19. 成功则清理测试资源 -20. 失败则收集 Kubernetes artifacts +6. 检查 fault-test namespace 不存在,或所有权标记与配置完全匹配 +7. reset 专用 fault-test Tenant/PVC +8. namespace 不存在时由 runner 使用 create 创建带所有权标记的 fault-test namespace;不得通过 apply 认领竞态中出现的同名 namespace +9. 创建真实集群 fault-test Tenant +10. 等待 Tenant Ready +11. 启动 Tenant S3 port-forward +12. 创建测试 bucket +13. 预写入一批对象,记录 key 和 sha256 +14. 启动后台 verifier 持续读取已提交对象 +15. apply Chaos Mesh 故障资源 +16. 故障期间继续执行混合 S3 workload +17. delete Chaos Mesh 故障资源 +18. 等待 Tenant 再次 Ready +19. 对所有成功 PUT 对象做最终 GET + sha256 校验 +20. 生成 checker report +21. 成功则清理测试资源 +22. 失败则收集 Kubernetes artifacts ``` 伪代码: ```rust #[tokio::test] -#[ignore = "destructive fault scenario; run through `make e2e-live-faults`"] +#[ignore = "destructive fault scenario; run through `make fault-test`"] async fn fault_io_eio_preserves_committed_objects() -> Result<()> { - let config = E2eConfig::from_env(); + let config = FaultTestConfig::from_env()?; - live::require_live_enabled(&config)?; - live::ensure_dedicated_context(&config)?; - live::require_destructive_enabled(&config)?; - chaos_mesh::require_iochaos_crd(&config)?; + config.require_destructive_enabled()?; + chaos_mesh::require_iochaos_crd(&config.cluster)?; let result = async { - storage::prepare_local_storage(&config)?; - resources::apply_smoke_tenant_resources(&config)?; + resources::reset_fault_tenant_resources(&config.cluster)?; + resources::apply_fault_tenant_resources(&config.cluster)?; let client = kube_client::default_client().await?; - let tenants = kube_client::tenant_api(client.clone(), &config.test_namespace); - wait::wait_for_tenant_ready(tenants, &config.tenant_name, config.timeout).await?; + let tenants = kube_client::tenant_api(client.clone(), &config.cluster.test_namespace); + wait::wait_for_tenant_ready( + tenants, + &config.cluster.tenant_name, + config.cluster.timeout, + ) + .await?; - let mut port_forward = PortForwardSpec::start_tenant_io(&config)?; - let s3 = s3_workload::Client::from_tenant_port_forward(&config, &mut port_forward).await?; + let mut port_forward = PortForwardSpec::start_tenant_io(&config.cluster)?; + let s3 = s3_workload::Client::from_tenant_port_forward( + &config.cluster, + &mut port_forward, + ) + .await?; let mut history = history::Recorder::new("io-eio")?; s3.create_bucket().await?; s3.prefill_objects(&mut history).await?; let chaos = chaos_mesh::IoChaos::eio_on_rustfs_volume( - &config, + &config.cluster, "/data/rustfs0", 20, Duration::from_secs(60), @@ -605,9 +595,9 @@ async fn fault_io_eio_preserves_committed_objects() -> Result<()> { drop(guard); wait::wait_for_tenant_ready( - kube_client::tenant_api(client, &config.test_namespace), - &config.tenant_name, - config.timeout, + kube_client::tenant_api(client, &config.cluster.test_namespace), + &config.cluster.tenant_name, + config.cluster.timeout, ) .await?; @@ -632,9 +622,9 @@ async fn fault_io_eio_preserves_committed_objects() -> Result<()> { `chaos_mesh.rs` 建议提供这些能力: ```rust -pub fn require_iochaos_crd(config: &E2eConfig) -> Result<()>; -pub fn require_podchaos_crd(config: &E2eConfig) -> Result<()>; -pub fn require_networkchaos_crd(config: &E2eConfig) -> Result<()>; +pub fn require_iochaos_crd(config: &ClusterTestConfig) -> Result<()>; +pub fn require_podchaos_crd(config: &ClusterTestConfig) -> Result<()>; +pub fn require_networkchaos_crd(config: &ClusterTestConfig) -> Result<()>; pub struct ChaosGuard { name: String, @@ -667,7 +657,7 @@ pub struct IoChaosSpec { - apply 前检查 CRD 是否存在。 - apply 后可以 `kubectl describe` 保存到 artifacts。 - 删除时必须 best-effort,不应 panic。 -- 每个资源都带 `rustfs-e2e/run-id` label。 +- 每个资源都带 `rustfs-fault-test/run-id` label。 - 允许按 label 清理上一次异常残留。 ## S3 workload 模块设计 @@ -767,27 +757,27 @@ pods-describe.txt - `chaos-describe-.txt` / `chaos-.yaml`:在故障资源被清理前保留 Chaos Mesh 现场。 - `rustfs-pods-current.log`:定位 RustFS 如何处理故障。 - `events.yaml`:定位 Kubernetes 层是否出现调度、挂载、重启问题。 -- `pv-paths.txt`:定位具体 PVC/PV/worker/hostPath 映射。 +- `pv-paths.txt`:定位具体 PVC/PV、StorageClass 和节点映射。 ## Makefile 入口 -保留现有总入口: +使用独立入口: ```bash -make e2e-live-faults +RUSTFS_FAULT_TEST_STORAGE_CLASS= make fault-test ``` -该入口使用当前 `kubectl` context。Kind context 会重置 e2e local PV;非 Kind context 会跳过 Kind local storage reset,并使用 `RUSTFS_E2E_STORAGE_CLASS` 指向的集群 StorageClass。 +该入口使用当前 `kubectl` context,拒绝 Kind,并使用 `RUSTFS_FAULT_TEST_STORAGE_CLASS` 指向的真实集群动态 StorageClass。 后续可以增加聚焦入口,方便本地调试: ```makefile -e2e-live-faults-io: - RUSTFS_E2E_LIVE=1 RUSTFS_E2E_DESTRUCTIVE=1 RUSTFS_E2E_FAULT_SCENARIO=io-eio \ +fault-test-io: + RUSTFS_FAULT_TEST_DESTRUCTIVE=1 RUSTFS_FAULT_TEST_SCENARIO=io-eio \ cargo test --manifest-path $(E2E_MANIFEST) --test faults -- --ignored --nocapture -e2e-live-faults-pod: - RUSTFS_E2E_LIVE=1 RUSTFS_E2E_DESTRUCTIVE=1 RUSTFS_E2E_FAULT_SCENARIO=pod-kill-one \ +fault-test-pod: + RUSTFS_FAULT_TEST_DESTRUCTIVE=1 RUSTFS_FAULT_TEST_SCENARIO=pod-kill-one \ cargo test --manifest-path $(E2E_MANIFEST) --test faults -- --ignored --nocapture ``` @@ -810,22 +800,23 @@ fault_io_eio_preserves_committed_objects 它应该包含: -1. live/destructive/current context guard。 +1. destructive/current real Kubernetes context guard。 2. Chaos Mesh `IOChaos` CRD 检查。 -3. 启动前按 `app.kubernetes.io/managed-by=rustfs-operator-e2e` 清理上次异常残留的 `IOChaos`。 -4. `io-eio` case 前 reset Tenant/PVC/PV;Kind context 同时 reset local PV,真实集群使用配置的 StorageClass。 -5. Tenant 创建和 Ready 等待。 -6. S3 bucket 创建。 -7. S3 prefill 对象并记录 hash;prefill 阶段必须明确成功,避免空用例通过。 -8. apply `IOChaos fault errno=5`。 -9. 等待 `IOChaos` 进入已选择目标且已注入状态,再开始故障 workload。 -10. 故障期间持续读写并输出 `workload-summary.json`。 -11. workload 结束后确认 `IOChaos` 仍处于 active,避免 workload 跑出故障窗口。 -12. 故障 workload 失败、故障证据不足或 Chaos 删除失败时,先保存 Chaos Mesh describe/yaml,再触发 cleanup。 -13. delete `IOChaos`。 -14. Tenant 恢复 Ready 等待。 -15. 所有成功 `PUT` 对象最终 `GET + sha256` 校验。 -16. 恢复后执行 `LIST prefix`,缺失项先作为 warning。 +3. 启动前按 `app.kubernetes.io/managed-by=rustfs-operator-fault-test` 清理上次异常残留的 `IOChaos`。 +4. reset 前验证 namespace 所有权标记;未标记或 Tenant 不匹配时 fail closed。 +5. `io-eio` case 前 reset Tenant/PVC;真实集群使用配置的动态 StorageClass。 +6. Tenant 创建和 Ready 等待。 +7. S3 bucket 创建。 +8. S3 prefill 对象并记录 hash;prefill 阶段必须明确成功,避免空用例通过。 +9. apply `IOChaos fault errno=5`。 +10. 等待 `IOChaos` 进入已选择目标且已注入状态,再开始故障 workload。 +11. 故障期间持续读写并输出 `workload-summary.json`。 +12. workload 结束后确认 `IOChaos` 仍处于 active,避免 workload 跑出故障窗口。 +13. 故障 workload 失败、故障证据不足或 Chaos 删除失败时,先保存 Chaos Mesh describe/yaml,再触发 cleanup。 +14. delete `IOChaos`。 +15. Tenant 恢复 Ready 等待。 +16. 所有成功 `PUT` 对象最终 `GET + sha256` 校验。 +17. 恢复后执行 `LIST prefix`,缺失项先作为 warning。 17. AWS SDK error 按 service failure / timeout / dispatch-response unknown 分类写入 history。 18. history、workload summary 和 checker report 输出。 19. 失败时 artifacts 收集。 @@ -849,11 +840,11 @@ fault_io_eio_preserves_committed_objects 验收: - `make e2e-check` 通过。 -- `make e2e-live-faults` 可在当前 kubectl context 运行 `io-eio`;Kind 和真实 Kubernetes 测试集群均可。 +- `RUSTFS_FAULT_TEST_STORAGE_CLASS= make fault-test` 可在当前真实 Kubernetes 测试集群运行 `io-eio`,并拒绝 Kind。 - 如果 committed object 丢失,测试失败。 - 如果 successful GET 返回错误字节,测试失败。 - 如果 workload 跑出 IOChaos active 窗口,测试失败。 -- case inventory 中清晰标注该用例边界为 `rustfs-workload/fault-injection`,不归类为 Operator 控制面测试。 +- fault runner 不进入 Kind e2e case inventory;其边界是 `rustfs-workload/fault-injection`。 ### Phase 2:进程和网络故障 @@ -894,7 +885,7 @@ fault_io_eio_preserves_committed_objects - 研究 `dm-flakey`、`dm-error`、loop device-backed PV。 - 只在 Linux runner 或专用环境启用。 -- 不进入默认本地 Kind 流程。 +- 不进入默认 fault-test 流程。 这个阶段更接近真实磁盘坏块,但环境成本明显更高。 @@ -902,7 +893,7 @@ fault_io_eio_preserves_committed_objects | 框架或工具 | 当前项目定位 | | --- | --- | -| 当前 e2e harness | Operator 编排、Tenant 生命周期、artifacts 收集。 | +| 共享测试基础设施 | Operator 编排、Tenant 生命周期、artifacts 收集。 | | Chaos Mesh | Kubernetes-native nemesis,负责制造故障。 | | Jepsen-like checker | 判断对象存储 correctness,不制造故障。 | | MinIO Mint | 后续用于 S3 API 兼容性,不作为故障 checker。 | @@ -915,7 +906,7 @@ fault_io_eio_preserves_committed_objects 当前最优组合: ```text -RustFS Operator e2e +RustFS real-cluster fault-test runner + Chaos Mesh + Rust-native S3 workload + Jepsen-like object checker diff --git a/Makefile b/Makefile index b7df9b7..6fda94e 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,7 @@ .PHONY: pre-commit fmt fmt-check clippy test build help .PHONY: docker-build-operator docker-build-console-web docker-build-all .PHONY: console-lint console-build console-fmt console-fmt-check -.PHONY: e2e-check e2e-live-create .e2e-live-install-cert-manager e2e-live-run e2e-live-faults e2e-live-update e2e-live-delete +.PHONY: e2e-check e2e-live-create .e2e-live-install-cert-manager e2e-live-run e2e-live-update e2e-live-delete fault-test # Default target IMAGE_REPO ?= rustfs/operator @@ -43,7 +43,7 @@ help: @echo " make e2e-check - Check Rust-native e2e harness (fmt + test + clippy)" @echo " make e2e-live-create - Clean dedicated storage, recreate live Kind environment, install cert-manager, and load e2e image" @echo " make e2e-live-run - Run all non-destructive live suites in the existing live environment" - @echo " make e2e-live-faults - Run destructive fault suites against the current kubectl context" + @echo " make fault-test - Run destructive fault tests against the current real Kubernetes context" @echo " make e2e-live-update - Rebuild image and update the live environment (load + rollout)" @echo " make e2e-live-delete - Delete live Kind environment and clean dedicated storage" @@ -96,6 +96,8 @@ CERT_MANAGER_VERSION ?= v1.16.2 CERT_MANAGER_MANIFEST_URL ?= https://github.com/cert-manager/cert-manager/releases/download/$(CERT_MANAGER_VERSION)/cert-manager.yaml CERT_MANAGER_ROLLOUT_TIMEOUT ?= 180s E2E_LIVE_ENV ?= RUSTFS_E2E_LIVE=1 RUSTFS_E2E_CERT_MANAGER_VERSION=$(CERT_MANAGER_VERSION) +FAULT_TEST_MANIFEST ?= e2e/Cargo.toml +FAULT_TEST_THREADS ?= 1 # Rust-native e2e harness checks (non-live; ignored live tests remain opt-in) e2e-check: @@ -130,9 +132,10 @@ e2e-live-run: RUSTFS_E2E_LIVE=1 cargo test --manifest-path $(E2E_MANIFEST) --test cert_manager_tls -- --ignored --test-threads=$(E2E_TEST_THREADS) --nocapture @echo "configured live e2e suites passed." -e2e-live-faults: - @echo "running destructive fault e2e against current kubectl context: $$(kubectl config current-context)" - RUSTFS_E2E_LIVE=1 RUSTFS_E2E_DESTRUCTIVE=1 cargo test --manifest-path $(E2E_MANIFEST) --test faults -- --ignored --test-threads=$(E2E_TEST_THREADS) --nocapture +fault-test: + @test -n "$(RUSTFS_FAULT_TEST_STORAGE_CLASS)" || (echo "RUSTFS_FAULT_TEST_STORAGE_CLASS is required" && exit 1) + @echo "running destructive RustFS fault tests against current Kubernetes context: $$(kubectl config current-context)" + RUSTFS_FAULT_TEST_DESTRUCTIVE=1 cargo test --manifest-path $(FAULT_TEST_MANIFEST) --test faults -- --ignored --test-threads=$(FAULT_TEST_THREADS) --nocapture e2e-live-update: docker build --network host -t rustfs/operator:e2e . diff --git a/README.md b/README.md index abece1b..4b18a6a 100755 --- a/README.md +++ b/README.md @@ -73,12 +73,23 @@ From the repo root: | `make e2e-check` | Validate the e2e harness without creating a live cluster. | | `make e2e-live-create` | Build e2e images, recreate the dedicated Kind cluster, install cert-manager, and load images. | | `make e2e-live-run` | Deploy the dev control plane and run all non-destructive live suites. | -| `make e2e-live-faults` | Run destructive fault suites against the current kubectl context. | +| `make fault-test` | Run destructive RustFS fault tests against the current real Kubernetes context. | | `make e2e-live-update` | Rebuild images, reload them into Kind, and roll out control-plane deployments. | | `make e2e-live-delete` | Delete the dedicated Kind cluster and its local storage. | CI (`.github/workflows/ci.yml`) runs Rust tests (including `nextest`), `cargo fmt --check`, `clippy`, the Rust-native e2e harness checks, and `console-web` lint/build/format checks. Use **`make pre-commit`** before opening a PR so local validation stays aligned. +### Run fault tests on a real Kubernetes cluster + +Fault tests are separate from the Kind e2e workflow. They use the current kubectl context, reject `kind-*` contexts, reset a dedicated fault-test Tenant, and require Chaos Mesh plus a dynamic StorageClass: + +```bash +kubectl config use-context +RUSTFS_FAULT_TEST_STORAGE_CLASS= make fault-test +``` + +The test runner creates the default `rustfs-fault-test` namespace with ownership metadata before creating the credential Secret and Tenant. Override it only with another dedicated test namespace using `RUSTFS_FAULT_TEST_NAMESPACE`. If the namespace already exists, destructive reset is allowed only when its `app.kubernetes.io/managed-by` label and `rustfs.com/fault-test-tenant` annotation match the configured fault-test Tenant. The runner never adds these ownership markers to an existing namespace. + Contribution workflow, commit style, and PR expectations: [`CONTRIBUTING.md`](CONTRIBUTING.md). ### Run a local controller against e2e @@ -160,7 +171,7 @@ Then use `http://127.0.0.1:19000` for the Tenant S3 API and `http://127.0.0.1:19 - `deploy/rustfs-operator/` — Helm chart, templates, values, and packaged CRDs. - `deploy/k8s-dev/` — Development manifests used by the dev/e2e deployment flows. - `deploy/kind/` — Kind cluster configuration for local development. -- **e2e/** — Rust-native Kind e2e harness, live test suites, and dedicated manifests. +- **e2e/** — Rust-native Kind e2e harness plus shared implementation modules for the separate real-cluster fault-test runner. - **examples/** — Sample `Tenant` custom resources and usage notes. - **docs/** — Design notes, GA planning material, and supporting images. - **assets/** — README and documentation images. diff --git a/e2e/README.md b/e2e/README.md index b8f797c..1d02891 100644 --- a/e2e/README.md +++ b/e2e/README.md @@ -1,6 +1,6 @@ # RustFS Operator E2E Harness -This crate is the Rust-native integration-test harness for release-grade validation of the RustFS Operator and its Console API. +This crate provides the Rust-native Kind e2e harness and shared primitives used by the separate real-cluster fault-test runner. The harness is intentionally separated from the main operator crate so e2e-only dependencies stay scoped to the `e2e/` manifest while still being validated by `make e2e-check` and the default `make pre-commit` path. It is driven through the reduced live entrypoints `e2e-live-create`, `e2e-live-run`, `e2e-live-update`, and `e2e-live-delete`. @@ -22,7 +22,8 @@ e2e/ lib.rs bin/rustfs-e2e.rs Makefile-internal helper for live workflow steps framework/ - config.rs environment and CI knobs + config.rs dedicated Kind e2e configuration + fault_config.rs real-cluster fault-test configuration and safety checks command.rs safe subprocess wrapper for kind/docker/kubectl kind.rs Kind cluster lifecycle and host mount preparation kubectl.rs kubectl command construction boundary @@ -37,7 +38,7 @@ e2e/ resources.rs namespace/Secret/Tenant apply boundary storage.rs local StorageClass/PV preparation boundary assertions.rs Kubernetes and Tenant status assertions - tenant_factory.rs reusable Tenant manifests for e2e + tenant_factory.rs Kind-local and real-cluster Tenant templates cases/ smoke.rs install and readiness checks operator.rs Tenant status and observed-generation checks @@ -46,7 +47,7 @@ e2e/ smoke.rs ignored live smoke entrypoints operator.rs ignored live Operator assertion console.rs ignored live Console API assertion - faults.rs non-live destructive opt-in guard + faults.rs real-cluster destructive fault-test runner; not part of e2e case inventory ``` ## Boundary rules @@ -55,10 +56,12 @@ e2e/ 2. `framework::kubectl` is the shell/Kubernetes YAML boundary and must always pin `--context`. 3. `framework::kube_client` is the typed Kubernetes API boundary. 4. `framework::console_client` is the HTTP boundary for Console API tests. -5. `framework::storage` owns e2e local PV setup; `framework::resources` owns e2e namespace/Secret/Tenant setup. +5. `framework::storage` owns Kind local PV setup; `framework::resources` owns shared namespace/Secret/Tenant lifecycle. 6. `framework::live` owns live-run opt-in and dedicated-context checks. 7. `cases/*` should describe behavior and call framework helpers; avoid shell details there. -8. Destructive tests must use dedicated e2e namespaces and must never run against an arbitrary current context. +8. Kind e2e cases remain in `cases/*`; real-cluster fault tests are intentionally excluded from that inventory. +9. Fault tests use `FaultTestConfig`, reject Kind contexts, require a dedicated namespace and StorageClass, and never use Kind local-volume assumptions. +10. The fault-test runner creates its namespace with ownership metadata. Existing namespaces must already have the matching manager label and Tenant annotation before destructive reset is allowed. ## Safety defaults @@ -85,6 +88,29 @@ make e2e-live-run The harness refuses to run live tests unless the active Kubernetes context matches the configured dedicated Kind context. +Fault tests have separate safety defaults and environment variables: + +```text +context: current non-Kind kubectl context +test namespace: rustfs-fault-test +tenant name: fault-test-tenant +storage class: required via RUSTFS_FAULT_TEST_STORAGE_CLASS +artifacts: target/fault-tests/artifacts +``` + +Run them independently from the Kind lifecycle: + +```bash +RUSTFS_FAULT_TEST_STORAGE_CLASS= make fault-test +``` + +The runner creates an absent namespace through `kubectl create` before applying the credential Secret and Tenant. It refuses to reset or claim an existing namespace unless these values already match: + +```text +app.kubernetes.io/managed-by=rustfs-operator-fault-test +rustfs.com/fault-test-tenant= +``` + ## Non-live validation ```bash diff --git a/e2e/src/bin/rustfs-e2e.rs b/e2e/src/bin/rustfs-e2e.rs index a3dc28b..b662ac4 100644 --- a/e2e/src/bin/rustfs-e2e.rs +++ b/e2e/src/bin/rustfs-e2e.rs @@ -110,7 +110,7 @@ fn sanitize_live_storage(config: &E2eConfig) -> Result<()> { fn reset_live_fixtures(config: &E2eConfig) -> Result<()> { live::require_live_enabled(config)?; live::ensure_dedicated_context(config)?; - resources::reset_smoke_tenant_resources(config)?; + resources::reset_tenant_resources(config)?; storage::reset_default_local_storage(config)?; cert_manager_tls::reset_positive_case_resources(config)?; Ok(()) diff --git a/e2e/src/cases/faults.rs b/e2e/src/cases/faults.rs deleted file mode 100644 index c89d9c3..0000000 --- a/e2e/src/cases/faults.rs +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright 2025 RustFS Team -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use super::{CaseSpec, Suite}; - -pub fn cases() -> Vec { - vec![CaseSpec::new( - Suite::Faults, - "fault_io_eio_preserves_committed_objects", - "Inject IOChaos EIO into one RustFS data volume and verify committed S3 objects remain readable with matching hashes after recovery.", - "rustfs-workload/fault-injection", - "faults", - )] -} - -#[cfg(test)] -mod tests { - use super::cases; - - #[test] - fn fault_case_inventory_matches_executable_tests() { - let names = cases() - .into_iter() - .map(|case| case.name) - .collect::>(); - - assert_eq!(names, vec!["fault_io_eio_preserves_committed_objects"]); - } -} diff --git a/e2e/src/cases/mod.rs b/e2e/src/cases/mod.rs index 022ba73..04933f3 100644 --- a/e2e/src/cases/mod.rs +++ b/e2e/src/cases/mod.rs @@ -14,7 +14,6 @@ pub mod cert_manager_tls; pub mod console; -pub mod faults; pub mod operator; pub mod smoke; pub mod sts; @@ -26,7 +25,6 @@ pub enum Suite { Console, Sts, CertManagerTls, - Faults, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -63,7 +61,6 @@ pub fn all_cases() -> Vec { cases.extend(sts::cases()); cases.extend(console::cases()); cases.extend(cert_manager_tls::cases()); - cases.extend(faults::cases()); cases } @@ -82,7 +79,6 @@ mod tests { assert!(suites.contains(&Suite::Sts)); assert!(suites.contains(&Suite::Console)); assert!(suites.contains(&Suite::CertManagerTls)); - assert!(suites.contains(&Suite::Faults)); } #[test] @@ -131,6 +127,5 @@ mod tests { .unwrap_or_default(), 9 ); - assert_eq!(counts.get(&Suite::Faults).copied().unwrap_or_default(), 1); } } diff --git a/e2e/src/framework/artifacts.rs b/e2e/src/framework/artifacts.rs index a7942f4..25455d5 100644 --- a/e2e/src/framework/artifacts.rs +++ b/e2e/src/framework/artifacts.rs @@ -16,7 +16,7 @@ use anyhow::Result; use std::fs; use std::path::{Path, PathBuf}; -use crate::framework::{command::CommandSpec, config::E2eConfig, kubectl::Kubectl}; +use crate::framework::{command::CommandSpec, config::ClusterTestConfig, kubectl::Kubectl}; const ERASURE_READ_QUORUM: &str = "erasure read quorum"; const DNS_LOOKUP_FAILURE: &str = "failed to lookup address information"; @@ -62,7 +62,7 @@ impl ArtifactCollector { pub fn collect_kubernetes_snapshot( &self, case_name: &str, - config: &E2eConfig, + config: &ClusterTestConfig, ) -> Result { let mut combined_output = String::new(); @@ -90,7 +90,7 @@ impl ArtifactCollector { } } -fn kubernetes_snapshot_commands(config: &E2eConfig) -> Vec { +fn kubernetes_snapshot_commands(config: &ClusterTestConfig) -> Vec { let kubectl = Kubectl::new(config); let operator_kubectl = Kubectl::new(config).namespaced(&config.operator_namespace); let test_kubectl = Kubectl::new(config).namespaced(&config.test_namespace); @@ -193,7 +193,7 @@ fn kubernetes_snapshot_commands(config: &E2eConfig) -> Vec { fn diagnose_snapshot(snapshot: &str) -> String { let mut lines = vec![ - "RustFS Operator e2e diagnostic summary".to_string(), + "RustFS Operator test diagnostic summary".to_string(), String::new(), ]; let mut matched = false; @@ -203,9 +203,8 @@ fn diagnose_snapshot(snapshot: &str) -> String { lines.extend([ format!("Detected `{ERASURE_READ_QUORUM}` in RustFS pod logs."), "Meaning: RustFS ECStore could not read a majority of matching erasure format metadata during startup.".to_string(), - "Most likely live-e2e causes: stale or partially initialized data in dedicated local PV host paths, peer startup/DNS timing, or a RustFS bootstrap retry window that ended before quorum converged.".to_string(), + "Most likely test causes: stale or partially initialized volumes, peer startup/DNS timing, or a RustFS bootstrap retry window that ended before quorum converged.".to_string(), "Inspect: rustfs-pods-current.log, rustfs-pods-previous.log, tenant-describe.txt, rustfs-pods-describe.txt, and pv-paths.txt.".to_string(), - "Recovery for the dedicated e2e cluster: RUSTFS_E2E_LIVE=1 make e2e-live-delete && RUSTFS_E2E_LIVE=1 make e2e-live-create && RUSTFS_E2E_LIVE=1 make e2e-live-run".to_string(), String::new(), ]); } @@ -293,7 +292,7 @@ mod tests { assert!(diagnosis.contains("Detected `erasure read quorum`")); assert!(diagnosis.contains("ECStore could not read a majority")); - assert!(diagnosis.contains("e2e-live-delete")); + assert!(diagnosis.contains("stale or partially initialized volumes")); } #[test] diff --git a/e2e/src/framework/cert_manager_tls.rs b/e2e/src/framework/cert_manager_tls.rs index 15caa66..78acfab 100644 --- a/e2e/src/framework/cert_manager_tls.rs +++ b/e2e/src/framework/cert_manager_tls.rs @@ -101,7 +101,7 @@ pub fn external_secret_storage_layout(config: &E2eConfig) -> storage::LocalStora pub fn reset_positive_case_resources(config: &E2eConfig) -> Result<()> { let managed = managed_certificate_case_config(config); - resources::reset_smoke_tenant_resources(&managed)?; + resources::reset_tenant_resources(&managed)?; storage::reset_local_storage_for_layout( &managed, &managed_certificate_storage_layout(&managed), @@ -109,7 +109,7 @@ pub fn reset_positive_case_resources(config: &E2eConfig) -> Result<()> { .context("reset managed cert-manager TLS e2e storage")?; let external = external_secret_case_config(config); - resources::reset_smoke_tenant_resources(&external)?; + resources::reset_tenant_resources(&external)?; storage::reset_local_storage_for_layout(&external, &external_secret_storage_layout(&external)) .context("reset external Secret cert-manager TLS e2e storage")?; diff --git a/e2e/src/framework/chaos_mesh.rs b/e2e/src/framework/chaos_mesh.rs index 6725a8e..f2bacd9 100644 --- a/e2e/src/framework/chaos_mesh.rs +++ b/e2e/src/framework/chaos_mesh.rs @@ -17,12 +17,13 @@ use serde_json::Value; use std::thread::sleep; use std::time::{Duration, Instant}; -use crate::framework::{config::E2eConfig, kubectl::Kubectl}; +use crate::framework::{config::ClusterTestConfig, kubectl::Kubectl}; const IOCHAOS_CRD: &str = "iochaos.chaos-mesh.org"; -const RUN_ID_LABEL: &str = "rustfs-e2e/run-id"; -const SCENARIO_LABEL: &str = "rustfs-e2e/scenario"; +const RUN_ID_LABEL: &str = "rustfs-fault-test/run-id"; +const SCENARIO_LABEL: &str = "rustfs-fault-test/scenario"; const MANAGED_BY_LABEL: &str = "app.kubernetes.io/managed-by"; +const MANAGED_BY_VALUE: &str = "rustfs-operator-fault-test"; #[derive(Debug, Clone, PartialEq, Eq)] pub struct IoChaosSpec { @@ -42,7 +43,7 @@ pub struct IoChaosSpec { #[derive(Debug, Clone)] pub struct ChaosGuard { - config: E2eConfig, + config: ClusterTestConfig, kind: &'static str, namespace: String, name: String, @@ -51,7 +52,8 @@ pub struct ChaosGuard { impl IoChaosSpec { pub fn eio_on_rustfs_volume( - config: &E2eConfig, + config: &ClusterTestConfig, + chaos_namespace: impl Into, run_id: impl Into, scenario: impl Into, volume_path: impl Into, @@ -72,8 +74,8 @@ impl IoChaosSpec { let scenario = scenario.into(); Ok(Self { - name: format!("rustfs-e2e-io-eio-{short_run_id}"), - namespace: config.chaos_namespace.clone(), + name: format!("rustfs-fault-io-eio-{short_run_id}"), + namespace: chaos_namespace.into(), run_id, scenario, target_namespace: config.test_namespace.clone(), @@ -105,7 +107,7 @@ metadata: labels: {run_id_label}: "{run_id}" {scenario_label}: "{scenario}" - {managed_by_label}: rustfs-operator-e2e + {managed_by_label}: {managed_by_value} spec: action: fault mode: one @@ -131,6 +133,7 @@ spec: scenario_label = SCENARIO_LABEL, scenario = self.scenario, managed_by_label = MANAGED_BY_LABEL, + managed_by_value = MANAGED_BY_VALUE, target_namespace = self.target_namespace, tenant_name = self.tenant_name, container_name = self.container_name, @@ -142,20 +145,20 @@ spec: } } -pub fn require_iochaos_crd(config: &E2eConfig) -> Result<()> { +pub fn require_iochaos_crd(config: &ClusterTestConfig) -> Result<()> { let output = Kubectl::new(config) .command(["get", "crd", IOCHAOS_CRD]) .run()?; ensure!( output.code == Some(0), - "Chaos Mesh IOChaos CRD {IOCHAOS_CRD} is required for fault e2e; install Chaos Mesh before running faults\nstdout:\n{}\nstderr:\n{}", + "Chaos Mesh IOChaos CRD {IOCHAOS_CRD} is required for fault tests; install Chaos Mesh before running faults\nstdout:\n{}\nstderr:\n{}", output.stdout, output.stderr ); Ok(()) } -pub fn cleanup_run(config: &E2eConfig, namespace: &str, run_id: &str) -> Result<()> { +pub fn cleanup_run(config: &ClusterTestConfig, namespace: &str, run_id: &str) -> Result<()> { let selector = format!("{RUN_ID_LABEL}={run_id}"); Kubectl::new(config) .namespaced(namespace) @@ -164,8 +167,8 @@ pub fn cleanup_run(config: &E2eConfig, namespace: &str, run_id: &str) -> Result< Ok(()) } -pub fn cleanup_managed_iochaos(config: &E2eConfig, namespace: &str) -> Result<()> { - let selector = format!("{MANAGED_BY_LABEL}=rustfs-operator-e2e"); +pub fn cleanup_managed_iochaos(config: &ClusterTestConfig, namespace: &str) -> Result<()> { + let selector = format!("{MANAGED_BY_LABEL}={MANAGED_BY_VALUE}"); Kubectl::new(config) .namespaced(namespace) .command(["delete", "iochaos", "-l", &selector, "--ignore-not-found"]) @@ -173,7 +176,7 @@ pub fn cleanup_managed_iochaos(config: &E2eConfig, namespace: &str) -> Result<() Ok(()) } -pub fn apply_iochaos(config: &E2eConfig, spec: &IoChaosSpec) -> Result { +pub fn apply_iochaos(config: &ClusterTestConfig, spec: &IoChaosSpec) -> Result { cleanup_run(config, &spec.namespace, &spec.run_id)?; Kubectl::new(config) .namespaced(&spec.namespace) @@ -302,14 +305,15 @@ impl Drop for ChaosGuard { #[cfg(test)] mod tests { use super::{IoChaosSpec, iochaos_is_active}; - use crate::framework::config::E2eConfig; + use crate::framework::fault_config::FaultTestConfig; use std::time::Duration; #[test] fn iochaos_manifest_targets_rustfs_workload_only() { - let config = E2eConfig::defaults(); + let config = FaultTestConfig::for_test("real-cluster", "fast-csi"); let spec = IoChaosSpec::eio_on_rustfs_volume( - &config, + &config.cluster, + "chaos-mesh", "run-1234567890", "io-eio", "/data/rustfs0", @@ -321,7 +325,9 @@ mod tests { assert!(manifest.contains("kind: IOChaos")); assert!(manifest.contains("namespace: chaos-mesh")); - assert!(manifest.contains("rustfs.tenant: e2e-tenant")); + assert!(manifest.contains("rustfs.tenant: fault-test-tenant")); + assert!(manifest.contains("rustfs-fault-test/run-id")); + assert!(manifest.contains("rustfs-operator-fault-test")); assert!(manifest.contains("containerNames:\n - rustfs")); assert!(manifest.contains("volumePath: /data/rustfs0")); assert!(manifest.contains("errno: 5")); diff --git a/e2e/src/framework/config.rs b/e2e/src/framework/config.rs index 4b45898..c0c079f 100644 --- a/e2e/src/framework/config.rs +++ b/e2e/src/framework/config.rs @@ -13,6 +13,7 @@ // limitations under the License. use operator::types::v1alpha1::k8s::PodManagementPolicy; +use std::ops::{Deref, DerefMut}; use std::path::PathBuf; use std::time::Duration; @@ -23,32 +24,43 @@ pub const DEFAULT_CERT_MANAGER_VERSION: &str = "v1.16.2"; pub const KIND_WORKER_COUNT: usize = 3; #[derive(Debug, Clone)] -pub struct E2eConfig { - pub cluster_name: String, +pub struct ClusterTestConfig { pub context: String, pub operator_namespace: String, pub test_namespace_prefix: String, pub test_namespace: String, pub tenant_name: String, pub storage_class: String, + pub rustfs_image: String, + pub pod_management_policy: Option, + pub artifacts_dir: PathBuf, + pub timeout: Duration, +} + +#[derive(Debug, Clone)] +pub struct E2eConfig { + pub cluster: ClusterTestConfig, + pub cluster_name: String, pub pv_count: usize, pub operator_image: String, pub console_web_image: String, - pub rustfs_image: String, pub cert_manager_version: String, - pub pod_management_policy: Option, pub kind_config: PathBuf, - pub artifacts_dir: PathBuf, pub live_enabled: bool, - pub destructive_enabled: bool, - pub fault_scenario: String, - pub fault_duration: Duration, - pub fault_percent: u8, - pub fault_workload_objects: usize, - pub fault_request_timeout: Duration, - pub fault_require_client_disruption: bool, - pub chaos_namespace: String, - pub timeout: Duration, +} + +impl Deref for E2eConfig { + type Target = ClusterTestConfig; + + fn deref(&self) -> &Self::Target { + &self.cluster + } +} + +impl DerefMut for E2eConfig { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.cluster + } } impl E2eConfig { @@ -71,17 +83,30 @@ impl E2eConfig { let test_namespace = env_or(&get_env, "RUSTFS_E2E_NAMESPACE", &test_namespace_default); Self { + cluster: ClusterTestConfig { + context, + operator_namespace: env_or( + &get_env, + "RUSTFS_E2E_OPERATOR_NAMESPACE", + "rustfs-system", + ), + test_namespace_prefix, + test_namespace, + tenant_name: env_or(&get_env, "RUSTFS_E2E_TENANT", "e2e-tenant"), + storage_class: env_or(&get_env, "RUSTFS_E2E_STORAGE_CLASS", "local-storage"), + rustfs_image: env_or(&get_env, "RUSTFS_E2E_SERVER_IMAGE", DEFAULT_RUSTFS_IMAGE), + artifacts_dir: PathBuf::from(env_or( + &get_env, + "RUSTFS_E2E_ARTIFACTS", + "target/e2e/artifacts", + )), + pod_management_policy: parse_pod_management_policy(&get_env), + timeout: Duration::from_secs(env_u64(&get_env, "RUSTFS_E2E_TIMEOUT_SECONDS", 300)), + }, cluster_name, - context, - operator_namespace: env_or(&get_env, "RUSTFS_E2E_OPERATOR_NAMESPACE", "rustfs-system"), - test_namespace_prefix, - test_namespace, - tenant_name: env_or(&get_env, "RUSTFS_E2E_TENANT", "e2e-tenant"), - storage_class: env_or(&get_env, "RUSTFS_E2E_STORAGE_CLASS", "local-storage"), pv_count: env_usize(&get_env, "RUSTFS_E2E_PV_COUNT", 12), operator_image: "rustfs/operator:e2e".to_string(), console_web_image: "rustfs/console-web:e2e".to_string(), - rustfs_image: env_or(&get_env, "RUSTFS_E2E_SERVER_IMAGE", DEFAULT_RUSTFS_IMAGE), cert_manager_version: env_or( &get_env, "RUSTFS_E2E_CERT_MANAGER_VERSION", @@ -92,33 +117,7 @@ impl E2eConfig { "RUSTFS_E2E_KIND_CONFIG", "e2e/manifests/kind-rustfs-e2e.yaml", )), - artifacts_dir: PathBuf::from(env_or( - &get_env, - "RUSTFS_E2E_ARTIFACTS", - "target/e2e/artifacts", - )), - pod_management_policy: parse_pod_management_policy(&get_env), live_enabled: env_bool(&get_env, "RUSTFS_E2E_LIVE"), - destructive_enabled: env_bool(&get_env, "RUSTFS_E2E_DESTRUCTIVE"), - fault_scenario: env_or(&get_env, "RUSTFS_E2E_FAULT_SCENARIO", "io-eio"), - fault_duration: Duration::from_secs(env_u64( - &get_env, - "RUSTFS_E2E_FAULT_DURATION_SECONDS", - 180, - )), - fault_percent: env_u8(&get_env, "RUSTFS_E2E_FAULT_PERCENT", 20), - fault_workload_objects: env_usize(&get_env, "RUSTFS_E2E_WORKLOAD_OBJECTS", 40), - fault_request_timeout: Duration::from_secs(env_u64( - &get_env, - "RUSTFS_E2E_FAULT_REQUEST_TIMEOUT_SECONDS", - 3, - )), - fault_require_client_disruption: env_bool( - &get_env, - "RUSTFS_E2E_FAULT_REQUIRE_CLIENT_DISRUPTION", - ), - chaos_namespace: env_or(&get_env, "RUSTFS_E2E_CHAOS_NAMESPACE", "chaos-mesh"), - timeout: Duration::from_secs(env_u64(&get_env, "RUSTFS_E2E_TIMEOUT_SECONDS", 300)), } } @@ -161,15 +160,6 @@ where .unwrap_or(default) } -fn env_u8(get_env: &F, name: &str, default: u8) -> u8 -where - F: Fn(&str) -> Option, -{ - get_env(name) - .and_then(|value| value.parse::().ok()) - .unwrap_or(default) -} - fn parse_pod_management_policy(get_env: &F) -> Option where F: Fn(&str) -> Option, @@ -199,16 +189,6 @@ mod tests { assert_eq!(config.storage_class, "local-storage"); assert_eq!(config.pv_count, 12); assert_eq!(config.rustfs_image, DEFAULT_RUSTFS_IMAGE); - assert_eq!(config.fault_scenario, "io-eio"); - assert_eq!(config.fault_duration, std::time::Duration::from_secs(180)); - assert_eq!(config.fault_percent, 20); - assert_eq!(config.fault_workload_objects, 40); - assert_eq!( - config.fault_request_timeout, - std::time::Duration::from_secs(3) - ); - assert!(!config.fault_require_client_disruption); - assert_eq!(config.chaos_namespace, "chaos-mesh"); assert_eq!(config.cert_manager_version, "v1.16.2"); assert_eq!( config.kind_config, diff --git a/e2e/src/framework/fault_config.rs b/e2e/src/framework/fault_config.rs new file mode 100644 index 0000000..5f79b8e --- /dev/null +++ b/e2e/src/framework/fault_config.rs @@ -0,0 +1,273 @@ +// Copyright 2025 RustFS Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use anyhow::{Context, Result, ensure}; +use serde_json::Value; +use std::path::PathBuf; +use std::time::Duration; + +use crate::framework::{command::CommandSpec, config::ClusterTestConfig, kubectl::Kubectl}; + +#[derive(Debug, Clone)] +pub struct FaultTestConfig { + pub cluster: ClusterTestConfig, + pub destructive_enabled: bool, + pub scenario: String, + pub duration: Duration, + pub percent: u8, + pub workload_objects: usize, + pub request_timeout: Duration, + pub require_client_disruption: bool, + pub chaos_namespace: String, +} + +impl FaultTestConfig { + pub fn from_env() -> Result { + let context = current_context()?; + Self::from_env_with(|name| std::env::var(name).ok(), context) + } + + fn from_env_with(get_env: F, context: String) -> Result + where + F: Fn(&str) -> Option, + { + ensure!( + !context.starts_with("kind-"), + "fault tests require a real Kubernetes cluster; current context {context:?} is a Kind context" + ); + + let storage_class = required_env(&get_env, "RUSTFS_FAULT_TEST_STORAGE_CLASS")?; + let namespace = env_or(&get_env, "RUSTFS_FAULT_TEST_NAMESPACE", "rustfs-fault-test"); + let cluster = ClusterTestConfig { + context, + operator_namespace: env_or( + &get_env, + "RUSTFS_FAULT_TEST_OPERATOR_NAMESPACE", + "rustfs-system", + ), + test_namespace_prefix: namespace.clone(), + test_namespace: namespace, + tenant_name: env_or(&get_env, "RUSTFS_FAULT_TEST_TENANT", "fault-test-tenant"), + storage_class, + rustfs_image: env_or( + &get_env, + "RUSTFS_FAULT_TEST_SERVER_IMAGE", + "rustfs/rustfs:latest", + ), + artifacts_dir: PathBuf::from(env_or( + &get_env, + "RUSTFS_FAULT_TEST_ARTIFACTS", + "target/fault-tests/artifacts", + )), + pod_management_policy: None, + timeout: Duration::from_secs(env_u64( + &get_env, + "RUSTFS_FAULT_TEST_TIMEOUT_SECONDS", + 300, + )), + }; + + Ok(Self { + cluster, + destructive_enabled: env_bool(&get_env, "RUSTFS_FAULT_TEST_DESTRUCTIVE"), + scenario: env_or(&get_env, "RUSTFS_FAULT_TEST_SCENARIO", "io-eio"), + duration: Duration::from_secs(env_u64( + &get_env, + "RUSTFS_FAULT_TEST_DURATION_SECONDS", + 180, + )), + percent: env_u8(&get_env, "RUSTFS_FAULT_TEST_PERCENT", 20), + workload_objects: env_usize(&get_env, "RUSTFS_FAULT_TEST_WORKLOAD_OBJECTS", 40), + request_timeout: Duration::from_secs(env_u64( + &get_env, + "RUSTFS_FAULT_TEST_REQUEST_TIMEOUT_SECONDS", + 3, + )), + require_client_disruption: env_bool( + &get_env, + "RUSTFS_FAULT_TEST_REQUIRE_CLIENT_DISRUPTION", + ), + chaos_namespace: env_or(&get_env, "RUSTFS_FAULT_TEST_CHAOS_NAMESPACE", "chaos-mesh"), + }) + } + + pub fn require_destructive_enabled(&self) -> Result<()> { + ensure!( + self.destructive_enabled, + "destructive fault tests are disabled; run through `make fault-test` or set RUSTFS_FAULT_TEST_DESTRUCTIVE=1 explicitly" + ); + Ok(()) + } + + pub fn validate_cluster(&self) -> Result<()> { + Kubectl::new(&self.cluster) + .command(["get", "crd", "tenants.rustfs.com"]) + .run_checked() + .context("RustFS Tenant CRD tenants.rustfs.com is required")?; + + let output = Kubectl::new(&self.cluster) + .command([ + "get", + "storageclass", + &self.cluster.storage_class, + "-o", + "json", + ]) + .run_checked() + .with_context(|| { + format!( + "fault-test StorageClass {:?} is required", + self.cluster.storage_class + ) + })?; + validate_storage_class(&output.stdout) + } + + #[cfg(test)] + pub(crate) fn for_test(context: &str, storage_class: &str) -> Self { + Self::from_env_with( + |name| match name { + "RUSTFS_FAULT_TEST_STORAGE_CLASS" => Some(storage_class.to_string()), + _ => None, + }, + context.to_string(), + ) + .expect("fault test config") + } +} + +fn validate_storage_class(raw: &str) -> Result<()> { + let value = serde_json::from_str::(raw).context("parse StorageClass json")?; + let provisioner = value + .get("provisioner") + .and_then(Value::as_str) + .unwrap_or_default(); + ensure!( + !provisioner.is_empty(), + "StorageClass provisioner is missing" + ); + ensure!( + provisioner != "kubernetes.io/no-provisioner", + "fault tests require a dynamically provisioned StorageClass, got {provisioner}" + ); + Ok(()) +} + +fn current_context() -> Result { + let output = CommandSpec::new("kubectl") + .args(["config", "current-context"]) + .run_checked()?; + Ok(output.stdout.trim().to_string()) +} + +fn required_env(get_env: &F, name: &str) -> Result +where + F: Fn(&str) -> Option, +{ + let value = get_env(name).unwrap_or_default(); + ensure!(!value.trim().is_empty(), "{name} is required"); + Ok(value) +} + +fn env_or(get_env: &F, name: &str, default: &str) -> String +where + F: Fn(&str) -> Option, +{ + get_env(name).unwrap_or_else(|| default.to_string()) +} + +fn env_bool(get_env: &F, name: &str) -> bool +where + F: Fn(&str) -> Option, +{ + get_env(name) + .map(|value| matches!(value.as_str(), "1" | "true" | "TRUE" | "yes" | "YES")) + .unwrap_or(false) +} + +fn env_u64(get_env: &F, name: &str, default: u64) -> u64 +where + F: Fn(&str) -> Option, +{ + get_env(name) + .and_then(|value| value.parse::().ok()) + .unwrap_or(default) +} + +fn env_usize(get_env: &F, name: &str, default: usize) -> usize +where + F: Fn(&str) -> Option, +{ + get_env(name) + .and_then(|value| value.parse::().ok()) + .unwrap_or(default) +} + +fn env_u8(get_env: &F, name: &str, default: u8) -> u8 +where + F: Fn(&str) -> Option, +{ + get_env(name) + .and_then(|value| value.parse::().ok()) + .unwrap_or(default) +} + +#[cfg(test)] +mod tests { + use super::{FaultTestConfig, validate_storage_class}; + + #[test] + fn real_cluster_fault_defaults_are_isolated() { + let config = FaultTestConfig::from_env_with( + |name| match name { + "RUSTFS_FAULT_TEST_STORAGE_CLASS" => Some("fast-csi".to_string()), + _ => None, + }, + "production-test-cluster".to_string(), + ) + .expect("fault config"); + + assert_eq!(config.cluster.context, "production-test-cluster"); + assert_eq!(config.cluster.test_namespace, "rustfs-fault-test"); + assert_eq!(config.cluster.tenant_name, "fault-test-tenant"); + assert_eq!(config.cluster.storage_class, "fast-csi"); + assert_eq!( + config.cluster.artifacts_dir, + std::path::PathBuf::from("target/fault-tests/artifacts") + ); + assert!(!config.destructive_enabled); + assert!(config.require_destructive_enabled().is_err()); + } + + #[test] + fn kind_context_is_rejected_for_fault_tests() { + let result = FaultTestConfig::from_env_with( + |name| match name { + "RUSTFS_FAULT_TEST_STORAGE_CLASS" => Some("local-storage".to_string()), + _ => None, + }, + "kind-rustfs-e2e".to_string(), + ); + + assert!(result.is_err()); + } + + #[test] + fn dynamic_storage_class_is_required() { + assert!(validate_storage_class(r#"{"provisioner":"ebs.csi.aws.com"}"#).is_ok()); + assert!( + validate_storage_class(r#"{"provisioner":"kubernetes.io/no-provisioner"}"#).is_err() + ); + } +} diff --git a/e2e/src/framework/fault_scenarios.rs b/e2e/src/framework/fault_scenarios.rs index 8a1f43e..e17a957 100644 --- a/e2e/src/framework/fault_scenarios.rs +++ b/e2e/src/framework/fault_scenarios.rs @@ -15,7 +15,7 @@ use anyhow::{Result, ensure}; use std::time::Duration; -use crate::framework::config::E2eConfig; +use crate::framework::fault_config::FaultTestConfig; pub const IO_EIO_SCENARIO: &str = "io-eio"; @@ -28,31 +28,31 @@ pub struct FaultScenario { } impl FaultScenario { - pub fn from_config(config: &E2eConfig) -> Result { + pub fn from_config(config: &FaultTestConfig) -> Result { ensure!( - config.fault_scenario == IO_EIO_SCENARIO, + config.scenario == IO_EIO_SCENARIO, "unsupported fault scenario {:?}; first implementation supports only {IO_EIO_SCENARIO:?}", - config.fault_scenario + config.scenario ); ensure!( - (1..=100).contains(&config.fault_percent), - "RUSTFS_E2E_FAULT_PERCENT must be in 1..=100, got {}", - config.fault_percent + (1..=100).contains(&config.percent), + "RUSTFS_FAULT_TEST_PERCENT must be in 1..=100, got {}", + config.percent ); ensure!( - config.fault_duration > Duration::ZERO, - "RUSTFS_E2E_FAULT_DURATION_SECONDS must be greater than zero" + config.duration > Duration::ZERO, + "RUSTFS_FAULT_TEST_DURATION_SECONDS must be greater than zero" ); ensure!( - config.fault_workload_objects >= 4, - "RUSTFS_E2E_WORKLOAD_OBJECTS must be at least 4" + config.workload_objects >= 4, + "RUSTFS_FAULT_TEST_WORKLOAD_OBJECTS must be at least 4" ); Ok(Self { - name: config.fault_scenario.clone(), - duration: config.fault_duration, - percent: config.fault_percent, - object_count: config.fault_workload_objects, + name: config.scenario.clone(), + duration: config.duration, + percent: config.percent, + object_count: config.workload_objects, }) } @@ -68,12 +68,13 @@ impl FaultScenario { #[cfg(test)] mod tests { use super::{FaultScenario, IO_EIO_SCENARIO}; - use crate::framework::config::E2eConfig; + use crate::framework::fault_config::FaultTestConfig; use std::time::Duration; #[test] fn default_fault_scenario_is_io_eio_with_split_workload() { - let scenario = FaultScenario::from_config(&E2eConfig::defaults()).expect("valid scenario"); + let config = FaultTestConfig::for_test("real-cluster", "fast-csi"); + let scenario = FaultScenario::from_config(&config).expect("valid scenario"); assert_eq!(scenario.name, IO_EIO_SCENARIO); assert_eq!(scenario.duration, Duration::from_secs(180)); @@ -84,8 +85,8 @@ mod tests { #[test] fn unsupported_fault_scenario_is_rejected() { - let mut config = E2eConfig::defaults(); - config.fault_scenario = "operator-restart".to_string(); + let mut config = FaultTestConfig::for_test("real-cluster", "fast-csi"); + config.scenario = "operator-restart".to_string(); assert!(FaultScenario::from_config(&config).is_err()); } diff --git a/e2e/src/framework/kubectl.rs b/e2e/src/framework/kubectl.rs index 9ab45c3..102014a 100644 --- a/e2e/src/framework/kubectl.rs +++ b/e2e/src/framework/kubectl.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::framework::{command::CommandSpec, config::E2eConfig}; +use crate::framework::{command::CommandSpec, config::ClusterTestConfig}; #[derive(Debug, Clone, PartialEq, Eq)] pub struct Kubectl { @@ -21,7 +21,7 @@ pub struct Kubectl { } impl Kubectl { - pub fn new(config: &E2eConfig) -> Self { + pub fn new(config: &ClusterTestConfig) -> Self { Self { context: config.context.clone(), namespace: None, @@ -50,6 +50,10 @@ impl Kubectl { pub fn apply_yaml_command(&self, yaml: impl Into) -> CommandSpec { self.command(["apply", "-f", "-"]).stdin(yaml) } + + pub fn create_yaml_command(&self, yaml: impl Into) -> CommandSpec { + self.command(["create", "-f", "-"]).stdin(yaml) + } } #[cfg(test)] @@ -78,4 +82,15 @@ mod tests { "kubectl --context kind-rustfs-e2e apply -f -" ); } + + #[test] + fn kubectl_create_yaml_uses_stdin_without_exposing_payload() { + let kubectl = Kubectl::new(&E2eConfig::defaults()); + let command = kubectl.create_yaml_command("kind: Namespace"); + + assert_eq!( + command.display(), + "kubectl --context kind-rustfs-e2e create -f -" + ); + } } diff --git a/e2e/src/framework/live.rs b/e2e/src/framework/live.rs index 1cfb24c..e909cc4 100644 --- a/e2e/src/framework/live.rs +++ b/e2e/src/framework/live.rs @@ -24,14 +24,6 @@ pub fn require_live_enabled(config: &E2eConfig) -> Result<()> { Ok(()) } -pub fn require_destructive_enabled(config: &E2eConfig) -> Result<()> { - ensure!( - config.destructive_enabled, - "destructive e2e faults are disabled; set RUSTFS_E2E_DESTRUCTIVE=1 explicitly" - ); - Ok(()) -} - pub fn current_context() -> Result { let output = CommandSpec::new("kubectl") .args(["config", "current-context"]) @@ -39,15 +31,6 @@ pub fn current_context() -> Result { Ok(output.stdout.trim().to_string()) } -pub fn use_current_context(config: &mut E2eConfig) -> Result { - let actual = current_context()?; - config.context = actual.clone(); - if let Some(kind_cluster) = actual.strip_prefix("kind-") { - config.cluster_name = kind_cluster.to_string(); - } - Ok(actual) -} - pub fn ensure_dedicated_context(config: &E2eConfig) -> Result { let actual = current_context()?; ensure!( @@ -60,14 +43,13 @@ pub fn ensure_dedicated_context(config: &E2eConfig) -> Result { #[cfg(test)] mod tests { - use super::{require_destructive_enabled, require_live_enabled}; + use super::require_live_enabled; use crate::framework::config::E2eConfig; #[test] - fn live_and_destructive_guards_are_disabled_by_default() { + fn live_guard_is_disabled_by_default() { let config = E2eConfig::defaults(); assert!(require_live_enabled(&config).is_err()); - assert!(require_destructive_enabled(&config).is_err()); } } diff --git a/e2e/src/framework/mod.rs b/e2e/src/framework/mod.rs index 5f21fd2..893dbdf 100644 --- a/e2e/src/framework/mod.rs +++ b/e2e/src/framework/mod.rs @@ -21,6 +21,7 @@ pub mod command; pub mod config; pub mod console_client; pub mod deploy; +pub mod fault_config; pub mod fault_scenarios; pub mod history; pub mod images; @@ -36,4 +37,4 @@ pub mod tenant_factory; pub mod tools; pub mod wait; -pub use config::E2eConfig; +pub use config::{ClusterTestConfig, E2eConfig}; diff --git a/e2e/src/framework/port_forward.rs b/e2e/src/framework/port_forward.rs index d751ec2..07aecdc 100644 --- a/e2e/src/framework/port_forward.rs +++ b/e2e/src/framework/port_forward.rs @@ -18,7 +18,7 @@ use std::path::{Path, PathBuf}; use std::process::Child; use uuid::Uuid; -use crate::framework::{command::CommandSpec, config::E2eConfig, kubectl::Kubectl}; +use crate::framework::{command::CommandSpec, config::ClusterTestConfig, kubectl::Kubectl}; #[derive(Debug, Clone, PartialEq, Eq)] pub struct PortForwardSpec { @@ -96,17 +96,17 @@ impl PortForwardSpec { format!("http://127.0.0.1:{}", self.local_port) } - pub fn start_console(config: &E2eConfig) -> Result { + pub fn start_console(config: &ClusterTestConfig) -> Result { let kubectl = Kubectl::new(config); Self::console(&config.operator_namespace).start_with_temp_log(&kubectl) } - pub fn start_operator_sts(config: &E2eConfig) -> Result { + pub fn start_operator_sts(config: &ClusterTestConfig) -> Result { let kubectl = Kubectl::new(config); Self::operator_sts(&config.operator_namespace).start_with_temp_log(&kubectl) } - pub fn start_tenant_io(config: &E2eConfig) -> Result { + pub fn start_tenant_io(config: &ClusterTestConfig) -> Result { let kubectl = Kubectl::new(config); Self::tenant_io(&config.test_namespace, &config.tenant_name).start_with_temp_log(&kubectl) } diff --git a/e2e/src/framework/resources.rs b/e2e/src/framework/resources.rs index 2b0cefd..ff6bd0c 100644 --- a/e2e/src/framework/resources.rs +++ b/e2e/src/framework/resources.rs @@ -12,29 +12,33 @@ // See the License for the specific language governing permissions and // limitations under the License. -use anyhow::{Context, Result, bail}; +use anyhow::{Context, Result, bail, ensure}; +use serde_json::Value; use std::thread::sleep; use std::time::{Duration, Instant}; use crate::framework::{ command::{CommandOutput, CommandSpec}, - config::E2eConfig, + config::ClusterTestConfig, kubectl::Kubectl, tenant_factory::TenantTemplate, }; use operator::types::v1alpha1::k8s::PodManagementPolicy; -const E2E_ACCESS_KEY: &str = "e2eaccess"; -const E2E_SECRET_KEY: &str = "e2esecret"; +const TEST_ACCESS_KEY: &str = "testaccess"; +const TEST_SECRET_KEY: &str = "testsecret"; const RESOURCE_RESET_TIMEOUT: Duration = Duration::from_secs(120); const RESOURCE_RESET_POLL_INTERVAL: Duration = Duration::from_secs(2); +const MANAGED_BY_LABEL: &str = "app.kubernetes.io/managed-by"; +const FAULT_TEST_MANAGER: &str = "rustfs-operator-fault-test"; +const FAULT_TEST_TENANT_ANNOTATION: &str = "rustfs.com/fault-test-tenant"; -pub fn credential_secret_name(config: &E2eConfig) -> String { +pub fn credential_secret_name(config: &ClusterTestConfig) -> String { format!("{}-credentials", config.tenant_name) } -pub fn e2e_credentials() -> (&'static str, &'static str) { - (E2E_ACCESS_KEY, E2E_SECRET_KEY) +pub fn test_credentials() -> (&'static str, &'static str) { + (TEST_ACCESS_KEY, TEST_SECRET_KEY) } pub fn namespace_manifest(namespace: &str) -> String { @@ -47,7 +51,26 @@ metadata: ) } -pub fn credential_secret_manifest(config: &E2eConfig) -> String { +pub fn fault_namespace_manifest(config: &ClusterTestConfig) -> String { + format!( + r#"apiVersion: v1 +kind: Namespace +metadata: + name: {namespace} + labels: + {managed_by_label}: {manager} + annotations: + {tenant_annotation}: {tenant_name} +"#, + namespace = config.test_namespace, + managed_by_label = MANAGED_BY_LABEL, + manager = FAULT_TEST_MANAGER, + tenant_annotation = FAULT_TEST_TENANT_ANNOTATION, + tenant_name = config.tenant_name, + ) +} + +pub fn credential_secret_manifest(config: &ClusterTestConfig) -> String { format!( r#"apiVersion: v1 kind: Secret @@ -61,12 +84,12 @@ stringData: "#, secret_name = credential_secret_name(config), namespace = config.test_namespace, - access_key = E2E_ACCESS_KEY, - secret_key = E2E_SECRET_KEY + access_key = TEST_ACCESS_KEY, + secret_key = TEST_SECRET_KEY ) } -pub fn smoke_tenant_template(config: &E2eConfig) -> TenantTemplate { +pub fn smoke_tenant_template(config: &ClusterTestConfig) -> TenantTemplate { let mut template = TenantTemplate::kind_local( &config.test_namespace, &config.tenant_name, @@ -85,13 +108,24 @@ pub fn smoke_tenant_template(config: &E2eConfig) -> TenantTemplate { template } -pub fn smoke_tenant_manifest(config: &E2eConfig) -> Result { +pub fn smoke_tenant_manifest(config: &ClusterTestConfig) -> Result { Ok(serde_yaml_ng::to_string( &smoke_tenant_template(config).build(), )?) } -pub fn apply_smoke_tenant_resources(config: &E2eConfig) -> Result<()> { +pub fn fault_tenant_manifest(config: &ClusterTestConfig) -> Result { + let template = TenantTemplate::real_cluster( + &config.test_namespace, + &config.tenant_name, + &config.rustfs_image, + &config.storage_class, + credential_secret_name(config), + ); + Ok(serde_yaml_ng::to_string(&template.build())?) +} + +pub fn apply_smoke_tenant_resources(config: &ClusterTestConfig) -> Result<()> { let kubectl = Kubectl::new(config); kubectl .apply_yaml_command(namespace_manifest(&config.test_namespace)) @@ -105,12 +139,41 @@ pub fn apply_smoke_tenant_resources(config: &E2eConfig) -> Result<()> { Ok(()) } -pub fn reset_and_apply_smoke_tenant_resources(config: &E2eConfig) -> Result<()> { - reset_smoke_tenant_resources(config)?; +pub fn apply_fault_tenant_resources(config: &ClusterTestConfig) -> Result<()> { + let kubectl = Kubectl::new(config); + if !ensure_fault_namespace_owned_or_absent(config)? { + kubectl + .create_yaml_command(fault_namespace_manifest(config)) + .run_checked() + .with_context(|| { + format!( + "create dedicated fault-test namespace {:?}", + config.test_namespace + ) + })?; + } + kubectl + .apply_yaml_command(credential_secret_manifest(config)) + .run_checked()?; + kubectl + .apply_yaml_command(fault_tenant_manifest(config)?) + .run_checked()?; + Ok(()) +} + +pub fn reset_fault_tenant_resources(config: &ClusterTestConfig) -> Result<()> { + if !ensure_fault_namespace_owned_or_absent(config)? { + return Ok(()); + } + reset_tenant_resources(config) +} + +pub fn reset_and_apply_smoke_tenant_resources(config: &ClusterTestConfig) -> Result<()> { + reset_tenant_resources(config)?; apply_smoke_tenant_resources(config) } -pub fn reset_smoke_tenant_resources(config: &E2eConfig) -> Result<()> { +pub fn reset_tenant_resources(config: &ClusterTestConfig) -> Result<()> { let kubectl = Kubectl::new(config); if !namespace_exists(&kubectl, &config.test_namespace)? { return Ok(()); @@ -173,7 +236,7 @@ pub fn reset_smoke_tenant_resources(config: &E2eConfig) -> Result<()> { Ok(()) } -pub fn cleanup_smoke_tenant_resources(config: &E2eConfig) -> Result<()> { +pub fn cleanup_tenant_resources(config: &ClusterTestConfig) -> Result<()> { let kubectl = Kubectl::new(config).namespaced(&config.test_namespace); let selector = format!("rustfs.tenant={}", config.tenant_name); @@ -223,6 +286,52 @@ fn namespace_exists(kubectl: &Kubectl, namespace: &str) -> Result { Ok(output.code == Some(0)) } +fn ensure_fault_namespace_owned_or_absent(config: &ClusterTestConfig) -> Result { + let output = Kubectl::new(config) + .command(["get", "namespace", &config.test_namespace, "-o", "json"]) + .run()?; + + match output.code { + Some(0) => { + validate_fault_namespace_ownership( + &output.stdout, + &config.test_namespace, + &config.tenant_name, + )?; + Ok(true) + } + _ if is_not_found(&output) => Ok(false), + _ => bail!( + "failed to inspect fault-test namespace {:?} before destructive operation\nexit: {:?}\nstdout:\n{}\nstderr:\n{}", + config.test_namespace, + output.code, + output.stdout, + output.stderr + ), + } +} + +fn validate_fault_namespace_ownership(raw: &str, namespace: &str, tenant_name: &str) -> Result<()> { + let value = serde_json::from_str::(raw) + .with_context(|| format!("parse namespace {namespace:?} json"))?; + let manager = value + .pointer("/metadata/labels/app.kubernetes.io~1managed-by") + .and_then(Value::as_str); + let owned_tenant = value + .pointer("/metadata/annotations/rustfs.com~1fault-test-tenant") + .and_then(Value::as_str); + + ensure!( + manager == Some(FAULT_TEST_MANAGER) && owned_tenant == Some(tenant_name), + "refusing destructive fault-test operation in namespace {namespace:?}: expected label \ + {MANAGED_BY_LABEL}={FAULT_TEST_MANAGER:?} and annotation \ + {FAULT_TEST_TENANT_ANNOTATION}={tenant_name:?}, got manager={manager:?}, \ + tenant={owned_tenant:?}; use a dedicated namespace or explicitly label and annotate it \ + only after verifying that it contains no non-test workloads" + ); + Ok(()) +} + fn run_delete(command: CommandSpec) -> Result<()> { command.run_checked()?; Ok(()) @@ -304,8 +413,12 @@ fn is_not_found(output: &CommandOutput) -> bool { #[cfg(test)] mod tests { - use super::{credential_secret_manifest, credential_secret_name, smoke_tenant_manifest}; + use super::{ + credential_secret_manifest, credential_secret_name, fault_namespace_manifest, + fault_tenant_manifest, smoke_tenant_manifest, validate_fault_namespace_ownership, + }; use crate::framework::config::E2eConfig; + use crate::framework::fault_config::FaultTestConfig; #[test] fn smoke_tenant_manifest_wires_secret_storage_and_image() { @@ -329,4 +442,54 @@ mod tests { assert!(manifest.contains("accesskey:")); assert!(manifest.contains("secretkey:")); } + + #[test] + fn fault_tenant_manifest_uses_real_cluster_defaults() { + let config = FaultTestConfig::for_test("real-cluster", "fast-csi"); + let manifest = fault_tenant_manifest(&config.cluster).expect("fault tenant manifest"); + + assert!(manifest.contains("namespace: rustfs-fault-test")); + assert!(manifest.contains("storageClassName: fast-csi")); + assert!(!manifest.contains("rustfs-storage")); + assert!(!manifest.contains("RUSTFS_UNSAFE_BYPASS_DISK_CHECK")); + } + + #[test] + fn fault_namespace_manifest_records_destructive_test_ownership() { + let config = FaultTestConfig::for_test("real-cluster", "fast-csi"); + let manifest = fault_namespace_manifest(&config.cluster); + + assert!(manifest.contains("name: rustfs-fault-test")); + assert!(manifest.contains("app.kubernetes.io/managed-by: rustfs-operator-fault-test")); + assert!(manifest.contains("rustfs.com/fault-test-tenant: fault-test-tenant")); + } + + #[test] + fn fault_namespace_ownership_requires_matching_manager_and_tenant() { + let owned = r#"{ + "metadata": { + "labels": { + "app.kubernetes.io/managed-by": "rustfs-operator-fault-test" + }, + "annotations": { + "rustfs.com/fault-test-tenant": "fault-test-tenant" + } + } + }"#; + assert!( + validate_fault_namespace_ownership(owned, "rustfs-fault-test", "fault-test-tenant") + .is_ok() + ); + + let unowned = r#"{"metadata":{"labels":{},"annotations":{}}}"#; + assert!( + validate_fault_namespace_ownership(unowned, "rustfs-fault-test", "fault-test-tenant") + .is_err() + ); + + assert!( + validate_fault_namespace_ownership(owned, "rustfs-fault-test", "another-tenant") + .is_err() + ); + } } diff --git a/e2e/src/framework/s3_workload.rs b/e2e/src/framework/s3_workload.rs index f7fe597..d1d3a20 100644 --- a/e2e/src/framework/s3_workload.rs +++ b/e2e/src/framework/s3_workload.rs @@ -45,7 +45,7 @@ pub struct GetObjectResult { impl ObjectSpec { pub fn deterministic(run_id: &str, index: usize, size_bytes: usize) -> Self { - let key = format!("fault-e2e/{run_id}/object-{index:06}"); + let key = format!("fault-test/{run_id}/object-{index:06}"); let body = deterministic_bytes(index, size_bytes); let sha256 = sha256_hex(&body); @@ -71,7 +71,7 @@ impl S3WorkloadClient { secret_key.into(), None, None, - "rustfs-e2e-static-credentials", + "rustfs-fault-test-static-credentials", ); let shared_config = aws_config::defaults(BehaviorVersion::latest()) .region(Region::new("us-east-1")) @@ -438,7 +438,7 @@ mod tests { let object = ObjectSpec::deterministic("run-1", 7, 4096); let same = ObjectSpec::deterministic("run-1", 7, 4096); - assert_eq!(object.key, "fault-e2e/run-1/object-000007"); + assert_eq!(object.key, "fault-test/run-1/object-000007"); assert_eq!(object.size_bytes, 4096); assert_eq!(object.sha256, same.sha256); assert_eq!(object.sha256, sha256_hex(&same.body)); diff --git a/e2e/src/framework/tenant_factory.rs b/e2e/src/framework/tenant_factory.rs index ca21fc2..3ff449a 100644 --- a/e2e/src/framework/tenant_factory.rs +++ b/e2e/src/framework/tenant_factory.rs @@ -34,6 +34,7 @@ pub struct TenantTemplate { pub volumes_per_server: i32, pub pod_management_policy: Option, pub unsafe_bypass_disk_check: bool, + pub node_selector: Option>, } impl TenantTemplate { @@ -54,6 +55,32 @@ impl TenantTemplate { volumes_per_server: 2, pod_management_policy: Some(PodManagementPolicy::Parallel), unsafe_bypass_disk_check: true, + node_selector: Some( + [("rustfs-storage".to_string(), "true".to_string())] + .into_iter() + .collect(), + ), + } + } + + pub fn real_cluster( + namespace: impl Into, + name: impl Into, + image: impl Into, + storage_class: impl Into, + credential_secret_name: impl Into, + ) -> Self { + Self { + namespace: namespace.into(), + name: name.into(), + image: image.into(), + storage_class: storage_class.into(), + credential_secret_name: credential_secret_name.into(), + servers: 4, + volumes_per_server: 2, + pod_management_policy: Some(PodManagementPolicy::Parallel), + unsafe_bypass_disk_check: false, + node_selector: None, } } @@ -79,11 +106,7 @@ impl TenantTemplate { ..PersistenceConfig::default() }, scheduling: SchedulingConfig { - node_selector: Some( - [("rustfs-storage".to_string(), "true".to_string())] - .into_iter() - .collect::>(), - ), + node_selector: self.node_selector.clone(), ..SchedulingConfig::default() }, }; @@ -162,5 +185,35 @@ mod tests { .any(|env| env.name == "RUSTFS_UNSAFE_BYPASS_DISK_CHECK" && env.value.as_deref() == Some("true")) ); + assert_eq!( + tenant.spec.pools[0] + .scheduling + .node_selector + .as_ref() + .and_then(|selector| selector.get("rustfs-storage")) + .map(String::as_str), + Some("true") + ); + } + + #[test] + fn real_cluster_tenant_uses_scheduler_defaults_and_disk_checks() { + let tenant = TenantTemplate::real_cluster( + "rustfs-fault-test", + "fault-test-tenant", + "rustfs/rustfs:latest", + "fast-csi", + "fault-test-tenant-credentials", + ) + .build(); + + assert!(tenant.spec.pools[0].scheduling.node_selector.is_none()); + assert!( + tenant + .spec + .env + .iter() + .all(|env| env.name != "RUSTFS_UNSAFE_BYPASS_DISK_CHECK") + ); } } diff --git a/e2e/tests/faults.rs b/e2e/tests/faults.rs index 88c0f4f..2e0c8a3 100644 --- a/e2e/tests/faults.rs +++ b/e2e/tests/faults.rs @@ -19,15 +19,16 @@ use rustfs_operator_e2e::framework::{ artifacts::ArtifactCollector, chaos_mesh::{self, IoChaosSpec}, checker, - config::E2eConfig, + config::ClusterTestConfig, + fault_config::FaultTestConfig, fault_scenarios::FaultScenario, history::OperationOutcome, history::Recorder, - kube_client, live, + kube_client, port_forward::{PortForwardGuard, PortForwardSpec}, resources, s3_workload::{ObjectSpec, S3WorkloadClient, wait_for_s3_endpoint}, - storage, wait, + wait, }; use serde::Serialize; use std::time::Duration; @@ -37,47 +38,31 @@ const IO_EIO_CASE: &str = "fault_io_eio_preserves_committed_objects"; const RUSTFS_DATA_VOLUME: &str = "/data/rustfs0"; const SMALL_OBJECT_SIZE_BYTES: usize = 4 * 1024; -#[test] -fn faults_are_not_destructive_without_explicit_opt_in() { - let config = E2eConfig::defaults(); - - assert!(!config.destructive_enabled); - assert!(live::require_destructive_enabled(&config).is_err()); -} - -#[test] -#[ignore = "reserved for destructive fault scenarios; run through `make e2e-live-faults`"] -fn fault_live_suite_requires_explicit_destructive_opt_in() -> Result<()> { - let mut config = E2eConfig::from_env(); - - live::require_live_enabled(&config)?; - live::require_destructive_enabled(&config)?; - let context = live::use_current_context(&mut config)?; - eprintln!("confirmed destructive fault e2e context: {context}"); - - Ok(()) -} - #[tokio::test] -#[ignore = "destructive RustFS workload fault scenario; run through `make e2e-live-faults`"] +#[ignore = "destructive RustFS workload fault scenario; run through `make fault-test`"] async fn fault_io_eio_preserves_committed_objects() -> Result<()> { - let mut config = E2eConfig::from_env(); - live::require_live_enabled(&config)?; - live::require_destructive_enabled(&config)?; - let context = live::use_current_context(&mut config)?; - eprintln!("running destructive RustFS fault e2e against current context: {context}"); + let config = FaultTestConfig::from_env()?; + config.require_destructive_enabled()?; + config.validate_cluster()?; + eprintln!( + "running destructive RustFS fault test against real Kubernetes context: {}", + config.cluster.context + ); - let collector = ArtifactCollector::new(&config.artifacts_dir); + let collector = ArtifactCollector::new(&config.cluster.artifacts_dir); let result = run_io_eio_case(&config, &collector).await; if let Err(error) = &result { - match collector.collect_kubernetes_snapshot(IO_EIO_CASE, &config) { + match collector.collect_kubernetes_snapshot(IO_EIO_CASE, &config.cluster) { Ok(report) => { - eprintln!("collected e2e artifacts under {}", report.dir.display()); + eprintln!( + "collected fault-test artifacts under {}", + report.dir.display() + ); eprintln!("{}", report.diagnosis); } Err(artifact_error) => { - eprintln!("failed to collect e2e artifacts after {error}: {artifact_error}"); + eprintln!("failed to collect fault-test artifacts after {error}: {artifact_error}"); } } } @@ -85,31 +70,33 @@ async fn fault_io_eio_preserves_committed_objects() -> Result<()> { result } -async fn run_io_eio_case(config: &E2eConfig, collector: &ArtifactCollector) -> Result<()> { +async fn run_io_eio_case(config: &FaultTestConfig, collector: &ArtifactCollector) -> Result<()> { let scenario = FaultScenario::from_config(config)?; - chaos_mesh::require_iochaos_crd(config)?; - chaos_mesh::cleanup_managed_iochaos(config, &config.chaos_namespace)?; + let cluster = &config.cluster; + chaos_mesh::require_iochaos_crd(cluster)?; + chaos_mesh::cleanup_managed_iochaos(cluster, &config.chaos_namespace)?; - reset_io_eio_fixture(config)?; - wait_for_ready_tenant(config).await?; + reset_io_eio_fixture(cluster)?; + wait_for_ready_tenant(cluster).await?; let run_id = format!("run-{}", Uuid::new_v4()); let bucket = bucket_name(&run_id); let history_path = collector.case_dir(IO_EIO_CASE).join("history.jsonl"); let mut history = Recorder::create(history_path, &scenario.name, &run_id)?; - let port_forward_spec = PortForwardSpec::tenant_io(&config.test_namespace, &config.tenant_name); + let port_forward_spec = + PortForwardSpec::tenant_io(&cluster.test_namespace, &cluster.tenant_name); let endpoint = port_forward_spec.local_base_url(); - let mut port_forward = PortForwardSpec::start_tenant_io(config)?; - wait_for_tenant_s3(&mut port_forward, &endpoint, config.timeout).await?; + let mut port_forward = PortForwardSpec::start_tenant_io(cluster)?; + wait_for_tenant_s3(&mut port_forward, &endpoint, cluster.timeout).await?; - let (access_key, secret_key) = resources::e2e_credentials(); + let (access_key, secret_key) = resources::test_credentials(); let s3 = S3WorkloadClient::new( &endpoint, &bucket, access_key, secret_key, - config.fault_request_timeout, + config.request_timeout, ) .await?; let bucket_outcome = s3.create_bucket(&mut history).await?; @@ -120,7 +107,8 @@ async fn run_io_eio_case(config: &E2eConfig, collector: &ArtifactCollector) -> R let prefilled = prefill_objects(&s3, &mut history, &run_id, scenario.prefill_count()).await?; let chaos = IoChaosSpec::eio_on_rustfs_volume( - config, + cluster, + &config.chaos_namespace, &run_id, &scenario.name, RUSTFS_DATA_VOLUME, @@ -128,7 +116,7 @@ async fn run_io_eio_case(config: &E2eConfig, collector: &ArtifactCollector) -> R scenario.duration, )?; collector.write_text(IO_EIO_CASE, "chaos-manifest.yaml", &chaos.manifest())?; - let mut guard = chaos_mesh::apply_iochaos(config, &chaos)?; + let mut guard = chaos_mesh::apply_iochaos(cluster, &chaos)?; match guard.describe() { Ok(describe) => { collector.write_text(IO_EIO_CASE, "chaos-describe.txt", &describe)?; @@ -141,7 +129,7 @@ async fn run_io_eio_case(config: &E2eConfig, collector: &ArtifactCollector) -> R )?; } } - if let Err(error) = guard.wait_active(config.timeout) { + if let Err(error) = guard.wait_active(cluster.timeout) { collect_active_chaos_artifacts(collector, &guard, "wait-active-failed")?; return Err(error); } @@ -167,9 +155,7 @@ async fn run_io_eio_case(config: &E2eConfig, collector: &ArtifactCollector) -> R "workload-summary.json", &serde_json::to_string_pretty(&workload_summary)?, )?; - if let Err(error) = - workload_summary.require_fault_evidence(config.fault_require_client_disruption) - { + if let Err(error) = workload_summary.require_fault_evidence(config.require_client_disruption) { collect_active_chaos_artifacts(collector, &guard, "workload-no-fault-evidence")?; return Err(error); } @@ -183,7 +169,7 @@ async fn run_io_eio_case(config: &E2eConfig, collector: &ArtifactCollector) -> R return Err(error); } - wait_for_ready_tenant(config).await?; + wait_for_ready_tenant(cluster).await?; let report = checker::check_s3_history(&s3, &mut history, true).await?; collector.write_text( IO_EIO_CASE, @@ -195,24 +181,12 @@ async fn run_io_eio_case(config: &E2eConfig, collector: &ArtifactCollector) -> R Ok(()) } -fn reset_io_eio_fixture(config: &E2eConfig) -> Result<()> { - resources::reset_smoke_tenant_resources(config)?; - if uses_kind_local_storage(config) { - storage::reset_default_local_storage(config)?; - } else { - eprintln!( - "skipping Kind local storage reset for context {}; using cluster storage class {}", - config.context, config.storage_class - ); - } - resources::apply_smoke_tenant_resources(config)?; +fn reset_io_eio_fixture(config: &ClusterTestConfig) -> Result<()> { + resources::reset_fault_tenant_resources(config)?; + resources::apply_fault_tenant_resources(config)?; Ok(()) } -fn uses_kind_local_storage(config: &E2eConfig) -> bool { - config.context.starts_with("kind-") -} - fn collect_active_chaos_artifacts( collector: &ArtifactCollector, guard: &chaos_mesh::ChaosGuard, @@ -235,7 +209,7 @@ fn collect_active_chaos_artifacts( Ok(()) } -async fn wait_for_ready_tenant(config: &E2eConfig) -> Result { +async fn wait_for_ready_tenant(config: &ClusterTestConfig) -> Result { let client = kube_client::default_client().await?; let tenants: Api = kube_client::tenant_api(client, &config.test_namespace); wait::wait_for_tenant_ready(tenants, &config.tenant_name, config.timeout).await @@ -332,7 +306,7 @@ impl WorkloadSummary { if require_client_disruption { ensure!( self.disrupted() > 0, - "IOChaos became active but the S3 workload observed no client-visible disrupted operation; increase RUSTFS_E2E_WORKLOAD_OBJECTS or RUSTFS_E2E_FAULT_PERCENT, or set RUSTFS_E2E_FAULT_REQUIRE_CLIENT_DISRUPTION=0 if this is expected" + "IOChaos became active but the S3 workload observed no client-visible disrupted operation; increase RUSTFS_FAULT_TEST_WORKLOAD_OBJECTS or RUSTFS_FAULT_TEST_PERCENT, or set RUSTFS_FAULT_TEST_REQUIRE_CLIENT_DISRUPTION=0 if this is expected" ); } else if self.disrupted() == 0 { eprintln!( From 0f4646589300bdaebaf6056114bda00404b6bdc4 Mon Sep 17 00:00:00 2001 From: GatewayJ <18332154+GatewayJ@users.noreply.github.com> Date: Fri, 19 Jun 2026 00:23:55 +0800 Subject: [PATCH 04/20] fix(chaos): use one volume per fault pod --- e2e/src/framework/tenant_factory.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/e2e/src/framework/tenant_factory.rs b/e2e/src/framework/tenant_factory.rs index 3ff449a..b1ae9bf 100644 --- a/e2e/src/framework/tenant_factory.rs +++ b/e2e/src/framework/tenant_factory.rs @@ -77,7 +77,7 @@ impl TenantTemplate { storage_class: storage_class.into(), credential_secret_name: credential_secret_name.into(), servers: 4, - volumes_per_server: 2, + volumes_per_server: 1, pod_management_policy: Some(PodManagementPolicy::Parallel), unsafe_bypass_disk_check: false, node_selector: None, @@ -207,6 +207,7 @@ mod tests { ) .build(); + assert_eq!(tenant.spec.pools[0].persistence.volumes_per_server, 1); assert!(tenant.spec.pools[0].scheduling.node_selector.is_none()); assert!( tenant From d11994ff7ca2e411bfce1d09413095514cb36cc9 Mon Sep 17 00:00:00 2001 From: GatewayJ <18332154+GatewayJ@users.noreply.github.com> Date: Fri, 19 Jun 2026 00:39:23 +0800 Subject: [PATCH 05/20] fix(chaos): align S3 checker key prefix --- e2e/src/framework/checker.rs | 4 ++-- e2e/src/framework/s3_workload.rs | 7 ++++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/e2e/src/framework/checker.rs b/e2e/src/framework/checker.rs index 140b15c..3ca58cf 100644 --- a/e2e/src/framework/checker.rs +++ b/e2e/src/framework/checker.rs @@ -18,7 +18,7 @@ use std::collections::{BTreeMap, BTreeSet}; use crate::framework::{ history::{OperationKind, OperationOutcome, OperationRecord, Recorder}, - s3_workload::{S3WorkloadClient, sha256_hex}, + s3_workload::{ObjectSpec, S3WorkloadClient, sha256_hex}, }; #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] @@ -92,7 +92,7 @@ pub async fn check_s3_history( } } - let prefix = format!("fault-e2e/{}/", recorder.run_id()); + let prefix = ObjectSpec::key_prefix(recorder.run_id()); match s3.list_prefix(&prefix, recorder).await? { Some(keys) => { let listed = keys.into_iter().collect::>(); diff --git a/e2e/src/framework/s3_workload.rs b/e2e/src/framework/s3_workload.rs index d1d3a20..36e9a57 100644 --- a/e2e/src/framework/s3_workload.rs +++ b/e2e/src/framework/s3_workload.rs @@ -44,8 +44,12 @@ pub struct GetObjectResult { } impl ObjectSpec { + pub fn key_prefix(run_id: &str) -> String { + format!("fault-test/{run_id}/") + } + pub fn deterministic(run_id: &str, index: usize, size_bytes: usize) -> Self { - let key = format!("fault-test/{run_id}/object-{index:06}"); + let key = format!("{}object-{index:06}", Self::key_prefix(run_id)); let body = deterministic_bytes(index, size_bytes); let sha256 = sha256_hex(&body); @@ -438,6 +442,7 @@ mod tests { let object = ObjectSpec::deterministic("run-1", 7, 4096); let same = ObjectSpec::deterministic("run-1", 7, 4096); + assert_eq!(ObjectSpec::key_prefix("run-1"), "fault-test/run-1/"); assert_eq!(object.key, "fault-test/run-1/object-000007"); assert_eq!(object.size_bytes, 4096); assert_eq!(object.sha256, same.sha256); From 7d4d0fa4deb0616335a12d5201682d70e0fba2aa Mon Sep 17 00:00:00 2001 From: GatewayJ <835269233@qq.com> Date: Fri, 19 Jun 2026 09:26:59 +0800 Subject: [PATCH 06/20] feat: add fault case --- FAULT_INJECTION_TEST_PLAN.md | 227 +++++----- e2e/README.md | 2 +- e2e/src/cases/mod.rs | 34 +- e2e/src/framework/chaos_mesh.rs | 346 ++++++++++++++- e2e/src/framework/fault_config.rs | 71 +++ e2e/src/framework/fault_scenarios.rs | 267 ++++++++++- e2e/src/framework/host_faults.rs | 440 ++++++++++++++++++ e2e/src/framework/mod.rs | 1 + e2e/tests/faults.rs | 637 ++++++++++++++++++++++++--- 9 files changed, 1819 insertions(+), 206 deletions(-) create mode 100644 e2e/src/framework/host_faults.rs diff --git a/FAULT_INJECTION_TEST_PLAN.md b/FAULT_INJECTION_TEST_PLAN.md index 8430b70..4bc6fab 100644 --- a/FAULT_INJECTION_TEST_PLAN.md +++ b/FAULT_INJECTION_TEST_PLAN.md @@ -83,7 +83,7 @@ limitations under the License. | 能力 | 当前位置 | 用途 | | --- | --- | --- | | destructive 入口 | `make fault-test` | 专门在真实 Kubernetes 测试集群运行破坏性故障测试。 | -| fault runner | `e2e/tests/faults.rs` | 真实集群故障测试入口,不属于 e2e case inventory。 | +| fault suite runners | `e2e/tests/faults.rs` | 真实集群 scenario-selected destructive runner,不属于 e2e case inventory。 | | fault config/context guard | `e2e/src/framework/fault_config.rs` | 读取独立 fault-test 配置、绑定当前 context,并拒绝 Kind。 | | Tenant/Secret 创建 | `e2e/src/framework/resources.rs` | 创建 fault-test namespace、凭据和真实集群 Tenant。 | | S3 port-forward | `e2e/src/framework/port_forward.rs` | 将 Tenant S3 服务暴露到本地。 | @@ -212,7 +212,7 @@ Jepsen-like 的含义是: 11. 测试失败时必须先收集 artifacts,再清理会影响诊断的信息。 12. destructive 场景保持 `#[ignore]`,只能通过显式 Make 目标执行。 -建议增加环境变量: +当前使用的故障测试环境变量: | 变量 | 默认值 | 作用 | | --- | --- | --- | @@ -225,6 +225,11 @@ Jepsen-like 的含义是: | `RUSTFS_FAULT_TEST_WORKLOAD_OBJECTS` | `40` | 写入或校验对象数量。 | | `RUSTFS_FAULT_TEST_REQUEST_TIMEOUT_SECONDS` | `3` | 单次 S3 请求超时时间。 | | `RUSTFS_FAULT_TEST_REQUIRE_CLIENT_DISRUPTION` | `false` | 是否要求故障期间至少出现一次客户端可见失败/超时/unknown。 | +| `RUSTFS_FAULT_TEST_DISK_FILL_MIB` | `12288` | `disk-full` 场景在 RustFS 数据路径写入的 filler 大小。 | +| `RUSTFS_FAULT_TEST_DM_NAME` | empty | `dm-flakey` 场景要切换的 device-mapper 设备名,必填。 | +| `RUSTFS_FAULT_TEST_DM_FAULT_TABLE` | empty | `dm-flakey` 场景注入故障时加载的 dmsetup table,必填。 | +| `RUSTFS_FAULT_TEST_DM_RECOVERY_TABLE` | current table | `dm-flakey` 场景恢复时加载的 dmsetup table;不填则使用注入前 table。 | +| `RUSTFS_FAULT_TEST_WARP_DURATION_SECONDS` | `60` | `warp-under-chaos` 场景中 Warp mixed workload 的运行时间。 | | `RUSTFS_FAULT_TEST_CHAOS_NAMESPACE` | `chaos-mesh` | Chaos Mesh 资源所在 namespace。 | ## 操作历史模型 @@ -521,31 +526,34 @@ spec: ## 测试流程 -第一版完整流程建议如下: +当前 runner 使用如下流程: ```text 1. 读取 FaultTestConfig 2. 检查 RUSTFS_FAULT_TEST_DESTRUCTIVE=1 3. 读取当前 kube context 并拒绝 kind-* context 4. 检查 RUSTFS_FAULT_TEST_STORAGE_CLASS 已配置 -5. 检查 Chaos Mesh CRD 存在 -6. 检查 fault-test namespace 不存在,或所有权标记与配置完全匹配 -7. reset 专用 fault-test Tenant/PVC -8. namespace 不存在时由 runner 使用 create 创建带所有权标记的 fault-test namespace;不得通过 apply 认领竞态中出现的同名 namespace -9. 创建真实集群 fault-test Tenant -10. 等待 Tenant Ready -11. 启动 Tenant S3 port-forward -12. 创建测试 bucket -13. 预写入一批对象,记录 key 和 sha256 -14. 启动后台 verifier 持续读取已提交对象 -15. apply Chaos Mesh 故障资源 -16. 故障期间继续执行混合 S3 workload -17. delete Chaos Mesh 故障资源 -18. 等待 Tenant 再次 Ready -19. 对所有成功 PUT 对象做最终 GET + sha256 校验 -20. 生成 checker report -21. 成功则清理测试资源 -22. 失败则收集 Kubernetes artifacts +5. 根据 RUSTFS_FAULT_TEST_SCENARIO 解析 FaultScenarioSpec +6. 按场景检查 Chaos Mesh CRD 或专用 host-side 工具配置 +7. 检查 fault-test namespace 不存在,或所有权标记与配置完全匹配 +8. reset 专用 fault-test Tenant/PVC +9. namespace 不存在时由 runner 使用 create 创建带所有权标记的 fault-test namespace;不得通过 apply 认领竞态中出现的同名 namespace +10. 创建真实集群 fault-test Tenant +11. 等待 Tenant Ready +12. 启动 Tenant S3 port-forward,等待 S3 endpoint 可用 +13. 创建 run-scoped bucket +14. prefill 一批对象,记录 key、size、sha256;prefill 必须成功 +15. apply 当前 scenario 的 Chaos Mesh 资源或 host-side fault +16. 对持续型 Chaos 等待 active +17. 故障期间执行 PUT/GET mixed workload,并输出 workload-summary.json +18. 如果要求 client-visible disruption,则确认 workload 观察到了失败、超时或 unknown +19. 确认持续型 Chaos 没有早于 workload 结束恢复 +20. 删除 Chaos、清理 filler 文件或恢复 dmsetup table +21. 等待 Tenant 再次 Ready +22. 对所有成功 PUT 对象做最终 GET + sha256 校验 +23. 执行 prefix LIST 并记录 warning +24. 写入 checker-report.json +25. 失败时收集 Kubernetes artifacts 和故障资源 describe/yaml ``` 伪代码: @@ -615,31 +623,34 @@ async fn fault_io_eio_preserves_committed_objects() -> Result<()> { result } -``` - ## Chaos Mesh 模块设计 -`chaos_mesh.rs` 建议提供这些能力: +`chaos_mesh.rs` 当前提供这些能力: ```rust pub fn require_iochaos_crd(config: &ClusterTestConfig) -> Result<()>; pub fn require_podchaos_crd(config: &ClusterTestConfig) -> Result<()>; pub fn require_networkchaos_crd(config: &ClusterTestConfig) -> Result<()>; - -pub struct ChaosGuard { - name: String, - namespace: String, - kind: String, -} - -impl Drop for ChaosGuard { - fn drop(&mut self) { - // best-effort kubectl delete - } +pub fn cleanup_managed_iochaos(config: &ClusterTestConfig, namespace: &str) -> Result<()>; +pub fn cleanup_managed_podchaos(config: &ClusterTestConfig, namespace: &str) -> Result<()>; +pub fn cleanup_managed_networkchaos(config: &ClusterTestConfig, namespace: &str) -> Result<()>; +pub fn apply_iochaos(config: &ClusterTestConfig, spec: &IoChaosSpec) -> Result; +pub fn apply_podchaos(config: &ClusterTestConfig, spec: &PodChaosSpec) -> Result; +pub fn apply_networkchaos(config: &ClusterTestConfig, spec: &NetworkChaosSpec) -> Result; + +pub enum IoChaosAction { + Fault { errno: u8 }, + Mistake { + filling: String, + max_occurrences: u8, + max_length: usize, + }, } - pub struct IoChaosSpec { pub name: String, + pub namespace: String, + pub run_id: String, + pub scenario: String, pub target_namespace: String, pub tenant_name: String, pub container_name: String, @@ -655,34 +666,36 @@ pub struct IoChaosSpec { - 所有 `kubectl` 命令必须通过现有 `framework::kubectl` 和 `framework::command` 边界。 - apply 前检查 CRD 是否存在。 -- apply 后可以 `kubectl describe` 保存到 artifacts。 -- 删除时必须 best-effort,不应 panic。 +- apply 后保存 manifest;失败时可以 `kubectl describe/get yaml` 保存到 artifacts。 +- `ChaosGuard::delete()` 必须明确返回结果;`Drop` 只做 best-effort cleanup,不应 panic。 - 每个资源都带 `rustfs-fault-test/run-id` label。 +- 每个资源都带 `rustfs-fault-test/scenario` label。 +- 每个资源都带 `app.kubernetes.io/managed-by=rustfs-operator-fault-test` label,便于按 suite 清理残留。 - 允许按 label 清理上一次异常残留。 ## S3 workload 模块设计 -`s3_workload.rs` 建议提供: +`s3_workload.rs` 当前提供: ```rust pub struct S3WorkloadClient { bucket: String, - endpoint: String, - timeout: Duration, + request_timeout: Duration, } pub struct ObjectSpec { - key: String, - size_bytes: usize, - sha256: String, + pub key: String, + pub size_bytes: usize, + pub sha256: String, } impl S3WorkloadClient { - pub async fn create_bucket(&self) -> Result<()>; - pub async fn put_object(&self, object: &ObjectSpec, history: &mut Recorder) -> Result<()>; - pub async fn get_object(&self, key: &str, history: &mut Recorder) -> Result>>; - pub async fn head_object(&self, key: &str, history: &mut Recorder) -> Result<()>; - pub async fn list_prefix(&self, prefix: &str, history: &mut Recorder) -> Result>; + pub async fn new(...) -> Result; + pub async fn create_bucket(&self, recorder: &mut Recorder) -> Result; + pub async fn put_object(&self, object: &ObjectSpec, recorder: &mut Recorder) -> Result; + pub async fn get_object_result(&self, key: &str, recorder: &mut Recorder) -> Result; + pub async fn head_object(&self, key: &str, recorder: &mut Recorder) -> Result; + pub async fn list_prefix(&self, prefix: &str, recorder: &mut Recorder) -> Result>>; } ``` @@ -769,16 +782,23 @@ RUSTFS_FAULT_TEST_STORAGE_CLASS= make fault-test 该入口使用当前 `kubectl` context,拒绝 Kind,并使用 `RUSTFS_FAULT_TEST_STORAGE_CLASS` 指向的真实集群动态 StorageClass。 -后续可以增加聚焦入口,方便本地调试: +`e2e/tests/faults.rs` 中每个 destructive 场景都有同名 ignored runner。运行时通过 `RUSTFS_FAULT_TEST_SCENARIO` 选择一个真实执行的场景;未选中的 ignored runner 会快速返回,避免一次 `make fault-test` 串行跑完整个破坏性矩阵。 + +示例: -```makefile -fault-test-io: - RUSTFS_FAULT_TEST_DESTRUCTIVE=1 RUSTFS_FAULT_TEST_SCENARIO=io-eio \ - cargo test --manifest-path $(E2E_MANIFEST) --test faults -- --ignored --nocapture +```bash +# 默认场景:io-eio;make fault-test 会注入 RUSTFS_FAULT_TEST_DESTRUCTIVE=1 +RUSTFS_FAULT_TEST_STORAGE_CLASS= make fault-test -fault-test-pod: - RUSTFS_FAULT_TEST_DESTRUCTIVE=1 RUSTFS_FAULT_TEST_SCENARIO=pod-kill-one \ - cargo test --manifest-path $(E2E_MANIFEST) --test faults -- --ignored --nocapture +# 运行其他场景 +RUSTFS_FAULT_TEST_STORAGE_CLASS= RUSTFS_FAULT_TEST_SCENARIO=pod-kill-one make fault-test +RUSTFS_FAULT_TEST_STORAGE_CLASS= RUSTFS_FAULT_TEST_SCENARIO=network-partition-one make fault-test +RUSTFS_FAULT_TEST_STORAGE_CLASS= RUSTFS_FAULT_TEST_SCENARIO=io-read-mistake make fault-test +RUSTFS_FAULT_TEST_STORAGE_CLASS= RUSTFS_FAULT_TEST_SCENARIO=disk-full make fault-test +RUSTFS_FAULT_TEST_STORAGE_CLASS= RUSTFS_FAULT_TEST_SCENARIO=direct-pv-corruption make fault-test +RUSTFS_FAULT_TEST_STORAGE_CLASS= RUSTFS_FAULT_TEST_SCENARIO=worker-restart make fault-test +RUSTFS_FAULT_TEST_STORAGE_CLASS= RUSTFS_FAULT_TEST_SCENARIO=dm-flakey make fault-test +RUSTFS_FAULT_TEST_STORAGE_CLASS= RUSTFS_FAULT_TEST_SCENARIO=warp-under-chaos make fault-test ``` 普通开发检查仍然使用: @@ -790,30 +810,38 @@ make pre-commit 不要把 destructive 场景混进普通 `make e2e-live-run`。 -## 第一版最小可交付范围 +## 当前可交付范围 -建议第一版只交付一个真实场景: +当前 fault suite 实现 9 个真实 runner: ```text fault_io_eio_preserves_committed_objects +fault_pod_kill_one_preserves_committed_objects +fault_network_partition_one_preserves_committed_objects +fault_io_read_mistake_rejects_corrupt_reads +fault_disk_full_preserves_committed_objects +fault_direct_pv_corruption_detects_or_repairs_bad_data +fault_worker_restart_preserves_committed_objects +fault_dm_flakey_preserves_committed_objects +fault_warp_under_chaos_reports_performance_separately ``` -它应该包含: +这些 runner 共享同一条 correctness 验证链路: 1. destructive/current real Kubernetes context guard。 -2. Chaos Mesh `IOChaos` CRD 检查。 -3. 启动前按 `app.kubernetes.io/managed-by=rustfs-operator-fault-test` 清理上次异常残留的 `IOChaos`。 +2. 按场景检查 Chaos Mesh CRD 或专用 host-side 工具配置。 +3. 启动前按 `app.kubernetes.io/managed-by=rustfs-operator-fault-test` 清理上次异常残留的 Chaos 资源。 4. reset 前验证 namespace 所有权标记;未标记或 Tenant 不匹配时 fail closed。 -5. `io-eio` case 前 reset Tenant/PVC;真实集群使用配置的动态 StorageClass。 +5. 每个 case 前 reset Tenant/PVC;真实集群使用配置的动态 StorageClass。 6. Tenant 创建和 Ready 等待。 7. S3 bucket 创建。 8. S3 prefill 对象并记录 hash;prefill 阶段必须明确成功,避免空用例通过。 -9. apply `IOChaos fault errno=5`。 -10. 等待 `IOChaos` 进入已选择目标且已注入状态,再开始故障 workload。 +9. apply 对应故障:Chaos Mesh `IOChaos` / `PodChaos` / `NetworkChaos`,或 host-side disk fill、direct PV corruption、Kind worker restart、dm-flakey、Warp under chaos。 +10. 对持续型 Chaos 资源等待进入 active,再开始故障 workload。 11. 故障期间持续读写并输出 `workload-summary.json`。 -12. workload 结束后确认 `IOChaos` 仍处于 active,避免 workload 跑出故障窗口。 -13. 故障 workload 失败、故障证据不足或 Chaos 删除失败时,先保存 Chaos Mesh describe/yaml,再触发 cleanup。 -14. delete `IOChaos`。 +12. 对持续型故障确认 workload 没有跑出故障窗口。 +13. 故障 workload 失败、故障证据不足或 Chaos 删除失败时,先保存 describe/yaml 或 host fault 输出,再触发 cleanup。 +14. 删除 Chaos 资源、清理 filler 文件或恢复 dmsetup table。 15. Tenant 恢复 Ready 等待。 16. 所有成功 `PUT` 对象最终 `GET + sha256` 校验。 17. 恢复后执行 `LIST prefix`,缺失项先作为 warning。 @@ -823,57 +851,45 @@ fault_io_eio_preserves_committed_objects 这个版本已经能证明系统从“占位骨架”升级为“真实故障注入 + 数据正确性校验”。 -当前可执行用例只包含 `io-eio`。上面的故障矩阵保留为后续路线图,不表示 Phase 1 已经实现所有矩阵项。 +## 后续增强计划 -## 分阶段实施计划 +当前 9 个 runner 已经落到代码里。后续工作不再是补入口,而是提高故障强度、判定模型和长稳覆盖。 -### Phase 1:磁盘 EIO 基线 +### Phase 1:runner hardening -- 新增 `chaos_mesh`。 -- 新增 `history`。 -- 新增 `checker`。 -- 新增 `s3_workload`。 -- 实现 `io-eio`。 -- 使用唯一对象 key。 -- 默认小对象数、短持续时间、低故障比例。 +- 在测试环境逐个验证 9 个 scenario 的前置条件、故障注入、清理和 artifacts 输出。 +- 为 PodChaos、NetworkChaos、IOChaos mistake 补充更细的 CRD status 断言。 +- 将 host-side 故障的输出结构化,便于 CI artifact 聚合和历史对比。 +- 保持每个 scenario 独立选择执行,避免多个故障在同一次测试中相互污染。 验收: - `make e2e-check` 通过。 -- `RUSTFS_FAULT_TEST_STORAGE_CLASS= make fault-test` 可在当前真实 Kubernetes 测试集群运行 `io-eio`,并拒绝 Kind。 +- `RUSTFS_FAULT_TEST_STORAGE_CLASS= RUSTFS_FAULT_TEST_SCENARIO= make fault-test` 可在当前真实 Kubernetes 测试集群逐个运行,并拒绝 Kind。 - 如果 committed object 丢失,测试失败。 - 如果 successful GET 返回错误字节,测试失败。 - 如果 workload 跑出 IOChaos active 窗口,测试失败。 - fault runner 不进入 Kind e2e case inventory;其边界是 `rustfs-workload/fault-injection`。 +- 每个 scenario 都能在失败时留下足够定位信息。 +- 每个 scenario 结束后能清理自己创建的 Chaos 资源、filler 文件或 dmsetup table。 -### Phase 2:进程和网络故障 - -- 新增 `pod-kill-one`。 -- 新增 `network-partition-one`。 -- 复用同一套 workload/history/checker。 - -验收: - -- Pod 死亡后 StatefulSet 能恢复。 -- 网络分区期间可以失败,但不能返回错误数据。 -- 网络恢复后 committed object 可读回。 - -### Phase 3:静默损坏 +### Phase 2:一致性模型增强 -- 新增 `io-read-mistake`。 -- 新增 direct local-PV corruption。 -- 强化 hash mismatch 和 repair behavior 报告。 +- 引入 same-key overwrite、delete、multipart、prefix/list 等更接近 Jepsen register/set 模型的 workload。 +- 将 operation history 扩展成可回放的事件日志,明确 invoke/ok/fail/info。 +- 在 checker 中区分 linearizable、eventual recovery、data corruption、availability degradation。 验收: -- RustFS 对错误字节返回错误或修复。 -- 不允许 `200 OK` 返回错误对象内容。 +- 成功写入的对象不得丢失。 +- 成功读取不得返回错误字节。 +- List 缺失、陈旧读、超时、服务错误分别记录,不能混成同一种 failure。 -### Phase 4:长稳和性能 +### Phase 3:长稳和性能 -- 增加随机组合故障。 -- 增加长时间 soak。 -- 可选接入 MinIO Warp 或 COSBench。 +- 增加长时间 soak runner。 +- 增加随机但可复现的故障调度。 +- 将 Warp 结果固定为性能/压力信号,不作为 correctness verdict。 注意: @@ -881,13 +897,16 @@ fault_io_eio_preserves_committed_objects - 压测失败不等于数据错误。 - 数据错误永远是 hard fail。 -### Phase 5:块设备级故障 +### Phase 4:块设备级故障实验室 - 研究 `dm-flakey`、`dm-error`、loop device-backed PV。 - 只在 Linux runner 或专用环境启用。 - 不进入默认 fault-test 流程。 +- 现有 dm-flakey runner 通过 `RUSTFS_FAULT_TEST_DM_*` 显式接入专用设备映射。 +- 后续可以在专用 Linux runner 上扩展 `dm-error`、loop device-backed PV 和更细粒度的 I/O 延迟/丢写模型。 +- 这些场景只进入明确标记的专用环境,不进入默认 fault-test 流程。 -这个阶段更接近真实磁盘坏块,但环境成本明显更高。 +这个方向更接近真实磁盘坏块,但环境成本明显更高,必须保持强隔离。 ## 与其他测试框架的关系 @@ -897,7 +916,7 @@ fault_io_eio_preserves_committed_objects | Chaos Mesh | Kubernetes-native nemesis,负责制造故障。 | | Jepsen-like checker | 判断对象存储 correctness,不制造故障。 | | MinIO Mint | 后续用于 S3 API 兼容性,不作为故障 checker。 | -| MinIO Warp | 后续用于故障期间性能压测,不作为 correctness verdict。 | +| MinIO Warp | 用于故障期间性能压测,不作为 correctness verdict。 | | COSBench | 后续用于大规模对象存储压测。 | | Ceph s3-tests | 后续用于 S3 行为兼容性参考。 | | Ceph Teuthology | 借鉴大规模编排思想,当前不直接引入。 | diff --git a/e2e/README.md b/e2e/README.md index 1d02891..70915f3 100644 --- a/e2e/README.md +++ b/e2e/README.md @@ -47,7 +47,7 @@ e2e/ smoke.rs ignored live smoke entrypoints operator.rs ignored live Operator assertion console.rs ignored live Console API assertion - faults.rs real-cluster destructive fault-test runner; not part of e2e case inventory + faults.rs real-cluster destructive fault-injection suite with scenario-selected runners; not part of e2e case inventory ``` ## Boundary rules diff --git a/e2e/src/cases/mod.rs b/e2e/src/cases/mod.rs index 04933f3..51a68dc 100644 --- a/e2e/src/cases/mod.rs +++ b/e2e/src/cases/mod.rs @@ -27,6 +27,12 @@ pub enum Suite { CertManagerTls, } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CaseStatus { + Executable, + Planned, +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct CaseSpec { pub suite: Suite, @@ -34,6 +40,7 @@ pub struct CaseSpec { pub description: &'static str, pub boundary: &'static str, pub ci_phase: &'static str, + pub status: CaseStatus, } impl CaseSpec { @@ -50,6 +57,24 @@ impl CaseSpec { description, boundary, ci_phase, + status: CaseStatus::Executable, + } + } + + pub const fn planned( + suite: Suite, + name: &'static str, + description: &'static str, + boundary: &'static str, + ci_phase: &'static str, + ) -> Self { + Self { + suite, + name, + description, + boundary, + ci_phase, + status: CaseStatus::Planned, } } } @@ -66,7 +91,7 @@ pub fn all_cases() -> Vec { #[cfg(test)] mod tests { - use super::{Suite, all_cases}; + use super::{CaseStatus, Suite, all_cases}; use std::collections::{HashMap, HashSet}; #[test] @@ -97,13 +122,15 @@ mod tests { fn cases_are_mapped_to_ci_phases_and_architecture_boundaries() { let missing = all_cases() .into_iter() - .filter(|case| case.boundary.is_empty() || case.ci_phase.is_empty()) + .filter(|case| { + case.description.is_empty() || case.boundary.is_empty() || case.ci_phase.is_empty() + }) .map(|case| case.name) .collect::>(); assert!( missing.is_empty(), - "cases missing boundary/ci phase: {missing:?}" + "cases missing description/boundary/ci phase: {missing:?}" ); } @@ -111,6 +138,7 @@ mod tests { fn executable_cases_are_present_for_each_suite() { let counts = all_cases() .into_iter() + .filter(|case| case.status == CaseStatus::Executable) .fold(HashMap::new(), |mut acc, case| { *acc.entry(case.suite).or_insert(0usize) += 1; acc diff --git a/e2e/src/framework/chaos_mesh.rs b/e2e/src/framework/chaos_mesh.rs index f2bacd9..0584dbc 100644 --- a/e2e/src/framework/chaos_mesh.rs +++ b/e2e/src/framework/chaos_mesh.rs @@ -20,11 +20,25 @@ use std::time::{Duration, Instant}; use crate::framework::{config::ClusterTestConfig, kubectl::Kubectl}; const IOCHAOS_CRD: &str = "iochaos.chaos-mesh.org"; +const PODCHAOS_CRD: &str = "podchaos.chaos-mesh.org"; +const NETWORKCHAOS_CRD: &str = "networkchaos.chaos-mesh.org"; const RUN_ID_LABEL: &str = "rustfs-fault-test/run-id"; const SCENARIO_LABEL: &str = "rustfs-fault-test/scenario"; const MANAGED_BY_LABEL: &str = "app.kubernetes.io/managed-by"; const MANAGED_BY_VALUE: &str = "rustfs-operator-fault-test"; +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum IoChaosAction { + Fault { + errno: u8, + }, + Mistake { + filling: String, + max_occurrences: u8, + max_length: usize, + }, +} + #[derive(Debug, Clone, PartialEq, Eq)] pub struct IoChaosSpec { pub name: String, @@ -36,11 +50,32 @@ pub struct IoChaosSpec { pub container_name: String, pub volume_path: String, pub methods: Vec, - pub errno: u8, + pub action: IoChaosAction, pub percent: u8, pub duration: Duration, } +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct PodChaosSpec { + pub name: String, + pub namespace: String, + pub run_id: String, + pub scenario: String, + pub target_namespace: String, + pub tenant_name: String, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct NetworkChaosSpec { + pub name: String, + pub namespace: String, + pub run_id: String, + pub scenario: String, + pub target_namespace: String, + pub tenant_name: String, + pub duration: Duration, +} + #[derive(Debug, Clone)] pub struct ChaosGuard { config: ClusterTestConfig, @@ -83,7 +118,49 @@ impl IoChaosSpec { container_name: "rustfs".to_string(), volume_path: volume_path.into(), methods: vec!["READ".to_string(), "WRITE".to_string()], - errno: 5, + action: IoChaosAction::Fault { errno: 5 }, + percent, + duration, + }) + } + + pub fn read_mistake_on_rustfs_volume( + config: &ClusterTestConfig, + chaos_namespace: impl Into, + run_id: impl Into, + scenario: impl Into, + volume_path: impl Into, + percent: u8, + duration: Duration, + ) -> Result { + ensure!( + (1..=100).contains(&percent), + "IOChaos percent must be in 1..=100, got {percent}" + ); + ensure!( + duration > Duration::ZERO, + "IOChaos duration must be positive" + ); + + let run_id = run_id.into(); + let short_run_id = run_id.chars().take(12).collect::(); + let scenario = scenario.into(); + + Ok(Self { + name: format!("rustfs-fault-io-mistake-{short_run_id}"), + namespace: chaos_namespace.into(), + run_id, + scenario, + target_namespace: config.test_namespace.clone(), + tenant_name: config.tenant_name.clone(), + container_name: "rustfs".to_string(), + volume_path: volume_path.into(), + methods: vec!["READ".to_string()], + action: IoChaosAction::Mistake { + filling: "random".to_string(), + max_occurrences: 1, + max_length: 4096, + }, percent, duration, }) @@ -97,6 +174,7 @@ impl IoChaosSpec { .collect::>() .join("\n"); let seconds = self.duration.as_secs(); + let action = self.action_manifest(); format!( r#"apiVersion: chaos-mesh.org/v1alpha1 @@ -109,7 +187,7 @@ metadata: {scenario_label}: "{scenario}" {managed_by_label}: {managed_by_value} spec: - action: fault +{action} mode: one selector: namespaces: @@ -122,7 +200,6 @@ spec: path: {volume_path}/**/* methods: {methods} - errno: {errno} percent: {percent} duration: "{seconds}s" "#, @@ -139,19 +216,171 @@ spec: container_name = self.container_name, volume_path = self.volume_path, methods = methods, - errno = self.errno, percent = self.percent, + action = action, + ) + } + + fn action_manifest(&self) -> String { + match &self.action { + IoChaosAction::Fault { errno } => { + format!(" action: fault\n errno: {errno}") + } + IoChaosAction::Mistake { + filling, + max_occurrences, + max_length, + } => format!( + r#" action: mistake + mistake: + filling: {filling} + maxOccurrences: {max_occurrences} + maxLength: {max_length}"# + ), + } + } +} + +impl PodChaosSpec { + pub fn kill_one_rustfs_pod( + config: &ClusterTestConfig, + chaos_namespace: impl Into, + run_id: impl Into, + scenario: impl Into, + ) -> Self { + let run_id = run_id.into(); + let short_run_id = run_id.chars().take(12).collect::(); + Self { + name: format!("rustfs-fault-pod-kill-{short_run_id}"), + namespace: chaos_namespace.into(), + run_id, + scenario: scenario.into(), + target_namespace: config.test_namespace.clone(), + tenant_name: config.tenant_name.clone(), + } + } + + pub fn manifest(&self) -> String { + format!( + r#"apiVersion: chaos-mesh.org/v1alpha1 +kind: PodChaos +metadata: + name: {name} + namespace: {namespace} + labels: + {run_id_label}: "{run_id}" + {scenario_label}: "{scenario}" + {managed_by_label}: {managed_by_value} +spec: + action: pod-kill + mode: one + selector: + namespaces: + - {target_namespace} + labelSelectors: + rustfs.tenant: {tenant_name} +"#, + name = self.name, + namespace = self.namespace, + run_id_label = RUN_ID_LABEL, + run_id = self.run_id, + scenario_label = SCENARIO_LABEL, + scenario = self.scenario, + managed_by_label = MANAGED_BY_LABEL, + managed_by_value = MANAGED_BY_VALUE, + target_namespace = self.target_namespace, + tenant_name = self.tenant_name, + ) + } +} + +impl NetworkChaosSpec { + pub fn partition_one_rustfs_pod( + config: &ClusterTestConfig, + chaos_namespace: impl Into, + run_id: impl Into, + scenario: impl Into, + duration: Duration, + ) -> Result { + ensure!( + duration > Duration::ZERO, + "NetworkChaos duration must be positive" + ); + + let run_id = run_id.into(); + let short_run_id = run_id.chars().take(12).collect::(); + Ok(Self { + name: format!("rustfs-fault-net-partition-{short_run_id}"), + namespace: chaos_namespace.into(), + run_id, + scenario: scenario.into(), + target_namespace: config.test_namespace.clone(), + tenant_name: config.tenant_name.clone(), + duration, + }) + } + + pub fn manifest(&self) -> String { + let seconds = self.duration.as_secs(); + format!( + r#"apiVersion: chaos-mesh.org/v1alpha1 +kind: NetworkChaos +metadata: + name: {name} + namespace: {namespace} + labels: + {run_id_label}: "{run_id}" + {scenario_label}: "{scenario}" + {managed_by_label}: {managed_by_value} +spec: + action: partition + mode: one + selector: + namespaces: + - {target_namespace} + labelSelectors: + rustfs.tenant: {tenant_name} + direction: both + target: + mode: all + selector: + namespaces: + - {target_namespace} + labelSelectors: + rustfs.tenant: {tenant_name} + duration: "{seconds}s" +"#, + name = self.name, + namespace = self.namespace, + run_id_label = RUN_ID_LABEL, + run_id = self.run_id, + scenario_label = SCENARIO_LABEL, + scenario = self.scenario, + managed_by_label = MANAGED_BY_LABEL, + managed_by_value = MANAGED_BY_VALUE, + target_namespace = self.target_namespace, + tenant_name = self.tenant_name, ) } } pub fn require_iochaos_crd(config: &ClusterTestConfig) -> Result<()> { - let output = Kubectl::new(config) - .command(["get", "crd", IOCHAOS_CRD]) - .run()?; + require_crd(config, IOCHAOS_CRD, "Chaos Mesh IOChaos") +} + +pub fn require_podchaos_crd(config: &ClusterTestConfig) -> Result<()> { + require_crd(config, PODCHAOS_CRD, "Chaos Mesh PodChaos") +} + +pub fn require_networkchaos_crd(config: &ClusterTestConfig) -> Result<()> { + require_crd(config, NETWORKCHAOS_CRD, "Chaos Mesh NetworkChaos") +} + +fn require_crd(config: &ClusterTestConfig, crd: &str, description: &str) -> Result<()> { + let output = Kubectl::new(config).command(["get", "crd", crd]).run()?; ensure!( output.code == Some(0), - "Chaos Mesh IOChaos CRD {IOCHAOS_CRD} is required for fault tests; install Chaos Mesh before running faults\nstdout:\n{}\nstderr:\n{}", + "{description} CRD {crd} is required for fault tests; install Chaos Mesh before running faults\nstdout:\n{}\nstderr:\n{}", output.stdout, output.stderr ); @@ -159,25 +388,53 @@ pub fn require_iochaos_crd(config: &ClusterTestConfig) -> Result<()> { } pub fn cleanup_run(config: &ClusterTestConfig, namespace: &str, run_id: &str) -> Result<()> { + let selector = format!("{RUN_ID_LABEL}={run_id}"); + for kind in ["iochaos", "podchaos", "networkchaos"] { + Kubectl::new(config) + .namespaced(namespace) + .command(["delete", kind, "-l", &selector, "--ignore-not-found"]) + .run_checked()?; + } + Ok(()) +} + +pub fn cleanup_run_kind( + config: &ClusterTestConfig, + namespace: &str, + run_id: &str, + kind: &str, +) -> Result<()> { let selector = format!("{RUN_ID_LABEL}={run_id}"); Kubectl::new(config) .namespaced(namespace) - .command(["delete", "iochaos", "-l", &selector, "--ignore-not-found"]) + .command(["delete", kind, "-l", &selector, "--ignore-not-found"]) .run_checked()?; Ok(()) } pub fn cleanup_managed_iochaos(config: &ClusterTestConfig, namespace: &str) -> Result<()> { + cleanup_managed_kind(config, namespace, "iochaos") +} + +pub fn cleanup_managed_podchaos(config: &ClusterTestConfig, namespace: &str) -> Result<()> { + cleanup_managed_kind(config, namespace, "podchaos") +} + +pub fn cleanup_managed_networkchaos(config: &ClusterTestConfig, namespace: &str) -> Result<()> { + cleanup_managed_kind(config, namespace, "networkchaos") +} + +fn cleanup_managed_kind(config: &ClusterTestConfig, namespace: &str, kind: &str) -> Result<()> { let selector = format!("{MANAGED_BY_LABEL}={MANAGED_BY_VALUE}"); Kubectl::new(config) .namespaced(namespace) - .command(["delete", "iochaos", "-l", &selector, "--ignore-not-found"]) + .command(["delete", kind, "-l", &selector, "--ignore-not-found"]) .run_checked()?; Ok(()) } pub fn apply_iochaos(config: &ClusterTestConfig, spec: &IoChaosSpec) -> Result { - cleanup_run(config, &spec.namespace, &spec.run_id)?; + cleanup_run_kind(config, &spec.namespace, &spec.run_id, "iochaos")?; Kubectl::new(config) .namespaced(&spec.namespace) .apply_yaml_command(spec.manifest()) @@ -192,6 +449,41 @@ pub fn apply_iochaos(config: &ClusterTestConfig, spec: &IoChaosSpec) -> Result Result { + cleanup_run_kind(config, &spec.namespace, &spec.run_id, "podchaos")?; + Kubectl::new(config) + .namespaced(&spec.namespace) + .apply_yaml_command(spec.manifest()) + .run_checked()?; + + Ok(ChaosGuard { + config: config.clone(), + kind: "podchaos", + namespace: spec.namespace.clone(), + name: spec.name.clone(), + deleted: false, + }) +} + +pub fn apply_networkchaos( + config: &ClusterTestConfig, + spec: &NetworkChaosSpec, +) -> Result { + cleanup_run_kind(config, &spec.namespace, &spec.run_id, "networkchaos")?; + Kubectl::new(config) + .namespaced(&spec.namespace) + .apply_yaml_command(spec.manifest()) + .run_checked()?; + + Ok(ChaosGuard { + config: config.clone(), + kind: "networkchaos", + namespace: spec.namespace.clone(), + name: spec.name.clone(), + deleted: false, + }) +} + impl ChaosGuard { pub fn wait_active(&self, timeout: Duration) -> Result<()> { let deadline = Instant::now() + timeout; @@ -199,18 +491,24 @@ impl ChaosGuard { loop { let status_snapshot = match self.json() { Ok(status) => { - if iochaos_is_active(&status)? { + if chaos_experiment_is_active(&status)? { return Ok(()); } status } - Err(error) => format!("failed to read IOChaos status: {error}"), + Err(error) => { + format!("failed to read {kind} status: {error}", kind = self.kind) + } }; if Instant::now() >= deadline { - let describe = self - .describe() - .unwrap_or_else(|error| format!("failed to describe IOChaos: {error}")); + let describe = self.describe().unwrap_or_else(|error| { + format!( + "failed to describe {kind}/{name}: {error}", + kind = self.kind, + name = self.name + ) + }); bail!( "timed out waiting for {kind}/{name} to become active after {timeout:?}\nlast status:\n{status_snapshot}\n\ndescribe:\n{describe}", kind = self.kind, @@ -225,7 +523,7 @@ impl ChaosGuard { pub fn ensure_active(&self, stage: &str) -> Result<()> { let status = self.json()?; ensure!( - iochaos_is_active(&status)?, + chaos_experiment_is_active(&status)?, "{kind}/{name} is not active at {stage}; status:\n{status}", kind = self.kind, name = self.name @@ -272,8 +570,8 @@ impl ChaosGuard { } } -fn iochaos_is_active(raw: &str) -> Result { - let value = serde_json::from_str::(raw).context("parse IOChaos status json")?; +fn chaos_experiment_is_active(raw: &str) -> Result { + let value = serde_json::from_str::(raw).context("parse Chaos Mesh status json")?; let selected = condition_status(&value, "Selected").is_some_and(|status| status == "True"); let injected = condition_status(&value, "AllInjected") .or_else(|| condition_status(&value, "Injected")) @@ -304,7 +602,7 @@ impl Drop for ChaosGuard { #[cfg(test)] mod tests { - use super::{IoChaosSpec, iochaos_is_active}; + use super::{IoChaosSpec, chaos_experiment_is_active}; use crate::framework::fault_config::FaultTestConfig; use std::time::Duration; @@ -346,11 +644,11 @@ mod tests { } }"#; - assert!(iochaos_is_active(status).expect("valid status")); + assert!(chaos_experiment_is_active(status).expect("valid status")); } #[test] - fn iochaos_active_rejects_unselected_experiment() { + fn chaos_experiment_active_rejects_unselected_experiment() { let status = r#"{ "status": { "conditions": [ @@ -360,6 +658,6 @@ mod tests { } }"#; - assert!(!iochaos_is_active(status).expect("valid status")); + assert!(!chaos_experiment_is_active(status).expect("valid status")); } } diff --git a/e2e/src/framework/fault_config.rs b/e2e/src/framework/fault_config.rs index 5f79b8e..e39f92c 100644 --- a/e2e/src/framework/fault_config.rs +++ b/e2e/src/framework/fault_config.rs @@ -29,6 +29,11 @@ pub struct FaultTestConfig { pub workload_objects: usize, pub request_timeout: Duration, pub require_client_disruption: bool, + pub disk_fill_mib: u64, + pub dm_name: Option, + pub dm_fault_table: Option, + pub dm_recovery_table: Option, + pub warp_duration: Duration, pub chaos_namespace: String, } @@ -98,6 +103,15 @@ impl FaultTestConfig { &get_env, "RUSTFS_FAULT_TEST_REQUIRE_CLIENT_DISRUPTION", ), + disk_fill_mib: env_u64(&get_env, "RUSTFS_FAULT_TEST_DISK_FILL_MIB", 12 * 1024), + dm_name: env_optional(&get_env, "RUSTFS_FAULT_TEST_DM_NAME"), + dm_fault_table: env_optional(&get_env, "RUSTFS_FAULT_TEST_DM_FAULT_TABLE"), + dm_recovery_table: env_optional(&get_env, "RUSTFS_FAULT_TEST_DM_RECOVERY_TABLE"), + warp_duration: Duration::from_secs(env_u64( + &get_env, + "RUSTFS_FAULT_TEST_WARP_DURATION_SECONDS", + 60, + )), chaos_namespace: env_or(&get_env, "RUSTFS_FAULT_TEST_CHAOS_NAMESPACE", "chaos-mesh"), }) } @@ -187,6 +201,13 @@ where get_env(name).unwrap_or_else(|| default.to_string()) } +fn env_optional(get_env: &F, name: &str) -> Option +where + F: Fn(&str) -> Option, +{ + get_env(name).filter(|value| !value.trim().is_empty()) +} + fn env_bool(get_env: &F, name: &str) -> bool where F: Fn(&str) -> Option, @@ -246,10 +267,60 @@ mod tests { config.cluster.artifacts_dir, std::path::PathBuf::from("target/fault-tests/artifacts") ); + assert_eq!(config.scenario, "io-eio"); + assert_eq!(config.duration, std::time::Duration::from_secs(180)); + assert_eq!(config.percent, 20); + assert_eq!(config.workload_objects, 40); + assert_eq!(config.request_timeout, std::time::Duration::from_secs(3)); + assert_eq!(config.disk_fill_mib, 12 * 1024); + assert!(config.dm_name.is_none()); + assert!(config.dm_fault_table.is_none()); + assert!(config.dm_recovery_table.is_none()); + assert_eq!(config.warp_duration, std::time::Duration::from_secs(60)); assert!(!config.destructive_enabled); assert!(config.require_destructive_enabled().is_err()); } + #[test] + fn fault_scenario_env_overrides_are_parsed() { + let config = FaultTestConfig::from_env_with( + |name| match name { + "RUSTFS_FAULT_TEST_STORAGE_CLASS" => Some("fast-csi".to_string()), + "RUSTFS_FAULT_TEST_SCENARIO" => Some("dm-flakey".to_string()), + "RUSTFS_FAULT_TEST_DURATION_SECONDS" => Some("45".to_string()), + "RUSTFS_FAULT_TEST_PERCENT" => Some("35".to_string()), + "RUSTFS_FAULT_TEST_WORKLOAD_OBJECTS" => Some("64".to_string()), + "RUSTFS_FAULT_TEST_REQUEST_TIMEOUT_SECONDS" => Some("7".to_string()), + "RUSTFS_FAULT_TEST_REQUIRE_CLIENT_DISRUPTION" => Some("true".to_string()), + "RUSTFS_FAULT_TEST_DISK_FILL_MIB" => Some("1024".to_string()), + "RUSTFS_FAULT_TEST_DM_NAME" => Some("rustfs-test".to_string()), + "RUSTFS_FAULT_TEST_DM_FAULT_TABLE" => Some("0 1024 error".to_string()), + "RUSTFS_FAULT_TEST_DM_RECOVERY_TABLE" => { + Some("0 1024 linear /dev/loop0 0".to_string()) + } + "RUSTFS_FAULT_TEST_WARP_DURATION_SECONDS" => Some("30".to_string()), + _ => None, + }, + "production-test-cluster".to_string(), + ) + .expect("fault config"); + + assert_eq!(config.scenario, "dm-flakey"); + assert_eq!(config.duration, std::time::Duration::from_secs(45)); + assert_eq!(config.percent, 35); + assert_eq!(config.workload_objects, 64); + assert_eq!(config.request_timeout, std::time::Duration::from_secs(7)); + assert!(config.require_client_disruption); + assert_eq!(config.disk_fill_mib, 1024); + assert_eq!(config.dm_name.as_deref(), Some("rustfs-test")); + assert_eq!(config.dm_fault_table.as_deref(), Some("0 1024 error")); + assert_eq!( + config.dm_recovery_table.as_deref(), + Some("0 1024 linear /dev/loop0 0") + ); + assert_eq!(config.warp_duration, std::time::Duration::from_secs(30)); + } + #[test] fn kind_context_is_rejected_for_fault_tests() { let result = FaultTestConfig::from_env_with( diff --git a/e2e/src/framework/fault_scenarios.rs b/e2e/src/framework/fault_scenarios.rs index e17a957..d4def0d 100644 --- a/e2e/src/framework/fault_scenarios.rs +++ b/e2e/src/framework/fault_scenarios.rs @@ -18,10 +18,209 @@ use std::time::Duration; use crate::framework::fault_config::FaultTestConfig; pub const IO_EIO_SCENARIO: &str = "io-eio"; +pub const POD_KILL_ONE_SCENARIO: &str = "pod-kill-one"; +pub const NETWORK_PARTITION_ONE_SCENARIO: &str = "network-partition-one"; +pub const IO_READ_MISTAKE_SCENARIO: &str = "io-read-mistake"; +pub const DISK_FULL_SCENARIO: &str = "disk-full"; +pub const DIRECT_PV_CORRUPTION_SCENARIO: &str = "direct-pv-corruption"; +pub const WORKER_RESTART_SCENARIO: &str = "worker-restart"; +pub const DM_FLAKEY_SCENARIO: &str = "dm-flakey"; +pub const WARP_UNDER_CHAOS_SCENARIO: &str = "warp-under-chaos"; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum FaultScenarioStatus { + Executable, + Planned, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum FaultPriority { + P0, + P1, + P2, + P3, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum FaultBackend { + ChaosMeshIoChaos, + ChaosMeshPodChaos, + ChaosMeshNetworkChaos, + LocalPvFill, + KindWorkerFileCorruption, + KindWorkerRestart, + DeviceMapper, + MinioWarpWithChaos, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum FaultIsolation { + FreshTenant, + ReusableTenant, + DedicatedKindWorker, + DedicatedLinuxBlockDevice, + PerformanceOnly, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct FaultScenarioSpec { + pub scenario: &'static str, + pub case_name: &'static str, + pub description: &'static str, + pub priority: FaultPriority, + pub backend: FaultBackend, + pub status: FaultScenarioStatus, + pub isolation: FaultIsolation, + pub boundary: &'static str, + pub ci_phase: &'static str, + pub target: &'static str, + pub validation: &'static str, + pub observability: &'static str, + pub conflict_domain: &'static str, +} + +pub const FAULT_SCENARIO_CATALOG: &[FaultScenarioSpec] = &[ + FaultScenarioSpec { + scenario: IO_EIO_SCENARIO, + case_name: "fault_io_eio_preserves_committed_objects", + description: "Inject Chaos Mesh IOChaos EIO into one RustFS data volume and verify committed S3 objects remain readable with matching hashes after recovery.", + priority: FaultPriority::P0, + backend: FaultBackend::ChaosMeshIoChaos, + status: FaultScenarioStatus::Executable, + isolation: FaultIsolation::FreshTenant, + boundary: "rustfs-workload/fault-injection", + ci_phase: "faults", + target: "one RustFS container data volume selected by tenant label and /data/rustfs0 path", + validation: "prefill succeeds before injection, mixed PUT/GET workload runs while IOChaos is active, committed PUTs are GET+sha256 verified after recovery, and successful GETs cannot return corrupt bytes", + observability: "history.jsonl, workload-summary.json, checker-report.json, chaos-manifest.yaml, chaos-describe*.txt, Kubernetes snapshot artifacts", + conflict_domain: "fresh Tenant/PVC/PV fixture and run-scoped IOChaos cleanup", + }, + FaultScenarioSpec { + scenario: POD_KILL_ONE_SCENARIO, + case_name: "fault_pod_kill_one_preserves_committed_objects", + description: "Inject Chaos Mesh PodChaos against one RustFS Pod and verify StatefulSet recovery preserves committed S3 objects.", + priority: FaultPriority::P0, + backend: FaultBackend::ChaosMeshPodChaos, + status: FaultScenarioStatus::Executable, + isolation: FaultIsolation::ReusableTenant, + boundary: "rustfs-workload/pod-recovery", + ci_phase: "faults", + target: "one RustFS Pod selected by tenant label", + validation: "the killed Pod is recreated, Tenant returns Ready, committed PUTs remain readable with matching hashes, and failed or unknown operations are recorded without becoming correctness failures", + observability: "history.jsonl, workload-summary.json, checker-report.json, podchaos manifest/describe/yaml, Pod restart counts, current and previous RustFS logs", + conflict_domain: "run-scoped PodChaos resource and one target Pod; can reuse a ready Tenant after the prior scenario has cleaned up", + }, + FaultScenarioSpec { + scenario: NETWORK_PARTITION_ONE_SCENARIO, + case_name: "fault_network_partition_one_preserves_committed_objects", + description: "Inject Chaos Mesh NetworkChaos that partitions one RustFS Pod from its peers and verify recovery does not lose or corrupt committed objects.", + priority: FaultPriority::P1, + backend: FaultBackend::ChaosMeshNetworkChaos, + status: FaultScenarioStatus::Executable, + isolation: FaultIsolation::ReusableTenant, + boundary: "rustfs-workload/network-partition", + ci_phase: "faults", + target: "one RustFS Pod selected by tenant label with peer traffic disrupted inside the e2e namespace", + validation: "network disruption is active during workload, successful reads never return wrong hashes, committed PUTs remain readable after heal, and Tenant recovers Ready", + observability: "history.jsonl, workload-summary.json, checker-report.json, networkchaos manifest/describe/yaml, endpoints, events, and RustFS logs", + conflict_domain: "run-scoped NetworkChaos resource; must not overlap with PodChaos or IOChaos in the same Tenant", + }, + FaultScenarioSpec { + scenario: IO_READ_MISTAKE_SCENARIO, + case_name: "fault_io_read_mistake_rejects_corrupt_reads", + description: "Inject Chaos Mesh IOChaos mistake on RustFS read paths and verify RustFS never returns corrupt object bytes as successful S3 reads.", + priority: FaultPriority::P1, + backend: FaultBackend::ChaosMeshIoChaos, + status: FaultScenarioStatus::Executable, + isolation: FaultIsolation::FreshTenant, + boundary: "rustfs-workload/data-integrity", + ci_phase: "faults", + target: "one RustFS data volume read path selected by tenant label and /data/rustfs0 path", + validation: "successful GET responses must match the committed hash; RustFS may fail or repair reads but must not return wrong bytes with a successful status", + observability: "history.jsonl, checker-report.json with successful_corrupted_reads, iochaos manifest/describe/yaml, RustFS logs, events", + conflict_domain: "fresh Tenant/PVC/PV fixture and run-scoped IOChaos mistake resource", + }, + FaultScenarioSpec { + scenario: DISK_FULL_SCENARIO, + case_name: "fault_disk_full_preserves_committed_objects", + description: "Fill one RustFS local PV or equivalent data path and verify committed objects survive ENOSPC-style pressure and recovery.", + priority: FaultPriority::P1, + backend: FaultBackend::LocalPvFill, + status: FaultScenarioStatus::Executable, + isolation: FaultIsolation::FreshTenant, + boundary: "rustfs-workload/storage-pressure", + ci_phase: "faults", + target: "one RustFS data volume with bounded filler data in the e2e-owned storage path", + validation: "new writes may fail under space pressure, but previously committed PUTs remain readable after filler cleanup and Tenant recovery", + observability: "history.jsonl, checker-report.json, filler file path and size, df output before/during/after, events, RustFS logs", + conflict_domain: "fresh Tenant/PVC/PV fixture and a uniquely named filler file cleaned before any subsequent case", + }, + FaultScenarioSpec { + scenario: DIRECT_PV_CORRUPTION_SCENARIO, + case_name: "fault_direct_pv_corruption_detects_or_repairs_bad_data", + description: "Corrupt one e2e-owned Kind local PV file after data is committed and verify RustFS detects, repairs, or fails instead of returning bad bytes.", + priority: FaultPriority::P2, + backend: FaultBackend::KindWorkerFileCorruption, + status: FaultScenarioStatus::Executable, + isolation: FaultIsolation::FreshTenant, + boundary: "rustfs-workload/data-integrity", + ci_phase: "faults", + target: "one file under an e2e-owned Kind worker local PV backing a RustFS data volume", + validation: "after direct corruption, successful GET responses must match committed hashes; missing or failed reads are reported separately from corrupt success", + observability: "history.jsonl, checker-report.json, selected PV path, before/after file hash, docker exec command display, events, RustFS logs", + conflict_domain: "dedicated Kind worker storage and fresh fixture because the case intentionally mutates persisted data", + }, + FaultScenarioSpec { + scenario: WORKER_RESTART_SCENARIO, + case_name: "fault_worker_restart_preserves_committed_objects", + description: "Restart one Kind worker that hosts RustFS data and verify Kubernetes and RustFS recovery preserve committed objects.", + priority: FaultPriority::P2, + backend: FaultBackend::KindWorkerRestart, + status: FaultScenarioStatus::Executable, + isolation: FaultIsolation::DedicatedKindWorker, + boundary: "rustfs-workload/node-recovery", + ci_phase: "faults", + target: "one e2e Kind worker hosting RustFS Pods or local PVs", + validation: "affected Pods reschedule or recover, Tenant returns Ready, and committed PUTs remain readable with matching hashes", + observability: "history.jsonl, checker-report.json, docker restart timing, node conditions, Pod placement before/after, events, RustFS logs", + conflict_domain: "dedicated Kind cluster case; must not run concurrently with other live suites", + }, + FaultScenarioSpec { + scenario: DM_FLAKEY_SCENARIO, + case_name: "fault_dm_flakey_preserves_committed_objects", + description: "Use a device-mapper flakey or error target for a dedicated test volume and verify RustFS handles block-device instability without data corruption.", + priority: FaultPriority::P3, + backend: FaultBackend::DeviceMapper, + status: FaultScenarioStatus::Executable, + isolation: FaultIsolation::DedicatedLinuxBlockDevice, + boundary: "rustfs-workload/block-device-fault", + ci_phase: "faults", + target: "one dedicated Linux block-device-backed PV used only by the e2e Tenant", + validation: "committed objects remain readable after the device fault is removed, and successful reads never return corrupt bytes", + observability: "history.jsonl, checker-report.json, dmsetup table/status, kernel logs, PV mapping, events, RustFS logs", + conflict_domain: "dedicated Linux runner or lab host; never part of the default Kind flow", + }, + FaultScenarioSpec { + scenario: WARP_UNDER_CHAOS_SCENARIO, + case_name: "fault_warp_under_chaos_reports_performance_separately", + description: "Run MinIO Warp during a selected chaos scenario while keeping performance output separate from the correctness verdict.", + priority: FaultPriority::P3, + backend: FaultBackend::MinioWarpWithChaos, + status: FaultScenarioStatus::Executable, + isolation: FaultIsolation::PerformanceOnly, + boundary: "rustfs-workload/performance-under-chaos", + ci_phase: "faults", + target: "RustFS S3 endpoint under an explicitly selected fault backend", + validation: "Warp throughput or latency changes are reported separately; correctness still comes only from history and checker reports", + observability: "warp report, history.jsonl, checker-report.json, selected chaos manifest/describe/yaml, RustFS logs", + conflict_domain: "performance-only run with isolated bucket prefix and no shared correctness threshold", + }, +]; #[derive(Debug, Clone, PartialEq, Eq)] pub struct FaultScenario { pub name: String, + pub case_name: &'static str, pub duration: Duration, pub percent: u8, pub object_count: usize, @@ -29,10 +228,15 @@ pub struct FaultScenario { impl FaultScenario { pub fn from_config(config: &FaultTestConfig) -> Result { + let spec = scenario_spec(&config.scenario)?; ensure!( - config.scenario == IO_EIO_SCENARIO, - "unsupported fault scenario {:?}; first implementation supports only {IO_EIO_SCENARIO:?}", - config.scenario + spec.status == FaultScenarioStatus::Executable, + "fault scenario {:?} is cataloged as {:?} but is not executable yet; case {}, backend {:?}, validation: {}", + config.scenario, + spec.status, + spec.case_name, + spec.backend, + spec.validation ); ensure!( (1..=100).contains(&config.percent), @@ -49,7 +253,8 @@ impl FaultScenario { ); Ok(Self { - name: config.scenario.clone(), + name: spec.scenario.to_string(), + case_name: spec.case_name, duration: config.duration, percent: config.percent, object_count: config.workload_objects, @@ -65,9 +270,27 @@ impl FaultScenario { } } +pub fn scenario_catalog() -> &'static [FaultScenarioSpec] { + FAULT_SCENARIO_CATALOG +} + +pub fn scenario_spec(name: &str) -> Result<&'static FaultScenarioSpec> { + FAULT_SCENARIO_CATALOG + .iter() + .find(|scenario| scenario.scenario == name) + .ok_or_else(|| { + let supported = FAULT_SCENARIO_CATALOG + .iter() + .map(|scenario| scenario.scenario) + .collect::>() + .join(", "); + anyhow::anyhow!("unsupported fault scenario {name:?}; catalog contains: {supported}") + }) +} + #[cfg(test)] mod tests { - use super::{FaultScenario, IO_EIO_SCENARIO}; + use super::{FaultScenario, FaultScenarioStatus, IO_EIO_SCENARIO, scenario_catalog}; use crate::framework::fault_config::FaultTestConfig; use std::time::Duration; @@ -77,6 +300,10 @@ mod tests { let scenario = FaultScenario::from_config(&config).expect("valid scenario"); assert_eq!(scenario.name, IO_EIO_SCENARIO); + assert_eq!( + scenario.case_name, + "fault_io_eio_preserves_committed_objects" + ); assert_eq!(scenario.duration, Duration::from_secs(180)); assert_eq!(scenario.percent, 20); assert_eq!(scenario.prefill_count(), 20); @@ -90,4 +317,34 @@ mod tests { assert!(FaultScenario::from_config(&config).is_err()); } + + #[test] + fn all_cataloged_fault_scenarios_are_executable() { + let mut config = FaultTestConfig::for_test("real-cluster", "fast-csi"); + + for spec in scenario_catalog() { + config.scenario = spec.scenario.to_string(); + + assert_eq!(spec.status, FaultScenarioStatus::Executable); + assert!(FaultScenario::from_config(&config).is_ok()); + } + } + + #[test] + fn fault_scenario_catalog_has_unique_clear_and_observable_cases() { + let mut names = std::collections::HashSet::new(); + let mut case_names = std::collections::HashSet::new(); + + for scenario in scenario_catalog() { + assert!(names.insert(scenario.scenario)); + assert!(case_names.insert(scenario.case_name)); + assert!(!scenario.description.is_empty()); + assert!(!scenario.boundary.is_empty()); + assert!(!scenario.ci_phase.is_empty()); + assert!(!scenario.target.is_empty()); + assert!(!scenario.validation.is_empty()); + assert!(!scenario.observability.is_empty()); + assert!(!scenario.conflict_domain.is_empty()); + } + } } diff --git a/e2e/src/framework/host_faults.rs b/e2e/src/framework/host_faults.rs new file mode 100644 index 0000000..ac8244c --- /dev/null +++ b/e2e/src/framework/host_faults.rs @@ -0,0 +1,440 @@ +// Copyright 2025 RustFS Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use anyhow::{Context, Result, bail, ensure}; +use std::time::Duration; + +use crate::framework::{ + artifacts::ArtifactCollector, + command::CommandSpec, + config::{ClusterTestConfig, KIND_WORKER_COUNT}, + kubectl::Kubectl, +}; + +const RUSTFS_DATA_VOLUME: &str = "/data/rustfs0"; + +#[derive(Debug, Clone)] +pub struct DiskFillGuard { + config: ClusterTestConfig, + pod: String, + filler_path: String, + deleted: bool, +} + +#[derive(Debug, Clone)] +pub struct DmFlakeyGuard { + name: String, + recovery_table: String, + restored: bool, +} + +pub fn fill_rustfs_data_volume( + config: &ClusterTestConfig, + fill_mib: u64, + collector: &ArtifactCollector, + case_name: &str, + run_id: &str, +) -> Result { + let pod = first_rustfs_pod(config)?; + let filler_path = format!("{RUSTFS_DATA_VOLUME}/.rustfs-e2e-disk-full-{run_id}"); + let fill_mib = fill_mib.to_string(); + let script = r#"set -eu +filler="$1" +fill_mib="$2" +dir="$(dirname "$filler")" +rm -f "$filler" +echo "before:" +df -k "$dir" +set +e +dd if=/dev/zero of="$filler" bs=1M count="$fill_mib" oflag=sync +dd_code=$? +set -e +sync +echo "after:" +df -k "$dir" +echo "dd_exit=$dd_code" +used_percent="$(df -k "$dir" | awk 'NR==2 {gsub("%", "", $5); print $5}')" +case "$used_percent" in + ''|*[!0-9]*) + echo "unable to parse disk usage percent from df output" >&2 + exit 3 + ;; +esac +if [ "$used_percent" -lt 95 ]; then + echo "disk fill did not create ENOSPC-grade pressure: used=${used_percent}% dd_exit=$dd_code" >&2 + exit 3 +fi +"#; + let output = rustfs_pod_shell( + config, + &pod, + script, + [filler_path.as_str(), fill_mib.as_str()], + ) + .run()?; + collector.write_text( + case_name, + "disk-fill.txt", + &format!( + "pod: {pod}\nfiller: {filler_path}\ncommand output:\nstdout:\n{}\nstderr:\n{}", + output.stdout, output.stderr + ), + )?; + ensure!( + output.code == Some(0), + "disk fill fault did not create observable space pressure; exit {:?}", + output.code + ); + + Ok(DiskFillGuard { + config: config.clone(), + pod, + filler_path, + deleted: false, + }) +} + +pub fn corrupt_one_kind_local_pv_file( + config: &ClusterTestConfig, + collector: &ArtifactCollector, + case_name: &str, +) -> Result<()> { + ensure!( + config.context.starts_with("kind-"), + "direct PV corruption requires a dedicated Kind context, got {}", + config.context + ); + + let script = r#"set -eu +root="$1" +file="$(find "$root" -type f -size +4096c ! -name '.rustfs-e2e-*' | head -n 1)" +if [ -z "$file" ]; then + echo "no candidate file under $root" >&2 + exit 2 +fi +before="$(sha256sum "$file" | awk '{print $1}')" +dd if=/dev/urandom of="$file" bs=4096 count=1 seek=1 conv=notrunc status=none +sync +after="$(sha256sum "$file" | awk '{print $1}')" +printf 'file=%s\nbefore=%s\nafter=%s\n' "$file" "$before" "$after" +"#; + + let mut attempts = String::new(); + for node in kind_worker_node_names(config)? { + let command = CommandSpec::new("docker").args([ + "exec".to_string(), + node.clone(), + "sh".to_string(), + "-c".to_string(), + script.to_string(), + "sh".to_string(), + "/mnt/data".to_string(), + ]); + let output = command.run()?; + attempts.push_str(&format!( + "$ {}\nexit: {:?}\nstdout:\n{}\nstderr:\n{}\n\n", + command.display(), + output.code, + output.stdout, + output.stderr + )); + if output.code == Some(0) { + collector.write_text(case_name, "direct-pv-corruption.txt", &attempts)?; + return Ok(()); + } + } + + collector.write_text(case_name, "direct-pv-corruption-failed.txt", &attempts)?; + bail!("failed to find and corrupt a candidate local PV file on any Kind worker") +} + +pub fn restart_one_kind_worker( + config: &ClusterTestConfig, + collector: &ArtifactCollector, + case_name: &str, +) -> Result<()> { + ensure!( + config.context.starts_with("kind-"), + "worker restart fault requires a dedicated Kind context, got {}", + config.context + ); + + let node = kind_worker_node_names(config)? + .into_iter() + .next() + .context("no Kind worker nodes configured")?; + let before = Kubectl::new(config) + .command(["get", "pods", "-A", "-o", "wide"]) + .run_checked()?; + let restart = CommandSpec::new("docker") + .args(["restart".to_string(), node.clone()]) + .run_checked()?; + let wait = Kubectl::new(config) + .command(vec![ + "wait".to_string(), + "--for=condition=Ready".to_string(), + format!("node/{node}"), + "--timeout=300s".to_string(), + ]) + .run_checked()?; + let after = Kubectl::new(config) + .command(["get", "pods", "-A", "-o", "wide"]) + .run_checked()?; + + collector.write_text( + case_name, + "worker-restart.txt", + &format!( + "node: {node}\n\nbefore pods:\n{}\nrestart stdout:\n{}\nrestart stderr:\n{}\nwait stdout:\n{}\nwait stderr:\n{}\nafter pods:\n{}", + before.stdout, restart.stdout, restart.stderr, wait.stdout, wait.stderr, after.stdout + ), + )?; + Ok(()) +} + +pub fn apply_dm_flakey( + name: &str, + fault_table: &str, + recovery_table: Option<&str>, + collector: &ArtifactCollector, + case_name: &str, +) -> Result { + let original = CommandSpec::new("dmsetup") + .args(["table".to_string(), name.to_string()]) + .run_checked()? + .stdout; + let recovery_table = recovery_table + .map(str::to_string) + .unwrap_or_else(|| original.trim().to_string()); + + dmsetup_load_table(name, fault_table)?; + collector.write_text( + case_name, + "dm-flakey.txt", + &format!( + "target: {name}\noriginal table:\n{original}\nfault table:\n{fault_table}\nrecovery table:\n{recovery_table}\n" + ), + )?; + + Ok(DmFlakeyGuard { + name: name.to_string(), + recovery_table, + restored: false, + }) +} + +pub fn run_warp_mixed( + duration: Duration, + collector: &ArtifactCollector, + case_name: &str, + endpoint: &str, + bucket: &str, + access_key: &str, + secret_key: &str, +) -> Result<()> { + let host = endpoint + .strip_prefix("http://") + .or_else(|| endpoint.strip_prefix("https://")) + .unwrap_or(endpoint); + let duration = format!("{}s", duration.as_secs()); + let command = CommandSpec::new("warp").args([ + "mixed".to_string(), + format!("--host={host}"), + format!("--access-key={access_key}"), + format!("--secret-key={secret_key}"), + format!("--bucket={bucket}"), + format!("--duration={duration}"), + "--obj.size=4KiB".to_string(), + "--tls=false".to_string(), + "--autoterm".to_string(), + ]); + let output = command.run()?; + let display = command.display().replace( + &format!("--secret-key={secret_key}"), + "--secret-key=", + ); + collector.write_text( + case_name, + "warp-mixed.txt", + &format!( + "$ {}\nexit: {:?}\nstdout:\n{}\nstderr:\n{}", + display, output.code, output.stdout, output.stderr + ), + )?; + ensure!( + output.code == Some(0), + "warp mixed command failed with exit {:?}", + output.code + ); + Ok(()) +} + +impl DiskFillGuard { + pub fn delete(&mut self) -> Result<()> { + self.delete_inner()?; + self.deleted = true; + Ok(()) + } + + fn delete_inner(&self) -> Result<()> { + let pods = rustfs_pod_names(&self.config).unwrap_or_else(|_| vec![self.pod.clone()]); + let mut attempts = String::new(); + for pod in pods { + let command = rustfs_pod_shell( + &self.config, + &pod, + "rm -f \"$1\" && sync", + [self.filler_path.as_str()], + ); + let output = command.run()?; + attempts.push_str(&format!( + "$ {}\nexit: {:?}\nstdout:\n{}\nstderr:\n{}\n\n", + command.display(), + output.code, + output.stdout, + output.stderr + )); + if output.code == Some(0) { + return Ok(()); + } + } + bail!( + "failed to remove disk fill artifact {} from RustFS pods\n{}", + self.filler_path, + attempts + ) + } +} + +impl Drop for DiskFillGuard { + fn drop(&mut self) { + if !self.deleted { + let _ = self.delete_inner(); + } + } +} + +impl DmFlakeyGuard { + pub fn restore(&mut self) -> Result<()> { + dmsetup_load_table(&self.name, &self.recovery_table)?; + self.restored = true; + Ok(()) + } +} + +impl Drop for DmFlakeyGuard { + fn drop(&mut self) { + if !self.restored { + let _ = dmsetup_load_table(&self.name, &self.recovery_table); + } + } +} + +fn first_rustfs_pod(config: &ClusterTestConfig) -> Result { + rustfs_pod_names(config)? + .into_iter() + .next() + .context("no RustFS pods returned") +} + +fn rustfs_pod_names(config: &ClusterTestConfig) -> Result> { + let selector = format!("rustfs.tenant={}", config.tenant_name); + let output = Kubectl::new(config) + .namespaced(&config.test_namespace) + .command([ + "get", + "pod", + "-l", + &selector, + "-o", + r#"jsonpath={range .items[*]}{.metadata.name}{"\n"}{end}"#, + ]) + .run_checked()?; + let pods = output + .stdout + .lines() + .map(str::trim) + .filter(|pod| !pod.is_empty()) + .map(str::to_string) + .collect::>(); + ensure!( + !pods.is_empty(), + "no RustFS pod found for selector {selector} in namespace {}", + config.test_namespace + ); + Ok(pods) +} + +fn kind_worker_node_names(config: &ClusterTestConfig) -> Result> { + let cluster_name = config.context.strip_prefix("kind-").with_context(|| { + format!( + "Kind worker fault requires a Kind context, got {}", + config.context + ) + })?; + + Ok((1..=KIND_WORKER_COUNT) + .map(|index| match index { + 1 => format!("{cluster_name}-worker"), + _ => format!("{cluster_name}-worker{index}"), + }) + .collect()) +} + +fn rustfs_pod_shell<'a, I>( + config: &ClusterTestConfig, + pod: &str, + script: &str, + args: I, +) -> CommandSpec +where + I: IntoIterator, +{ + let mut command_args = vec![ + "exec".to_string(), + pod.to_string(), + "-c".to_string(), + "rustfs".to_string(), + "--".to_string(), + "sh".to_string(), + "-c".to_string(), + script.to_string(), + "sh".to_string(), + ]; + command_args.extend(args.into_iter().map(str::to_string)); + Kubectl::new(config) + .namespaced(&config.test_namespace) + .command(command_args) +} + +fn dmsetup_load_table(name: &str, table: &str) -> Result<()> { + CommandSpec::new("dmsetup") + .args(["suspend".to_string(), name.to_string()]) + .run_checked()?; + let load = CommandSpec::new("dmsetup") + .args([ + "load".to_string(), + name.to_string(), + "--table".to_string(), + table.to_string(), + ]) + .run_checked(); + let resume = CommandSpec::new("dmsetup") + .args(["resume".to_string(), name.to_string()]) + .run_checked(); + + load?; + resume?; + Ok(()) +} diff --git a/e2e/src/framework/mod.rs b/e2e/src/framework/mod.rs index 893dbdf..de2f612 100644 --- a/e2e/src/framework/mod.rs +++ b/e2e/src/framework/mod.rs @@ -24,6 +24,7 @@ pub mod deploy; pub mod fault_config; pub mod fault_scenarios; pub mod history; +pub mod host_faults; pub mod images; pub mod kind; pub mod kube_client; diff --git a/e2e/tests/faults.rs b/e2e/tests/faults.rs index 2e0c8a3..6d47df8 100644 --- a/e2e/tests/faults.rs +++ b/e2e/tests/faults.rs @@ -12,18 +12,24 @@ // See the License for the specific language governing permissions and // limitations under the License. -use anyhow::{Context, Result, ensure}; +use anyhow::{Context, Result, bail, ensure}; use kube::Api; use operator::types::v1alpha1::tenant::Tenant; use rustfs_operator_e2e::framework::{ artifacts::ArtifactCollector, - chaos_mesh::{self, IoChaosSpec}, + chaos_mesh::{self, ChaosGuard, IoChaosSpec, NetworkChaosSpec, PodChaosSpec}, checker, + command::CommandSpec, config::ClusterTestConfig, fault_config::FaultTestConfig, - fault_scenarios::FaultScenario, + fault_scenarios::{ + self, DIRECT_PV_CORRUPTION_SCENARIO, DISK_FULL_SCENARIO, DM_FLAKEY_SCENARIO, FaultBackend, + FaultScenario, IO_EIO_SCENARIO, IO_READ_MISTAKE_SCENARIO, NETWORK_PARTITION_ONE_SCENARIO, + POD_KILL_ONE_SCENARIO, WARP_UNDER_CHAOS_SCENARIO, WORKER_RESTART_SCENARIO, + }, history::OperationOutcome, history::Recorder, + host_faults::{self, DiskFillGuard, DmFlakeyGuard}, kube_client, port_forward::{PortForwardGuard, PortForwardSpec}, resources, @@ -31,29 +37,91 @@ use rustfs_operator_e2e::framework::{ wait, }; use serde::Serialize; -use std::time::Duration; +use std::collections::BTreeSet; +use std::thread::sleep; +use std::time::{Duration, Instant}; use uuid::Uuid; -const IO_EIO_CASE: &str = "fault_io_eio_preserves_committed_objects"; const RUSTFS_DATA_VOLUME: &str = "/data/rustfs0"; const SMALL_OBJECT_SIZE_BYTES: usize = 4 * 1024; #[tokio::test] -#[ignore = "destructive RustFS workload fault scenario; run through `make fault-test`"] +#[ignore = "destructive RustFS workload fault scenario; select with RUSTFS_FAULT_TEST_SCENARIO=io-eio"] async fn fault_io_eio_preserves_committed_objects() -> Result<()> { + run_selected_fault_case(IO_EIO_SCENARIO).await +} + +#[tokio::test] +#[ignore = "destructive RustFS workload fault scenario; select with RUSTFS_FAULT_TEST_SCENARIO=pod-kill-one"] +async fn fault_pod_kill_one_preserves_committed_objects() -> Result<()> { + run_selected_fault_case(POD_KILL_ONE_SCENARIO).await +} + +#[tokio::test] +#[ignore = "destructive RustFS workload fault scenario; select with RUSTFS_FAULT_TEST_SCENARIO=network-partition-one"] +async fn fault_network_partition_one_preserves_committed_objects() -> Result<()> { + run_selected_fault_case(NETWORK_PARTITION_ONE_SCENARIO).await +} + +#[tokio::test] +#[ignore = "destructive RustFS workload fault scenario; select with RUSTFS_FAULT_TEST_SCENARIO=io-read-mistake"] +async fn fault_io_read_mistake_rejects_corrupt_reads() -> Result<()> { + run_selected_fault_case(IO_READ_MISTAKE_SCENARIO).await +} + +#[tokio::test] +#[ignore = "destructive RustFS workload fault scenario; select with RUSTFS_FAULT_TEST_SCENARIO=disk-full"] +async fn fault_disk_full_preserves_committed_objects() -> Result<()> { + run_selected_fault_case(DISK_FULL_SCENARIO).await +} + +#[tokio::test] +#[ignore = "destructive RustFS workload fault scenario; select with RUSTFS_FAULT_TEST_SCENARIO=direct-pv-corruption"] +async fn fault_direct_pv_corruption_detects_or_repairs_bad_data() -> Result<()> { + run_selected_fault_case(DIRECT_PV_CORRUPTION_SCENARIO).await +} + +#[tokio::test] +#[ignore = "destructive RustFS workload fault scenario; select with RUSTFS_FAULT_TEST_SCENARIO=worker-restart"] +async fn fault_worker_restart_preserves_committed_objects() -> Result<()> { + run_selected_fault_case(WORKER_RESTART_SCENARIO).await +} + +#[tokio::test] +#[ignore = "destructive RustFS workload fault scenario; select with RUSTFS_FAULT_TEST_SCENARIO=dm-flakey"] +async fn fault_dm_flakey_preserves_committed_objects() -> Result<()> { + run_selected_fault_case(DM_FLAKEY_SCENARIO).await +} + +#[tokio::test] +#[ignore = "destructive RustFS workload fault scenario; select with RUSTFS_FAULT_TEST_SCENARIO=warp-under-chaos"] +async fn fault_warp_under_chaos_reports_performance_separately() -> Result<()> { + run_selected_fault_case(WARP_UNDER_CHAOS_SCENARIO).await +} + +async fn run_selected_fault_case(expected_scenario: &str) -> Result<()> { let config = FaultTestConfig::from_env()?; + let scenario = FaultScenario::from_config(&config)?; + if scenario.name != expected_scenario { + eprintln!( + "skipping fault scenario {expected_scenario}; selected scenario is {}", + scenario.name + ); + return Ok(()); + } + config.require_destructive_enabled()?; config.validate_cluster()?; eprintln!( - "running destructive RustFS fault test against real Kubernetes context: {}", - config.cluster.context + "running destructive RustFS fault scenario {} against real Kubernetes context: {}", + scenario.name, config.cluster.context ); let collector = ArtifactCollector::new(&config.cluster.artifacts_dir); - let result = run_io_eio_case(&config, &collector).await; + let result = run_fault_case(&config, &collector, &scenario).await; if let Err(error) = &result { - match collector.collect_kubernetes_snapshot(IO_EIO_CASE, &config.cluster) { + match collector.collect_kubernetes_snapshot(scenario.case_name, &config.cluster) { Ok(report) => { eprintln!( "collected fault-test artifacts under {}", @@ -70,20 +138,24 @@ async fn fault_io_eio_preserves_committed_objects() -> Result<()> { result } -async fn run_io_eio_case(config: &FaultTestConfig, collector: &ArtifactCollector) -> Result<()> { - let scenario = FaultScenario::from_config(config)?; - let cluster = &config.cluster; - chaos_mesh::require_iochaos_crd(cluster)?; - chaos_mesh::cleanup_managed_iochaos(cluster, &config.chaos_namespace)?; +async fn run_fault_case( + config: &FaultTestConfig, + collector: &ArtifactCollector, + scenario: &FaultScenario, +) -> Result<()> { + let spec = fault_scenarios::scenario_spec(&scenario.name)?; + require_fault_backend(config, spec.backend)?; + cleanup_fault_backend(config, spec.backend)?; - reset_io_eio_fixture(cluster)?; - wait_for_ready_tenant(cluster).await?; + reset_fault_fixture(&config.cluster)?; + wait_for_ready_tenant(&config.cluster).await?; let run_id = format!("run-{}", Uuid::new_v4()); let bucket = bucket_name(&run_id); - let history_path = collector.case_dir(IO_EIO_CASE).join("history.jsonl"); + let history_path = collector.case_dir(scenario.case_name).join("history.jsonl"); let mut history = Recorder::create(history_path, &scenario.name, &run_id)?; + let cluster = &config.cluster; let port_forward_spec = PortForwardSpec::tenant_io(&cluster.test_namespace, &cluster.tenant_name); let endpoint = port_forward_spec.local_base_url(); @@ -106,34 +178,25 @@ async fn run_io_eio_case(config: &FaultTestConfig, collector: &ArtifactCollector ); let prefilled = prefill_objects(&s3, &mut history, &run_id, scenario.prefill_count()).await?; - let chaos = IoChaosSpec::eio_on_rustfs_volume( - cluster, - &config.chaos_namespace, + let mut fault = AppliedFault::apply( + config, + collector, + scenario, + spec.backend, &run_id, - &scenario.name, - RUSTFS_DATA_VOLUME, - scenario.percent, - scenario.duration, + &endpoint, + &bucket, + access_key, + secret_key, )?; - collector.write_text(IO_EIO_CASE, "chaos-manifest.yaml", &chaos.manifest())?; - let mut guard = chaos_mesh::apply_iochaos(cluster, &chaos)?; - match guard.describe() { - Ok(describe) => { - collector.write_text(IO_EIO_CASE, "chaos-describe.txt", &describe)?; - } - Err(error) => { - collector.write_text( - IO_EIO_CASE, - "chaos-describe.txt", - &format!("failed to describe IOChaos: {error}"), - )?; - } - } - if let Err(error) = guard.wait_active(cluster.timeout) { - collect_active_chaos_artifacts(collector, &guard, "wait-active-failed")?; + + if let Err(error) = fault.wait_active(cluster.timeout) { + collect_fault_artifacts(collector, scenario.case_name, &fault, "wait-active-failed")?; return Err(error); } + ensure_port_forward(&mut port_forward, cluster, &endpoint).await?; + let workload_summary = match run_mixed_workload( &s3, &mut history, @@ -146,33 +209,46 @@ async fn run_io_eio_case(config: &FaultTestConfig, collector: &ArtifactCollector { Ok(summary) => summary, Err(error) => { - collect_active_chaos_artifacts(collector, &guard, "workload-failed")?; + collect_fault_artifacts(collector, scenario.case_name, &fault, "workload-failed")?; return Err(error); } }; collector.write_text( - IO_EIO_CASE, + scenario.case_name, "workload-summary.json", &serde_json::to_string_pretty(&workload_summary)?, )?; - if let Err(error) = workload_summary.require_fault_evidence(config.require_client_disruption) { - collect_active_chaos_artifacts(collector, &guard, "workload-no-fault-evidence")?; + if let Err(error) = + workload_summary.require_fault_evidence(config.require_client_disruption) + { + collect_fault_artifacts( + collector, + scenario.case_name, + &fault, + "workload-no-fault-evidence", + )?; return Err(error); } - if let Err(error) = guard.ensure_active("after fault workload") { - collect_active_chaos_artifacts(collector, &guard, "workload-outlived-chaos")?; + if let Err(error) = fault.ensure_active("after fault workload") { + collect_fault_artifacts( + collector, + scenario.case_name, + &fault, + "workload-outlived-fault", + )?; return Err(error); } - if let Err(error) = guard.delete() { - collect_active_chaos_artifacts(collector, &guard, "delete-failed")?; + if let Err(error) = fault.delete() { + collect_fault_artifacts(collector, scenario.case_name, &fault, "delete-failed")?; return Err(error); } wait_for_ready_tenant(cluster).await?; + ensure_port_forward(&mut port_forward, cluster, &endpoint).await?; let report = checker::check_s3_history(&s3, &mut history, true).await?; collector.write_text( - IO_EIO_CASE, + scenario.case_name, "checker-report.json", &serde_json::to_string_pretty(&report)?, )?; @@ -181,40 +257,434 @@ async fn run_io_eio_case(config: &FaultTestConfig, collector: &ArtifactCollector Ok(()) } -fn reset_io_eio_fixture(config: &ClusterTestConfig) -> Result<()> { +fn require_fault_backend(config: &FaultTestConfig, backend: FaultBackend) -> Result<()> { + let cluster = &config.cluster; + match backend { + FaultBackend::ChaosMeshIoChaos => chaos_mesh::require_iochaos_crd(cluster), + FaultBackend::MinioWarpWithChaos => { + chaos_mesh::require_iochaos_crd(cluster)?; + require_tool("warp", ["--help"]) + } + FaultBackend::ChaosMeshPodChaos => chaos_mesh::require_podchaos_crd(cluster), + FaultBackend::ChaosMeshNetworkChaos => chaos_mesh::require_networkchaos_crd(cluster), + FaultBackend::LocalPvFill => Ok(()), + FaultBackend::KindWorkerFileCorruption | FaultBackend::KindWorkerRestart => { + require_tool("docker", ["version"]) + } + FaultBackend::DeviceMapper => require_dm_flakey_preflight(config), + } +} + +fn require_tool(program: &'static str, args: I) -> Result<()> +where + I: IntoIterator, + S: Into, +{ + CommandSpec::new(program) + .args(args) + .run_checked() + .with_context(|| format!("{program} is required for the selected fault scenario"))?; + Ok(()) +} + +fn require_dm_flakey_preflight(config: &FaultTestConfig) -> Result<()> { + let name = config + .dm_name + .as_deref() + .context("RUSTFS_FAULT_TEST_DM_NAME is required for dm-flakey")?; + config + .dm_fault_table + .as_deref() + .context("RUSTFS_FAULT_TEST_DM_FAULT_TABLE is required for dm-flakey")?; + + require_tool("dmsetup", ["version"])?; + CommandSpec::new("dmsetup") + .args(["table", name]) + .run_checked() + .with_context(|| format!("dm-flakey target {name:?} must exist before fixture reset"))?; + Ok(()) +} + +fn cleanup_fault_backend(config: &FaultTestConfig, backend: FaultBackend) -> Result<()> { + match backend { + FaultBackend::ChaosMeshIoChaos | FaultBackend::MinioWarpWithChaos => { + chaos_mesh::cleanup_managed_iochaos(&config.cluster, &config.chaos_namespace) + } + FaultBackend::ChaosMeshPodChaos => { + chaos_mesh::cleanup_managed_podchaos(&config.cluster, &config.chaos_namespace) + } + FaultBackend::ChaosMeshNetworkChaos => { + chaos_mesh::cleanup_managed_networkchaos(&config.cluster, &config.chaos_namespace) + } + FaultBackend::LocalPvFill + | FaultBackend::KindWorkerFileCorruption + | FaultBackend::KindWorkerRestart + | FaultBackend::DeviceMapper => Ok(()), + } +} + +fn reset_fault_fixture(config: &ClusterTestConfig) -> Result<()> { resources::reset_fault_tenant_resources(config)?; resources::apply_fault_tenant_resources(config)?; Ok(()) } -fn collect_active_chaos_artifacts( +enum AppliedFault { + Chaos { + guard: Box, + active_required: bool, + }, + PodKill { + guard: Box, + before_pods: Vec, + config: Box, + }, + DiskFill(Box), + DmFlakey(Box), + Completed, +} + +impl AppliedFault { + #[allow(clippy::too_many_arguments)] + fn apply( + config: &FaultTestConfig, + collector: &ArtifactCollector, + scenario: &FaultScenario, + backend: FaultBackend, + run_id: &str, + endpoint: &str, + bucket: &str, + access_key: &str, + secret_key: &str, + ) -> Result { + let cluster = &config.cluster; + match backend { + FaultBackend::ChaosMeshIoChaos if scenario.name == IO_READ_MISTAKE_SCENARIO => { + let chaos = IoChaosSpec::read_mistake_on_rustfs_volume( + cluster, + &config.chaos_namespace, + run_id, + &scenario.name, + RUSTFS_DATA_VOLUME, + scenario.percent, + scenario.duration, + )?; + collector.write_text( + scenario.case_name, + "chaos-manifest.yaml", + &chaos.manifest(), + )?; + Ok(Self::Chaos { + guard: Box::new(chaos_mesh::apply_iochaos(cluster, &chaos)?), + active_required: true, + }) + } + FaultBackend::ChaosMeshIoChaos => { + let chaos = IoChaosSpec::eio_on_rustfs_volume( + cluster, + &config.chaos_namespace, + run_id, + &scenario.name, + RUSTFS_DATA_VOLUME, + scenario.percent, + scenario.duration, + )?; + collector.write_text( + scenario.case_name, + "chaos-manifest.yaml", + &chaos.manifest(), + )?; + Ok(Self::Chaos { + guard: Box::new(chaos_mesh::apply_iochaos(cluster, &chaos)?), + active_required: true, + }) + } + FaultBackend::ChaosMeshPodChaos => { + let before_pods = rustfs_pod_identities(cluster)?; + let chaos = PodChaosSpec::kill_one_rustfs_pod( + cluster, + &config.chaos_namespace, + run_id, + &scenario.name, + ); + collector.write_text( + scenario.case_name, + "chaos-manifest.yaml", + &chaos.manifest(), + )?; + Ok(Self::PodKill { + guard: Box::new(chaos_mesh::apply_podchaos(cluster, &chaos)?), + before_pods, + config: Box::new(cluster.clone()), + }) + } + FaultBackend::ChaosMeshNetworkChaos => { + let chaos = NetworkChaosSpec::partition_one_rustfs_pod( + cluster, + &config.chaos_namespace, + run_id, + &scenario.name, + scenario.duration, + )?; + collector.write_text( + scenario.case_name, + "chaos-manifest.yaml", + &chaos.manifest(), + )?; + Ok(Self::Chaos { + guard: Box::new(chaos_mesh::apply_networkchaos(cluster, &chaos)?), + active_required: true, + }) + } + FaultBackend::LocalPvFill => Ok(Self::DiskFill(Box::new( + host_faults::fill_rustfs_data_volume( + cluster, + config.disk_fill_mib, + collector, + scenario.case_name, + run_id, + )?, + ))), + FaultBackend::KindWorkerFileCorruption => { + host_faults::corrupt_one_kind_local_pv_file( + cluster, + collector, + scenario.case_name, + )?; + Ok(Self::Completed) + } + FaultBackend::KindWorkerRestart => { + host_faults::restart_one_kind_worker(cluster, collector, scenario.case_name)?; + Ok(Self::Completed) + } + FaultBackend::DeviceMapper => { + let name = config + .dm_name + .as_deref() + .context("RUSTFS_FAULT_TEST_DM_NAME is required for dm-flakey")?; + let fault_table = config + .dm_fault_table + .as_deref() + .context("RUSTFS_FAULT_TEST_DM_FAULT_TABLE is required for dm-flakey")?; + Ok(Self::DmFlakey(Box::new(host_faults::apply_dm_flakey( + name, + fault_table, + config.dm_recovery_table.as_deref(), + collector, + scenario.case_name, + )?))) + } + FaultBackend::MinioWarpWithChaos => { + let chaos = IoChaosSpec::eio_on_rustfs_volume( + cluster, + &config.chaos_namespace, + run_id, + &scenario.name, + RUSTFS_DATA_VOLUME, + scenario.percent, + scenario.duration, + )?; + collector.write_text( + scenario.case_name, + "chaos-manifest.yaml", + &chaos.manifest(), + )?; + let guard = chaos_mesh::apply_iochaos(cluster, &chaos)?; + guard.wait_active(cluster.timeout)?; + host_faults::run_warp_mixed( + config.warp_duration, + collector, + scenario.case_name, + endpoint, + bucket, + access_key, + secret_key, + )?; + Ok(Self::Chaos { + guard: Box::new(guard), + active_required: true, + }) + } + } + } + + fn wait_active(&self, timeout: Duration) -> Result<()> { + match self { + Self::Chaos { + guard, + active_required, + } if *active_required => guard.wait_active(timeout), + Self::PodKill { + before_pods, + config, + .. + } => wait_for_rustfs_pod_replacement(config, before_pods, timeout), + Self::Chaos { .. } | Self::DiskFill(_) | Self::DmFlakey(_) | Self::Completed => Ok(()), + } + } + + fn ensure_active(&self, stage: &str) -> Result<()> { + match self { + Self::Chaos { + guard, + active_required, + } if *active_required => guard.ensure_active(stage), + Self::PodKill { .. } + | Self::Chaos { .. } + | Self::DiskFill(_) + | Self::DmFlakey(_) + | Self::Completed => Ok(()), + } + } + + fn delete(&mut self) -> Result<()> { + match self { + Self::Chaos { guard, .. } => guard.delete(), + Self::PodKill { guard, .. } => guard.delete(), + Self::DiskFill(guard) => guard.delete(), + Self::DmFlakey(guard) => guard.restore(), + Self::Completed => Ok(()), + } + } + + fn chaos_guard(&self) -> Option<&ChaosGuard> { + match self { + Self::Chaos { guard, .. } | Self::PodKill { guard, .. } => Some(guard.as_ref()), + Self::DiskFill(_) | Self::DmFlakey(_) | Self::Completed => None, + } + } +} + +fn collect_fault_artifacts( collector: &ArtifactCollector, - guard: &chaos_mesh::ChaosGuard, + case_name: &str, + fault: &AppliedFault, suffix: &str, ) -> Result<()> { - let describe = guard - .describe() - .unwrap_or_else(|error| format!("failed to describe IOChaos before cleanup: {error}")); - collector.write_text( - IO_EIO_CASE, - &format!("chaos-describe-{suffix}.txt"), - &describe, - )?; - - let yaml = guard - .yaml() - .unwrap_or_else(|error| format!("failed to get IOChaos yaml before cleanup: {error}")); - collector.write_text(IO_EIO_CASE, &format!("chaos-{suffix}.yaml"), &yaml)?; + if let Some(guard) = fault.chaos_guard() { + let describe = guard + .describe() + .unwrap_or_else(|error| format!("failed to describe chaos before cleanup: {error}")); + collector.write_text( + case_name, + &format!("chaos-describe-{suffix}.txt"), + &describe, + )?; + + let yaml = guard + .yaml() + .unwrap_or_else(|error| format!("failed to get chaos yaml before cleanup: {error}")); + collector.write_text(case_name, &format!("chaos-{suffix}.yaml"), &yaml)?; + } Ok(()) } +#[derive(Debug, Clone, PartialEq, Eq)] +struct PodIdentity { + name: String, + uid: String, +} + +fn rustfs_pod_identities(config: &ClusterTestConfig) -> Result> { + let selector = format!("rustfs.tenant={}", config.tenant_name); + let output = rustfs_operator_e2e::framework::kubectl::Kubectl::new(config) + .namespaced(&config.test_namespace) + .command(["get", "pod", "-l", &selector, "-o", "json"]) + .run_checked()?; + let value = serde_json::from_str::(&output.stdout) + .context("parse RustFS pod list json")?; + let items = value + .pointer("/items") + .and_then(serde_json::Value::as_array) + .context("RustFS pod list did not contain an items array")?; + let pods = items + .iter() + .filter_map(|item| { + let metadata = item.get("metadata")?; + Some(PodIdentity { + name: metadata.get("name")?.as_str()?.to_string(), + uid: metadata.get("uid")?.as_str()?.to_string(), + }) + }) + .collect::>(); + ensure!( + !pods.is_empty(), + "no RustFS pods found for selector {selector} in namespace {}", + config.test_namespace + ); + Ok(pods) +} + +fn wait_for_rustfs_pod_replacement( + config: &ClusterTestConfig, + before: &[PodIdentity], + timeout: Duration, +) -> Result<()> { + let deadline = Instant::now() + timeout; + let mut last_snapshot = Vec::new(); + let mut last_error = "not checked yet".to_string(); + + loop { + if Instant::now() >= deadline { + bail!( + "timed out waiting for PodChaos to replace a RustFS pod after {timeout:?}\nbefore: {before:?}\nlast: {last_snapshot:?}\nlast error: {last_error}", + ); + } + + match rustfs_pod_identities(config) { + Ok(current) => { + if pod_replacement_observed(before, ¤t) { + return Ok(()); + } + last_snapshot = current; + last_error = "none".to_string(); + } + Err(error) => { + last_error = error.to_string(); + } + } + + sleep(Duration::from_secs(1)); + } +} + +fn pod_replacement_observed(before: &[PodIdentity], current: &[PodIdentity]) -> bool { + if before.is_empty() || current.is_empty() { + return false; + } + + let before_uids = before + .iter() + .map(|pod| pod.uid.as_str()) + .collect::>(); + let current_uids = current + .iter() + .map(|pod| pod.uid.as_str()) + .collect::>(); + let old_uid_removed = before_uids.iter().any(|uid| !current_uids.contains(uid)); + let new_uid_added = current_uids.iter().any(|uid| !before_uids.contains(uid)); + + old_uid_removed && new_uid_added +} + async fn wait_for_ready_tenant(config: &ClusterTestConfig) -> Result { let client = kube_client::default_client().await?; let tenants: Api = kube_client::tenant_api(client, &config.test_namespace); wait::wait_for_tenant_ready(tenants, &config.tenant_name, config.timeout).await } +async fn ensure_port_forward( + port_forward: &mut PortForwardGuard, + config: &ClusterTestConfig, + endpoint: &str, +) -> Result<()> { + if port_forward.ensure_running().is_err() { + *port_forward = PortForwardSpec::start_tenant_io(config)?; + } + wait_for_tenant_s3(port_forward, endpoint, config.timeout).await +} + async fn wait_for_tenant_s3( port_forward: &mut PortForwardGuard, endpoint: &str, @@ -306,11 +776,11 @@ impl WorkloadSummary { if require_client_disruption { ensure!( self.disrupted() > 0, - "IOChaos became active but the S3 workload observed no client-visible disrupted operation; increase RUSTFS_FAULT_TEST_WORKLOAD_OBJECTS or RUSTFS_FAULT_TEST_PERCENT, or set RUSTFS_FAULT_TEST_REQUIRE_CLIENT_DISRUPTION=0 if this is expected" + "fault was applied but the S3 workload observed no client-visible disrupted operation; increase RUSTFS_FAULT_TEST_WORKLOAD_OBJECTS or RUSTFS_FAULT_TEST_PERCENT, or set RUSTFS_FAULT_TEST_REQUIRE_CLIENT_DISRUPTION=0 if this is expected" ); } else if self.disrupted() == 0 { eprintln!( - "IOChaos was active, but the S3 workload observed no client-visible disrupted operation" + "fault was applied, but the S3 workload observed no client-visible disrupted operation" ); } Ok(()) @@ -360,7 +830,9 @@ fn bucket_name(run_id: &str) -> String { #[cfg(test)] mod tests { - use super::{OutcomeCounts, WorkloadSummary, bucket_name}; + use super::{ + OutcomeCounts, PodIdentity, WorkloadSummary, bucket_name, pod_replacement_observed, + }; use rustfs_operator_e2e::framework::history::OperationOutcome; #[test] @@ -400,4 +872,31 @@ mod tests { assert!(summary.require_fault_evidence(false).is_ok()); assert!(summary.require_fault_evidence(true).is_err()); } + + #[test] + fn pod_replacement_requires_old_uid_removed_and_new_uid_added() { + let before = vec![ + PodIdentity { + name: "rustfs-0".to_string(), + uid: "uid-a".to_string(), + }, + PodIdentity { + name: "rustfs-1".to_string(), + uid: "uid-b".to_string(), + }, + ]; + + assert!(!pod_replacement_observed(&before, &before)); + assert!(!pod_replacement_observed(&before, &before[..1])); + assert!(pod_replacement_observed( + &before, + &[ + PodIdentity { + name: "rustfs-0".to_string(), + uid: "uid-c".to_string(), + }, + before[1].clone(), + ], + )); + } } From 49205634fa0c47cde4f2c573b361f2f7de0e25d9 Mon Sep 17 00:00:00 2001 From: GatewayJ <835269233@qq.com> Date: Fri, 19 Jun 2026 11:13:15 +0800 Subject: [PATCH 07/20] test: restrict fault suite to real clusters --- FAULT_INJECTION_TEST_PLAN.md | 16 ++- e2e/src/framework/fault_scenarios.rs | 46 ++------- e2e/src/framework/host_faults.rs | 147 +++------------------------ e2e/tests/faults.rs | 113 ++++++++------------ 4 files changed, 70 insertions(+), 252 deletions(-) diff --git a/FAULT_INJECTION_TEST_PLAN.md b/FAULT_INJECTION_TEST_PLAN.md index 4bc6fab..3b6c709 100644 --- a/FAULT_INJECTION_TEST_PLAN.md +++ b/FAULT_INJECTION_TEST_PLAN.md @@ -782,7 +782,7 @@ RUSTFS_FAULT_TEST_STORAGE_CLASS= make fault-test 该入口使用当前 `kubectl` context,拒绝 Kind,并使用 `RUSTFS_FAULT_TEST_STORAGE_CLASS` 指向的真实集群动态 StorageClass。 -`e2e/tests/faults.rs` 中每个 destructive 场景都有同名 ignored runner。运行时通过 `RUSTFS_FAULT_TEST_SCENARIO` 选择一个真实执行的场景;未选中的 ignored runner 会快速返回,避免一次 `make fault-test` 串行跑完整个破坏性矩阵。 +`e2e/tests/faults.rs` 中每个 destructive 场景都有同名 ignored runner。运行时通过 `RUSTFS_FAULT_TEST_SCENARIO` 选择一个真实执行的场景;未选中的 ignored runner 会快速返回,避免一次 `make fault-test` 串行跑完整个破坏性矩阵。故障测试只面向真实 Kubernetes 测试集群,不保留 Kind 后端;Kind e2e 生命周期测试是独立部分。 示例: @@ -795,8 +795,6 @@ RUSTFS_FAULT_TEST_STORAGE_CLASS= RUSTFS_FAULT_TEST_SCENARIO=pod-k RUSTFS_FAULT_TEST_STORAGE_CLASS= RUSTFS_FAULT_TEST_SCENARIO=network-partition-one make fault-test RUSTFS_FAULT_TEST_STORAGE_CLASS= RUSTFS_FAULT_TEST_SCENARIO=io-read-mistake make fault-test RUSTFS_FAULT_TEST_STORAGE_CLASS= RUSTFS_FAULT_TEST_SCENARIO=disk-full make fault-test -RUSTFS_FAULT_TEST_STORAGE_CLASS= RUSTFS_FAULT_TEST_SCENARIO=direct-pv-corruption make fault-test -RUSTFS_FAULT_TEST_STORAGE_CLASS= RUSTFS_FAULT_TEST_SCENARIO=worker-restart make fault-test RUSTFS_FAULT_TEST_STORAGE_CLASS= RUSTFS_FAULT_TEST_SCENARIO=dm-flakey make fault-test RUSTFS_FAULT_TEST_STORAGE_CLASS= RUSTFS_FAULT_TEST_SCENARIO=warp-under-chaos make fault-test ``` @@ -812,7 +810,7 @@ make pre-commit ## 当前可交付范围 -当前 fault suite 实现 9 个真实 runner: +当前 fault suite 实现 7 个真实集群 runner: ```text fault_io_eio_preserves_committed_objects @@ -820,8 +818,6 @@ fault_pod_kill_one_preserves_committed_objects fault_network_partition_one_preserves_committed_objects fault_io_read_mistake_rejects_corrupt_reads fault_disk_full_preserves_committed_objects -fault_direct_pv_corruption_detects_or_repairs_bad_data -fault_worker_restart_preserves_committed_objects fault_dm_flakey_preserves_committed_objects fault_warp_under_chaos_reports_performance_separately ``` @@ -836,7 +832,7 @@ fault_warp_under_chaos_reports_performance_separately 6. Tenant 创建和 Ready 等待。 7. S3 bucket 创建。 8. S3 prefill 对象并记录 hash;prefill 阶段必须明确成功,避免空用例通过。 -9. apply 对应故障:Chaos Mesh `IOChaos` / `PodChaos` / `NetworkChaos`,或 host-side disk fill、direct PV corruption、Kind worker restart、dm-flakey、Warp under chaos。 +9. apply 对应故障:Chaos Mesh `IOChaos` / `PodChaos` / `NetworkChaos`,或 host-side disk fill、dm-flakey、Warp under chaos。 10. 对持续型 Chaos 资源等待进入 active,再开始故障 workload。 11. 故障期间持续读写并输出 `workload-summary.json`。 12. 对持续型故障确认 workload 没有跑出故障窗口。 @@ -853,11 +849,11 @@ fault_warp_under_chaos_reports_performance_separately ## 后续增强计划 -当前 9 个 runner 已经落到代码里。后续工作不再是补入口,而是提高故障强度、判定模型和长稳覆盖。 +当前 7 个 real-cluster runner 已经落到代码里。后续工作不再是补这些入口,而是提高故障强度、判定模型和长稳覆盖。 ### Phase 1:runner hardening -- 在测试环境逐个验证 9 个 scenario 的前置条件、故障注入、清理和 artifacts 输出。 +- 在测试环境逐个验证 7 个 executable scenario 的前置条件、故障注入、清理和 artifacts 输出。 - 为 PodChaos、NetworkChaos、IOChaos mistake 补充更细的 CRD status 断言。 - 将 host-side 故障的输出结构化,便于 CI artifact 聚合和历史对比。 - 保持每个 scenario 独立选择执行,避免多个故障在同一次测试中相互污染。 @@ -865,7 +861,7 @@ fault_warp_under_chaos_reports_performance_separately 验收: - `make e2e-check` 通过。 -- `RUSTFS_FAULT_TEST_STORAGE_CLASS= RUSTFS_FAULT_TEST_SCENARIO= make fault-test` 可在当前真实 Kubernetes 测试集群逐个运行,并拒绝 Kind。 +- `RUSTFS_FAULT_TEST_STORAGE_CLASS= RUSTFS_FAULT_TEST_SCENARIO= make fault-test` 可在当前真实 Kubernetes 测试集群逐个运行 scenario,并拒绝 Kind。 - 如果 committed object 丢失,测试失败。 - 如果 successful GET 返回错误字节,测试失败。 - 如果 workload 跑出 IOChaos active 窗口,测试失败。 diff --git a/e2e/src/framework/fault_scenarios.rs b/e2e/src/framework/fault_scenarios.rs index d4def0d..df73c55 100644 --- a/e2e/src/framework/fault_scenarios.rs +++ b/e2e/src/framework/fault_scenarios.rs @@ -22,15 +22,12 @@ pub const POD_KILL_ONE_SCENARIO: &str = "pod-kill-one"; pub const NETWORK_PARTITION_ONE_SCENARIO: &str = "network-partition-one"; pub const IO_READ_MISTAKE_SCENARIO: &str = "io-read-mistake"; pub const DISK_FULL_SCENARIO: &str = "disk-full"; -pub const DIRECT_PV_CORRUPTION_SCENARIO: &str = "direct-pv-corruption"; -pub const WORKER_RESTART_SCENARIO: &str = "worker-restart"; pub const DM_FLAKEY_SCENARIO: &str = "dm-flakey"; pub const WARP_UNDER_CHAOS_SCENARIO: &str = "warp-under-chaos"; #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum FaultScenarioStatus { Executable, - Planned, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -47,8 +44,6 @@ pub enum FaultBackend { ChaosMeshPodChaos, ChaosMeshNetworkChaos, LocalPvFill, - KindWorkerFileCorruption, - KindWorkerRestart, DeviceMapper, MinioWarpWithChaos, } @@ -57,7 +52,6 @@ pub enum FaultBackend { pub enum FaultIsolation { FreshTenant, ReusableTenant, - DedicatedKindWorker, DedicatedLinuxBlockDevice, PerformanceOnly, } @@ -155,36 +149,6 @@ pub const FAULT_SCENARIO_CATALOG: &[FaultScenarioSpec] = &[ observability: "history.jsonl, checker-report.json, filler file path and size, df output before/during/after, events, RustFS logs", conflict_domain: "fresh Tenant/PVC/PV fixture and a uniquely named filler file cleaned before any subsequent case", }, - FaultScenarioSpec { - scenario: DIRECT_PV_CORRUPTION_SCENARIO, - case_name: "fault_direct_pv_corruption_detects_or_repairs_bad_data", - description: "Corrupt one e2e-owned Kind local PV file after data is committed and verify RustFS detects, repairs, or fails instead of returning bad bytes.", - priority: FaultPriority::P2, - backend: FaultBackend::KindWorkerFileCorruption, - status: FaultScenarioStatus::Executable, - isolation: FaultIsolation::FreshTenant, - boundary: "rustfs-workload/data-integrity", - ci_phase: "faults", - target: "one file under an e2e-owned Kind worker local PV backing a RustFS data volume", - validation: "after direct corruption, successful GET responses must match committed hashes; missing or failed reads are reported separately from corrupt success", - observability: "history.jsonl, checker-report.json, selected PV path, before/after file hash, docker exec command display, events, RustFS logs", - conflict_domain: "dedicated Kind worker storage and fresh fixture because the case intentionally mutates persisted data", - }, - FaultScenarioSpec { - scenario: WORKER_RESTART_SCENARIO, - case_name: "fault_worker_restart_preserves_committed_objects", - description: "Restart one Kind worker that hosts RustFS data and verify Kubernetes and RustFS recovery preserve committed objects.", - priority: FaultPriority::P2, - backend: FaultBackend::KindWorkerRestart, - status: FaultScenarioStatus::Executable, - isolation: FaultIsolation::DedicatedKindWorker, - boundary: "rustfs-workload/node-recovery", - ci_phase: "faults", - target: "one e2e Kind worker hosting RustFS Pods or local PVs", - validation: "affected Pods reschedule or recover, Tenant returns Ready, and committed PUTs remain readable with matching hashes", - observability: "history.jsonl, checker-report.json, docker restart timing, node conditions, Pod placement before/after, events, RustFS logs", - conflict_domain: "dedicated Kind cluster case; must not run concurrently with other live suites", - }, FaultScenarioSpec { scenario: DM_FLAKEY_SCENARIO, case_name: "fault_dm_flakey_preserves_committed_objects", @@ -198,7 +162,7 @@ pub const FAULT_SCENARIO_CATALOG: &[FaultScenarioSpec] = &[ target: "one dedicated Linux block-device-backed PV used only by the e2e Tenant", validation: "committed objects remain readable after the device fault is removed, and successful reads never return corrupt bytes", observability: "history.jsonl, checker-report.json, dmsetup table/status, kernel logs, PV mapping, events, RustFS logs", - conflict_domain: "dedicated Linux runner or lab host; never part of the default Kind flow", + conflict_domain: "dedicated Linux runner or lab host with an explicitly assigned block device; never part of shared test storage", }, FaultScenarioSpec { scenario: WARP_UNDER_CHAOS_SCENARIO, @@ -326,8 +290,14 @@ mod tests { config.scenario = spec.scenario.to_string(); assert_eq!(spec.status, FaultScenarioStatus::Executable); - assert!(FaultScenario::from_config(&config).is_ok()); + assert!( + FaultScenario::from_config(&config).is_ok(), + "{} should be selectable through the real-cluster fault-test entrypoint", + spec.scenario + ); } + + assert_eq!(scenario_catalog().len(), 7); } #[test] diff --git a/e2e/src/framework/host_faults.rs b/e2e/src/framework/host_faults.rs index ac8244c..fa46425 100644 --- a/e2e/src/framework/host_faults.rs +++ b/e2e/src/framework/host_faults.rs @@ -16,10 +16,7 @@ use anyhow::{Context, Result, bail, ensure}; use std::time::Duration; use crate::framework::{ - artifacts::ArtifactCollector, - command::CommandSpec, - config::{ClusterTestConfig, KIND_WORKER_COUNT}, - kubectl::Kubectl, + artifacts::ArtifactCollector, command::CommandSpec, config::ClusterTestConfig, kubectl::Kubectl, }; const RUSTFS_DATA_VOLUME: &str = "/data/rustfs0"; @@ -49,6 +46,12 @@ pub fn fill_rustfs_data_volume( let pod = first_rustfs_pod(config)?; let filler_path = format!("{RUSTFS_DATA_VOLUME}/.rustfs-e2e-disk-full-{run_id}"); let fill_mib = fill_mib.to_string(); + let guard = DiskFillGuard { + config: config.clone(), + pod: pod.clone(), + filler_path, + deleted: false, + }; let script = r#"set -eu filler="$1" fill_mib="$2" @@ -80,15 +83,15 @@ fi config, &pod, script, - [filler_path.as_str(), fill_mib.as_str()], + [guard.filler_path.as_str(), fill_mib.as_str()], ) .run()?; collector.write_text( case_name, "disk-fill.txt", &format!( - "pod: {pod}\nfiller: {filler_path}\ncommand output:\nstdout:\n{}\nstderr:\n{}", - output.stdout, output.stderr + "pod: {pod}\nfiller: {}\ncommand output:\nstdout:\n{}\nstderr:\n{}", + guard.filler_path, output.stdout, output.stderr ), )?; ensure!( @@ -97,110 +100,7 @@ fi output.code ); - Ok(DiskFillGuard { - config: config.clone(), - pod, - filler_path, - deleted: false, - }) -} - -pub fn corrupt_one_kind_local_pv_file( - config: &ClusterTestConfig, - collector: &ArtifactCollector, - case_name: &str, -) -> Result<()> { - ensure!( - config.context.starts_with("kind-"), - "direct PV corruption requires a dedicated Kind context, got {}", - config.context - ); - - let script = r#"set -eu -root="$1" -file="$(find "$root" -type f -size +4096c ! -name '.rustfs-e2e-*' | head -n 1)" -if [ -z "$file" ]; then - echo "no candidate file under $root" >&2 - exit 2 -fi -before="$(sha256sum "$file" | awk '{print $1}')" -dd if=/dev/urandom of="$file" bs=4096 count=1 seek=1 conv=notrunc status=none -sync -after="$(sha256sum "$file" | awk '{print $1}')" -printf 'file=%s\nbefore=%s\nafter=%s\n' "$file" "$before" "$after" -"#; - - let mut attempts = String::new(); - for node in kind_worker_node_names(config)? { - let command = CommandSpec::new("docker").args([ - "exec".to_string(), - node.clone(), - "sh".to_string(), - "-c".to_string(), - script.to_string(), - "sh".to_string(), - "/mnt/data".to_string(), - ]); - let output = command.run()?; - attempts.push_str(&format!( - "$ {}\nexit: {:?}\nstdout:\n{}\nstderr:\n{}\n\n", - command.display(), - output.code, - output.stdout, - output.stderr - )); - if output.code == Some(0) { - collector.write_text(case_name, "direct-pv-corruption.txt", &attempts)?; - return Ok(()); - } - } - - collector.write_text(case_name, "direct-pv-corruption-failed.txt", &attempts)?; - bail!("failed to find and corrupt a candidate local PV file on any Kind worker") -} - -pub fn restart_one_kind_worker( - config: &ClusterTestConfig, - collector: &ArtifactCollector, - case_name: &str, -) -> Result<()> { - ensure!( - config.context.starts_with("kind-"), - "worker restart fault requires a dedicated Kind context, got {}", - config.context - ); - - let node = kind_worker_node_names(config)? - .into_iter() - .next() - .context("no Kind worker nodes configured")?; - let before = Kubectl::new(config) - .command(["get", "pods", "-A", "-o", "wide"]) - .run_checked()?; - let restart = CommandSpec::new("docker") - .args(["restart".to_string(), node.clone()]) - .run_checked()?; - let wait = Kubectl::new(config) - .command(vec![ - "wait".to_string(), - "--for=condition=Ready".to_string(), - format!("node/{node}"), - "--timeout=300s".to_string(), - ]) - .run_checked()?; - let after = Kubectl::new(config) - .command(["get", "pods", "-A", "-o", "wide"]) - .run_checked()?; - - collector.write_text( - case_name, - "worker-restart.txt", - &format!( - "node: {node}\n\nbefore pods:\n{}\nrestart stdout:\n{}\nrestart stderr:\n{}\nwait stdout:\n{}\nwait stderr:\n{}\nafter pods:\n{}", - before.stdout, restart.stdout, restart.stderr, wait.stdout, wait.stderr, after.stdout - ), - )?; - Ok(()) + Ok(guard) } pub fn apply_dm_flakey( @@ -218,7 +118,6 @@ pub fn apply_dm_flakey( .map(str::to_string) .unwrap_or_else(|| original.trim().to_string()); - dmsetup_load_table(name, fault_table)?; collector.write_text( case_name, "dm-flakey.txt", @@ -226,12 +125,14 @@ pub fn apply_dm_flakey( "target: {name}\noriginal table:\n{original}\nfault table:\n{fault_table}\nrecovery table:\n{recovery_table}\n" ), )?; - - Ok(DmFlakeyGuard { + let guard = DmFlakeyGuard { name: name.to_string(), recovery_table, restored: false, - }) + }; + dmsetup_load_table(name, fault_table)?; + + Ok(guard) } pub fn run_warp_mixed( @@ -376,22 +277,6 @@ fn rustfs_pod_names(config: &ClusterTestConfig) -> Result> { Ok(pods) } -fn kind_worker_node_names(config: &ClusterTestConfig) -> Result> { - let cluster_name = config.context.strip_prefix("kind-").with_context(|| { - format!( - "Kind worker fault requires a Kind context, got {}", - config.context - ) - })?; - - Ok((1..=KIND_WORKER_COUNT) - .map(|index| match index { - 1 => format!("{cluster_name}-worker"), - _ => format!("{cluster_name}-worker{index}"), - }) - .collect()) -} - fn rustfs_pod_shell<'a, I>( config: &ClusterTestConfig, pod: &str, diff --git a/e2e/tests/faults.rs b/e2e/tests/faults.rs index 6d47df8..a8ae565 100644 --- a/e2e/tests/faults.rs +++ b/e2e/tests/faults.rs @@ -23,9 +23,9 @@ use rustfs_operator_e2e::framework::{ config::ClusterTestConfig, fault_config::FaultTestConfig, fault_scenarios::{ - self, DIRECT_PV_CORRUPTION_SCENARIO, DISK_FULL_SCENARIO, DM_FLAKEY_SCENARIO, FaultBackend, - FaultScenario, IO_EIO_SCENARIO, IO_READ_MISTAKE_SCENARIO, NETWORK_PARTITION_ONE_SCENARIO, - POD_KILL_ONE_SCENARIO, WARP_UNDER_CHAOS_SCENARIO, WORKER_RESTART_SCENARIO, + self, DISK_FULL_SCENARIO, DM_FLAKEY_SCENARIO, FaultBackend, FaultScenario, IO_EIO_SCENARIO, + IO_READ_MISTAKE_SCENARIO, NETWORK_PARTITION_ONE_SCENARIO, POD_KILL_ONE_SCENARIO, + WARP_UNDER_CHAOS_SCENARIO, }, history::OperationOutcome, history::Recorder, @@ -75,18 +75,6 @@ async fn fault_disk_full_preserves_committed_objects() -> Result<()> { run_selected_fault_case(DISK_FULL_SCENARIO).await } -#[tokio::test] -#[ignore = "destructive RustFS workload fault scenario; select with RUSTFS_FAULT_TEST_SCENARIO=direct-pv-corruption"] -async fn fault_direct_pv_corruption_detects_or_repairs_bad_data() -> Result<()> { - run_selected_fault_case(DIRECT_PV_CORRUPTION_SCENARIO).await -} - -#[tokio::test] -#[ignore = "destructive RustFS workload fault scenario; select with RUSTFS_FAULT_TEST_SCENARIO=worker-restart"] -async fn fault_worker_restart_preserves_committed_objects() -> Result<()> { - run_selected_fault_case(WORKER_RESTART_SCENARIO).await -} - #[tokio::test] #[ignore = "destructive RustFS workload fault scenario; select with RUSTFS_FAULT_TEST_SCENARIO=dm-flakey"] async fn fault_dm_flakey_preserves_committed_objects() -> Result<()> { @@ -178,24 +166,42 @@ async fn run_fault_case( ); let prefilled = prefill_objects(&s3, &mut history, &run_id, scenario.prefill_count()).await?; - let mut fault = AppliedFault::apply( - config, - collector, - scenario, - spec.backend, - &run_id, - &endpoint, - &bucket, - access_key, - secret_key, - )?; + let mut fault = AppliedFault::apply(config, collector, scenario, spec.backend, &run_id)?; if let Err(error) = fault.wait_active(cluster.timeout) { collect_fault_artifacts(collector, scenario.case_name, &fault, "wait-active-failed")?; return Err(error); } - ensure_port_forward(&mut port_forward, cluster, &endpoint).await?; + if let Err(error) = ensure_port_forward(&mut port_forward, cluster, &endpoint).await { + collect_fault_artifacts(collector, scenario.case_name, &fault, "port-forward-failed")?; + return Err(error); + } + + if spec.backend == FaultBackend::MinioWarpWithChaos { + if let Err(error) = host_faults::run_warp_mixed( + config.warp_duration, + collector, + scenario.case_name, + &endpoint, + &bucket, + access_key, + secret_key, + ) { + collect_fault_artifacts(collector, scenario.case_name, &fault, "warp-failed")?; + return Err(error); + } + + if let Err(error) = ensure_port_forward(&mut port_forward, cluster, &endpoint).await { + collect_fault_artifacts( + collector, + scenario.case_name, + &fault, + "post-warp-port-forward-failed", + )?; + return Err(error); + } + } let workload_summary = match run_mixed_workload( &s3, @@ -218,9 +224,7 @@ async fn run_fault_case( "workload-summary.json", &serde_json::to_string_pretty(&workload_summary)?, )?; - if let Err(error) = - workload_summary.require_fault_evidence(config.require_client_disruption) - { + if let Err(error) = workload_summary.require_fault_evidence(config.require_client_disruption) { collect_fault_artifacts( collector, scenario.case_name, @@ -268,9 +272,6 @@ fn require_fault_backend(config: &FaultTestConfig, backend: FaultBackend) -> Res FaultBackend::ChaosMeshPodChaos => chaos_mesh::require_podchaos_crd(cluster), FaultBackend::ChaosMeshNetworkChaos => chaos_mesh::require_networkchaos_crd(cluster), FaultBackend::LocalPvFill => Ok(()), - FaultBackend::KindWorkerFileCorruption | FaultBackend::KindWorkerRestart => { - require_tool("docker", ["version"]) - } FaultBackend::DeviceMapper => require_dm_flakey_preflight(config), } } @@ -316,10 +317,7 @@ fn cleanup_fault_backend(config: &FaultTestConfig, backend: FaultBackend) -> Res FaultBackend::ChaosMeshNetworkChaos => { chaos_mesh::cleanup_managed_networkchaos(&config.cluster, &config.chaos_namespace) } - FaultBackend::LocalPvFill - | FaultBackend::KindWorkerFileCorruption - | FaultBackend::KindWorkerRestart - | FaultBackend::DeviceMapper => Ok(()), + FaultBackend::LocalPvFill | FaultBackend::DeviceMapper => Ok(()), } } @@ -341,21 +339,15 @@ enum AppliedFault { }, DiskFill(Box), DmFlakey(Box), - Completed, } impl AppliedFault { - #[allow(clippy::too_many_arguments)] fn apply( config: &FaultTestConfig, collector: &ArtifactCollector, scenario: &FaultScenario, backend: FaultBackend, run_id: &str, - endpoint: &str, - bucket: &str, - access_key: &str, - secret_key: &str, ) -> Result { let cluster = &config.cluster; match backend { @@ -445,18 +437,6 @@ impl AppliedFault { run_id, )?, ))), - FaultBackend::KindWorkerFileCorruption => { - host_faults::corrupt_one_kind_local_pv_file( - cluster, - collector, - scenario.case_name, - )?; - Ok(Self::Completed) - } - FaultBackend::KindWorkerRestart => { - host_faults::restart_one_kind_worker(cluster, collector, scenario.case_name)?; - Ok(Self::Completed) - } FaultBackend::DeviceMapper => { let name = config .dm_name @@ -490,16 +470,6 @@ impl AppliedFault { &chaos.manifest(), )?; let guard = chaos_mesh::apply_iochaos(cluster, &chaos)?; - guard.wait_active(cluster.timeout)?; - host_faults::run_warp_mixed( - config.warp_duration, - collector, - scenario.case_name, - endpoint, - bucket, - access_key, - secret_key, - )?; Ok(Self::Chaos { guard: Box::new(guard), active_required: true, @@ -519,7 +489,7 @@ impl AppliedFault { config, .. } => wait_for_rustfs_pod_replacement(config, before_pods, timeout), - Self::Chaos { .. } | Self::DiskFill(_) | Self::DmFlakey(_) | Self::Completed => Ok(()), + Self::Chaos { .. } | Self::DiskFill(_) | Self::DmFlakey(_) => Ok(()), } } @@ -529,11 +499,9 @@ impl AppliedFault { guard, active_required, } if *active_required => guard.ensure_active(stage), - Self::PodKill { .. } - | Self::Chaos { .. } - | Self::DiskFill(_) - | Self::DmFlakey(_) - | Self::Completed => Ok(()), + Self::PodKill { .. } | Self::Chaos { .. } | Self::DiskFill(_) | Self::DmFlakey(_) => { + Ok(()) + } } } @@ -543,14 +511,13 @@ impl AppliedFault { Self::PodKill { guard, .. } => guard.delete(), Self::DiskFill(guard) => guard.delete(), Self::DmFlakey(guard) => guard.restore(), - Self::Completed => Ok(()), } } fn chaos_guard(&self) -> Option<&ChaosGuard> { match self { Self::Chaos { guard, .. } | Self::PodKill { guard, .. } => Some(guard.as_ref()), - Self::DiskFill(_) | Self::DmFlakey(_) | Self::Completed => None, + Self::DiskFill(_) | Self::DmFlakey(_) => None, } } } From 9e062f36965b854804a65889c0cf86c57df81470 Mon Sep 17 00:00:00 2001 From: GatewayJ <18332154+GatewayJ@users.noreply.github.com> Date: Fri, 19 Jun 2026 11:55:43 +0800 Subject: [PATCH 08/20] test(chaos): harden fault scenario execution --- FAULT_INJECTION_TEST_PLAN.md | 42 +- e2e/src/framework/chaos_mesh.rs | 69 ++- e2e/src/framework/fault_config.rs | 72 ++- e2e/src/framework/fault_scenarios.rs | 16 +- e2e/src/framework/host_faults.rs | 651 ++++++++++++++++++--------- e2e/tests/faults.rs | 299 ++++++++---- 6 files changed, 809 insertions(+), 340 deletions(-) diff --git a/FAULT_INJECTION_TEST_PLAN.md b/FAULT_INJECTION_TEST_PLAN.md index 3b6c709..4af8e43 100644 --- a/FAULT_INJECTION_TEST_PLAN.md +++ b/FAULT_INJECTION_TEST_PLAN.md @@ -94,7 +94,7 @@ limitations under the License. - RustFS Pod selector 可使用 `rustfs.tenant=`。 - RustFS 容器名是 `rustfs`。 - RustFS 数据卷路径遵循 `/data/rustfs0`、`/data/rustfs1`。 -- 故障测试要求真实集群提供动态 StorageClass,不操作 Kind hostPath 或 local PV。 +- 常规场景要求真实集群提供动态 StorageClass;`dm-flakey` 只允许使用显式配置的专用静态 Local PV。 因此推荐方案是: @@ -113,7 +113,7 @@ limitations under the License. make fault-test -> e2e/tests/faults.rs | +-- 环境保护:destructive opt-in / current real Kubernetes context / required StorageClass - +-- 环境准备:强故障 case reset;真实集群使用配置的动态 StorageClass + +-- 环境准备:按 isolation reset 或复用 Tenant;DM 场景验证专用 PV 拓扑 +-- S3 workload:持续读写对象 +-- history recorder:记录每次操作的开始、结束、结果、hash +-- nemesis:通过 Chaos Mesh 对 RustFS workload 注入故障 @@ -201,7 +201,7 @@ Jepsen-like 的含义是: 1. 必须设置 `RUSTFS_FAULT_TEST_DESTRUCTIVE=1`;`make fault-test` 会显式设置。 2. fault runner 使用当前 `kubectl config current-context`,并拒绝 `kind-*` context。 -3. 必须显式提供 `RUSTFS_FAULT_TEST_STORAGE_CLASS`,目标 StorageClass 应支持动态供给。 +3. 必须显式提供 `RUSTFS_FAULT_TEST_STORAGE_CLASS`;除 `dm-flakey` 的专用静态 Local PV 外,目标 StorageClass 必须支持动态供给。 4. 目标 namespace 必须来自 fault-test 配置,默认 `rustfs-fault-test`;runner 创建 namespace 时必须写入 `app.kubernetes.io/managed-by=rustfs-operator-fault-test` label 和匹配 Tenant 的 `rustfs.com/fault-test-tenant` annotation。 5. 已存在 namespace 只有在上述所有权标记完全匹配时才允许 reset;runner 不得自动认领未标记 namespace。 6. 所有故障资源必须带唯一 run id label。 @@ -216,19 +216,21 @@ Jepsen-like 的含义是: | 变量 | 默认值 | 作用 | | --- | --- | --- | -| `RUSTFS_FAULT_TEST_STORAGE_CLASS` | required | 真实集群动态 StorageClass。 | +| `RUSTFS_FAULT_TEST_STORAGE_CLASS` | required | 常规场景使用动态 StorageClass;`dm-flakey` 使用专用静态 Local PV StorageClass。 | | `RUSTFS_FAULT_TEST_NAMESPACE` | `rustfs-fault-test` | 专用测试 namespace。 | | `RUSTFS_FAULT_TEST_TENANT` | `fault-test-tenant` | 专用测试 Tenant。 | | `RUSTFS_FAULT_TEST_SCENARIO` | `io-eio` | 选择故障场景。 | | `RUSTFS_FAULT_TEST_DURATION_SECONDS` | `180` | 故障持续时间,默认覆盖串行小对象 workload。 | -| `RUSTFS_FAULT_TEST_PERCENT` | `20` | 支持百分比注入的场景使用。 | +| `RUSTFS_FAULT_TEST_PERCENT` | `20`;`disk-full` 为 `100` | 支持百分比注入的场景使用。 | | `RUSTFS_FAULT_TEST_WORKLOAD_OBJECTS` | `40` | 写入或校验对象数量。 | | `RUSTFS_FAULT_TEST_REQUEST_TIMEOUT_SECONDS` | `3` | 单次 S3 请求超时时间。 | | `RUSTFS_FAULT_TEST_REQUIRE_CLIENT_DISRUPTION` | `false` | 是否要求故障期间至少出现一次客户端可见失败/超时/unknown。 | -| `RUSTFS_FAULT_TEST_DISK_FILL_MIB` | `12288` | `disk-full` 场景在 RustFS 数据路径写入的 filler 大小。 | | `RUSTFS_FAULT_TEST_DM_NAME` | empty | `dm-flakey` 场景要切换的 device-mapper 设备名,必填。 | +| `RUSTFS_FAULT_TEST_DM_NODE` | empty | device-mapper 设备与目标 Local PV 所在 Kubernetes 节点,必填。 | +| `RUSTFS_FAULT_TEST_DM_MOUNT_PATH` | empty | 目标 PV 在节点上的 Local PV 挂载路径,必填。 | | `RUSTFS_FAULT_TEST_DM_FAULT_TABLE` | empty | `dm-flakey` 场景注入故障时加载的 dmsetup table,必填。 | | `RUSTFS_FAULT_TEST_DM_RECOVERY_TABLE` | current table | `dm-flakey` 场景恢复时加载的 dmsetup table;不填则使用注入前 table。 | +| `RUSTFS_FAULT_TEST_DM_HELPER_IMAGE` | `rancher/mirrored-library-busybox:1.37.0` | 目标节点 privileged helper Pod 镜像。 | | `RUSTFS_FAULT_TEST_WARP_DURATION_SECONDS` | `60` | `warp-under-chaos` 场景中 Warp mixed workload 的运行时间。 | | `RUSTFS_FAULT_TEST_CHAOS_NAMESPACE` | `chaos-mesh` | Chaos Mesh 资源所在 namespace。 | @@ -400,7 +402,7 @@ fault-test//large/ | P0 | `pod-kill-one` | Chaos Mesh `PodChaos` | 模拟一个 RustFS Pod 死亡和 StatefulSet 恢复。 | | P1 | `network-partition-one` | Chaos Mesh `NetworkChaos` | 模拟一个 RustFS Pod 与集群网络分区。 | | P1 | `io-read-mistake` | Chaos Mesh `IOChaos` | 模拟读路径返回错误字节,即静默坏块。 | -| P1 | `disk-full` | IOChaos 或 CSI 后端专用工具 | 验证单盘空间耗尽行为。 | +| P1 | `disk-full` | Chaos Mesh `IOChaos` errno 28 | 在不消耗节点磁盘的情况下验证 ENOSPC 行为。 | | P2 | `direct-volume-corruption` | 存储后端专用测试环境 | 模拟已经落盘的数据被破坏。 | | P2 | `node-restart` | 集群节点运维接口 | 模拟节点重启。 | | P3 | `dm-flakey` | device mapper / loop device | 更接近真实块设备故障。 | @@ -536,7 +538,7 @@ spec: 5. 根据 RUSTFS_FAULT_TEST_SCENARIO 解析 FaultScenarioSpec 6. 按场景检查 Chaos Mesh CRD 或专用 host-side 工具配置 7. 检查 fault-test namespace 不存在,或所有权标记与配置完全匹配 -8. reset 专用 fault-test Tenant/PVC +8. 根据 `FaultIsolation` reset 或复用专用 fault-test Tenant/PVC 9. namespace 不存在时由 runner 使用 create 创建带所有权标记的 fault-test namespace;不得通过 apply 认领竞态中出现的同名 namespace 10. 创建真实集群 fault-test Tenant 11. 等待 Tenant Ready @@ -548,12 +550,12 @@ spec: 17. 故障期间执行 PUT/GET mixed workload,并输出 workload-summary.json 18. 如果要求 client-visible disruption,则确认 workload 观察到了失败、超时或 unknown 19. 确认持续型 Chaos 没有早于 workload 结束恢复 -20. 删除 Chaos、清理 filler 文件或恢复 dmsetup table +20. 删除 Chaos 或通过目标节点 helper Pod 恢复 dmsetup table 21. 等待 Tenant 再次 Ready 22. 对所有成功 PUT 对象做最终 GET + sha256 校验 23. 执行 prefix LIST 并记录 warning -24. 写入 checker-report.json -25. 失败时收集 Kubernetes artifacts 和故障资源 describe/yaml +24. 写入 checker-report.json 和 fault-evidence.json +25. 失败时收集 Kubernetes artifacts、故障状态和故障资源 describe/yaml ``` 伪代码: @@ -780,9 +782,9 @@ pods-describe.txt RUSTFS_FAULT_TEST_STORAGE_CLASS= make fault-test ``` -该入口使用当前 `kubectl` context,拒绝 Kind,并使用 `RUSTFS_FAULT_TEST_STORAGE_CLASS` 指向的真实集群动态 StorageClass。 +该入口使用当前 `kubectl` context,拒绝 Kind,并使用 `RUSTFS_FAULT_TEST_STORAGE_CLASS` 指向的真实集群测试存储。 -`e2e/tests/faults.rs` 中每个 destructive 场景都有同名 ignored runner。运行时通过 `RUSTFS_FAULT_TEST_SCENARIO` 选择一个真实执行的场景;未选中的 ignored runner 会快速返回,避免一次 `make fault-test` 串行跑完整个破坏性矩阵。故障测试只面向真实 Kubernetes 测试集群,不保留 Kind 后端;Kind e2e 生命周期测试是独立部分。 +`e2e/tests/faults.rs` 只有一个 ignored dispatcher。运行时通过 `RUSTFS_FAULT_TEST_SCENARIO` 从 7 个 catalog 场景中选择并执行一个,因此测试结果不会把未选中的场景计为通过。故障测试只面向真实 Kubernetes 测试集群,不保留 Kind 后端;Kind e2e 生命周期测试是独立部分。 示例: @@ -828,34 +830,34 @@ fault_warp_under_chaos_reports_performance_separately 2. 按场景检查 Chaos Mesh CRD 或专用 host-side 工具配置。 3. 启动前按 `app.kubernetes.io/managed-by=rustfs-operator-fault-test` 清理上次异常残留的 Chaos 资源。 4. reset 前验证 namespace 所有权标记;未标记或 Tenant 不匹配时 fail closed。 -5. 每个 case 前 reset Tenant/PVC;真实集群使用配置的动态 StorageClass。 +5. Fresh/Dedicated 场景 reset Tenant/PVC;Pod Kill 和网络分区可复用已验证所有权的 Tenant。 6. Tenant 创建和 Ready 等待。 7. S3 bucket 创建。 8. S3 prefill 对象并记录 hash;prefill 阶段必须明确成功,避免空用例通过。 -9. apply 对应故障:Chaos Mesh `IOChaos` / `PodChaos` / `NetworkChaos`,或 host-side disk fill、dm-flakey、Warp under chaos。 +9. apply 对应故障:Chaos Mesh `IOChaos` / `PodChaos` / `NetworkChaos`,或目标节点 helper Pod 执行 dm-flakey、Warp under chaos。 10. 对持续型 Chaos 资源等待进入 active,再开始故障 workload。 11. 故障期间持续读写并输出 `workload-summary.json`。 12. 对持续型故障确认 workload 没有跑出故障窗口。 13. 故障 workload 失败、故障证据不足或 Chaos 删除失败时,先保存 describe/yaml 或 host fault 输出,再触发 cleanup。 -14. 删除 Chaos 资源、清理 filler 文件或恢复 dmsetup table。 +14. 删除 Chaos 资源,或恢复 dmsetup table 并删除 helper Pod。 15. Tenant 恢复 Ready 等待。 16. 所有成功 `PUT` 对象最终 `GET + sha256` 校验。 17. 恢复后执行 `LIST prefix`,缺失项先作为 warning。 17. AWS SDK error 按 service failure / timeout / dispatch-response unknown 分类写入 history。 -18. history、workload summary 和 checker report 输出。 +18. history、workload summary、fault evidence 和 checker report 输出。 19. 失败时 artifacts 收集。 这个版本已经能证明系统从“占位骨架”升级为“真实故障注入 + 数据正确性校验”。 ## 后续增强计划 -当前 7 个 real-cluster runner 已经落到代码里。后续工作不再是补这些入口,而是提高故障强度、判定模型和长稳覆盖。 +当前 catalog 包含 7 个 real-cluster scenario,由一个 dispatcher 精确选择执行。后续工作重点是提高故障强度、判定模型和长稳覆盖。 ### Phase 1:runner hardening - 在测试环境逐个验证 7 个 executable scenario 的前置条件、故障注入、清理和 artifacts 输出。 - 为 PodChaos、NetworkChaos、IOChaos mistake 补充更细的 CRD status 断言。 -- 将 host-side 故障的输出结构化,便于 CI artifact 聚合和历史对比。 +- 保持 `fault-evidence.json` 的后端状态结构稳定,便于 CI artifact 聚合和历史对比。 - 保持每个 scenario 独立选择执行,避免多个故障在同一次测试中相互污染。 验收: @@ -867,7 +869,7 @@ fault_warp_under_chaos_reports_performance_separately - 如果 workload 跑出 IOChaos active 窗口,测试失败。 - fault runner 不进入 Kind e2e case inventory;其边界是 `rustfs-workload/fault-injection`。 - 每个 scenario 都能在失败时留下足够定位信息。 -- 每个 scenario 结束后能清理自己创建的 Chaos 资源、filler 文件或 dmsetup table。 +- 每个 scenario 结束后能清理自己创建的 Chaos 资源、helper Pod 或恢复 dmsetup table。 ### Phase 2:一致性模型增强 diff --git a/e2e/src/framework/chaos_mesh.rs b/e2e/src/framework/chaos_mesh.rs index 0584dbc..a0c1083 100644 --- a/e2e/src/framework/chaos_mesh.rs +++ b/e2e/src/framework/chaos_mesh.rs @@ -166,6 +166,44 @@ impl IoChaosSpec { }) } + pub fn enospc_on_rustfs_volume( + config: &ClusterTestConfig, + chaos_namespace: impl Into, + run_id: impl Into, + scenario: impl Into, + volume_path: impl Into, + percent: u8, + duration: Duration, + ) -> Result { + ensure!( + (1..=100).contains(&percent), + "IOChaos percent must be in 1..=100, got {percent}" + ); + ensure!( + duration > Duration::ZERO, + "IOChaos duration must be positive" + ); + + let run_id = run_id.into(); + let short_run_id = run_id.chars().take(12).collect::(); + let scenario = scenario.into(); + + Ok(Self { + name: format!("rustfs-fault-enospc-{short_run_id}"), + namespace: chaos_namespace.into(), + run_id, + scenario, + target_namespace: config.test_namespace.clone(), + tenant_name: config.tenant_name.clone(), + container_name: "rustfs".to_string(), + volume_path: volume_path.into(), + methods: vec!["WRITE".to_string()], + action: IoChaosAction::Fault { errno: 28 }, + percent, + duration, + }) + } + pub fn manifest(&self) -> String { let methods = self .methods @@ -485,6 +523,14 @@ pub fn apply_networkchaos( } impl ChaosGuard { + pub fn kind(&self) -> &'static str { + self.kind + } + + pub fn name(&self) -> &str { + &self.name + } + pub fn wait_active(&self, timeout: Duration) -> Result<()> { let deadline = Instant::now() + timeout; @@ -553,7 +599,7 @@ impl ChaosGuard { Ok(()) } - fn json(&self) -> Result { + pub fn json(&self) -> Result { let output = Kubectl::new(&self.config) .namespaced(&self.namespace) .command(["get", self.kind, &self.name, "-o", "json"]) @@ -632,6 +678,27 @@ mod tests { assert!(manifest.contains("percent: 20")); } + #[test] + fn enospc_manifest_targets_only_volume_writes() { + let config = FaultTestConfig::for_test("real-cluster", "fast-csi"); + let spec = IoChaosSpec::enospc_on_rustfs_volume( + &config.cluster, + "chaos-mesh", + "run-1234567890", + "disk-full", + "/data/rustfs0", + 100, + Duration::from_secs(60), + ) + .expect("valid enospc chaos"); + let manifest = spec.manifest(); + + assert!(manifest.contains("errno: 28")); + assert!(manifest.contains("methods:\n - WRITE")); + assert!(manifest.contains("percent: 100")); + assert!(!manifest.contains(" - READ")); + } + #[test] fn iochaos_active_requires_selected_and_injected_not_recovered() { let status = r#"{ diff --git a/e2e/src/framework/fault_config.rs b/e2e/src/framework/fault_config.rs index e39f92c..7fa6c2e 100644 --- a/e2e/src/framework/fault_config.rs +++ b/e2e/src/framework/fault_config.rs @@ -29,10 +29,12 @@ pub struct FaultTestConfig { pub workload_objects: usize, pub request_timeout: Duration, pub require_client_disruption: bool, - pub disk_fill_mib: u64, pub dm_name: Option, + pub dm_node: Option, + pub dm_mount_path: Option, pub dm_fault_table: Option, pub dm_recovery_table: Option, + pub dm_helper_image: String, pub warp_duration: Duration, pub chaos_namespace: String, } @@ -54,6 +56,8 @@ impl FaultTestConfig { let storage_class = required_env(&get_env, "RUSTFS_FAULT_TEST_STORAGE_CLASS")?; let namespace = env_or(&get_env, "RUSTFS_FAULT_TEST_NAMESPACE", "rustfs-fault-test"); + let scenario = env_or(&get_env, "RUSTFS_FAULT_TEST_SCENARIO", "io-eio"); + let default_percent = if scenario == "disk-full" { 100 } else { 20 }; let cluster = ClusterTestConfig { context, operator_namespace: env_or( @@ -86,13 +90,13 @@ impl FaultTestConfig { Ok(Self { cluster, destructive_enabled: env_bool(&get_env, "RUSTFS_FAULT_TEST_DESTRUCTIVE"), - scenario: env_or(&get_env, "RUSTFS_FAULT_TEST_SCENARIO", "io-eio"), + scenario, duration: Duration::from_secs(env_u64( &get_env, "RUSTFS_FAULT_TEST_DURATION_SECONDS", 180, )), - percent: env_u8(&get_env, "RUSTFS_FAULT_TEST_PERCENT", 20), + percent: env_u8(&get_env, "RUSTFS_FAULT_TEST_PERCENT", default_percent), workload_objects: env_usize(&get_env, "RUSTFS_FAULT_TEST_WORKLOAD_OBJECTS", 40), request_timeout: Duration::from_secs(env_u64( &get_env, @@ -103,10 +107,16 @@ impl FaultTestConfig { &get_env, "RUSTFS_FAULT_TEST_REQUIRE_CLIENT_DISRUPTION", ), - disk_fill_mib: env_u64(&get_env, "RUSTFS_FAULT_TEST_DISK_FILL_MIB", 12 * 1024), dm_name: env_optional(&get_env, "RUSTFS_FAULT_TEST_DM_NAME"), + dm_node: env_optional(&get_env, "RUSTFS_FAULT_TEST_DM_NODE"), + dm_mount_path: env_optional(&get_env, "RUSTFS_FAULT_TEST_DM_MOUNT_PATH"), dm_fault_table: env_optional(&get_env, "RUSTFS_FAULT_TEST_DM_FAULT_TABLE"), dm_recovery_table: env_optional(&get_env, "RUSTFS_FAULT_TEST_DM_RECOVERY_TABLE"), + dm_helper_image: env_or( + &get_env, + "RUSTFS_FAULT_TEST_DM_HELPER_IMAGE", + "rancher/mirrored-library-busybox:1.37.0", + ), warp_duration: Duration::from_secs(env_u64( &get_env, "RUSTFS_FAULT_TEST_WARP_DURATION_SECONDS", @@ -124,7 +134,7 @@ impl FaultTestConfig { Ok(()) } - pub fn validate_cluster(&self) -> Result<()> { + pub fn validate_cluster(&self, allow_static_storage: bool) -> Result<()> { Kubectl::new(&self.cluster) .command(["get", "crd", "tenants.rustfs.com"]) .run_checked() @@ -145,7 +155,7 @@ impl FaultTestConfig { self.cluster.storage_class ) })?; - validate_storage_class(&output.stdout) + validate_storage_class(&output.stdout, allow_static_storage) } #[cfg(test)] @@ -161,7 +171,7 @@ impl FaultTestConfig { } } -fn validate_storage_class(raw: &str) -> Result<()> { +fn validate_storage_class(raw: &str, allow_static: bool) -> Result<()> { let value = serde_json::from_str::(raw).context("parse StorageClass json")?; let provisioner = value .get("provisioner") @@ -172,8 +182,8 @@ fn validate_storage_class(raw: &str) -> Result<()> { "StorageClass provisioner is missing" ); ensure!( - provisioner != "kubernetes.io/no-provisioner", - "fault tests require a dynamically provisioned StorageClass, got {provisioner}" + allow_static || provisioner != "kubernetes.io/no-provisioner", + "fault tests require a dynamically provisioned StorageClass unless the selected scenario explicitly requires dedicated static local PVs, got {provisioner}" ); Ok(()) } @@ -272,10 +282,15 @@ mod tests { assert_eq!(config.percent, 20); assert_eq!(config.workload_objects, 40); assert_eq!(config.request_timeout, std::time::Duration::from_secs(3)); - assert_eq!(config.disk_fill_mib, 12 * 1024); assert!(config.dm_name.is_none()); + assert!(config.dm_node.is_none()); + assert!(config.dm_mount_path.is_none()); assert!(config.dm_fault_table.is_none()); assert!(config.dm_recovery_table.is_none()); + assert_eq!( + config.dm_helper_image, + "rancher/mirrored-library-busybox:1.37.0" + ); assert_eq!(config.warp_duration, std::time::Duration::from_secs(60)); assert!(!config.destructive_enabled); assert!(config.require_destructive_enabled().is_err()); @@ -292,13 +307,17 @@ mod tests { "RUSTFS_FAULT_TEST_WORKLOAD_OBJECTS" => Some("64".to_string()), "RUSTFS_FAULT_TEST_REQUEST_TIMEOUT_SECONDS" => Some("7".to_string()), "RUSTFS_FAULT_TEST_REQUIRE_CLIENT_DISRUPTION" => Some("true".to_string()), - "RUSTFS_FAULT_TEST_DISK_FILL_MIB" => Some("1024".to_string()), "RUSTFS_FAULT_TEST_DM_NAME" => Some("rustfs-test".to_string()), + "RUSTFS_FAULT_TEST_DM_NODE" => Some("worker-a".to_string()), + "RUSTFS_FAULT_TEST_DM_MOUNT_PATH" => { + Some("/data/rustfs-fault/dm-volume".to_string()) + } "RUSTFS_FAULT_TEST_DM_FAULT_TABLE" => Some("0 1024 error".to_string()), "RUSTFS_FAULT_TEST_DM_RECOVERY_TABLE" => { Some("0 1024 linear /dev/loop0 0".to_string()) } "RUSTFS_FAULT_TEST_WARP_DURATION_SECONDS" => Some("30".to_string()), + "RUSTFS_FAULT_TEST_DM_HELPER_IMAGE" => Some("busybox:test".to_string()), _ => None, }, "production-test-cluster".to_string(), @@ -311,14 +330,19 @@ mod tests { assert_eq!(config.workload_objects, 64); assert_eq!(config.request_timeout, std::time::Duration::from_secs(7)); assert!(config.require_client_disruption); - assert_eq!(config.disk_fill_mib, 1024); assert_eq!(config.dm_name.as_deref(), Some("rustfs-test")); + assert_eq!(config.dm_node.as_deref(), Some("worker-a")); + assert_eq!( + config.dm_mount_path.as_deref(), + Some("/data/rustfs-fault/dm-volume") + ); assert_eq!(config.dm_fault_table.as_deref(), Some("0 1024 error")); assert_eq!( config.dm_recovery_table.as_deref(), Some("0 1024 linear /dev/loop0 0") ); assert_eq!(config.warp_duration, std::time::Duration::from_secs(30)); + assert_eq!(config.dm_helper_image, "busybox:test"); } #[test] @@ -336,9 +360,29 @@ mod tests { #[test] fn dynamic_storage_class_is_required() { - assert!(validate_storage_class(r#"{"provisioner":"ebs.csi.aws.com"}"#).is_ok()); + assert!(validate_storage_class(r#"{"provisioner":"ebs.csi.aws.com"}"#, false).is_ok()); + assert!( + validate_storage_class(r#"{"provisioner":"kubernetes.io/no-provisioner"}"#, false) + .is_err() + ); assert!( - validate_storage_class(r#"{"provisioner":"kubernetes.io/no-provisioner"}"#).is_err() + validate_storage_class(r#"{"provisioner":"kubernetes.io/no-provisioner"}"#, true) + .is_ok() ); } + + #[test] + fn disk_full_defaults_to_full_enospc_injection() { + let config = FaultTestConfig::from_env_with( + |name| match name { + "RUSTFS_FAULT_TEST_STORAGE_CLASS" => Some("fast-csi".to_string()), + "RUSTFS_FAULT_TEST_SCENARIO" => Some("disk-full".to_string()), + _ => None, + }, + "production-test-cluster".to_string(), + ) + .expect("fault config"); + + assert_eq!(config.percent, 100); + } } diff --git a/e2e/src/framework/fault_scenarios.rs b/e2e/src/framework/fault_scenarios.rs index df73c55..05ac026 100644 --- a/e2e/src/framework/fault_scenarios.rs +++ b/e2e/src/framework/fault_scenarios.rs @@ -43,7 +43,6 @@ pub enum FaultBackend { ChaosMeshIoChaos, ChaosMeshPodChaos, ChaosMeshNetworkChaos, - LocalPvFill, DeviceMapper, MinioWarpWithChaos, } @@ -53,7 +52,6 @@ pub enum FaultIsolation { FreshTenant, ReusableTenant, DedicatedLinuxBlockDevice, - PerformanceOnly, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -137,17 +135,17 @@ pub const FAULT_SCENARIO_CATALOG: &[FaultScenarioSpec] = &[ FaultScenarioSpec { scenario: DISK_FULL_SCENARIO, case_name: "fault_disk_full_preserves_committed_objects", - description: "Fill one RustFS local PV or equivalent data path and verify committed objects survive ENOSPC-style pressure and recovery.", + description: "Inject ENOSPC on writes to one RustFS data volume and verify committed objects survive storage pressure and recovery.", priority: FaultPriority::P1, - backend: FaultBackend::LocalPvFill, + backend: FaultBackend::ChaosMeshIoChaos, status: FaultScenarioStatus::Executable, isolation: FaultIsolation::FreshTenant, boundary: "rustfs-workload/storage-pressure", ci_phase: "faults", - target: "one RustFS data volume with bounded filler data in the e2e-owned storage path", - validation: "new writes may fail under space pressure, but previously committed PUTs remain readable after filler cleanup and Tenant recovery", - observability: "history.jsonl, checker-report.json, filler file path and size, df output before/during/after, events, RustFS logs", - conflict_domain: "fresh Tenant/PVC/PV fixture and a uniquely named filler file cleaned before any subsequent case", + target: "one RustFS data volume selected by tenant label with WRITE operations returning ENOSPC", + validation: "new writes may fail with ENOSPC, but previously committed PUTs remain readable after IOChaos recovery", + observability: "history.jsonl, checker-report.json, fault-evidence.json, IOChaos manifest/status, events, RustFS logs", + conflict_domain: "fresh Tenant/PVC/PV fixture and run-scoped IOChaos cleanup without consuming node disk capacity", }, FaultScenarioSpec { scenario: DM_FLAKEY_SCENARIO, @@ -171,7 +169,7 @@ pub const FAULT_SCENARIO_CATALOG: &[FaultScenarioSpec] = &[ priority: FaultPriority::P3, backend: FaultBackend::MinioWarpWithChaos, status: FaultScenarioStatus::Executable, - isolation: FaultIsolation::PerformanceOnly, + isolation: FaultIsolation::FreshTenant, boundary: "rustfs-workload/performance-under-chaos", ci_phase: "faults", target: "RustFS S3 endpoint under an explicitly selected fault backend", diff --git a/e2e/src/framework/host_faults.rs b/e2e/src/framework/host_faults.rs index fa46425..455a518 100644 --- a/e2e/src/framework/host_faults.rs +++ b/e2e/src/framework/host_faults.rs @@ -12,125 +12,121 @@ // See the License for the specific language governing permissions and // limitations under the License. -use anyhow::{Context, Result, bail, ensure}; +use anyhow::{Context, Result, ensure}; +use serde::Serialize; +use serde_json::Value; use std::time::Duration; use crate::framework::{ - artifacts::ArtifactCollector, command::CommandSpec, config::ClusterTestConfig, kubectl::Kubectl, + artifacts::ArtifactCollector, command::CommandOutput, command::CommandSpec, + config::ClusterTestConfig, kubectl::Kubectl, }; -const RUSTFS_DATA_VOLUME: &str = "/data/rustfs0"; +const MANAGED_BY_LABEL: &str = "app.kubernetes.io/managed-by"; +const MANAGED_BY_VALUE: &str = "rustfs-operator-fault-test"; -#[derive(Debug, Clone)] -pub struct DiskFillGuard { - config: ClusterTestConfig, - pod: String, - filler_path: String, - deleted: bool, +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub struct DmVolumeMapping { + pub node: String, + pub pod: String, + pub pvc: String, + pub pv: String, + pub mount_path: String, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub struct DmStatusSnapshot { + pub stage: String, + pub helper_pod: String, + pub mapping: DmVolumeMapping, + pub table: String, + pub status: String, } -#[derive(Debug, Clone)] +#[derive(Debug)] pub struct DmFlakeyGuard { - name: String, + config: ClusterTestConfig, + helper_pod: String, + dm_name: String, + fault_table: String, recovery_table: String, + mapping: DmVolumeMapping, + recovery_snapshot: Option, restored: bool, } -pub fn fill_rustfs_data_volume( - config: &ClusterTestConfig, - fill_mib: u64, - collector: &ArtifactCollector, - case_name: &str, - run_id: &str, -) -> Result { - let pod = first_rustfs_pod(config)?; - let filler_path = format!("{RUSTFS_DATA_VOLUME}/.rustfs-e2e-disk-full-{run_id}"); - let fill_mib = fill_mib.to_string(); - let guard = DiskFillGuard { - config: config.clone(), - pod: pod.clone(), - filler_path, - deleted: false, - }; - let script = r#"set -eu -filler="$1" -fill_mib="$2" -dir="$(dirname "$filler")" -rm -f "$filler" -echo "before:" -df -k "$dir" -set +e -dd if=/dev/zero of="$filler" bs=1M count="$fill_mib" oflag=sync -dd_code=$? -set -e -sync -echo "after:" -df -k "$dir" -echo "dd_exit=$dd_code" -used_percent="$(df -k "$dir" | awk 'NR==2 {gsub("%", "", $5); print $5}')" -case "$used_percent" in - ''|*[!0-9]*) - echo "unable to parse disk usage percent from df output" >&2 - exit 3 - ;; -esac -if [ "$used_percent" -lt 95 ]; then - echo "disk fill did not create ENOSPC-grade pressure: used=${used_percent}% dd_exit=$dd_code" >&2 - exit 3 -fi -"#; - let output = rustfs_pod_shell( - config, - &pod, - script, - [guard.filler_path.as_str(), fill_mib.as_str()], - ) - .run()?; - collector.write_text( - case_name, - "disk-fill.txt", - &format!( - "pod: {pod}\nfiller: {}\ncommand output:\nstdout:\n{}\nstderr:\n{}", - guard.filler_path, output.stdout, output.stderr - ), - )?; - ensure!( - output.code == Some(0), - "disk fill fault did not create observable space pressure; exit {:?}", - output.code - ); - - Ok(guard) +#[derive(Debug)] +pub struct DmFlakeySpec<'a> { + pub node: &'a str, + pub mount_path: &'a str, + pub helper_image: &'a str, + pub name: &'a str, + pub fault_table: &'a str, + pub recovery_table: Option<&'a str>, + pub run_id: &'a str, } pub fn apply_dm_flakey( - name: &str, - fault_table: &str, - recovery_table: Option<&str>, + config: &ClusterTestConfig, + spec: &DmFlakeySpec<'_>, collector: &ArtifactCollector, case_name: &str, ) -> Result { - let original = CommandSpec::new("dmsetup") - .args(["table".to_string(), name.to_string()]) - .run_checked()? - .stdout; - let recovery_table = recovery_table + validate_dm_spec(spec)?; + let mapping = verify_dm_volume_mapping(config, spec.node, spec.mount_path)?; + let helper_pod = helper_pod_name(spec.run_id); + let manifest = dm_helper_manifest(config, &helper_pod, spec.node, spec.helper_image); + collector.write_text(case_name, "dm-helper-manifest.yaml", &manifest)?; + + let kubectl = Kubectl::new(config).namespaced(&config.test_namespace); + kubectl + .command([ + "delete", + "pod", + &helper_pod, + "--ignore-not-found", + "--wait=true", + ]) + .run_checked()?; + kubectl.create_yaml_command(manifest).run_checked()?; + + let mut guard = DmFlakeyGuard { + config: config.clone(), + helper_pod, + dm_name: spec.name.to_string(), + fault_table: spec.fault_table.to_string(), + recovery_table: String::new(), + mapping, + recovery_snapshot: None, + restored: false, + }; + guard.wait_helper_ready()?; + guard.verify_mount_source()?; + + let original_table = guard.dmsetup(["table", spec.name])?.stdout; + guard.recovery_table = spec + .recovery_table .map(str::to_string) - .unwrap_or_else(|| original.trim().to_string()); + .unwrap_or_else(|| original_table.trim().to_string()); + ensure!( + !guard.recovery_table.trim().is_empty(), + "dmsetup returned an empty recovery table for {:?}", + spec.name + ); + guard.load_table(spec.fault_table)?; + let active = guard.snapshot("active")?; + ensure!( + active.table.split_whitespace().nth(2) == spec.fault_table.split_whitespace().nth(2), + "device-mapper target did not switch to the requested fault table; requested {:?}, active {:?}", + spec.fault_table, + active.table + ); collector.write_text( case_name, - "dm-flakey.txt", - &format!( - "target: {name}\noriginal table:\n{original}\nfault table:\n{fault_table}\nrecovery table:\n{recovery_table}\n" - ), + "dm-flakey-active.json", + &serde_json::to_string_pretty(&active)?, )?; - let guard = DmFlakeyGuard { - name: name.to_string(), - recovery_table, - restored: false, - }; - dmsetup_load_table(name, fault_table)?; Ok(guard) } @@ -181,55 +177,138 @@ pub fn run_warp_mixed( Ok(()) } -impl DiskFillGuard { - pub fn delete(&mut self) -> Result<()> { - self.delete_inner()?; - self.deleted = true; +impl DmFlakeyGuard { + pub fn ensure_active(&self, stage: &str) -> Result { + let snapshot = self.snapshot(stage)?; + ensure!( + snapshot.table.split_whitespace().nth(2) == self.fault_table.split_whitespace().nth(2), + "device-mapper target {:?} is no longer using the requested fault table at {stage}; expected {:?}, active {:?}", + self.dm_name, + self.fault_table, + snapshot.table + ); + Ok(snapshot) + } + + pub fn snapshot(&self, stage: &str) -> Result { + Ok(DmStatusSnapshot { + stage: stage.to_string(), + helper_pod: self.helper_pod.clone(), + mapping: self.mapping.clone(), + table: self.dmsetup(["table", self.dm_name.as_str()])?.stdout, + status: self.dmsetup(["status", self.dm_name.as_str()])?.stdout, + }) + } + + pub fn restore(&mut self) -> Result<()> { + let recovery_table = self.recovery_table.clone(); + self.load_table(&recovery_table)?; + self.recovery_snapshot = Some(self.snapshot("recovered")?); + self.delete_helper()?; + self.restored = true; Ok(()) } - fn delete_inner(&self) -> Result<()> { - let pods = rustfs_pod_names(&self.config).unwrap_or_else(|_| vec![self.pod.clone()]); - let mut attempts = String::new(); - for pod in pods { - let command = rustfs_pod_shell( - &self.config, - &pod, - "rm -f \"$1\" && sync", - [self.filler_path.as_str()], - ); - let output = command.run()?; - attempts.push_str(&format!( - "$ {}\nexit: {:?}\nstdout:\n{}\nstderr:\n{}\n\n", - command.display(), - output.code, - output.stdout, - output.stderr - )); - if output.code == Some(0) { - return Ok(()); - } - } - bail!( - "failed to remove disk fill artifact {} from RustFS pods\n{}", - self.filler_path, - attempts - ) + pub fn recovery_snapshot(&self) -> Option<&DmStatusSnapshot> { + self.recovery_snapshot.as_ref() } -} -impl Drop for DiskFillGuard { - fn drop(&mut self) { - if !self.deleted { - let _ = self.delete_inner(); - } + fn wait_helper_ready(&self) -> Result<()> { + Kubectl::new(&self.config) + .namespaced(&self.config.test_namespace) + .command([ + "wait", + "--for=condition=Ready", + "pod", + &self.helper_pod, + "--timeout=60s", + ]) + .run_checked()?; + Ok(()) } -} -impl DmFlakeyGuard { - pub fn restore(&mut self) -> Result<()> { - dmsetup_load_table(&self.name, &self.recovery_table)?; - self.restored = true; + fn verify_mount_source(&self) -> Result<()> { + let source = self + .host_command([ + "/usr/bin/findmnt", + "-n", + "-o", + "SOURCE", + "--target", + self.mapping.mount_path.as_str(), + ])? + .stdout; + let mapper = self + .host_command([ + "/usr/bin/readlink", + "-f", + &format!("/dev/mapper/{}", self.dm_name), + ])? + .stdout; + let source = source.trim(); + let canonical_source = self + .host_command(["/usr/bin/readlink", "-f", source])? + .stdout; + ensure!( + canonical_source.trim() == mapper.trim(), + "fault-test PV mount {:?} on node {:?} is backed by {:?}, not device-mapper target {:?}", + self.mapping.mount_path, + self.mapping.node, + source, + self.dm_name + ); + Ok(()) + } + + fn load_table(&self, table: &str) -> Result<()> { + self.dmsetup(["suspend", self.dm_name.as_str()])?; + let load = self.dmsetup(["load", self.dm_name.as_str(), "--table", table]); + let resume = self.dmsetup(["resume", self.dm_name.as_str()]); + load?; + resume?; + Ok(()) + } + + fn dmsetup(&self, args: I) -> Result + where + I: IntoIterator, + S: Into, + { + let mut command = vec!["/usr/sbin/dmsetup".to_string()]; + command.extend(args.into_iter().map(Into::into)); + self.host_command(command) + } + + fn host_command(&self, args: I) -> Result + where + I: IntoIterator, + S: Into, + { + let mut command = vec![ + "exec".to_string(), + self.helper_pod.clone(), + "--".to_string(), + "chroot".to_string(), + "/host".to_string(), + ]; + command.extend(args.into_iter().map(Into::into)); + Kubectl::new(&self.config) + .namespaced(&self.config.test_namespace) + .command(command) + .run_checked() + } + + fn delete_helper(&self) -> Result<()> { + Kubectl::new(&self.config) + .namespaced(&self.config.test_namespace) + .command([ + "delete", + "pod", + &self.helper_pod, + "--ignore-not-found", + "--wait=true", + ]) + .run_checked()?; Ok(()) } } @@ -237,89 +316,249 @@ impl DmFlakeyGuard { impl Drop for DmFlakeyGuard { fn drop(&mut self) { if !self.restored { - let _ = dmsetup_load_table(&self.name, &self.recovery_table); + let recovery_table = self.recovery_table.clone(); + if !recovery_table.is_empty() { + let _ = self.load_table(&recovery_table); + } + let _ = self.delete_helper(); } } } -fn first_rustfs_pod(config: &ClusterTestConfig) -> Result { - rustfs_pod_names(config)? - .into_iter() - .next() - .context("no RustFS pods returned") +fn validate_dm_spec(spec: &DmFlakeySpec<'_>) -> Result<()> { + ensure!( + !spec.node.is_empty() + && spec + .node + .chars() + .all(|ch| ch.is_ascii_alphanumeric() || matches!(ch, '.' | '-')), + "RUSTFS_FAULT_TEST_DM_NODE must be a valid node name" + ); + ensure!( + spec.mount_path.starts_with('/') && spec.mount_path != "/", + "RUSTFS_FAULT_TEST_DM_MOUNT_PATH must be an absolute non-root path" + ); + ensure!( + !spec.mount_path.contains(['\n', '\r']), + "RUSTFS_FAULT_TEST_DM_MOUNT_PATH must not contain newlines" + ); + ensure!( + !spec.name.is_empty() + && spec + .name + .chars() + .all(|ch| ch.is_ascii_alphanumeric() || matches!(ch, '.' | '_' | '-' | '+')), + "RUSTFS_FAULT_TEST_DM_NAME contains unsupported characters" + ); + ensure!( + !spec.fault_table.trim().is_empty(), + "RUSTFS_FAULT_TEST_DM_FAULT_TABLE is required" + ); + ensure!( + !spec.helper_image.trim().is_empty() + && !spec.helper_image.contains(['\n', '\r', ' ', '\t']), + "RUSTFS_FAULT_TEST_DM_HELPER_IMAGE must be a non-empty image reference" + ); + Ok(()) } -fn rustfs_pod_names(config: &ClusterTestConfig) -> Result> { +fn verify_dm_volume_mapping( + config: &ClusterTestConfig, + node: &str, + expected_mount_path: &str, +) -> Result { let selector = format!("rustfs.tenant={}", config.tenant_name); - let output = Kubectl::new(config) + let pods = Kubectl::new(config) .namespaced(&config.test_namespace) - .command([ - "get", - "pod", - "-l", - &selector, - "-o", - r#"jsonpath={range .items[*]}{.metadata.name}{"\n"}{end}"#, - ]) + .command(["get", "pod", "-l", &selector, "-o", "json"]) .run_checked()?; - let pods = output - .stdout - .lines() - .map(str::trim) - .filter(|pod| !pod.is_empty()) - .map(str::to_string) - .collect::>(); + let pods = serde_json::from_str::(&pods.stdout).context("parse RustFS pod list")?; + let pod = pods + .pointer("/items") + .and_then(Value::as_array) + .and_then(|items| { + items + .iter() + .find(|item| item.pointer("/spec/nodeName").and_then(Value::as_str) == Some(node)) + }) + .with_context(|| format!("no RustFS fault-test Pod is scheduled on DM node {node:?}"))?; + let pod_name = pod + .pointer("/metadata/name") + .and_then(Value::as_str) + .context("DM target Pod is missing metadata.name")?; + let pvc = pod + .pointer("/spec/volumes") + .and_then(Value::as_array) + .and_then(|volumes| { + volumes.iter().find_map(|volume| { + volume + .pointer("/persistentVolumeClaim/claimName") + .and_then(Value::as_str) + }) + }) + .context("DM target Pod does not mount a PVC")?; + + let pvc_json = Kubectl::new(config) + .namespaced(&config.test_namespace) + .command(["get", "pvc", pvc, "-o", "json"]) + .run_checked()?; + let pvc_json = + serde_json::from_str::(&pvc_json.stdout).context("parse DM target PVC")?; + let pv = pvc_json + .pointer("/spec/volumeName") + .and_then(Value::as_str) + .context("DM target PVC is not bound")?; + + let pv_json = Kubectl::new(config) + .command(["get", "pv", pv, "-o", "json"]) + .run_checked()?; + let pv_json = serde_json::from_str::(&pv_json.stdout).context("parse DM target PV")?; + let local_path = pv_json + .pointer("/spec/local/path") + .and_then(Value::as_str) + .context("DM target PV is not a local PV")?; + ensure!( + local_path == expected_mount_path, + "DM target PV {pv:?} uses local path {local_path:?}, expected {expected_mount_path:?}" + ); ensure!( - !pods.is_empty(), - "no RustFS pod found for selector {selector} in namespace {}", - config.test_namespace + pv_targets_node(&pv_json, node), + "DM target PV {pv:?} node affinity does not target {node:?}" ); - Ok(pods) + + Ok(DmVolumeMapping { + node: node.to_string(), + pod: pod_name.to_string(), + pvc: pvc.to_string(), + pv: pv.to_string(), + mount_path: local_path.to_string(), + }) } -fn rustfs_pod_shell<'a, I>( - config: &ClusterTestConfig, - pod: &str, - script: &str, - args: I, -) -> CommandSpec -where - I: IntoIterator, -{ - let mut command_args = vec![ - "exec".to_string(), - pod.to_string(), - "-c".to_string(), - "rustfs".to_string(), - "--".to_string(), - "sh".to_string(), - "-c".to_string(), - script.to_string(), - "sh".to_string(), - ]; - command_args.extend(args.into_iter().map(str::to_string)); - Kubectl::new(config) - .namespaced(&config.test_namespace) - .command(command_args) +fn pv_targets_node(pv: &Value, node: &str) -> bool { + pv.pointer("/spec/nodeAffinity/required/nodeSelectorTerms") + .and_then(Value::as_array) + .into_iter() + .flatten() + .filter_map(|term| term.get("matchExpressions").and_then(Value::as_array)) + .flatten() + .any(|expression| { + expression.get("key").and_then(Value::as_str) == Some("kubernetes.io/hostname") + && expression.get("operator").and_then(Value::as_str) == Some("In") + && expression + .get("values") + .and_then(Value::as_array) + .is_some_and(|values| values.iter().any(|value| value.as_str() == Some(node))) + }) } -fn dmsetup_load_table(name: &str, table: &str) -> Result<()> { - CommandSpec::new("dmsetup") - .args(["suspend".to_string(), name.to_string()]) - .run_checked()?; - let load = CommandSpec::new("dmsetup") - .args([ - "load".to_string(), - name.to_string(), - "--table".to_string(), - table.to_string(), - ]) - .run_checked(); - let resume = CommandSpec::new("dmsetup") - .args(["resume".to_string(), name.to_string()]) - .run_checked(); +fn helper_pod_name(run_id: &str) -> String { + let suffix = run_id + .chars() + .filter(|ch| ch.is_ascii_alphanumeric()) + .take(12) + .collect::() + .to_ascii_lowercase(); + format!("rustfs-fault-dm-helper-{suffix}") +} - load?; - resume?; - Ok(()) +fn dm_helper_manifest(config: &ClusterTestConfig, name: &str, node: &str, image: &str) -> String { + format!( + r#"apiVersion: v1 +kind: Pod +metadata: + name: {name} + namespace: {namespace} + labels: + {managed_by_label}: {managed_by_value} +spec: + nodeName: {node} + hostPID: true + restartPolicy: Never + containers: + - name: host-tools + image: {image} + imagePullPolicy: IfNotPresent + command: ["sh", "-c", "trap : TERM INT; sleep 3600 & wait"] + securityContext: + privileged: true + volumeMounts: + - name: host-root + mountPath: /host + mountPropagation: HostToContainer + volumes: + - name: host-root + hostPath: + path: / + type: Directory +"#, + namespace = config.test_namespace, + managed_by_label = MANAGED_BY_LABEL, + managed_by_value = MANAGED_BY_VALUE, + ) +} + +#[cfg(test)] +mod tests { + use super::{ + DmFlakeySpec, dm_helper_manifest, helper_pod_name, pv_targets_node, validate_dm_spec, + }; + use crate::framework::fault_config::FaultTestConfig; + + #[test] + fn dm_helper_is_pinned_to_one_node_and_host_root() { + let config = FaultTestConfig::for_test("real-cluster", "fast-csi"); + let manifest = dm_helper_manifest( + &config.cluster, + "rustfs-fault-dm-helper-run123", + "worker-a", + "busybox:test", + ); + + assert!(manifest.contains("nodeName: worker-a")); + assert!(manifest.contains("privileged: true")); + assert!(manifest.contains("mountPath: /host")); + assert!(manifest.contains("path: /")); + assert!(manifest.contains("rustfs-operator-fault-test")); + } + + #[test] + fn dm_spec_rejects_unbounded_or_unsafe_targets() { + let valid = DmFlakeySpec { + node: "worker-a", + mount_path: "/data/rustfs-fault/dm-volume", + helper_image: "busybox:test", + name: "rustfs-fault-dm", + fault_table: "0 1024 flakey /dev/loop0 0 1 15", + recovery_table: None, + run_id: "run-123", + }; + assert!(validate_dm_spec(&valid).is_ok()); + + let root = DmFlakeySpec { + mount_path: "/", + ..valid + }; + assert!(validate_dm_spec(&root).is_err()); + } + + #[test] + fn dm_pv_affinity_must_match_target_node() { + let pv = serde_json::json!({ + "spec": {"nodeAffinity": {"required": {"nodeSelectorTerms": [{ + "matchExpressions": [{ + "key": "kubernetes.io/hostname", + "operator": "In", + "values": ["worker-a"] + }] + }]}}} + }); + + assert!(pv_targets_node(&pv, "worker-a")); + assert!(!pv_targets_node(&pv, "worker-b")); + assert_eq!( + helper_pod_name("run-ABC-123"), + "rustfs-fault-dm-helper-runabc123" + ); + } } diff --git a/e2e/tests/faults.rs b/e2e/tests/faults.rs index a8ae565..b9335da 100644 --- a/e2e/tests/faults.rs +++ b/e2e/tests/faults.rs @@ -23,13 +23,12 @@ use rustfs_operator_e2e::framework::{ config::ClusterTestConfig, fault_config::FaultTestConfig, fault_scenarios::{ - self, DISK_FULL_SCENARIO, DM_FLAKEY_SCENARIO, FaultBackend, FaultScenario, IO_EIO_SCENARIO, - IO_READ_MISTAKE_SCENARIO, NETWORK_PARTITION_ONE_SCENARIO, POD_KILL_ONE_SCENARIO, - WARP_UNDER_CHAOS_SCENARIO, + self, DISK_FULL_SCENARIO, FaultBackend, FaultIsolation, FaultScenario, + IO_READ_MISTAKE_SCENARIO, }, history::OperationOutcome, history::Recorder, - host_faults::{self, DiskFillGuard, DmFlakeyGuard}, + host_faults::{self, DmFlakeyGuard, DmFlakeySpec, DmStatusSnapshot}, kube_client, port_forward::{PortForwardGuard, PortForwardSpec}, resources, @@ -46,60 +45,14 @@ const RUSTFS_DATA_VOLUME: &str = "/data/rustfs0"; const SMALL_OBJECT_SIZE_BYTES: usize = 4 * 1024; #[tokio::test] -#[ignore = "destructive RustFS workload fault scenario; select with RUSTFS_FAULT_TEST_SCENARIO=io-eio"] -async fn fault_io_eio_preserves_committed_objects() -> Result<()> { - run_selected_fault_case(IO_EIO_SCENARIO).await -} - -#[tokio::test] -#[ignore = "destructive RustFS workload fault scenario; select with RUSTFS_FAULT_TEST_SCENARIO=pod-kill-one"] -async fn fault_pod_kill_one_preserves_committed_objects() -> Result<()> { - run_selected_fault_case(POD_KILL_ONE_SCENARIO).await -} - -#[tokio::test] -#[ignore = "destructive RustFS workload fault scenario; select with RUSTFS_FAULT_TEST_SCENARIO=network-partition-one"] -async fn fault_network_partition_one_preserves_committed_objects() -> Result<()> { - run_selected_fault_case(NETWORK_PARTITION_ONE_SCENARIO).await -} - -#[tokio::test] -#[ignore = "destructive RustFS workload fault scenario; select with RUSTFS_FAULT_TEST_SCENARIO=io-read-mistake"] -async fn fault_io_read_mistake_rejects_corrupt_reads() -> Result<()> { - run_selected_fault_case(IO_READ_MISTAKE_SCENARIO).await -} - -#[tokio::test] -#[ignore = "destructive RustFS workload fault scenario; select with RUSTFS_FAULT_TEST_SCENARIO=disk-full"] -async fn fault_disk_full_preserves_committed_objects() -> Result<()> { - run_selected_fault_case(DISK_FULL_SCENARIO).await -} - -#[tokio::test] -#[ignore = "destructive RustFS workload fault scenario; select with RUSTFS_FAULT_TEST_SCENARIO=dm-flakey"] -async fn fault_dm_flakey_preserves_committed_objects() -> Result<()> { - run_selected_fault_case(DM_FLAKEY_SCENARIO).await -} - -#[tokio::test] -#[ignore = "destructive RustFS workload fault scenario; select with RUSTFS_FAULT_TEST_SCENARIO=warp-under-chaos"] -async fn fault_warp_under_chaos_reports_performance_separately() -> Result<()> { - run_selected_fault_case(WARP_UNDER_CHAOS_SCENARIO).await -} - -async fn run_selected_fault_case(expected_scenario: &str) -> Result<()> { +#[ignore = "destructive RustFS workload fault scenario; select with RUSTFS_FAULT_TEST_SCENARIO"] +async fn fault_selected_scenario() -> Result<()> { let config = FaultTestConfig::from_env()?; let scenario = FaultScenario::from_config(&config)?; - if scenario.name != expected_scenario { - eprintln!( - "skipping fault scenario {expected_scenario}; selected scenario is {}", - scenario.name - ); - return Ok(()); - } + let spec = fault_scenarios::scenario_spec(&scenario.name)?; config.require_destructive_enabled()?; - config.validate_cluster()?; + config.validate_cluster(spec.backend == FaultBackend::DeviceMapper)?; eprintln!( "running destructive RustFS fault scenario {} against real Kubernetes context: {}", scenario.name, config.cluster.context @@ -135,7 +88,7 @@ async fn run_fault_case( require_fault_backend(config, spec.backend)?; cleanup_fault_backend(config, spec.backend)?; - reset_fault_fixture(&config.cluster)?; + prepare_fault_fixture(&config.cluster, spec.isolation)?; wait_for_ready_tenant(&config.cluster).await?; let run_id = format!("run-{}", Uuid::new_v4()); @@ -166,12 +119,14 @@ async fn run_fault_case( ); let prefilled = prefill_objects(&s3, &mut history, &run_id, scenario.prefill_count()).await?; + let pods_before = rustfs_pod_identities(cluster)?; let mut fault = AppliedFault::apply(config, collector, scenario, spec.backend, &run_id)?; if let Err(error) = fault.wait_active(cluster.timeout) { collect_fault_artifacts(collector, scenario.case_name, &fault, "wait-active-failed")?; return Err(error); } + let active_snapshot = fault.snapshot("active")?; if let Err(error) = ensure_port_forward(&mut port_forward, cluster, &endpoint).await { collect_fault_artifacts(collector, scenario.case_name, &fault, "port-forward-failed")?; @@ -242,13 +197,15 @@ async fn run_fault_case( )?; return Err(error); } + let workload_snapshot = fault.snapshot("after-workload")?; - if let Err(error) = fault.delete() { + if let Err(error) = fault.delete(cluster.timeout) { collect_fault_artifacts(collector, scenario.case_name, &fault, "delete-failed")?; return Err(error); } wait_for_ready_tenant(cluster).await?; + let pods_after = rustfs_pod_identities(cluster)?; ensure_port_forward(&mut port_forward, cluster, &endpoint).await?; let report = checker::check_s3_history(&s3, &mut history, true).await?; collector.write_text( @@ -256,6 +213,25 @@ async fn run_fault_case( "checker-report.json", &serde_json::to_string_pretty(&report)?, )?; + let evidence = FaultEvidence { + scenario: scenario.name.clone(), + backend: format!("{:?}", spec.backend), + target: spec.target.to_string(), + injected: true, + active_during_workload: true, + recovered: report.tenant_recovered, + client_disruptions: workload_summary.disrupted(), + pods_before, + pods_after, + active_snapshot, + workload_snapshot, + dm_recovery_snapshot: fault.recovery_dm_snapshot(), + }; + collector.write_text( + scenario.case_name, + "fault-evidence.json", + &serde_json::to_string_pretty(&evidence)?, + )?; report.require_success()?; Ok(()) @@ -271,7 +247,6 @@ fn require_fault_backend(config: &FaultTestConfig, backend: FaultBackend) -> Res } FaultBackend::ChaosMeshPodChaos => chaos_mesh::require_podchaos_crd(cluster), FaultBackend::ChaosMeshNetworkChaos => chaos_mesh::require_networkchaos_crd(cluster), - FaultBackend::LocalPvFill => Ok(()), FaultBackend::DeviceMapper => require_dm_flakey_preflight(config), } } @@ -289,20 +264,22 @@ where } fn require_dm_flakey_preflight(config: &FaultTestConfig) -> Result<()> { - let name = config + config .dm_name .as_deref() .context("RUSTFS_FAULT_TEST_DM_NAME is required for dm-flakey")?; + config + .dm_node + .as_deref() + .context("RUSTFS_FAULT_TEST_DM_NODE is required for dm-flakey")?; + config + .dm_mount_path + .as_deref() + .context("RUSTFS_FAULT_TEST_DM_MOUNT_PATH is required for dm-flakey")?; config .dm_fault_table .as_deref() .context("RUSTFS_FAULT_TEST_DM_FAULT_TABLE is required for dm-flakey")?; - - require_tool("dmsetup", ["version"])?; - CommandSpec::new("dmsetup") - .args(["table", name]) - .run_checked() - .with_context(|| format!("dm-flakey target {name:?} must exist before fixture reset"))?; Ok(()) } @@ -317,13 +294,18 @@ fn cleanup_fault_backend(config: &FaultTestConfig, backend: FaultBackend) -> Res FaultBackend::ChaosMeshNetworkChaos => { chaos_mesh::cleanup_managed_networkchaos(&config.cluster, &config.chaos_namespace) } - FaultBackend::LocalPvFill | FaultBackend::DeviceMapper => Ok(()), + FaultBackend::DeviceMapper => Ok(()), } } -fn reset_fault_fixture(config: &ClusterTestConfig) -> Result<()> { - resources::reset_fault_tenant_resources(config)?; - resources::apply_fault_tenant_resources(config)?; +fn prepare_fault_fixture(config: &ClusterTestConfig, isolation: FaultIsolation) -> Result<()> { + match isolation { + FaultIsolation::ReusableTenant => resources::apply_fault_tenant_resources(config)?, + FaultIsolation::FreshTenant | FaultIsolation::DedicatedLinuxBlockDevice => { + resources::reset_fault_tenant_resources(config)?; + resources::apply_fault_tenant_resources(config)?; + } + } Ok(()) } @@ -337,7 +319,6 @@ enum AppliedFault { before_pods: Vec, config: Box, }, - DiskFill(Box), DmFlakey(Box), } @@ -351,6 +332,26 @@ impl AppliedFault { ) -> Result { let cluster = &config.cluster; match backend { + FaultBackend::ChaosMeshIoChaos if scenario.name == DISK_FULL_SCENARIO => { + let chaos = IoChaosSpec::enospc_on_rustfs_volume( + cluster, + &config.chaos_namespace, + run_id, + &scenario.name, + RUSTFS_DATA_VOLUME, + scenario.percent, + scenario.duration, + )?; + collector.write_text( + scenario.case_name, + "chaos-manifest.yaml", + &chaos.manifest(), + )?; + Ok(Self::Chaos { + guard: Box::new(chaos_mesh::apply_iochaos(cluster, &chaos)?), + active_required: true, + }) + } FaultBackend::ChaosMeshIoChaos if scenario.name == IO_READ_MISTAKE_SCENARIO => { let chaos = IoChaosSpec::read_mistake_on_rustfs_volume( cluster, @@ -428,15 +429,6 @@ impl AppliedFault { active_required: true, }) } - FaultBackend::LocalPvFill => Ok(Self::DiskFill(Box::new( - host_faults::fill_rustfs_data_volume( - cluster, - config.disk_fill_mib, - collector, - scenario.case_name, - run_id, - )?, - ))), FaultBackend::DeviceMapper => { let name = config .dm_name @@ -446,10 +438,25 @@ impl AppliedFault { .dm_fault_table .as_deref() .context("RUSTFS_FAULT_TEST_DM_FAULT_TABLE is required for dm-flakey")?; + let node = config + .dm_node + .as_deref() + .context("RUSTFS_FAULT_TEST_DM_NODE is required for dm-flakey")?; + let mount_path = config + .dm_mount_path + .as_deref() + .context("RUSTFS_FAULT_TEST_DM_MOUNT_PATH is required for dm-flakey")?; Ok(Self::DmFlakey(Box::new(host_faults::apply_dm_flakey( - name, - fault_table, - config.dm_recovery_table.as_deref(), + cluster, + &DmFlakeySpec { + node, + mount_path, + helper_image: &config.dm_helper_image, + name, + fault_table, + recovery_table: config.dm_recovery_table.as_deref(), + run_id, + }, collector, scenario.case_name, )?))) @@ -488,8 +495,8 @@ impl AppliedFault { before_pods, config, .. - } => wait_for_rustfs_pod_replacement(config, before_pods, timeout), - Self::Chaos { .. } | Self::DiskFill(_) | Self::DmFlakey(_) => Ok(()), + } => wait_for_rustfs_pod_deletion(config, before_pods, timeout), + Self::Chaos { .. } | Self::DmFlakey(_) => Ok(()), } } @@ -499,17 +506,25 @@ impl AppliedFault { guard, active_required, } if *active_required => guard.ensure_active(stage), - Self::PodKill { .. } | Self::Chaos { .. } | Self::DiskFill(_) | Self::DmFlakey(_) => { + Self::PodKill { .. } | Self::Chaos { .. } => Ok(()), + Self::DmFlakey(guard) => { + guard.ensure_active("after fault workload")?; Ok(()) } } } - fn delete(&mut self) -> Result<()> { + fn delete(&mut self, timeout: Duration) -> Result<()> { match self { Self::Chaos { guard, .. } => guard.delete(), - Self::PodKill { guard, .. } => guard.delete(), - Self::DiskFill(guard) => guard.delete(), + Self::PodKill { + guard, + before_pods, + config, + } => { + guard.delete()?; + wait_for_rustfs_pod_replacement(config, before_pods, timeout) + } Self::DmFlakey(guard) => guard.restore(), } } @@ -517,9 +532,60 @@ impl AppliedFault { fn chaos_guard(&self) -> Option<&ChaosGuard> { match self { Self::Chaos { guard, .. } | Self::PodKill { guard, .. } => Some(guard.as_ref()), - Self::DiskFill(_) | Self::DmFlakey(_) => None, + Self::DmFlakey(_) => None, } } + + fn snapshot(&self, stage: &str) -> Result { + match self { + Self::Chaos { guard, .. } | Self::PodKill { guard, .. } => Ok(FaultStatusSnapshot { + stage: stage.to_string(), + resource_kind: Some(guard.kind().to_string()), + resource_name: Some(guard.name().to_string()), + chaos_status: Some(serde_json::from_str(&guard.json()?)?), + dm_status: None, + }), + Self::DmFlakey(guard) => Ok(FaultStatusSnapshot { + stage: stage.to_string(), + resource_kind: Some("device-mapper".to_string()), + resource_name: None, + chaos_status: None, + dm_status: Some(guard.snapshot(stage)?), + }), + } + } + + fn recovery_dm_snapshot(&self) -> Option { + match self { + Self::DmFlakey(guard) => guard.recovery_snapshot().cloned(), + Self::Chaos { .. } | Self::PodKill { .. } => None, + } + } +} + +#[derive(Debug, Clone, Serialize)] +struct FaultStatusSnapshot { + stage: String, + resource_kind: Option, + resource_name: Option, + chaos_status: Option, + dm_status: Option, +} + +#[derive(Debug, Clone, Serialize)] +struct FaultEvidence { + scenario: String, + backend: String, + target: String, + injected: bool, + active_during_workload: bool, + recovered: bool, + client_disruptions: usize, + pods_before: Vec, + pods_after: Vec, + active_snapshot: FaultStatusSnapshot, + workload_snapshot: FaultStatusSnapshot, + dm_recovery_snapshot: Option, } fn collect_fault_artifacts( @@ -528,6 +594,12 @@ fn collect_fault_artifacts( fault: &AppliedFault, suffix: &str, ) -> Result<()> { + let status = fault + .snapshot(suffix) + .and_then(|snapshot| serde_json::to_string_pretty(&snapshot).map_err(Into::into)) + .unwrap_or_else(|error| format!("failed to collect fault status: {error}")); + collector.write_text(case_name, &format!("fault-status-{suffix}.json"), &status)?; + if let Some(guard) = fault.chaos_guard() { let describe = guard .describe() @@ -547,7 +619,7 @@ fn collect_fault_artifacts( Ok(()) } -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] struct PodIdentity { name: String, uid: String, @@ -616,6 +688,50 @@ fn wait_for_rustfs_pod_replacement( } } +fn wait_for_rustfs_pod_deletion( + config: &ClusterTestConfig, + before: &[PodIdentity], + timeout: Duration, +) -> Result<()> { + let deadline = Instant::now() + timeout; + let mut last_snapshot = Vec::new(); + let mut last_error = "not checked yet".to_string(); + + loop { + if Instant::now() >= deadline { + bail!( + "timed out waiting for PodChaos to delete a RustFS pod after {timeout:?}\nbefore: {before:?}\nlast: {last_snapshot:?}\nlast error: {last_error}", + ); + } + + match rustfs_pod_identities(config) { + Ok(current) => { + if pod_deletion_observed(before, ¤t) { + return Ok(()); + } + last_snapshot = current; + last_error = "none".to_string(); + } + Err(error) => { + last_error = error.to_string(); + } + } + + sleep(Duration::from_millis(250)); + } +} + +fn pod_deletion_observed(before: &[PodIdentity], current: &[PodIdentity]) -> bool { + let current_uids = current + .iter() + .map(|pod| pod.uid.as_str()) + .collect::>(); + !before.is_empty() + && before + .iter() + .any(|pod| !current_uids.contains(pod.uid.as_str())) +} + fn pod_replacement_observed(before: &[PodIdentity], current: &[PodIdentity]) -> bool { if before.is_empty() || current.is_empty() { return false; @@ -798,7 +914,8 @@ fn bucket_name(run_id: &str) -> String { #[cfg(test)] mod tests { use super::{ - OutcomeCounts, PodIdentity, WorkloadSummary, bucket_name, pod_replacement_observed, + OutcomeCounts, PodIdentity, WorkloadSummary, bucket_name, pod_deletion_observed, + pod_replacement_observed, }; use rustfs_operator_e2e::framework::history::OperationOutcome; @@ -855,6 +972,8 @@ mod tests { assert!(!pod_replacement_observed(&before, &before)); assert!(!pod_replacement_observed(&before, &before[..1])); + assert!(!pod_deletion_observed(&before, &before)); + assert!(pod_deletion_observed(&before, &before[..1])); assert!(pod_replacement_observed( &before, &[ From 8abf1e5b7d2cbc9a21856b9ba0ce7726924cf12e Mon Sep 17 00:00:00 2001 From: GatewayJ <18332154+GatewayJ@users.noreply.github.com> Date: Fri, 19 Jun 2026 12:08:47 +0800 Subject: [PATCH 09/20] fix(chaos): recommit unconfirmed fault writes --- FAULT_INJECTION_TEST_PLAN.md | 20 ++++++------ e2e/tests/faults.rs | 62 ++++++++++++++++++++++++++++++++---- 2 files changed, 66 insertions(+), 16 deletions(-) diff --git a/FAULT_INJECTION_TEST_PLAN.md b/FAULT_INJECTION_TEST_PLAN.md index 4af8e43..3025cbb 100644 --- a/FAULT_INJECTION_TEST_PLAN.md +++ b/FAULT_INJECTION_TEST_PLAN.md @@ -552,10 +552,11 @@ spec: 19. 确认持续型 Chaos 没有早于 workload 结束恢复 20. 删除 Chaos 或通过目标节点 helper Pod 恢复 dmsetup table 21. 等待 Tenant 再次 Ready -22. 对所有成功 PUT 对象做最终 GET + sha256 校验 -23. 执行 prefix LIST 并记录 warning -24. 写入 checker-report.json 和 fault-evidence.json -25. 失败时收集 Kubernetes artifacts、故障状态和故障资源 describe/yaml +22. 对故障期间失败、超时或 unknown 的 PUT 使用相同 key 和内容做幂等重提交 +23. 对全部预期 committed PUT 对象做最终 GET + sha256 校验 +24. 执行 prefix LIST 并记录 warning +25. 写入 checker-report.json 和 fault-evidence.json +26. 失败时收集 Kubernetes artifacts、故障状态和故障资源 describe/yaml ``` 伪代码: @@ -841,11 +842,12 @@ fault_warp_under_chaos_reports_performance_separately 13. 故障 workload 失败、故障证据不足或 Chaos 删除失败时,先保存 describe/yaml 或 host fault 输出,再触发 cleanup。 14. 删除 Chaos 资源,或恢复 dmsetup table 并删除 helper Pod。 15. Tenant 恢复 Ready 等待。 -16. 所有成功 `PUT` 对象最终 `GET + sha256` 校验。 -17. 恢复后执行 `LIST prefix`,缺失项先作为 warning。 -17. AWS SDK error 按 service failure / timeout / dispatch-response unknown 分类写入 history。 -18. history、workload summary、fault evidence 和 checker report 输出。 -19. 失败时 artifacts 收集。 +16. 恢复后幂等重提交未确认 PUT,并要求全部预期对象进入 committed 集合。 +17. 所有 committed `PUT` 对象最终 `GET + sha256` 校验。 +18. 恢复后执行 `LIST prefix`,缺失项先作为 warning。 +19. AWS SDK error 按 service failure / timeout / dispatch-response unknown 分类写入 history。 +20. history、workload summary、fault evidence 和 checker report 输出。 +21. 失败时 artifacts 收集。 这个版本已经能证明系统从“占位骨架”升级为“真实故障注入 + 数据正确性校验”。 diff --git a/e2e/tests/faults.rs b/e2e/tests/faults.rs index b9335da..a925b82 100644 --- a/e2e/tests/faults.rs +++ b/e2e/tests/faults.rs @@ -158,7 +158,7 @@ async fn run_fault_case( } } - let workload_summary = match run_mixed_workload( + let mut workload = match run_mixed_workload( &s3, &mut history, &run_id, @@ -168,7 +168,7 @@ async fn run_fault_case( ) .await { - Ok(summary) => summary, + Ok(workload) => workload, Err(error) => { collect_fault_artifacts(collector, scenario.case_name, &fault, "workload-failed")?; return Err(error); @@ -177,9 +177,12 @@ async fn run_fault_case( collector.write_text( scenario.case_name, "workload-summary.json", - &serde_json::to_string_pretty(&workload_summary)?, + &serde_json::to_string_pretty(&workload.summary)?, )?; - if let Err(error) = workload_summary.require_fault_evidence(config.require_client_disruption) { + if let Err(error) = workload + .summary + .require_fault_evidence(config.require_client_disruption) + { collect_fault_artifacts( collector, scenario.case_name, @@ -207,6 +210,13 @@ async fn run_fault_case( wait_for_ready_tenant(cluster).await?; let pods_after = rustfs_pod_identities(cluster)?; ensure_port_forward(&mut port_forward, cluster, &endpoint).await?; + workload.summary.recommitted_after_recovery = + recommit_unconfirmed_objects(&s3, &mut history, &workload.unconfirmed_puts).await?; + collector.write_text( + scenario.case_name, + "workload-summary.json", + &serde_json::to_string_pretty(&workload.summary)?, + )?; let report = checker::check_s3_history(&s3, &mut history, true).await?; collector.write_text( scenario.case_name, @@ -220,7 +230,7 @@ async fn run_fault_case( injected: true, active_during_workload: true, recovered: report.tenant_recovered, - client_disruptions: workload_summary.disrupted(), + client_disruptions: workload.summary.disrupted(), pods_before, pods_after, active_snapshot, @@ -232,6 +242,13 @@ async fn run_fault_case( "fault-evidence.json", &serde_json::to_string_pretty(&evidence)?, )?; + ensure!( + report.committed_puts == scenario.object_count, + "fault scenario {} expected {} committed objects after recovery reconciliation, got {}", + scenario.name, + scenario.object_count, + report.committed_puts + ); report.require_success()?; Ok(()) @@ -821,14 +838,18 @@ async fn run_mixed_workload( prefilled: &[ObjectSpec], start_index: usize, count: usize, -) -> Result { +) -> Result { let mut summary = WorkloadSummary::default(); + let mut unconfirmed_puts = Vec::new(); for offset in 0..count { let object = ObjectSpec::deterministic(run_id, start_index + offset, SMALL_OBJECT_SIZE_BYTES); let put_outcome = s3.put_object(&object, history).await?; summary.puts.record(put_outcome); + if put_outcome != OperationOutcome::Ok { + unconfirmed_puts.push(object.clone()); + } if let Some(existing) = prefilled.get(offset % prefilled.len()) { let get_result = s3.get_object_result(&existing.key, history).await?; @@ -837,13 +858,39 @@ async fn run_mixed_workload( } summary.require_exercised()?; - Ok(summary) + Ok(MixedWorkloadResult { + summary, + unconfirmed_puts, + }) +} + +async fn recommit_unconfirmed_objects( + s3: &S3WorkloadClient, + history: &mut Recorder, + objects: &[ObjectSpec], +) -> Result { + for object in objects { + let outcome = s3.put_object(object, history).await?; + ensure!( + outcome == OperationOutcome::Ok, + "PUT for previously unconfirmed object {} did not commit after recovery: {outcome:?}", + object.key + ); + } + Ok(objects.len()) +} + +#[derive(Debug)] +struct MixedWorkloadResult { + summary: WorkloadSummary, + unconfirmed_puts: Vec, } #[derive(Debug, Default, Clone, PartialEq, Eq, Serialize)] struct WorkloadSummary { puts: OutcomeCounts, gets: OutcomeCounts, + recommitted_after_recovery: usize, } impl WorkloadSummary { @@ -951,6 +998,7 @@ mod tests { ok: 1, ..OutcomeCounts::default() }, + recommitted_after_recovery: 0, }; assert!(summary.require_fault_evidence(false).is_ok()); From e233e8b5d88b7fd7e4c6a6e292cfced8c2ab5de4 Mon Sep 17 00:00:00 2001 From: GatewayJ <18332154+GatewayJ@users.noreply.github.com> Date: Fri, 19 Jun 2026 12:20:23 +0800 Subject: [PATCH 10/20] fix(chaos): isolate Warp benchmark bucket --- FAULT_INJECTION_TEST_PLAN.md | 2 +- e2e/tests/faults.rs | 13 +++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/FAULT_INJECTION_TEST_PLAN.md b/FAULT_INJECTION_TEST_PLAN.md index 3025cbb..79a94e5 100644 --- a/FAULT_INJECTION_TEST_PLAN.md +++ b/FAULT_INJECTION_TEST_PLAN.md @@ -406,7 +406,7 @@ fault-test//large/ | P2 | `direct-volume-corruption` | 存储后端专用测试环境 | 模拟已经落盘的数据被破坏。 | | P2 | `node-restart` | 集群节点运维接口 | 模拟节点重启。 | | P3 | `dm-flakey` | device mapper / loop device | 更接近真实块设备故障。 | -| P3 | `warp-under-chaos` | MinIO Warp + chaos | 故障期间性能退化分析。 | +| P3 | `warp-under-chaos` | MinIO Warp + chaos | 使用独立 benchmark bucket 分析故障期间性能,避免影响 correctness 对象。 | `operator-restart` 可以作为独立 Operator 控制面韧性测试,但不放入本方案第一阶段的 RustFS workload fault matrix,避免混淆测试对象。 diff --git a/e2e/tests/faults.rs b/e2e/tests/faults.rs index a925b82..aa26e1a 100644 --- a/e2e/tests/faults.rs +++ b/e2e/tests/faults.rs @@ -134,12 +134,13 @@ async fn run_fault_case( } if spec.backend == FaultBackend::MinioWarpWithChaos { + let warp_bucket = warp_bucket_name(&run_id); if let Err(error) = host_faults::run_warp_mixed( config.warp_duration, collector, scenario.case_name, &endpoint, - &bucket, + &warp_bucket, access_key, secret_key, ) { @@ -958,11 +959,15 @@ fn bucket_name(run_id: &str) -> String { format!("rustfs-fault-{suffix}") } +fn warp_bucket_name(run_id: &str) -> String { + format!("{}-warp", bucket_name(run_id)) +} + #[cfg(test)] mod tests { use super::{ OutcomeCounts, PodIdentity, WorkloadSummary, bucket_name, pod_deletion_observed, - pod_replacement_observed, + pod_replacement_observed, warp_bucket_name, }; use rustfs_operator_e2e::framework::history::OperationOutcome; @@ -972,6 +977,10 @@ mod tests { bucket_name("run-12345678-abcd-efgh"), "rustfs-fault-run12345678abcde" ); + assert_eq!( + warp_bucket_name("run-12345678-abcd-efgh"), + "rustfs-fault-run12345678abcde-warp" + ); } #[test] From 2a0128f6da6ab0e8a8c0d81017801952e75f5763 Mon Sep 17 00:00:00 2001 From: GatewayJ <18332154+GatewayJ@users.noreply.github.com> Date: Fri, 19 Jun 2026 12:51:42 +0800 Subject: [PATCH 11/20] fix(chaos): avoid dm resume udev deadlock --- e2e/src/framework/host_faults.rs | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/e2e/src/framework/host_faults.rs b/e2e/src/framework/host_faults.rs index 455a518..77f069b 100644 --- a/e2e/src/framework/host_faults.rs +++ b/e2e/src/framework/host_faults.rs @@ -263,7 +263,7 @@ impl DmFlakeyGuard { fn load_table(&self, table: &str) -> Result<()> { self.dmsetup(["suspend", self.dm_name.as_str()])?; let load = self.dmsetup(["load", self.dm_name.as_str(), "--table", table]); - let resume = self.dmsetup(["resume", self.dm_name.as_str()]); + let resume = self.dmsetup(dm_resume_args(&self.dm_name)); load?; resume?; Ok(()) @@ -362,6 +362,10 @@ fn validate_dm_spec(spec: &DmFlakeySpec<'_>) -> Result<()> { Ok(()) } +fn dm_resume_args(name: &str) -> [&str; 3] { + ["resume", "--noudevsync", name] +} + fn verify_dm_volume_mapping( config: &ClusterTestConfig, node: &str, @@ -501,7 +505,8 @@ spec: #[cfg(test)] mod tests { use super::{ - DmFlakeySpec, dm_helper_manifest, helper_pod_name, pv_targets_node, validate_dm_spec, + DmFlakeySpec, dm_helper_manifest, dm_resume_args, helper_pod_name, pv_targets_node, + validate_dm_spec, }; use crate::framework::fault_config::FaultTestConfig; @@ -522,6 +527,14 @@ mod tests { assert!(manifest.contains("rustfs-operator-fault-test")); } + #[test] + fn dm_resume_disables_udev_synchronization() { + assert_eq!( + dm_resume_args("rustfs-fault-dm"), + ["resume", "--noudevsync", "rustfs-fault-dm"] + ); + } + #[test] fn dm_spec_rejects_unbounded_or_unsafe_targets() { let valid = DmFlakeySpec { From a45162511b93a2193b8849950de1f4238e6a50f0 Mon Sep 17 00:00:00 2001 From: GatewayJ <18332154+GatewayJ@users.noreply.github.com> Date: Fri, 19 Jun 2026 12:58:32 +0800 Subject: [PATCH 12/20] fix(chaos): recover dm target without flushing --- e2e/src/framework/host_faults.rs | 34 +++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/e2e/src/framework/host_faults.rs b/e2e/src/framework/host_faults.rs index 77f069b..641420d 100644 --- a/e2e/src/framework/host_faults.rs +++ b/e2e/src/framework/host_faults.rs @@ -114,7 +114,7 @@ pub fn apply_dm_flakey( spec.name ); - guard.load_table(spec.fault_table)?; + guard.load_table(spec.fault_table, false)?; let active = guard.snapshot("active")?; ensure!( active.table.split_whitespace().nth(2) == spec.fault_table.split_whitespace().nth(2), @@ -202,7 +202,7 @@ impl DmFlakeyGuard { pub fn restore(&mut self) -> Result<()> { let recovery_table = self.recovery_table.clone(); - self.load_table(&recovery_table)?; + self.load_table(&recovery_table, true)?; self.recovery_snapshot = Some(self.snapshot("recovered")?); self.delete_helper()?; self.restored = true; @@ -260,8 +260,8 @@ impl DmFlakeyGuard { Ok(()) } - fn load_table(&self, table: &str) -> Result<()> { - self.dmsetup(["suspend", self.dm_name.as_str()])?; + fn load_table(&self, table: &str, noflush: bool) -> Result<()> { + self.dmsetup(dm_suspend_args(&self.dm_name, noflush))?; let load = self.dmsetup(["load", self.dm_name.as_str(), "--table", table]); let resume = self.dmsetup(dm_resume_args(&self.dm_name)); load?; @@ -318,7 +318,7 @@ impl Drop for DmFlakeyGuard { if !self.restored { let recovery_table = self.recovery_table.clone(); if !recovery_table.is_empty() { - let _ = self.load_table(&recovery_table); + let _ = self.load_table(&recovery_table, true); } let _ = self.delete_helper(); } @@ -366,6 +366,14 @@ fn dm_resume_args(name: &str) -> [&str; 3] { ["resume", "--noudevsync", name] } +fn dm_suspend_args(name: &str, noflush: bool) -> Vec<&str> { + if noflush { + vec!["suspend", "--noflush", name] + } else { + vec!["suspend", name] + } +} + fn verify_dm_volume_mapping( config: &ClusterTestConfig, node: &str, @@ -505,8 +513,8 @@ spec: #[cfg(test)] mod tests { use super::{ - DmFlakeySpec, dm_helper_manifest, dm_resume_args, helper_pod_name, pv_targets_node, - validate_dm_spec, + DmFlakeySpec, dm_helper_manifest, dm_resume_args, dm_suspend_args, helper_pod_name, + pv_targets_node, validate_dm_spec, }; use crate::framework::fault_config::FaultTestConfig; @@ -535,6 +543,18 @@ mod tests { ); } + #[test] + fn dm_recovery_suspend_does_not_flush_faulting_io() { + assert_eq!( + dm_suspend_args("rustfs-fault-dm", true), + ["suspend", "--noflush", "rustfs-fault-dm"] + ); + assert_eq!( + dm_suspend_args("rustfs-fault-dm", false), + ["suspend", "rustfs-fault-dm"] + ); + } + #[test] fn dm_spec_rejects_unbounded_or_unsafe_targets() { let valid = DmFlakeySpec { From f293a0988f809701e5a555ca71d798793d003eb1 Mon Sep 17 00:00:00 2001 From: GatewayJ <18332154+GatewayJ@users.noreply.github.com> Date: Fri, 19 Jun 2026 21:56:47 +0800 Subject: [PATCH 13/20] test(chaos): add seeded concurrent fault workload --- FAULT_INJECTION_TEST_PLAN.md | 1469 ++++++++++++-------------- README.md | 8 +- e2e/README.md | 16 +- e2e/src/framework/checker.rs | 54 +- e2e/src/framework/fault_config.rs | 49 +- e2e/src/framework/fault_scenarios.rs | 20 +- e2e/src/framework/history.rs | 109 +- e2e/src/framework/resources.rs | 1 + e2e/src/framework/s3_workload.rs | 289 +++-- e2e/src/framework/tenant_factory.rs | 72 +- e2e/tests/faults.rs | 217 +++- 11 files changed, 1359 insertions(+), 945 deletions(-) diff --git a/FAULT_INJECTION_TEST_PLAN.md b/FAULT_INJECTION_TEST_PLAN.md index 79a94e5..b1d9505 100644 --- a/FAULT_INJECTION_TEST_PLAN.md +++ b/FAULT_INJECTION_TEST_PLAN.md @@ -14,940 +14,873 @@ See the License for the specific language governing permissions and limitations under the License. --> -# RustFS 故障注入测试方案 +# RustFS Fault Injection Operations Manual / RustFS 故障注入测试操作手册 -本文档描述如何复用 RustFS Operator 测试基础设施,在真实 Kubernetes 测试集群中运行可执行、可诊断、可逐步增强的故障注入测试体系。故障测试不属于 Kind e2e suite。 +- [中文操作手册](#中文操作手册) +- [English Operations Manual](#english-operations-manual) -核心原则: +## 中文操作手册 -- **Operator 负责测试环境编排**:创建 Tenant、准备本地 PV、暴露 RustFS S3 服务、等待状态、收集诊断现场。 -- **故障注入器负责制造故障**:优先使用 Kubernetes-native 的 Chaos Mesh。 -- **S3 workload 负责产生真实对象访问流量**:持续执行 `PUT`、`GET`、`HEAD`、`LIST` 等操作。 -- **Jepsen-like checker 负责判断正确性**:它不制造故障,只基于操作历史和最终读取结果判断 RustFS 是否丢数据、读错数据或返回假成功。 +### 1. 目的与范围 -也就是说,这套测试不是单纯验证 Operator 是否能拉起 StatefulSet,而是通过 Operator 部署出来的 RustFS 集群来验证 RustFS 在故障下的数据正确性。 +本手册用于在专用的真实 Kubernetes 测试集群中运行 RustFS 故障注入测试。测试对象是由 RustFS Operator 创建的测试 Tenant,不是现有业务 Tenant,也不是生产 Operator 控制面。 -## 边界澄清 +每次执行 `make fault-test` 只运行 `RUSTFS_FAULT_TEST_SCENARIO` 选择的一个场景,并只报告一个真实的 destructive test。七个场景必须串行执行。 -这套故障测试的测试对象是 **Operator 编排出的 RustFS workload**,不是 Operator 控制面自身。 +测试分为两类: -边界如下: +1. 六个 Kubernetes-native 场景,使用 Chaos Mesh 和动态 StorageClass。 +2. 一个 `dm-flakey` 场景,使用专用静态 Local PV、Linux Device Mapper 和 privileged helper Pod。 -- Operator 只负责把 RustFS Tenant、Service、PVC、Secret 等测试环境编排出来。 -- 故障注入作用于 RustFS Pod、RustFS 容器、RustFS 数据卷和 RustFS 服务路径。 -- checker 判断的是 RustFS 对象读写正确性:已经确认成功写入的数据不能丢,成功读取不能返回错误内容。 -- Operator 状态只作为恢复观察信号,例如故障解除后 Tenant 是否重新回到 Ready;它不是第一阶段 correctness verdict 的主体。 -- 不在 Tenant Console 或生产 Operator Console 中提供 destructive fault test 入口。 -- Chaos Mesh Dashboard 可以作为观察 Chaos 资源的外部工具,但 fault-test runner 的权威输出是 `history.jsonl`、`checker-report.json` 和 Kubernetes artifacts。 +执行 `dm-flakey` 前不需要重装 Kubernetes、RustFS Operator、Chaos Mesh 或 Rust 工具链;只需要把 fault-test Tenant 的存储 fixture 切换为专用静态 Local PV。 -## 目标 +### 2. 安全要求 -故障注入测试需要回答这些问题: +必须满足以下要求: -1. RustFS 在 Pod、节点、网络、磁盘 I/O 故障下,已经成功写入的数据是否仍然存在。 -2. RustFS 是否会在磁盘损坏或网络分区后,把错误对象内容以 `200 OK` 返回给客户端。 -3. RustFS 在请求超时、连接中断、部分失败后,是否存在“客户端认为失败但服务端实际写入”的未知状态。 -4. Operator 编排出的 Tenant 是否能在故障解除后回到 Ready,作为 RustFS workload 恢复观察信号。 -5. 当测试失败时,fault-test runner 是否能留下足够的日志、事件、历史记录和 checker 报告用于定位。 +- 只能在专用测试集群执行,禁止在生产或共享开发集群执行。 +- 当前 context 不能以 `kind-` 开头。 +- 不得把 `RUSTFS_FAULT_TEST_NAMESPACE` 或 `RUSTFS_FAULT_TEST_TENANT` 指向现有业务资源。 +- 常规场景必须使用支持动态供给的 StorageClass。 +- `dm-flakey` 只能使用专用的 `kubernetes.io/no-provisioner` StorageClass 和专用块设备或 loop 文件。 +- DM Local PV 路径不得复用现有 RustFS 数据目录。 +- 所有场景必须串行运行;默认本地 port-forward 端口为 `19000`。 +- 失败时先保存 artifacts,再清理故障资源和测试 namespace。 -最重要的判定不是“故障期间所有请求都成功”,而是: +测试 runner 默认创建: ```text -可以失败,但不能假成功。 -可以超时,但不能返回错误数据。 -故障恢复后,已经确认成功的数据必须一致。 +namespace: rustfs-fault-test +tenant: fault-test-tenant ``` -## 非目标 +如果 namespace 已存在,必须同时具备: -第一阶段不做这些事: +```text +app.kubernetes.io/managed-by=rustfs-operator-fault-test +rustfs.com/fault-test-tenant=fault-test-tenant +``` + +runner 不会自动认领未标记的 namespace,也不会删除不属于它的 namespace。 -- 不替代 RustFS 自身的单元测试、集成测试或存储引擎内部测试。 -- 不直接引入完整 Clojure Jepsen 测试套件。 -- 不在共享开发集群或生产集群上运行 destructive 测试;真实 Kubernetes 集群也必须使用专用测试 namespace、Tenant 和 StorageClass。 -- 不把性能压测结果当成 correctness 结论。 -- 不在第一版验证所有 S3 线性一致性细节。 -- 不默认测试多 Tenant、跨集群、真实块设备故障。 -- 不把故障测试放进 Tenant Console。 -- 不在生产 Operator Console 提供运行 destructive 测试的入口。 -- 不把 Operator 控制面重启、升级、Leader Election 等作为第一阶段核心验证对象。 +### 3. 场景目录 -第一阶段的目标是补齐当前最大缺口:**真实故障注入 + 对象内容正确性检查**。 +| 场景 | 后端 | 隔离方式 | 主要验证 | +| --- | --- | --- | --- | +| `io-eio` | Chaos Mesh IOChaos | 新 Tenant/PVC | 一个数据卷发生 EIO 后,已提交对象不丢失、不损坏。 | +| `pod-kill-one` | Chaos Mesh PodChaos | 可复用 Ready Tenant | 删除一个 RustFS Pod 后,替代 Pod 出现且对象保持正确。 | +| `network-partition-one` | Chaos Mesh NetworkChaos | 可复用 Ready Tenant | 一个 Pod 与同 Tenant peers 分区后,恢复时对象保持正确。 | +| `io-read-mistake` | Chaos Mesh IOChaos | 新 Tenant/PVC | 读路径被篡改时,成功 GET 不能返回错误内容。 | +| `disk-full` | Chaos Mesh IOChaos | 新 Tenant/PVC | 写操作返回 ENOSPC 后,已提交对象保持正确。 | +| `warp-under-chaos` | Warp + IOChaos | 新 Tenant/PVC | 记录故障下性能,正确性仍由 history/checker 判断。 | +| `dm-flakey` | Linux Device Mapper | 专用静态 Local PV | 底层块设备间歇性 EIO 后,恢复时对象保持正确。 | -## 可复用的测试基础设施 +默认 workload 写入或确认 4000 个对象,并使用 50 并发。尺寸计划先按固定比例生成,再由 seed 确定性打乱:4KiB 85%(3400 个)、16KiB 10%(400 个)、8MiB 4%(160 个)、16MiB 1%(40 个)。每个场景的逻辑 payload 为 2,033,745,920 bytes,约 1.89GiB。 -当前项目已经有适合故障测试的底层模块,不需要复制 kubectl、S3、history 和 checker 实现。但故障测试拥有独立配置、命令和安全边界,不属于 Kind e2e case inventory。 +对象内容由同一个 seed 和对象索引通过 `splitmix64-v1` 确定性生成。`workload-plan.json` 记录 seed、生成器版本、并发、尺寸分布和总 payload;`history.jsonl` 记录每个 key 的 size、SHA-256 和结果。设置 `RUSTFS_FAULT_TEST_SEED=` 可以重放相同尺寸顺序和对象内容。 -已有能力: +客户端没有看到错误不代表故障未生效;权威故障证据来自 Chaos 状态或 DM table/status,以及 `fault-evidence.json`。 -| 能力 | 当前位置 | 用途 | -| --- | --- | --- | -| destructive 入口 | `make fault-test` | 专门在真实 Kubernetes 测试集群运行破坏性故障测试。 | -| fault suite runners | `e2e/tests/faults.rs` | 真实集群 scenario-selected destructive runner,不属于 e2e case inventory。 | -| fault config/context guard | `e2e/src/framework/fault_config.rs` | 读取独立 fault-test 配置、绑定当前 context,并拒绝 Kind。 | -| Tenant/Secret 创建 | `e2e/src/framework/resources.rs` | 创建 fault-test namespace、凭据和真实集群 Tenant。 | -| S3 port-forward | `e2e/src/framework/port_forward.rs` | 将 Tenant S3 服务暴露到本地。 | -| artifact collector | `e2e/src/framework/artifacts.rs` | 测试失败后收集 Kubernetes 现场。 | +`RUSTFS_FAULT_TEST_PERCENT=20` 表示 Chaos Mesh 对匹配 I/O 操作的注入概率,不表示预先固定选择 20% 的对象。 -关键约定: +### 4. 测试机要求 -- RustFS Pod selector 可使用 `rustfs.tenant=`。 -- RustFS 容器名是 `rustfs`。 -- RustFS 数据卷路径遵循 `/data/rustfs0`、`/data/rustfs1`。 -- 常规场景要求真实集群提供动态 StorageClass;`dm-flakey` 只允许使用显式配置的专用静态 Local PV。 +运行测试的主机需要: -因此推荐方案是: +- `kubectl` +- Rust stable 和 Cargo,支持 Rust edition 2024 +- GNU Make +- 可访问 Kubernetes API 的 kubeconfig +- `warp` v1.3.1,仅 `warp-under-chaos` 需要 +- 足够空间保存 `target/fault-tests` artifacts -```text -复用当前测试基础设施 - + 独立 FaultTestConfig 与 Make 入口 - + 新增 Chaos Mesh 故障注入模块 - + 新增 S3 workload - + 新增 operation history - + 新增对象存储 checker +建议测试账户在专用测试集群使用 cluster-admin。最小权限至少需要: + +- 读取 CRD、Node 和 StorageClass +- 创建、读取、更新和删除 namespace、Secret、Pod、Service、PVC、StatefulSet 和 Tenant +- 在 Chaos Mesh namespace 管理 IOChaos、PodChaos 和 NetworkChaos +- 读取 Pod 日志、events,并执行 `kubectl exec` +- `dm-flakey` 允许创建 privileged、`hostPID`、`hostPath: /` 的 helper Pod + +代码检查: + +```bash +rustc --version +cargo --version +kubectl version --client +make e2e-check ``` -## 总体架构 +### 5. Kubernetes 和 RustFS 前置检查 -```text -make fault-test -> e2e/tests/faults.rs - | - +-- 环境保护:destructive opt-in / current real Kubernetes context / required StorageClass - +-- 环境准备:按 isolation reset 或复用 Tenant;DM 场景验证专用 PV 拓扑 - +-- S3 workload:持续读写对象 - +-- history recorder:记录每次操作的开始、结束、结果、hash - +-- nemesis:通过 Chaos Mesh 对 RustFS workload 注入故障 - +-- checker:基于 history 和最终读回结果判断 RustFS 对象正确性 - +-- artifact collector:失败时收集诊断现场 +切换并记录目标 context: + +```bash +kubectl config use-context +kubectl config current-context +kubectl get nodes ``` -建议新增模块: +确认 RustFS Operator、Tenant CRD 和 StorageClass: -```text -e2e/src/framework/chaos_mesh.rs -e2e/src/framework/fault_config.rs -e2e/src/framework/fault_scenarios.rs -e2e/src/framework/s3_workload.rs -e2e/src/framework/history.rs -e2e/src/framework/checker.rs +```bash +kubectl get crd tenants.rustfs.com +kubectl -n rustfs-system get deployment +kubectl get storageclass ``` -模块职责: +常规场景需要至少四个可调度节点和四个 `80Gi` RWO PVC。fault Tenant 使用 required pod anti-affinity,把四个 RustFS Pod 分散到不同的 `kubernetes.io/hostname`。StorageClass 必须支持动态供给,不能是 `kubernetes.io/no-provisioner`。每个承载 fault-test PVC 的节点应至少有 100Gi 可用空间;执行前必须按实际 StorageClass 拓扑核对容量。 -| 模块 | 职责 | -| --- | --- | -| `chaos_mesh` | 生成、apply、describe、delete Chaos Mesh 资源。 | -| `fault_scenarios` | 定义故障场景名称、默认参数、目标对象和执行顺序。 | -| `s3_workload` | 对 RustFS Tenant S3 endpoint 执行对象读写流量。 | -| `history` | 将每个 S3 操作记录成 JSON Lines。 | -| `checker` | 基于 history 和最终读回结果验证 RustFS 对象存储不变量。 | -| `faults.rs` | 编排完整测试流程,不承载底层实现细节。 | +不能只看 PVC 显示的 capacity。hostPath/local-path provisioner 通常不执行容量配额,必须检查它的实际 node path 和对应文件系统: -## 为什么优先用 Chaos Mesh +```bash +kubectl -n kube-system get configmap local-path-config -o yaml +kubectl get pv -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.spec.hostPath.path}{"\n"}{end}' +df -h +``` -当前场景是在 Kubernetes 中通过 Operator 部署 RustFS,因此故障注入也应该尽量 Kubernetes-native。 +K3s 默认 `/var/lib/rancher/k3s/storage` 经常位于较小的系统盘。若该文件系统不足 100Gi,不得用于本测试;应部署专用的动态 provisioner/StorageClass,把新 fault-test PVC 放到 `/data/rustfs/rustfs-fault-local-path` 之类的独立数据盘目录。不要修改或迁移现有业务 PVC。 -Chaos Mesh 适合第一阶段,原因: +建议固定已验证的 RustFS image digest,避免 `latest` 漂移: -- 可以通过 namespace 和 label 精准选择 RustFS Pod。 -- 可以指定容器名,避免影响非目标 sidecar 或其他组件。 -- 支持 `PodChaos`、`NetworkChaos`、`IOChaos`。 -- `IOChaos` 能对指定挂载路径返回 `EIO`,适合模拟磁盘坏块或磁盘 I/O 错误。 -- `IOChaos mistake` 能模拟读写返回错误字节,适合模拟 bit rot / 静默损坏。 -- 以 CRD 形式管理故障,方便 fault-test runner apply/delete/describe/collect。 +```bash +export RUSTFS_IMAGE='docker.io/rustfs/rustfs@sha256:' +``` + +### 6. 安装和验证 Chaos Mesh -第一阶段建议只要求: +以下示例使用已验证的 Chaos Mesh v2.8.3: + +```bash +helm repo add chaos-mesh https://charts.chaos-mesh.org +helm repo update + +helm upgrade --install chaos-mesh chaos-mesh/chaos-mesh \ + -n chaos-mesh --create-namespace \ + --version 2.8.3 \ + --set chaosDaemon.runtime=containerd \ + --set chaosDaemon.socketPath=/run/containerd/containerd.sock \ + --set dashboard.create=false \ + --wait --timeout 10m +``` + +K3s 使用: ```text -Chaos Mesh 已安装 -iochaos.chaos-mesh.org CRD 存在 -podchaos.chaos-mesh.org CRD 存在 -networkchaos.chaos-mesh.org CRD 存在 +/run/k3s/containerd/containerd.sock ``` -如果 CRD 不存在,测试应明确失败并给出提示,而不是静默跳过。 +其他发行版必须根据实际容器运行时修改 `chaosDaemon.runtime` 和 `chaosDaemon.socketPath`。 + +验证: -## 为什么不是直接上完整 Jepsen +```bash +kubectl -n chaos-mesh get deployment,daemonset +kubectl get crd \ + iochaos.chaos-mesh.org \ + podchaos.chaos-mesh.org \ + networkchaos.chaos-mesh.org +``` -完整 Jepsen 很强,但第一阶段不建议直接引入,原因: +要求 controller-manager 全部 Ready,chaos-daemon 在所有目标节点 Ready。 -- 当前项目 e2e 是 Rust-native,直接接入 Clojure Jepsen 成本高。 -- 当前最大的缺口是“没有真实故障注入”和“没有对象内容正确性 checker”。 -- 对象存储第一阶段最关键的不变量可以用更轻量的 checker 覆盖。 -- 先把 `PUT/GET/hash` 这条基本正确性链路跑通,收益更高。 +### 7. 运行普通测试 -因此建议路线是: +先设置公共参数: -```text -先做 Jepsen-like checker -后续再逐步增强为更完整的并发历史模型 +```bash +export RUSTFS_FAULT_TEST_STORAGE_CLASS= +export RUSTFS_FAULT_TEST_SERVER_IMAGE="$RUSTFS_IMAGE" +export RUSTFS_FAULT_TEST_OPERATOR_NAMESPACE=rustfs-system +export RUSTFS_FAULT_TEST_NAMESPACE=rustfs-fault-test +export RUSTFS_FAULT_TEST_TENANT=fault-test-tenant +export RUSTFS_FAULT_TEST_CHAOS_NAMESPACE=chaos-mesh +export RUN_ROOT="target/fault-tests/$(date -u +%Y%m%dT%H%M%SZ)" ``` -Jepsen-like 的含义是: +运行一个场景: -- 有 workload。 -- 有 nemesis。 -- 有 operation history。 -- 有明确 correctness model。 -- 有自动 checker。 +```bash +RUSTFS_FAULT_TEST_SCENARIO=io-eio \ +RUSTFS_FAULT_TEST_ARTIFACTS="$RUN_ROOT/io-eio" \ +make fault-test +``` -它不是简单 chaos smoke test。 +`make fault-test` 会在内部设置 `RUSTFS_FAULT_TEST_DESTRUCTIVE=1`。不要直接绕过 Make 入口运行 destructive test。 -## 安全模型 +测试期间持续观察节点、现有业务 Tenant 和 fault-test Tenant。任一非目标资源变为非 Ready 时,应立即删除当前 managed Chaos resource、停止后续场景并收集现场。 -故障测试必须默认安全,只能面向当前真实 Kubernetes 测试集群,不能运行在 Kind、共享开发集群或生产集群。 +按推荐顺序运行六个普通场景,并在首个失败后停止: -必须保留并强化这些保护: +```bash +for scenario in \ + io-eio \ + pod-kill-one \ + network-partition-one \ + io-read-mistake \ + disk-full \ + warp-under-chaos +do + RUSTFS_FAULT_TEST_SCENARIO="$scenario" \ + RUSTFS_FAULT_TEST_ARTIFACTS="$RUN_ROOT/$scenario" \ + make fault-test || break +done +``` -1. 必须设置 `RUSTFS_FAULT_TEST_DESTRUCTIVE=1`;`make fault-test` 会显式设置。 -2. fault runner 使用当前 `kubectl config current-context`,并拒绝 `kind-*` context。 -3. 必须显式提供 `RUSTFS_FAULT_TEST_STORAGE_CLASS`;除 `dm-flakey` 的专用静态 Local PV 外,目标 StorageClass 必须支持动态供给。 -4. 目标 namespace 必须来自 fault-test 配置,默认 `rustfs-fault-test`;runner 创建 namespace 时必须写入 `app.kubernetes.io/managed-by=rustfs-operator-fault-test` label 和匹配 Tenant 的 `rustfs.com/fault-test-tenant` annotation。 -5. 已存在 namespace 只有在上述所有权标记完全匹配时才允许 reset;runner 不得自动认领未标记 namespace。 -6. 所有故障资源必须带唯一 run id label。 -7. 每个 Chaos 资源必须有 RAII-style cleanup guard。 -8. 正常结束和异常失败都必须 best-effort 删除故障资源。 -9. `io-eio` 这类存储破坏/强干扰 case 必须在 case 前 reset Tenant/PVC/PV;后续 pod kill、network delay、短暂 disconnect 可以按场景复用 Tenant。 -10. 默认故障持续时间要覆盖 workload 窗口,默认故障比例要小。 -11. 测试失败时必须先收集 artifacts,再清理会影响诊断的信息。 -12. destructive 场景保持 `#[ignore]`,只能通过显式 Make 目标执行。 +`warp-under-chaos` 执行前验证: -当前使用的故障测试环境变量: +```bash +warp --version +``` -| 变量 | 默认值 | 作用 | -| --- | --- | --- | -| `RUSTFS_FAULT_TEST_STORAGE_CLASS` | required | 常规场景使用动态 StorageClass;`dm-flakey` 使用专用静态 Local PV StorageClass。 | -| `RUSTFS_FAULT_TEST_NAMESPACE` | `rustfs-fault-test` | 专用测试 namespace。 | -| `RUSTFS_FAULT_TEST_TENANT` | `fault-test-tenant` | 专用测试 Tenant。 | -| `RUSTFS_FAULT_TEST_SCENARIO` | `io-eio` | 选择故障场景。 | -| `RUSTFS_FAULT_TEST_DURATION_SECONDS` | `180` | 故障持续时间,默认覆盖串行小对象 workload。 | -| `RUSTFS_FAULT_TEST_PERCENT` | `20`;`disk-full` 为 `100` | 支持百分比注入的场景使用。 | -| `RUSTFS_FAULT_TEST_WORKLOAD_OBJECTS` | `40` | 写入或校验对象数量。 | -| `RUSTFS_FAULT_TEST_REQUEST_TIMEOUT_SECONDS` | `3` | 单次 S3 请求超时时间。 | -| `RUSTFS_FAULT_TEST_REQUIRE_CLIENT_DISRUPTION` | `false` | 是否要求故障期间至少出现一次客户端可见失败/超时/unknown。 | -| `RUSTFS_FAULT_TEST_DM_NAME` | empty | `dm-flakey` 场景要切换的 device-mapper 设备名,必填。 | -| `RUSTFS_FAULT_TEST_DM_NODE` | empty | device-mapper 设备与目标 Local PV 所在 Kubernetes 节点,必填。 | -| `RUSTFS_FAULT_TEST_DM_MOUNT_PATH` | empty | 目标 PV 在节点上的 Local PV 挂载路径,必填。 | -| `RUSTFS_FAULT_TEST_DM_FAULT_TABLE` | empty | `dm-flakey` 场景注入故障时加载的 dmsetup table,必填。 | -| `RUSTFS_FAULT_TEST_DM_RECOVERY_TABLE` | current table | `dm-flakey` 场景恢复时加载的 dmsetup table;不填则使用注入前 table。 | -| `RUSTFS_FAULT_TEST_DM_HELPER_IMAGE` | `rancher/mirrored-library-busybox:1.37.0` | 目标节点 privileged helper Pod 镜像。 | -| `RUSTFS_FAULT_TEST_WARP_DURATION_SECONDS` | `60` | `warp-under-chaos` 场景中 Warp mixed workload 的运行时间。 | -| `RUSTFS_FAULT_TEST_CHAOS_NAMESPACE` | `chaos-mesh` | Chaos Mesh 资源所在 namespace。 | - -## 操作历史模型 - -每个客户端可见的 S3 操作都应记录一条 JSON Lines。 +Warp 性能数据不参与 correctness verdict。 -示例: +### 8. `dm-flakey` 专用操作 -```json -{ - "id": "op-000001", - "scenario": "io-eio", - "kind": "put", - "bucket": "rustfs-fault-run123", - "key": "fault-test/run-123/object-1", - "value_sha256": "abc123", - "size_bytes": 1048576, - "started_at_ms": 1710000000000, - "ended_at_ms": 1710000001234, - "outcome": "ok", - "http_status": 200, - "error": null -} -``` - -`outcome` 建议只保留四类,语义必须清晰: - -| outcome | 含义 | checker 处理 | -| --- | --- | --- | -| `ok` | 客户端收到明确成功响应。 | 作为强正确性输入。 | -| `failed` | 客户端收到明确失败响应。 | 不要求最终存在。 | -| `timeout` | 客户端超时,不知道服务端是否完成。 | 作为 unknown 处理。 | -| `unknown` | 连接中断、body 未读完、port-forward 中断等。 | 作为 unknown 处理。 | +#### 8.1 不需要重装集群 -第一版 checker 只对 `ok` 的 `PUT` 做强校验。 +如果前六个场景已经执行,只需: -对于 `timeout` 和 `unknown` 的写入: +1. 保留 Kubernetes、Operator、Chaos Mesh 和 Rust 工具链。 +2. 停止其他 fault-test 进程。 +3. 为四个测试 Pod 准备四个专用静态 Local PV。 +4. 其中一个 PV 必须由 Device Mapper 设备提供。 +5. 使用新的静态 StorageClass 运行 `dm-flakey`。 -- 最终存在可以接受。 -- 最终不存在也可以接受。 -- 需要在 report 中单独列出,方便后续分析。 +runner 会 reset fault-test Tenant/PVC,但不会创建主机块设备、静态 PV 或 StorageClass。 -这样可以避免把网络中断导致的“未知成功”误判为 RustFS 数据错误。 +#### 8.2 允许 privileged helper -## Checker 不变量 +如果 fault-test namespace 已存在: -### 不变量 1:成功写入的数据不能丢 +```bash +kubectl label namespace rustfs-fault-test \ + pod-security.kubernetes.io/enforce=privileged \ + --overwrite +``` -如果客户端收到了成功写入: +如果要在第一次运行前预创建 namespace: -```text -PUT key value_hash=H -> ok +```bash +kubectl create namespace rustfs-fault-test +kubectl label namespace rustfs-fault-test \ + app.kubernetes.io/managed-by=rustfs-operator-fault-test \ + pod-security.kubernetes.io/enforce=privileged +kubectl annotate namespace rustfs-fault-test \ + rustfs.com/fault-test-tenant=fault-test-tenant ``` -故障解除并等待 Tenant 恢复后,必须满足: +#### 8.3 准备四个专用卷 -```text -GET key -> 200 -sha256(body) == H +推荐使用四个真实专用测试块设备。loop 文件仅适用于实验室环境。每个 backing filesystem 建议至少 `90Gi`,静态 PV capacity 固定为 `80Gi`。 + +目标 DM 节点的实验室 loop 示例;使用真实专用块设备时跳过 `truncate` 和 `losetup`: + +```bash +export LAB=/data/rustfs/rustfs-fault-lab +export DM_NAME=rustfs-fault-dm + +mkdir -p "$LAB/volume" +truncate -s 90G "$LAB/disk.img" +BACKING=$(losetup --find --show "$LAB/disk.img") +SECTORS=$(blockdev --getsz "$BACKING") +dmsetup create "$DM_NAME" --table "0 $SECTORS linear $BACKING 0" +mkfs.ext4 -F "/dev/mapper/$DM_NAME" +mount "/dev/mapper/$DM_NAME" "$LAB/volume" +``` + +其他三个节点把各自专用块设备直接格式化并挂载到同一路径: + +```bash +mkdir -p /data/rustfs/rustfs-fault-lab/volume +mkfs.ext4 -F +mount /data/rustfs/rustfs-fault-lab/volume ``` -否则 hard fail。 +不得格式化或挂载现有 RustFS 数据盘。 -### 不变量 2:成功读取不能返回错误内容 +#### 8.4 创建静态 StorageClass 和 Local PV -任何一次 `GET` 只要返回 `200 OK`,并且该 key 有已知成功写入值,则: +StorageClass: -```text -sha256(body) == expected_hash +```yaml +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: rustfs-fault-dm + labels: + app.kubernetes.io/managed-by: rustfs-operator-fault-test +provisioner: kubernetes.io/no-provisioner +volumeBindingMode: WaitForFirstConsumer +reclaimPolicy: Retain ``` -如果 `GET` 返回 `200` 但 hash 不一致,这是最高优先级失败。 +为四个节点分别创建一个 PV。每个 PV 使用唯一名称和对应 node affinity: -这比“请求是否成功”更重要,因为对象存储最危险的问题不是失败,而是**成功返回错误数据**。 +```yaml +apiVersion: v1 +kind: PersistentVolume +metadata: + name: rustfs-fault-dm- + labels: + app.kubernetes.io/managed-by: rustfs-operator-fault-test +spec: + capacity: + storage: 80Gi + volumeMode: Filesystem + accessModes: + - ReadWriteOnce + persistentVolumeReclaimPolicy: Retain + storageClassName: rustfs-fault-dm + local: + path: /data/rustfs/rustfs-fault-lab/volume + nodeAffinity: + required: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - +``` -### 不变量 3:明确失败的写入不要求存在 +验证四个 PV 均为 `Available`: -如果 `PUT` 返回明确失败: +```bash +kubectl get storageclass rustfs-fault-dm +kubectl get pv -l app.kubernetes.io/managed-by=rustfs-operator-fault-test -o wide +``` -```text -PUT key -> failed +#### 8.5 运行 `dm-flakey` + +目标节点名必须是 Kubernetes `metadata.name`,挂载路径必须与目标 PV 的 `spec.local.path` 完全一致。 + +先在目标节点执行 `blockdev --getsz `,再把结果设置为测试机上的 `SECTORS`。 + +```bash +export DM_NODE= +export DM_MOUNT_PATH=/data/rustfs/rustfs-fault-lab/volume +export BACKING_DEVICE= +export SECTORS= + +RUSTFS_FAULT_TEST_SCENARIO=dm-flakey \ +RUSTFS_FAULT_TEST_STORAGE_CLASS=rustfs-fault-dm \ +RUSTFS_FAULT_TEST_SERVER_IMAGE="$RUSTFS_IMAGE" \ +RUSTFS_FAULT_TEST_DM_NAME=rustfs-fault-dm \ +RUSTFS_FAULT_TEST_DM_NODE="$DM_NODE" \ +RUSTFS_FAULT_TEST_DM_MOUNT_PATH="$DM_MOUNT_PATH" \ +RUSTFS_FAULT_TEST_DM_FAULT_TABLE="0 $SECTORS flakey $BACKING_DEVICE 0 1 15" \ +RUSTFS_FAULT_TEST_ARTIFACTS="$RUN_ROOT/dm-flakey" \ +make fault-test ``` -那么最终这个 key 存在或不存在,都不作为第一版 hard fail。 +该 table 表示底层设备正常 1 秒、故障 15 秒并循环。helper 会验证 Pod、PVC、PV、节点、Local PV 路径和 Device Mapper mount source 的关系,然后加载 fault table。恢复时使用注入前的 linear table。 -### 不变量 4:未知结果单独记录 +#### 8.6 DM 紧急恢复 -如果 `PUT` 是: +如果测试进程异常退出且设备仍为 flakey,立即在目标节点执行: -```text -timeout -unknown +```bash +dmsetup suspend --noflush rustfs-fault-dm +dmsetup load rustfs-fault-dm \ + --table "0 $SECTORS linear $BACKING_DEVICE 0" +dmsetup resume --noudevsync rustfs-fault-dm +dmsetup table rustfs-fault-dm ``` -则 checker 记录它最终是否 materialized,但不作为第一版 hard fail。 +确认 table 已恢复为 `linear` 后再删除测试 Pod、PVC 或卸载文件系统。 + +### 9. 验收标准 + +每个场景必须满足: + +- `make fault-test` 退出码为 0。 +- `fault-evidence.json` 中 `injected=true`、`active_during_workload=true`、`recovered=true`。 +- `checker-report.json` 中 `committed_puts=4000`。 +- `missing_committed_objects` 为空。 +- `hash_mismatches` 为空。 +- `successful_corrupted_reads` 为空。 +- `list_warnings` 为空。 +- fault-test Tenant 恢复 Ready。 -### 不变量 5:恢复后的 LIST 先作为 warning +`RUSTFS_FAULT_TEST_REQUIRE_CLIENT_DISRUPTION` 默认是 `false`。因此客户端没有失败或超时可以接受,只要故障后端明确证明故障已选中并注入。 -故障解除并等待 Tenant Ready 后: +主要 artifacts: ```text -LIST prefix +history.jsonl +workload-plan.json +workload-summary.json +checker-report.json +fault-evidence.json +chaos-manifest.yaml +dm-flakey-active.json +Kubernetes logs/events/snapshots ``` -理论上应包含所有成功 `PUT` 且未成功 `DELETE` 的 key。 +### 10. 清理 + +先确认 namespace 所有权: + +```bash +kubectl get namespace rustfs-fault-test --show-labels +kubectl get namespace rustfs-fault-test \ + -o jsonpath='{.metadata.annotations.rustfs\.com/fault-test-tenant}{"\n"}' +``` -第一版可以将 LIST 缺失作为 warning,而不是 hard fail。等 RustFS 对 LIST 一致性的目标语义确认后,再升级为 hard fail。 +清理测试资源: -## S3 workload 设计 +```bash +kubectl delete namespace rustfs-fault-test --wait=true +kubectl delete iochaos,podchaos,networkchaos \ + -n chaos-mesh \ + -l app.kubernetes.io/managed-by=rustfs-operator-fault-test \ + --ignore-not-found +``` -第一阶段建议使用 Rust 代码实现 S3 workload,而不是依赖外部 `aws` 或 `mc` CLI。 +动态 PV 是否自动删除取决于 StorageClass reclaim policy。`Retain` PV 必须由运维手动删除并清理后端数据。 -原因: +DM 场景额外清理: -- 操作历史更容易结构化记录。 -- 请求 timeout、transport error、body error 更容易准确分类。 -- 对象 hash 和操作结果可以在同一进程中关联。 -- CI 和本地依赖更少。 -- 后续可以扩展为并发 workload 和 checker replay。 +1. 删除 fault-test namespace,等待 Pod/PVC 消失。 +2. 删除四个静态 PV 和 `rustfs-fault-dm` StorageClass。 +3. 在目标节点确认 DM table 为 `linear`。 +4. 卸载四个实验卷。 +5. 删除 DM mapping。 +6. detach loop 设备并删除专用实验目录。 -建议在 `e2e/Cargo.toml` 后续增加: +示例: -```text -aws-sdk-s3 -aws-config -aws-credential-types -sha2 -rand -hex +```bash +umount /data/rustfs/rustfs-fault-lab/volume +dmsetup remove rustfs-fault-dm +losetup -d # 仅 loop 实验环境 +rm -rf /data/rustfs/rustfs-fault-lab ``` -第一版 workload 操作: +最后确认: -```text -CreateBucket -PutObject -GetObject -HeadObject -ListObjectsV2 +```bash +kubectl get nodes +kubectl -n rustfs-system get deployment +kubectl -n chaos-mesh get deployment,daemonset +kubectl get pv +kubectl get iochaos,podchaos,networkchaos -A ``` -第一版建议使用唯一 key,不要并发覆盖同一个 key。 +### 11. 常用环境变量 -key 格式: +| 变量 | 默认值 | 说明 | +| --- | --- | --- | +| `RUSTFS_FAULT_TEST_STORAGE_CLASS` | 必填 | 常规动态 StorageClass 或 DM 专用静态 StorageClass。 | +| `RUSTFS_FAULT_TEST_DESTRUCTIVE` | 由 Make 设置 | destructive opt-in,不应手动绕过 Make 入口。 | +| `RUSTFS_FAULT_TEST_SCENARIO` | `io-eio` | 选择七个场景之一。 | +| `RUSTFS_FAULT_TEST_NAMESPACE` | `rustfs-fault-test` | 专用测试 namespace。 | +| `RUSTFS_FAULT_TEST_TENANT` | `fault-test-tenant` | 专用测试 Tenant。 | +| `RUSTFS_FAULT_TEST_OPERATOR_NAMESPACE` | `rustfs-system` | Operator namespace。 | +| `RUSTFS_FAULT_TEST_SERVER_IMAGE` | `rustfs/rustfs:latest` | 建议设置为已验证 digest。 | +| `RUSTFS_FAULT_TEST_ARTIFACTS` | `target/fault-tests/artifacts` | 当前场景 artifacts 目录。 | +| `RUSTFS_FAULT_TEST_TIMEOUT_SECONDS` | `300` | Kubernetes/Tenant 等待超时。 | +| `RUSTFS_FAULT_TEST_DURATION_SECONDS` | `900` | Chaos 故障持续时间。 | +| `RUSTFS_FAULT_TEST_PERCENT` | `20`;`disk-full` 为 `100` | 支持百分比的故障注入比例。 | +| `RUSTFS_FAULT_TEST_WORKLOAD_OBJECTS` | `4000` | workload 对象数量。 | +| `RUSTFS_FAULT_TEST_WORKLOAD_CONCURRENCY` | `50` | prefill、故障 workload、恢复重写和 checker 的最大并发。 | +| `RUSTFS_FAULT_TEST_SEED` | 随机生成 | 可选 u64 seed;设置后可重放尺寸顺序和对象内容。 | +| `RUSTFS_FAULT_TEST_REQUEST_TIMEOUT_SECONDS` | `30` | 单个 S3 操作超时。 | +| `RUSTFS_FAULT_TEST_REQUIRE_CLIENT_DISRUPTION` | `false` | 是否强制要求客户端看到故障。 | +| `RUSTFS_FAULT_TEST_CHAOS_NAMESPACE` | `chaos-mesh` | Chaos Mesh resource namespace。 | +| `RUSTFS_FAULT_TEST_WARP_DURATION_SECONDS` | `60` | Warp mixed workload 时间。 | +| `RUSTFS_FAULT_TEST_DM_NAME` | 无 | DM mapping 名称,DM 场景必填。 | +| `RUSTFS_FAULT_TEST_DM_NODE` | 无 | DM 目标 Kubernetes 节点,DM 场景必填。 | +| `RUSTFS_FAULT_TEST_DM_MOUNT_PATH` | 无 | DM Local PV 路径,DM 场景必填。 | +| `RUSTFS_FAULT_TEST_DM_FAULT_TABLE` | 无 | 注入时的 dmsetup table,DM 场景必填。 | +| `RUSTFS_FAULT_TEST_DM_RECOVERY_TABLE` | 注入前 table | 可选恢复 table。 | +| `RUSTFS_FAULT_TEST_DM_HELPER_IMAGE` | `rancher/mirrored-library-busybox:1.37.0` | privileged helper image。 | + +## English Operations Manual + +### 1. Purpose and scope + +This manual describes how to run RustFS fault-injection tests in a dedicated, real Kubernetes test cluster. The target is the test Tenant created by the RustFS Operator, not an existing application Tenant or the production Operator control plane. + +Each `make fault-test` invocation runs exactly one destructive test selected by `RUSTFS_FAULT_TEST_SCENARIO`. Run all seven scenarios serially. + +The suite has two operational groups: + +1. Six Kubernetes-native scenarios using Chaos Mesh and a dynamic StorageClass. +2. One `dm-flakey` scenario using dedicated static Local PVs, Linux Device Mapper, and a privileged helper Pod. + +Running `dm-flakey` does not require reinstalling Kubernetes, the RustFS Operator, Chaos Mesh, or the Rust toolchain. Only the fault-test Tenant storage fixture must be replaced with dedicated static Local PVs. + +### 2. Safety requirements + +- Run only in a dedicated test cluster; never use a production or shared development cluster. +- The current context must not start with `kind-`. +- Never point the configured namespace or Tenant at existing application resources. +- Use a dynamically provisioned StorageClass for regular scenarios. +- Use a dedicated `kubernetes.io/no-provisioner` StorageClass and dedicated devices or loop files for `dm-flakey`. +- Never reuse an existing RustFS data directory for a DM Local PV. +- Run scenarios serially because the default namespace, Tenant, and local port `19000` are shared. +- On failure, preserve artifacts before removing the fault and test resources. + +The default test resources are: ```text -fault-test//small/ -fault-test//medium/ -fault-test//large/ +namespace: rustfs-fault-test +tenant: fault-test-tenant ``` -对象大小建议: +An existing namespace must contain both ownership markers: -| 类型 | 大小 | -| --- | --- | -| small | 4 KiB | -| medium | 64 KiB | -| large | 1 MiB | -| xlarge | 8 MiB | +```text +app.kubernetes.io/managed-by=rustfs-operator-fault-test +rustfs.com/fault-test-tenant=fault-test-tenant +``` -第一版不建议默认使用太大对象,避免故障测试运行过慢。 +The runner never claims an unmarked namespace. -## 初始故障场景优先级 +### 3. Scenario catalog -| 优先级 | 场景 | 后端 | 目的 | +| Scenario | Backend | Isolation | Main validation | | --- | --- | --- | --- | -| P0 | `io-eio` | Chaos Mesh `IOChaos` | 模拟单个 RustFS 数据卷读写返回 `EIO`。 | -| P0 | `pod-kill-one` | Chaos Mesh `PodChaos` | 模拟一个 RustFS Pod 死亡和 StatefulSet 恢复。 | -| P1 | `network-partition-one` | Chaos Mesh `NetworkChaos` | 模拟一个 RustFS Pod 与集群网络分区。 | -| P1 | `io-read-mistake` | Chaos Mesh `IOChaos` | 模拟读路径返回错误字节,即静默坏块。 | -| P1 | `disk-full` | Chaos Mesh `IOChaos` errno 28 | 在不消耗节点磁盘的情况下验证 ENOSPC 行为。 | -| P2 | `direct-volume-corruption` | 存储后端专用测试环境 | 模拟已经落盘的数据被破坏。 | -| P2 | `node-restart` | 集群节点运维接口 | 模拟节点重启。 | -| P3 | `dm-flakey` | device mapper / loop device | 更接近真实块设备故障。 | -| P3 | `warp-under-chaos` | MinIO Warp + chaos | 使用独立 benchmark bucket 分析故障期间性能,避免影响 correctness 对象。 | +| `io-eio` | Chaos Mesh IOChaos | Fresh Tenant/PVC | Committed objects survive EIO on one data volume. | +| `pod-kill-one` | Chaos Mesh PodChaos | Reusable Ready Tenant | A killed Pod is replaced without losing committed objects. | +| `network-partition-one` | Chaos Mesh NetworkChaos | Reusable Ready Tenant | Objects remain correct after one Pod is partitioned from its peers. | +| `io-read-mistake` | Chaos Mesh IOChaos | Fresh Tenant/PVC | A successful GET never returns altered bytes. | +| `disk-full` | Chaos Mesh IOChaos | Fresh Tenant/PVC | Committed objects survive injected ENOSPC write failures. | +| `warp-under-chaos` | Warp + IOChaos | Fresh Tenant/PVC | Performance is reported separately from correctness. | +| `dm-flakey` | Linux Device Mapper | Dedicated static Local PV | Objects remain correct after intermittent block-device EIO. | -`operator-restart` 可以作为独立 Operator 控制面韧性测试,但不放入本方案第一阶段的 RustFS workload fault matrix,避免混淆测试对象。 +The default workload commits or reconciles 4000 objects with concurrency 50. The size plan is generated with fixed weights and then deterministically shuffled by the seed: 4KiB 85% (3400 objects), 16KiB 10% (400), 8MiB 4% (160), and 16MiB 1% (40). The logical payload per scenario is 2,033,745,920 bytes, approximately 1.89GiB. -## P0 场景:磁盘 EIO +Object content is deterministically generated from the same seed and object index by `splitmix64-v1`. `workload-plan.json` records the seed, generator version, concurrency, size distribution, and total payload. `history.jsonl` records each key's size, SHA-256, and outcome. Set `RUSTFS_FAULT_TEST_SEED=` to replay the same size order and object content. -这是建议最先实现的场景。 +A lack of client-visible errors does not mean that injection failed. Backend state and `fault-evidence.json` are the authoritative fault evidence. -它能直接验证 RustFS 在真实集群 CSI 数据卷发生读写错误时,是否会丢失已提交对象。 +`RUSTFS_FAULT_TEST_PERCENT=20` is an injection probability for matching I/O operations, not a fixed selection of 20 percent of the objects. -目标: +### 4. Runner requirements -```text -让某一个 RustFS Pod 的某一块数据卷,在部分 READ/WRITE 调用上返回 EIO。 +The runner host needs: + +- `kubectl` +- Rust stable and Cargo with Rust edition 2024 support +- GNU Make +- A kubeconfig that can reach the target Kubernetes API +- `warp` v1.3.1 for `warp-under-chaos` +- Sufficient space for `target/fault-tests` artifacts + +Cluster-admin is recommended in a dedicated test cluster. At minimum, the account needs CRUD access to the fault-test Kubernetes resources and Chaos CRs, Pod logs/events/exec access, and permission to create the privileged DM helper Pod. + +Validate the code and tools: + +```bash +rustc --version +cargo --version +kubectl version --client +make e2e-check ``` -Chaos Mesh `IOChaos` 示例: +### 5. Kubernetes and RustFS preflight -```yaml -apiVersion: chaos-mesh.org/v1alpha1 -kind: IOChaos -metadata: - name: rustfs-fault-io-eio - namespace: chaos-mesh - labels: - rustfs-fault-test/run-id: "" -spec: - action: fault - mode: one - selector: - namespaces: - - rustfs-fault-test - labelSelectors: - rustfs.tenant: fault-test-tenant - containerNames: - - rustfs - volumePath: /data/rustfs0 - path: /data/rustfs0/**/* - methods: - - READ - - WRITE - errno: 5 - percent: 20 - duration: "60s" -``` - -关键点: - -- `volumePath` 是 RustFS 容器内的 CSI 数据卷挂载路径。 -- `errno: 5` 对应 Linux `EIO`。 -- `mode: one` 表示只选择一个匹配 Pod,避免第一版故障面过大。 -- `percent: 20` 表示只影响部分 I/O 调用,避免全量不可用。 - -预期行为: - -- 故障期间 S3 请求可以失败、超时或返回 5xx。 -- RustFS 不能把错误数据作为成功响应返回。 -- 已经成功 `PUT` 的对象,在故障解除后必须 hash 一致。 -- Tenant 可以短暂 Degraded,但最终应回到 Ready。 -- Chaos 资源必须被删除。 - -## P1 场景:静默坏块 / bit rot - -EIO 是显式错误,比较容易处理;更危险的是静默损坏。 - -静默坏块的模拟方式: +```bash +kubectl config use-context +kubectl config current-context +kubectl get nodes +kubectl get crd tenants.rustfs.com +kubectl -n rustfs-system get deployment +kubectl get storageclass +``` -```text -磁盘读操作看起来成功,但返回的字节是错的。 +Regular scenarios require four schedulable nodes and four `80Gi` RWO PVCs. The fault Tenant uses required Pod anti-affinity to spread the four RustFS Pods across distinct `kubernetes.io/hostname` values. The selected StorageClass must support dynamic provisioning and must not use `kubernetes.io/no-provisioner`. Each node that hosts a fault-test PVC should have at least 100Gi available; verify capacity against the actual StorageClass topology before running. + +Do not trust the capacity displayed on a PVC alone. hostPath/local-path provisioners commonly do not enforce capacity. Inspect the actual node path and its backing filesystem: + +```bash +kubectl -n kube-system get configmap local-path-config -o yaml +kubectl get pv -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.spec.hostPath.path}{"\n"}{end}' +df -h ``` -Chaos Mesh `IOChaos mistake` 示例: +The K3s default `/var/lib/rancher/k3s/storage` is often on a smaller system disk. If that filesystem has less than 100Gi available, do not use it for this suite. Deploy a dedicated dynamic provisioner/StorageClass that places new fault-test PVCs under an isolated data-disk path such as `/data/rustfs/rustfs-fault-local-path`. Do not modify or migrate existing application PVCs. -```yaml -apiVersion: chaos-mesh.org/v1alpha1 -kind: IOChaos -metadata: - name: rustfs-fault-io-read-mistake - namespace: chaos-mesh -spec: - action: mistake - mode: one - selector: - namespaces: - - rustfs-fault-test - labelSelectors: - rustfs.tenant: fault-test-tenant - containerNames: - - rustfs - volumePath: /data/rustfs0 - path: /data/rustfs0/**/* - methods: - - READ - mistake: - filling: random - maxOccurrences: 1 - maxLength: 4096 - percent: 5 - duration: "60s" +Pin a validated RustFS image digest instead of using `latest`: + +```bash +export RUSTFS_IMAGE='docker.io/rustfs/rustfs@sha256:' ``` -预期行为: +### 6. Install and validate Chaos Mesh -- RustFS 可以返回错误。 -- RustFS 可以从健康 shard 修复或读取。 -- RustFS 不能返回 `200 OK` 且 body hash 错误。 +The following example uses the validated Chaos Mesh v2.8.3 release: -这个场景是对象存储非常关键的测试,因为它验证的是“不要静默返回坏数据”。 +```bash +helm repo add chaos-mesh https://charts.chaos-mesh.org +helm repo update + +helm upgrade --install chaos-mesh chaos-mesh/chaos-mesh \ + -n chaos-mesh --create-namespace \ + --version 2.8.3 \ + --set chaosDaemon.runtime=containerd \ + --set chaosDaemon.socketPath=/run/containerd/containerd.sock \ + --set dashboard.create=false \ + --wait --timeout 10m +``` -## P2 场景:存储后端级数据破坏 +K3s uses `/run/k3s/containerd/containerd.sock`. Adjust the runtime and socket path for other distributions. -真实集群不能假设能够直接访问宿主机或 CSI 后端文件。该场景必须在专用存储测试环境中,通过存储后端提供的故障工具、快照克隆或块设备测试接口实现。 +```bash +kubectl -n chaos-mesh get deployment,daemonset +kubectl get crd \ + iochaos.chaos-mesh.org \ + podchaos.chaos-mesh.org \ + networkchaos.chaos-mesh.org +``` -这个场景比 `IOChaos mistake` 更接近真实“落盘数据已经损坏”,但也更危险: +All controller-manager replicas and all target-node chaos-daemon Pods must be Ready. -- 可能破坏 RustFS 元数据。 -- 可能导致恢复语义更复杂。 -- 需要更明确的预期结果。 -- 适合作为 P2,不适合作为第一版。 +### 7. Run the regular scenarios -## 测试流程 +Set common parameters: -当前 runner 使用如下流程: +```bash +export RUSTFS_FAULT_TEST_STORAGE_CLASS= +export RUSTFS_FAULT_TEST_SERVER_IMAGE="$RUSTFS_IMAGE" +export RUSTFS_FAULT_TEST_OPERATOR_NAMESPACE=rustfs-system +export RUSTFS_FAULT_TEST_NAMESPACE=rustfs-fault-test +export RUSTFS_FAULT_TEST_TENANT=fault-test-tenant +export RUSTFS_FAULT_TEST_CHAOS_NAMESPACE=chaos-mesh +export RUN_ROOT="target/fault-tests/$(date -u +%Y%m%dT%H%M%SZ)" +``` -```text -1. 读取 FaultTestConfig -2. 检查 RUSTFS_FAULT_TEST_DESTRUCTIVE=1 -3. 读取当前 kube context 并拒绝 kind-* context -4. 检查 RUSTFS_FAULT_TEST_STORAGE_CLASS 已配置 -5. 根据 RUSTFS_FAULT_TEST_SCENARIO 解析 FaultScenarioSpec -6. 按场景检查 Chaos Mesh CRD 或专用 host-side 工具配置 -7. 检查 fault-test namespace 不存在,或所有权标记与配置完全匹配 -8. 根据 `FaultIsolation` reset 或复用专用 fault-test Tenant/PVC -9. namespace 不存在时由 runner 使用 create 创建带所有权标记的 fault-test namespace;不得通过 apply 认领竞态中出现的同名 namespace -10. 创建真实集群 fault-test Tenant -11. 等待 Tenant Ready -12. 启动 Tenant S3 port-forward,等待 S3 endpoint 可用 -13. 创建 run-scoped bucket -14. prefill 一批对象,记录 key、size、sha256;prefill 必须成功 -15. apply 当前 scenario 的 Chaos Mesh 资源或 host-side fault -16. 对持续型 Chaos 等待 active -17. 故障期间执行 PUT/GET mixed workload,并输出 workload-summary.json -18. 如果要求 client-visible disruption,则确认 workload 观察到了失败、超时或 unknown -19. 确认持续型 Chaos 没有早于 workload 结束恢复 -20. 删除 Chaos 或通过目标节点 helper Pod 恢复 dmsetup table -21. 等待 Tenant 再次 Ready -22. 对故障期间失败、超时或 unknown 的 PUT 使用相同 key 和内容做幂等重提交 -23. 对全部预期 committed PUT 对象做最终 GET + sha256 校验 -24. 执行 prefix LIST 并记录 warning -25. 写入 checker-report.json 和 fault-evidence.json -26. 失败时收集 Kubernetes artifacts、故障状态和故障资源 describe/yaml -``` - -伪代码: - -```rust -#[tokio::test] -#[ignore = "destructive fault scenario; run through `make fault-test`"] -async fn fault_io_eio_preserves_committed_objects() -> Result<()> { - let config = FaultTestConfig::from_env()?; - - config.require_destructive_enabled()?; - chaos_mesh::require_iochaos_crd(&config.cluster)?; - - let result = async { - resources::reset_fault_tenant_resources(&config.cluster)?; - resources::apply_fault_tenant_resources(&config.cluster)?; - - let client = kube_client::default_client().await?; - let tenants = kube_client::tenant_api(client.clone(), &config.cluster.test_namespace); - wait::wait_for_tenant_ready( - tenants, - &config.cluster.tenant_name, - config.cluster.timeout, - ) - .await?; - - let mut port_forward = PortForwardSpec::start_tenant_io(&config.cluster)?; - let s3 = s3_workload::Client::from_tenant_port_forward( - &config.cluster, - &mut port_forward, - ) - .await?; - - let mut history = history::Recorder::new("io-eio")?; - s3.create_bucket().await?; - s3.prefill_objects(&mut history).await?; - - let chaos = chaos_mesh::IoChaos::eio_on_rustfs_volume( - &config.cluster, - "/data/rustfs0", - 20, - Duration::from_secs(60), - ); - - let guard = chaos.apply()?; - s3.run_mixed_workload(&mut history).await?; - drop(guard); - - wait::wait_for_tenant_ready( - kube_client::tenant_api(client, &config.cluster.test_namespace), - &config.cluster.tenant_name, - config.cluster.timeout, - ) - .await?; - - let report = checker::check_s3_history(&s3, &history).await?; - report.require_success()?; - - Ok(()) - } - .await; - - if result.is_err() { - ArtifactCollector::new(&config.artifacts_dir) - .collect_kubernetes_snapshot("fault_io_eio_preserves_committed_objects", &config)?; - } - - result -} -## Chaos Mesh 模块设计 - -`chaos_mesh.rs` 当前提供这些能力: - -```rust -pub fn require_iochaos_crd(config: &ClusterTestConfig) -> Result<()>; -pub fn require_podchaos_crd(config: &ClusterTestConfig) -> Result<()>; -pub fn require_networkchaos_crd(config: &ClusterTestConfig) -> Result<()>; -pub fn cleanup_managed_iochaos(config: &ClusterTestConfig, namespace: &str) -> Result<()>; -pub fn cleanup_managed_podchaos(config: &ClusterTestConfig, namespace: &str) -> Result<()>; -pub fn cleanup_managed_networkchaos(config: &ClusterTestConfig, namespace: &str) -> Result<()>; -pub fn apply_iochaos(config: &ClusterTestConfig, spec: &IoChaosSpec) -> Result; -pub fn apply_podchaos(config: &ClusterTestConfig, spec: &PodChaosSpec) -> Result; -pub fn apply_networkchaos(config: &ClusterTestConfig, spec: &NetworkChaosSpec) -> Result; - -pub enum IoChaosAction { - Fault { errno: u8 }, - Mistake { - filling: String, - max_occurrences: u8, - max_length: usize, - }, -} -pub struct IoChaosSpec { - pub name: String, - pub namespace: String, - pub run_id: String, - pub scenario: String, - pub target_namespace: String, - pub tenant_name: String, - pub container_name: String, - pub volume_path: String, - pub methods: Vec, - pub action: IoChaosAction, - pub percent: u8, - pub duration: Duration, -} -``` - -实现要求: - -- 所有 `kubectl` 命令必须通过现有 `framework::kubectl` 和 `framework::command` 边界。 -- apply 前检查 CRD 是否存在。 -- apply 后保存 manifest;失败时可以 `kubectl describe/get yaml` 保存到 artifacts。 -- `ChaosGuard::delete()` 必须明确返回结果;`Drop` 只做 best-effort cleanup,不应 panic。 -- 每个资源都带 `rustfs-fault-test/run-id` label。 -- 每个资源都带 `rustfs-fault-test/scenario` label。 -- 每个资源都带 `app.kubernetes.io/managed-by=rustfs-operator-fault-test` label,便于按 suite 清理残留。 -- 允许按 label 清理上一次异常残留。 - -## S3 workload 模块设计 - -`s3_workload.rs` 当前提供: - -```rust -pub struct S3WorkloadClient { - bucket: String, - request_timeout: Duration, -} - -pub struct ObjectSpec { - pub key: String, - pub size_bytes: usize, - pub sha256: String, -} - -impl S3WorkloadClient { - pub async fn new(...) -> Result; - pub async fn create_bucket(&self, recorder: &mut Recorder) -> Result; - pub async fn put_object(&self, object: &ObjectSpec, recorder: &mut Recorder) -> Result; - pub async fn get_object_result(&self, key: &str, recorder: &mut Recorder) -> Result; - pub async fn head_object(&self, key: &str, recorder: &mut Recorder) -> Result; - pub async fn list_prefix(&self, prefix: &str, recorder: &mut Recorder) -> Result>>; -} -``` - -注意点: - -- 每个请求必须有明确 timeout。 -- 不要在 workload 层做无限 retry。 -- 如果要 retry,必须记录每次尝试,而不是只记录最终结果。 -- body 读取失败不能记为 `failed`,应记为 `unknown`。 -- `PUT` 返回成功后才进入 committed set。 - -## Checker report 设计 - -最终 report 建议保存为 JSON: - -```json -{ - "scenario": "io-eio", - "run_id": "run-123", - "committed_puts": 200, - "missing_committed_objects": [], - "hash_mismatches": [], - "successful_corrupted_reads": [], - "unknown_writes_materialized": [], - "list_warnings": [], - "tenant_recovered": true, - "passed": true -} -``` - -hard fail 条件: - -1. 成功 `PUT` 的对象最终 `GET` 不到。 -2. 成功 `PUT` 的对象最终 `GET` hash 不一致。 -3. 任意成功 `GET` 返回的 body hash 与预期不一致。 -4. 故障解除后 Tenant 在 timeout 内没有回到 Ready。 -5. Chaos 资源删除失败并仍然残留。 -6. RustFS Pod 进入不可恢复 CrashLoopBackOff。 - -允许出现: - -1. 故障期间 S3 请求失败。 -2. 故障期间 S3 请求 timeout。 -3. 故障期间 port-forward 连接中断。 -4. Tenant 短暂 Degraded。 -5. unknown write 最终存在或不存在。 -6. 故障期间 LIST 不完整。 - -## artifacts 设计 - -每次 fault run 至少应该保存: +Run one scenario: -```text -history.jsonl -checker-report.json -chaos-manifest.yaml -chaos-describe.txt -chaos-describe-.txt -chaos-.yaml -events.yaml -pv-paths.txt -rustfs-pods-current.log -rustfs-pods-previous.log -tenant-describe.txt -pods-describe.txt +```bash +RUSTFS_FAULT_TEST_SCENARIO=io-eio \ +RUSTFS_FAULT_TEST_ARTIFACTS="$RUN_ROOT/io-eio" \ +make fault-test +``` + +`make fault-test` sets `RUSTFS_FAULT_TEST_DESTRUCTIVE=1` internally. Do not bypass the Make entry point to invoke the destructive test directly. + +Continuously monitor nodes, any existing application Tenant, and the fault-test Tenant. If a non-target resource becomes non-Ready, remove the current managed Chaos resource, stop subsequent scenarios, and collect evidence. + +Run all six regular scenarios in the recommended order and stop after the first failure: + +```bash +for scenario in \ + io-eio \ + pod-kill-one \ + network-partition-one \ + io-read-mistake \ + disk-full \ + warp-under-chaos +do + RUSTFS_FAULT_TEST_SCENARIO="$scenario" \ + RUSTFS_FAULT_TEST_ARTIFACTS="$RUN_ROOT/$scenario" \ + make fault-test || break +done +``` + +Run `warp --version` before `warp-under-chaos`. Warp output is performance evidence and does not determine the correctness verdict. + +### 8. Dedicated `dm-flakey` procedure + +#### 8.1 No cluster reinstall is required + +After running the six regular scenarios, keep Kubernetes, the Operator, Chaos Mesh, and the Rust toolchain. Stop other fault-test processes, prepare four dedicated static Local PVs, put one PV behind Device Mapper, and run the scenario with the static StorageClass. + +The runner resets the fault-test Tenant and PVCs, but it does not create host block devices, static PVs, or the StorageClass. + +#### 8.2 Allow the privileged helper + +For an existing namespace: + +```bash +kubectl label namespace rustfs-fault-test \ + pod-security.kubernetes.io/enforce=privileged \ + --overwrite ``` -其中最关键的是: +To pre-create the namespace before the first run: + +```bash +kubectl create namespace rustfs-fault-test +kubectl label namespace rustfs-fault-test \ + app.kubernetes.io/managed-by=rustfs-operator-fault-test \ + pod-security.kubernetes.io/enforce=privileged +kubectl annotate namespace rustfs-fault-test \ + rustfs.com/fault-test-tenant=fault-test-tenant +``` -- `history.jsonl`:复盘客户端看到的世界。 -- `checker-report.json`:复盘 correctness verdict。 -- `chaos-describe-.txt` / `chaos-.yaml`:在故障资源被清理前保留 Chaos Mesh 现场。 -- `rustfs-pods-current.log`:定位 RustFS 如何处理故障。 -- `events.yaml`:定位 Kubernetes 层是否出现调度、挂载、重启问题。 -- `pv-paths.txt`:定位具体 PVC/PV、StorageClass 和节点映射。 +#### 8.3 Prepare four dedicated volumes -## Makefile 入口 +Prefer four dedicated test block devices. Loop files are acceptable only in a lab. Each backing filesystem should be at least `90Gi`, while static PV capacity is fixed at `80Gi`. -使用独立入口: +Lab loop example on the target DM node; skip `truncate` and `losetup` when using a real dedicated block device: ```bash -RUSTFS_FAULT_TEST_STORAGE_CLASS= make fault-test +export LAB=/data/rustfs/rustfs-fault-lab +export DM_NAME=rustfs-fault-dm + +mkdir -p "$LAB/volume" +truncate -s 90G "$LAB/disk.img" +BACKING=$(losetup --find --show "$LAB/disk.img") +SECTORS=$(blockdev --getsz "$BACKING") +dmsetup create "$DM_NAME" --table "0 $SECTORS linear $BACKING 0" +mkfs.ext4 -F "/dev/mapper/$DM_NAME" +mount "/dev/mapper/$DM_NAME" "$LAB/volume" ``` -该入口使用当前 `kubectl` context,拒绝 Kind,并使用 `RUSTFS_FAULT_TEST_STORAGE_CLASS` 指向的真实集群测试存储。 +On each of the other three nodes, format and mount its dedicated device directly at `/data/rustfs/rustfs-fault-lab/volume`. Never format an existing RustFS data device. -`e2e/tests/faults.rs` 只有一个 ignored dispatcher。运行时通过 `RUSTFS_FAULT_TEST_SCENARIO` 从 7 个 catalog 场景中选择并执行一个,因此测试结果不会把未选中的场景计为通过。故障测试只面向真实 Kubernetes 测试集群,不保留 Kind 后端;Kind e2e 生命周期测试是独立部分。 +#### 8.4 Create the static StorageClass and Local PVs -示例: +```yaml +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: rustfs-fault-dm + labels: + app.kubernetes.io/managed-by: rustfs-operator-fault-test +provisioner: kubernetes.io/no-provisioner +volumeBindingMode: WaitForFirstConsumer +reclaimPolicy: Retain +``` + +Create four copies of this PV template, with a unique name and the corresponding node affinity: + +```yaml +apiVersion: v1 +kind: PersistentVolume +metadata: + name: rustfs-fault-dm- + labels: + app.kubernetes.io/managed-by: rustfs-operator-fault-test +spec: + capacity: + storage: 80Gi + volumeMode: Filesystem + accessModes: [ReadWriteOnce] + persistentVolumeReclaimPolicy: Retain + storageClassName: rustfs-fault-dm + local: + path: /data/rustfs/rustfs-fault-lab/volume + nodeAffinity: + required: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: [] +``` + +Verify that all four PVs are `Available` before running the test. + +#### 8.5 Run `dm-flakey` + +The configured node must match Kubernetes `metadata.name`, and the mount path must exactly match the target PV `spec.local.path`. + +Run `blockdev --getsz ` on the target node first, then set that value as `SECTORS` on the runner host. ```bash -# 默认场景:io-eio;make fault-test 会注入 RUSTFS_FAULT_TEST_DESTRUCTIVE=1 -RUSTFS_FAULT_TEST_STORAGE_CLASS= make fault-test +export DM_NODE= +export DM_MOUNT_PATH=/data/rustfs/rustfs-fault-lab/volume +export BACKING_DEVICE= +export SECTORS= + +RUSTFS_FAULT_TEST_SCENARIO=dm-flakey \ +RUSTFS_FAULT_TEST_STORAGE_CLASS=rustfs-fault-dm \ +RUSTFS_FAULT_TEST_SERVER_IMAGE="$RUSTFS_IMAGE" \ +RUSTFS_FAULT_TEST_DM_NAME=rustfs-fault-dm \ +RUSTFS_FAULT_TEST_DM_NODE="$DM_NODE" \ +RUSTFS_FAULT_TEST_DM_MOUNT_PATH="$DM_MOUNT_PATH" \ +RUSTFS_FAULT_TEST_DM_FAULT_TABLE="0 $SECTORS flakey $BACKING_DEVICE 0 1 15" \ +RUSTFS_FAULT_TEST_ARTIFACTS="$RUN_ROOT/dm-flakey" \ +make fault-test +``` + +The fault table alternates between one second up and fifteen seconds down. The helper verifies the Pod-to-PVC-to-PV-to-node-to-mount relationship before loading the table, and restores the original linear table afterward. -# 运行其他场景 -RUSTFS_FAULT_TEST_STORAGE_CLASS= RUSTFS_FAULT_TEST_SCENARIO=pod-kill-one make fault-test -RUSTFS_FAULT_TEST_STORAGE_CLASS= RUSTFS_FAULT_TEST_SCENARIO=network-partition-one make fault-test -RUSTFS_FAULT_TEST_STORAGE_CLASS= RUSTFS_FAULT_TEST_SCENARIO=io-read-mistake make fault-test -RUSTFS_FAULT_TEST_STORAGE_CLASS= RUSTFS_FAULT_TEST_SCENARIO=disk-full make fault-test -RUSTFS_FAULT_TEST_STORAGE_CLASS= RUSTFS_FAULT_TEST_SCENARIO=dm-flakey make fault-test -RUSTFS_FAULT_TEST_STORAGE_CLASS= RUSTFS_FAULT_TEST_SCENARIO=warp-under-chaos make fault-test +#### 8.6 Emergency DM recovery + +If the test process exits while the target is still flakey, restore it immediately on the target node: + +```bash +dmsetup suspend --noflush rustfs-fault-dm +dmsetup load rustfs-fault-dm \ + --table "0 $SECTORS linear $BACKING_DEVICE 0" +dmsetup resume --noudevsync rustfs-fault-dm +dmsetup table rustfs-fault-dm ``` -普通开发检查仍然使用: +Confirm that the table is `linear` before deleting Pods/PVCs or unmounting the filesystem. + +### 9. Acceptance criteria + +For every scenario: + +- `make fault-test` exits with status 0. +- `fault-evidence.json` reports `injected=true`, `active_during_workload=true`, and `recovered=true`. +- `checker-report.json` reports `committed_puts=4000`. +- `missing_committed_objects`, `hash_mismatches`, `successful_corrupted_reads`, and `list_warnings` are empty. +- The fault-test Tenant returns to Ready. + +`RUSTFS_FAULT_TEST_REQUIRE_CLIENT_DISRUPTION` defaults to `false`. No client-visible failure is acceptable when the backend evidence proves that the fault was selected and injected. + +Key artifacts are `workload-plan.json`, `history.jsonl`, `workload-summary.json`, `checker-report.json`, `fault-evidence.json`, Chaos manifests/status, DM snapshots, and Kubernetes logs/events. + +### 10. Cleanup + +Verify namespace ownership before deletion, then remove the test namespace and any managed Chaos resources: ```bash -make e2e-check -make pre-commit +kubectl get namespace rustfs-fault-test --show-labels +kubectl delete namespace rustfs-fault-test --wait=true +kubectl delete iochaos,podchaos,networkchaos \ + -n chaos-mesh \ + -l app.kubernetes.io/managed-by=rustfs-operator-fault-test \ + --ignore-not-found ``` -不要把 destructive 场景混进普通 `make e2e-live-run`。 +Dynamic PV deletion depends on the StorageClass reclaim policy. Retained PVs and backend data require manual cleanup. -## 当前可交付范围 +For `dm-flakey`, delete the namespace first, then the four static PVs and StorageClass. Confirm a linear DM table, unmount all four lab filesystems, remove the DM mapping, detach any loop devices, and delete only the dedicated lab directory. -当前 fault suite 实现 7 个真实集群 runner: +```bash +umount /data/rustfs/rustfs-fault-lab/volume +dmsetup remove rustfs-fault-dm +losetup -d # lab loop setup only +rm -rf /data/rustfs/rustfs-fault-lab +``` -```text -fault_io_eio_preserves_committed_objects -fault_pod_kill_one_preserves_committed_objects -fault_network_partition_one_preserves_committed_objects -fault_io_read_mistake_rejects_corrupt_reads -fault_disk_full_preserves_committed_objects -fault_dm_flakey_preserves_committed_objects -fault_warp_under_chaos_reports_performance_separately -``` - -这些 runner 共享同一条 correctness 验证链路: - -1. destructive/current real Kubernetes context guard。 -2. 按场景检查 Chaos Mesh CRD 或专用 host-side 工具配置。 -3. 启动前按 `app.kubernetes.io/managed-by=rustfs-operator-fault-test` 清理上次异常残留的 Chaos 资源。 -4. reset 前验证 namespace 所有权标记;未标记或 Tenant 不匹配时 fail closed。 -5. Fresh/Dedicated 场景 reset Tenant/PVC;Pod Kill 和网络分区可复用已验证所有权的 Tenant。 -6. Tenant 创建和 Ready 等待。 -7. S3 bucket 创建。 -8. S3 prefill 对象并记录 hash;prefill 阶段必须明确成功,避免空用例通过。 -9. apply 对应故障:Chaos Mesh `IOChaos` / `PodChaos` / `NetworkChaos`,或目标节点 helper Pod 执行 dm-flakey、Warp under chaos。 -10. 对持续型 Chaos 资源等待进入 active,再开始故障 workload。 -11. 故障期间持续读写并输出 `workload-summary.json`。 -12. 对持续型故障确认 workload 没有跑出故障窗口。 -13. 故障 workload 失败、故障证据不足或 Chaos 删除失败时,先保存 describe/yaml 或 host fault 输出,再触发 cleanup。 -14. 删除 Chaos 资源,或恢复 dmsetup table 并删除 helper Pod。 -15. Tenant 恢复 Ready 等待。 -16. 恢复后幂等重提交未确认 PUT,并要求全部预期对象进入 committed 集合。 -17. 所有 committed `PUT` 对象最终 `GET + sha256` 校验。 -18. 恢复后执行 `LIST prefix`,缺失项先作为 warning。 -19. AWS SDK error 按 service failure / timeout / dispatch-response unknown 分类写入 history。 -20. history、workload summary、fault evidence 和 checker report 输出。 -21. 失败时 artifacts 收集。 - -这个版本已经能证明系统从“占位骨架”升级为“真实故障注入 + 数据正确性校验”。 - -## 后续增强计划 - -当前 catalog 包含 7 个 real-cluster scenario,由一个 dispatcher 精确选择执行。后续工作重点是提高故障强度、判定模型和长稳覆盖。 - -### Phase 1:runner hardening - -- 在测试环境逐个验证 7 个 executable scenario 的前置条件、故障注入、清理和 artifacts 输出。 -- 为 PodChaos、NetworkChaos、IOChaos mistake 补充更细的 CRD status 断言。 -- 保持 `fault-evidence.json` 的后端状态结构稳定,便于 CI artifact 聚合和历史对比。 -- 保持每个 scenario 独立选择执行,避免多个故障在同一次测试中相互污染。 - -验收: - -- `make e2e-check` 通过。 -- `RUSTFS_FAULT_TEST_STORAGE_CLASS= RUSTFS_FAULT_TEST_SCENARIO= make fault-test` 可在当前真实 Kubernetes 测试集群逐个运行 scenario,并拒绝 Kind。 -- 如果 committed object 丢失,测试失败。 -- 如果 successful GET 返回错误字节,测试失败。 -- 如果 workload 跑出 IOChaos active 窗口,测试失败。 -- fault runner 不进入 Kind e2e case inventory;其边界是 `rustfs-workload/fault-injection`。 -- 每个 scenario 都能在失败时留下足够定位信息。 -- 每个 scenario 结束后能清理自己创建的 Chaos 资源、helper Pod 或恢复 dmsetup table。 - -### Phase 2:一致性模型增强 - -- 引入 same-key overwrite、delete、multipart、prefix/list 等更接近 Jepsen register/set 模型的 workload。 -- 将 operation history 扩展成可回放的事件日志,明确 invoke/ok/fail/info。 -- 在 checker 中区分 linearizable、eventual recovery、data corruption、availability degradation。 - -验收: - -- 成功写入的对象不得丢失。 -- 成功读取不得返回错误字节。 -- List 缺失、陈旧读、超时、服务错误分别记录,不能混成同一种 failure。 - -### Phase 3:长稳和性能 - -- 增加长时间 soak runner。 -- 增加随机但可复现的故障调度。 -- 将 Warp 结果固定为性能/压力信号,不作为 correctness verdict。 - -注意: - -- 性能结果和 correctness verdict 必须分离。 -- 压测失败不等于数据错误。 -- 数据错误永远是 hard fail。 - -### Phase 4:块设备级故障实验室 - -- 研究 `dm-flakey`、`dm-error`、loop device-backed PV。 -- 只在 Linux runner 或专用环境启用。 -- 不进入默认 fault-test 流程。 -- 现有 dm-flakey runner 通过 `RUSTFS_FAULT_TEST_DM_*` 显式接入专用设备映射。 -- 后续可以在专用 Linux runner 上扩展 `dm-error`、loop device-backed PV 和更细粒度的 I/O 延迟/丢写模型。 -- 这些场景只进入明确标记的专用环境,不进入默认 fault-test 流程。 - -这个方向更接近真实磁盘坏块,但环境成本明显更高,必须保持强隔离。 - -## 与其他测试框架的关系 - -| 框架或工具 | 当前项目定位 | -| --- | --- | -| 共享测试基础设施 | Operator 编排、Tenant 生命周期、artifacts 收集。 | -| Chaos Mesh | Kubernetes-native nemesis,负责制造故障。 | -| Jepsen-like checker | 判断对象存储 correctness,不制造故障。 | -| MinIO Mint | 后续用于 S3 API 兼容性,不作为故障 checker。 | -| MinIO Warp | 用于故障期间性能压测,不作为 correctness verdict。 | -| COSBench | 后续用于大规模对象存储压测。 | -| Ceph s3-tests | 后续用于 S3 行为兼容性参考。 | -| Ceph Teuthology | 借鉴大规模编排思想,当前不直接引入。 | -| Ozone fault injection | 借鉴 FUSE/agent 精细磁盘故障思想,作为后续增强。 | +Finally verify nodes, the Operator, Chaos Mesh, PVs, and remaining Chaos resources. -当前最优组合: +### 11. Environment variables -```text -RustFS real-cluster fault-test runner - + Chaos Mesh - + Rust-native S3 workload - + Jepsen-like object checker -``` - -## 实现注意事项 - -- 所有外部调用必须有 timeout。 -- workload 不要无限 retry。 -- retry 必须记录每次尝试。 -- 不要把 transport unknown 错误归类为 definite failed。 -- 不要把 performance degradation 误判为 correctness failure。 -- 故障资源必须总是 best-effort cleanup。 -- artifacts 中不要记录密钥明文。 -- 第一版避免覆盖同一个 key,降低 checker 复杂度。 -- 后续再逐步加入 same-key overwrite、delete、multipart、LIST consistency。 - -## 参考资料 - -- [Chaos Mesh IOChaos](https://chaos-mesh.org/docs/simulate-io-chaos-on-kubernetes/) -- [Chaos Mesh Documentation](https://chaos-mesh.org/docs/) -- [Jepsen](https://jepsen.io/) -- [MinIO Warp](https://docs.min.io/warp/) -- [COSBench](https://github.com/intel-cloud/cosbench) -- [Ceph s3-tests](https://github.com/ceph/s3-tests) +| Variable | Default | Purpose | +| --- | --- | --- | +| `RUSTFS_FAULT_TEST_STORAGE_CLASS` | required | Dynamic class for regular scenarios or dedicated static class for DM. | +| `RUSTFS_FAULT_TEST_DESTRUCTIVE` | set by Make | Destructive opt-in; do not bypass the Make entry point. | +| `RUSTFS_FAULT_TEST_SCENARIO` | `io-eio` | Selects one of the seven scenarios. | +| `RUSTFS_FAULT_TEST_NAMESPACE` | `rustfs-fault-test` | Dedicated test namespace. | +| `RUSTFS_FAULT_TEST_TENANT` | `fault-test-tenant` | Dedicated test Tenant. | +| `RUSTFS_FAULT_TEST_OPERATOR_NAMESPACE` | `rustfs-system` | Operator namespace. | +| `RUSTFS_FAULT_TEST_SERVER_IMAGE` | `rustfs/rustfs:latest` | Pin a validated digest in real runs. | +| `RUSTFS_FAULT_TEST_ARTIFACTS` | `target/fault-tests/artifacts` | Current scenario artifact directory. | +| `RUSTFS_FAULT_TEST_TIMEOUT_SECONDS` | `300` | Kubernetes/Tenant wait timeout. | +| `RUSTFS_FAULT_TEST_DURATION_SECONDS` | `900` | Chaos duration. | +| `RUSTFS_FAULT_TEST_PERCENT` | `20`; `100` for `disk-full` | Injection percentage where supported. | +| `RUSTFS_FAULT_TEST_WORKLOAD_OBJECTS` | `4000` | Workload object count. | +| `RUSTFS_FAULT_TEST_WORKLOAD_CONCURRENCY` | `50` | Maximum concurrency for prefill, fault workload, recovery writes, and checker reads. | +| `RUSTFS_FAULT_TEST_SEED` | generated randomly | Optional u64 seed for replaying the size order and object content. | +| `RUSTFS_FAULT_TEST_REQUEST_TIMEOUT_SECONDS` | `30` | S3 operation timeout. | +| `RUSTFS_FAULT_TEST_REQUIRE_CLIENT_DISRUPTION` | `false` | Require client-visible disruption when enabled. | +| `RUSTFS_FAULT_TEST_CHAOS_NAMESPACE` | `chaos-mesh` | Namespace for Chaos resources. | +| `RUSTFS_FAULT_TEST_WARP_DURATION_SECONDS` | `60` | Warp mixed workload duration. | +| `RUSTFS_FAULT_TEST_DM_NAME` | unset | DM mapping name; required for DM. | +| `RUSTFS_FAULT_TEST_DM_NODE` | unset | Target Kubernetes node; required for DM. | +| `RUSTFS_FAULT_TEST_DM_MOUNT_PATH` | unset | Target Local PV path; required for DM. | +| `RUSTFS_FAULT_TEST_DM_FAULT_TABLE` | unset | Fault dmsetup table; required for DM. | +| `RUSTFS_FAULT_TEST_DM_RECOVERY_TABLE` | original table | Optional explicit recovery table. | +| `RUSTFS_FAULT_TEST_DM_HELPER_IMAGE` | `rancher/mirrored-library-busybox:1.37.0` | Privileged helper image. | diff --git a/README.md b/README.md index 4b18a6a..93baa12 100755 --- a/README.md +++ b/README.md @@ -81,14 +81,16 @@ CI (`.github/workflows/ci.yml`) runs Rust tests (including `nextest`), `cargo fm ### Run fault tests on a real Kubernetes cluster -Fault tests are separate from the Kind e2e workflow. They use the current kubectl context, reject `kind-*` contexts, reset a dedicated fault-test Tenant, and require Chaos Mesh plus a dynamic StorageClass: +Fault tests are separate from the Kind e2e workflow. They use the current kubectl context, reject `kind-*` contexts, and run one explicitly selected destructive scenario at a time: ```bash kubectl config use-context -RUSTFS_FAULT_TEST_STORAGE_CLASS= make fault-test +RUSTFS_FAULT_TEST_SCENARIO=io-eio \ +RUSTFS_FAULT_TEST_STORAGE_CLASS= \ +make fault-test ``` -The test runner creates the default `rustfs-fault-test` namespace with ownership metadata before creating the credential Secret and Tenant. Override it only with another dedicated test namespace using `RUSTFS_FAULT_TEST_NAMESPACE`. If the namespace already exists, destructive reset is allowed only when its `app.kubernetes.io/managed-by` label and `rustfs.com/fault-test-tenant` annotation match the configured fault-test Tenant. The runner never adds these ownership markers to an existing namespace. +See the bilingual [Fault Injection Operations Manual](FAULT_INJECTION_TEST_PLAN.md) for cluster preparation, Chaos Mesh installation, all seven scenarios, the dedicated `dm-flakey` Local PV procedure, acceptance criteria, emergency recovery, and cleanup. Contribution workflow, commit style, and PR expectations: [`CONTRIBUTING.md`](CONTRIBUTING.md). diff --git a/e2e/README.md b/e2e/README.md index 70915f3..1ffce49 100644 --- a/e2e/README.md +++ b/e2e/README.md @@ -88,7 +88,7 @@ make e2e-live-run The harness refuses to run live tests unless the active Kubernetes context matches the configured dedicated Kind context. -Fault tests have separate safety defaults and environment variables: +Fault tests have separate safety defaults and run exactly one selected scenario per invocation: ```text context: current non-Kind kubectl context @@ -96,20 +96,20 @@ test namespace: rustfs-fault-test tenant name: fault-test-tenant storage class: required via RUSTFS_FAULT_TEST_STORAGE_CLASS artifacts: target/fault-tests/artifacts +PVCs: 4 × 80Gi +objects: 4000 with seeded weighted sizes +concurrency: 50 ``` Run them independently from the Kind lifecycle: ```bash -RUSTFS_FAULT_TEST_STORAGE_CLASS= make fault-test +RUSTFS_FAULT_TEST_SCENARIO=io-eio \ +RUSTFS_FAULT_TEST_STORAGE_CLASS= \ +make fault-test ``` -The runner creates an absent namespace through `kubectl create` before applying the credential Secret and Tenant. It refuses to reset or claim an existing namespace unless these values already match: - -```text -app.kubernetes.io/managed-by=rustfs-operator-fault-test -rustfs.com/fault-test-tenant= -``` +The runner creates an absent namespace with ownership metadata and refuses to reset or claim an existing namespace unless its ownership markers match. See the bilingual [Fault Injection Operations Manual](../FAULT_INJECTION_TEST_PLAN.md) for prerequisites, all seven scenarios, the dedicated `dm-flakey` storage procedure, validation, recovery, and cleanup. ## Non-live validation diff --git a/e2e/src/framework/checker.rs b/e2e/src/framework/checker.rs index 3ca58cf..7f72b5f 100644 --- a/e2e/src/framework/checker.rs +++ b/e2e/src/framework/checker.rs @@ -13,6 +13,7 @@ // limitations under the License. use anyhow::{Result, ensure}; +use futures::{StreamExt, stream}; use serde::{Deserialize, Serialize}; use std::collections::{BTreeMap, BTreeSet}; @@ -50,15 +51,16 @@ impl CheckerReport { pub async fn check_s3_history( s3: &S3WorkloadClient, - recorder: &mut Recorder, + recorder: &Recorder, tenant_recovered: bool, + concurrency: usize, ) -> Result { - let initial_records = recorder.records().to_vec(); + let initial_records = recorder.records(); let committed = committed_puts(&initial_records); let unknown_writes = unknown_puts(&initial_records); let mut report = CheckerReport { - scenario: recorder.scenario().to_string(), - run_id: recorder.run_id().to_string(), + scenario: recorder.scenario(), + run_id: recorder.run_id(), committed_puts: committed.len(), missing_committed_objects: Vec::new(), hash_mismatches: Vec::new(), @@ -69,22 +71,44 @@ pub async fn check_s3_history( passed: false, }; - for (key, expected_hash) in &committed { - match s3.get_object(key, recorder).await? { + let mut committed_results = + stream::iter(committed.clone().into_iter().map(|(key, expected_hash)| { + let s3 = s3.clone(); + let recorder = recorder.clone(); + async move { + let body = s3.get_object(&key, &recorder).await?; + Ok::<_, anyhow::Error>((key, expected_hash, body)) + } + })) + .buffer_unordered(concurrency); + while let Some(result) = committed_results.next().await { + let (key, expected_hash, body) = result?; + match body { Some(body) => { let actual_hash = sha256_hex(&body); - if actual_hash != *expected_hash { + if actual_hash != expected_hash { report.hash_mismatches.push(format!( "{key}: expected {expected_hash}, got {actual_hash}" )); } } - None => report.missing_committed_objects.push(key.clone()), + None => report.missing_committed_objects.push(key), } } - for (key, attempted_hash) in &unknown_writes { - if let Some(body) = s3.get_object(key, recorder).await? { + let mut unknown_results = + stream::iter(unknown_writes.into_iter().map(|(key, attempted_hash)| { + let s3 = s3.clone(); + let recorder = recorder.clone(); + async move { + let body = s3.get_object(&key, &recorder).await?; + Ok::<_, anyhow::Error>((key, attempted_hash, body)) + } + })) + .buffer_unordered(concurrency); + while let Some(result) = unknown_results.next().await { + let (key, attempted_hash, body) = result?; + if let Some(body) = body { let actual_hash = sha256_hex(&body); report.unknown_writes_materialized.push(format!( "{key}: attempted {attempted_hash}, got {actual_hash}" @@ -92,7 +116,8 @@ pub async fn check_s3_history( } } - let prefix = ObjectSpec::key_prefix(recorder.run_id()); + let run_id = recorder.run_id(); + let prefix = ObjectSpec::key_prefix(&run_id); match s3.list_prefix(&prefix, recorder).await? { Some(keys) => { let listed = keys.into_iter().collect::>(); @@ -109,10 +134,15 @@ pub async fn check_s3_history( .push(format!("LIST prefix {prefix} did not complete")), } + report.missing_committed_objects.sort(); + report.hash_mismatches.sort(); + report.unknown_writes_materialized.sort(); + report.list_warnings.sort(); report.passed = report.tenant_recovered && report.missing_committed_objects.is_empty() && report.hash_mismatches.is_empty() - && report.successful_corrupted_reads.is_empty(); + && report.successful_corrupted_reads.is_empty() + && report.list_warnings.is_empty(); Ok(report) } diff --git a/e2e/src/framework/fault_config.rs b/e2e/src/framework/fault_config.rs index 7fa6c2e..49022d0 100644 --- a/e2e/src/framework/fault_config.rs +++ b/e2e/src/framework/fault_config.rs @@ -27,6 +27,8 @@ pub struct FaultTestConfig { pub duration: Duration, pub percent: u8, pub workload_objects: usize, + pub workload_concurrency: usize, + pub workload_seed: Option, pub request_timeout: Duration, pub require_client_disruption: bool, pub dm_name: Option, @@ -94,14 +96,16 @@ impl FaultTestConfig { duration: Duration::from_secs(env_u64( &get_env, "RUSTFS_FAULT_TEST_DURATION_SECONDS", - 180, + 900, )), percent: env_u8(&get_env, "RUSTFS_FAULT_TEST_PERCENT", default_percent), - workload_objects: env_usize(&get_env, "RUSTFS_FAULT_TEST_WORKLOAD_OBJECTS", 40), + workload_objects: env_usize(&get_env, "RUSTFS_FAULT_TEST_WORKLOAD_OBJECTS", 4000), + workload_concurrency: env_usize(&get_env, "RUSTFS_FAULT_TEST_WORKLOAD_CONCURRENCY", 50), + workload_seed: env_optional_u64(&get_env, "RUSTFS_FAULT_TEST_SEED")?, request_timeout: Duration::from_secs(env_u64( &get_env, "RUSTFS_FAULT_TEST_REQUEST_TIMEOUT_SECONDS", - 3, + 30, )), require_client_disruption: env_bool( &get_env, @@ -236,6 +240,19 @@ where .unwrap_or(default) } +fn env_optional_u64(get_env: &F, name: &str) -> Result> +where + F: Fn(&str) -> Option, +{ + get_env(name) + .map(|value| { + value + .parse::() + .with_context(|| format!("{name} must be an unsigned 64-bit integer")) + }) + .transpose() +} + fn env_usize(get_env: &F, name: &str, default: usize) -> usize where F: Fn(&str) -> Option, @@ -278,10 +295,12 @@ mod tests { std::path::PathBuf::from("target/fault-tests/artifacts") ); assert_eq!(config.scenario, "io-eio"); - assert_eq!(config.duration, std::time::Duration::from_secs(180)); + assert_eq!(config.duration, std::time::Duration::from_secs(900)); assert_eq!(config.percent, 20); - assert_eq!(config.workload_objects, 40); - assert_eq!(config.request_timeout, std::time::Duration::from_secs(3)); + assert_eq!(config.workload_objects, 4000); + assert_eq!(config.workload_concurrency, 50); + assert_eq!(config.workload_seed, None); + assert_eq!(config.request_timeout, std::time::Duration::from_secs(30)); assert!(config.dm_name.is_none()); assert!(config.dm_node.is_none()); assert!(config.dm_mount_path.is_none()); @@ -305,6 +324,8 @@ mod tests { "RUSTFS_FAULT_TEST_DURATION_SECONDS" => Some("45".to_string()), "RUSTFS_FAULT_TEST_PERCENT" => Some("35".to_string()), "RUSTFS_FAULT_TEST_WORKLOAD_OBJECTS" => Some("64".to_string()), + "RUSTFS_FAULT_TEST_WORKLOAD_CONCURRENCY" => Some("8".to_string()), + "RUSTFS_FAULT_TEST_SEED" => Some("4242".to_string()), "RUSTFS_FAULT_TEST_REQUEST_TIMEOUT_SECONDS" => Some("7".to_string()), "RUSTFS_FAULT_TEST_REQUIRE_CLIENT_DISRUPTION" => Some("true".to_string()), "RUSTFS_FAULT_TEST_DM_NAME" => Some("rustfs-test".to_string()), @@ -328,6 +349,8 @@ mod tests { assert_eq!(config.duration, std::time::Duration::from_secs(45)); assert_eq!(config.percent, 35); assert_eq!(config.workload_objects, 64); + assert_eq!(config.workload_concurrency, 8); + assert_eq!(config.workload_seed, Some(4242)); assert_eq!(config.request_timeout, std::time::Duration::from_secs(7)); assert!(config.require_client_disruption); assert_eq!(config.dm_name.as_deref(), Some("rustfs-test")); @@ -358,6 +381,20 @@ mod tests { assert!(result.is_err()); } + #[test] + fn invalid_workload_seed_is_rejected() { + let result = FaultTestConfig::from_env_with( + |name| match name { + "RUSTFS_FAULT_TEST_STORAGE_CLASS" => Some("fast-csi".to_string()), + "RUSTFS_FAULT_TEST_SEED" => Some("not-a-number".to_string()), + _ => None, + }, + "production-test-cluster".to_string(), + ); + + assert!(result.is_err()); + } + #[test] fn dynamic_storage_class_is_required() { assert!(validate_storage_class(r#"{"provisioner":"ebs.csi.aws.com"}"#, false).is_ok()); diff --git a/e2e/src/framework/fault_scenarios.rs b/e2e/src/framework/fault_scenarios.rs index 05ac026..7827f5c 100644 --- a/e2e/src/framework/fault_scenarios.rs +++ b/e2e/src/framework/fault_scenarios.rs @@ -213,6 +213,11 @@ impl FaultScenario { config.workload_objects >= 4, "RUSTFS_FAULT_TEST_WORKLOAD_OBJECTS must be at least 4" ); + ensure!( + (1..=config.workload_objects).contains(&config.workload_concurrency), + "RUSTFS_FAULT_TEST_WORKLOAD_CONCURRENCY must be between 1 and RUSTFS_FAULT_TEST_WORKLOAD_OBJECTS ({})", + config.workload_objects + ); Ok(Self { name: spec.scenario.to_string(), @@ -266,10 +271,10 @@ mod tests { scenario.case_name, "fault_io_eio_preserves_committed_objects" ); - assert_eq!(scenario.duration, Duration::from_secs(180)); + assert_eq!(scenario.duration, Duration::from_secs(900)); assert_eq!(scenario.percent, 20); - assert_eq!(scenario.prefill_count(), 20); - assert_eq!(scenario.mixed_workload_count(), 20); + assert_eq!(scenario.prefill_count(), 2000); + assert_eq!(scenario.mixed_workload_count(), 2000); } #[test] @@ -280,6 +285,15 @@ mod tests { assert!(FaultScenario::from_config(&config).is_err()); } + #[test] + fn workload_concurrency_must_fit_the_object_count() { + let mut config = FaultTestConfig::for_test("real-cluster", "fast-csi"); + config.workload_objects = 4; + config.workload_concurrency = 5; + + assert!(FaultScenario::from_config(&config).is_err()); + } + #[test] fn all_cataloged_fault_scenarios_are_executable() { let mut config = FaultTestConfig::for_test("real-cluster", "fast-csi"); diff --git a/e2e/src/framework/history.rs b/e2e/src/framework/history.rs index 22468b2..99dc105 100644 --- a/e2e/src/framework/history.rs +++ b/e2e/src/framework/history.rs @@ -16,7 +16,8 @@ use anyhow::Result; use serde::{Deserialize, Serialize}; use std::fs::{self, File}; use std::io::{BufWriter, Write}; -use std::path::{Path, PathBuf}; +use std::path::PathBuf; +use std::sync::{Arc, Mutex, MutexGuard}; use std::time::{SystemTime, UNIX_EPOCH}; #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] @@ -55,8 +56,13 @@ pub struct OperationRecord { pub error: Option, } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct Recorder { + inner: Arc>, +} + +#[derive(Debug)] +struct RecorderState { path: PathBuf, scenario: String, run_id: String, @@ -77,30 +83,33 @@ impl Recorder { } let writer = BufWriter::new(File::create(&path)?); Ok(Self { - path, - scenario: scenario.into(), - run_id: run_id.into(), - next_id: 1, - records: Vec::new(), - writer, + inner: Arc::new(Mutex::new(RecorderState { + path, + scenario: scenario.into(), + run_id: run_id.into(), + next_id: 1, + records: Vec::new(), + writer, + })), }) } pub fn begin( - &mut self, + &self, kind: OperationKind, bucket: impl Into, key: Option, value_sha256: Option, size_bytes: Option, ) -> OperationRecord { - let id = format!("op-{:06}", self.next_id); - self.next_id += 1; + let mut state = self.state(); + let id = format!("op-{:06}", state.next_id); + state.next_id += 1; let started_at_ms = now_ms(); OperationRecord { id, - scenario: self.scenario.clone(), + scenario: state.scenario.clone(), kind, bucket: bucket.into(), key, @@ -115,7 +124,7 @@ impl Recorder { } pub fn finish( - &mut self, + &self, mut record: OperationRecord, outcome: OperationOutcome, http_status: Option, @@ -126,27 +135,34 @@ impl Recorder { record.http_status = http_status; record.error = error.map(|message| truncate_error(&message)); - serde_json::to_writer(&mut self.writer, &record)?; - self.writer.write_all(b"\n")?; - self.writer.flush()?; - self.records.push(record); + let mut state = self.state(); + serde_json::to_writer(&mut state.writer, &record)?; + state.writer.write_all(b"\n")?; + state.writer.flush()?; + state.records.push(record); Ok(()) } - pub fn records(&self) -> &[OperationRecord] { - &self.records + pub fn records(&self) -> Vec { + self.state().records.clone() + } + + pub fn scenario(&self) -> String { + self.state().scenario.clone() } - pub fn scenario(&self) -> &str { - &self.scenario + pub fn run_id(&self) -> String { + self.state().run_id.clone() } - pub fn run_id(&self) -> &str { - &self.run_id + pub fn path(&self) -> PathBuf { + self.state().path.clone() } - pub fn path(&self) -> &Path { - &self.path + fn state(&self) -> MutexGuard<'_, RecorderState> { + self.inner + .lock() + .unwrap_or_else(|poisoned| poisoned.into_inner()) } } @@ -169,12 +185,13 @@ fn truncate_error(message: &str) -> String { #[cfg(test)] mod tests { use super::{OperationKind, OperationOutcome, Recorder}; + use std::collections::BTreeSet; #[test] fn recorder_writes_jsonl_records() { let dir = tempfile::tempdir().expect("tempdir"); let path = dir.path().join("history.jsonl"); - let mut recorder = Recorder::create(&path, "io-eio", "run-1").expect("recorder"); + let recorder = Recorder::create(&path, "io-eio", "run-1").expect("recorder"); let record = recorder.begin( OperationKind::Put, "bucket", @@ -187,9 +204,47 @@ mod tests { .finish(record, OperationOutcome::Ok, Some(200), None) .expect("finish"); - let content = std::fs::read_to_string(path).expect("history"); + let content = std::fs::read_to_string(&path).expect("history"); assert!(content.contains("\"scenario\":\"io-eio\"")); assert!(content.contains("\"kind\":\"put\"")); assert_eq!(recorder.records().len(), 1); + assert_eq!(recorder.path(), path); + } + + #[test] + fn recorder_assigns_unique_ids_across_concurrent_writers() { + let dir = tempfile::tempdir().expect("tempdir"); + let recorder = Recorder::create(dir.path().join("history.jsonl"), "io-eio", "run-1") + .expect("recorder"); + let writers = (0..8) + .map(|writer| { + let recorder = recorder.clone(); + std::thread::spawn(move || { + for operation in 0..25 { + let record = recorder.begin( + OperationKind::Put, + "bucket", + Some(format!("{writer}-{operation}")), + Some("hash".to_string()), + Some(4), + ); + recorder + .finish(record, OperationOutcome::Ok, Some(200), None) + .expect("finish"); + } + }) + }) + .collect::>(); + for writer in writers { + writer.join().expect("writer thread"); + } + + let records = recorder.records(); + let ids = records + .iter() + .map(|record| record.id.as_str()) + .collect::>(); + assert_eq!(records.len(), 200); + assert_eq!(ids.len(), 200); } } diff --git a/e2e/src/framework/resources.rs b/e2e/src/framework/resources.rs index ff6bd0c..a1309db 100644 --- a/e2e/src/framework/resources.rs +++ b/e2e/src/framework/resources.rs @@ -450,6 +450,7 @@ mod tests { assert!(manifest.contains("namespace: rustfs-fault-test")); assert!(manifest.contains("storageClassName: fast-csi")); + assert!(manifest.contains("storage: 80Gi")); assert!(!manifest.contains("rustfs-storage")); assert!(!manifest.contains("RUSTFS_UNSAFE_BYPASS_DISK_CHECK")); } diff --git a/e2e/src/framework/s3_workload.rs b/e2e/src/framework/s3_workload.rs index 36e9a57..7b84c5e 100644 --- a/e2e/src/framework/s3_workload.rs +++ b/e2e/src/framework/s3_workload.rs @@ -16,6 +16,7 @@ use anyhow::{Context, Result}; use aws_config::BehaviorVersion; use aws_credential_types::Credentials; use aws_sdk_s3::{Client, config::Region, error::SdkError, primitives::ByteStream}; +use serde::Serialize; use sha2::{Digest, Sha256}; use std::time::Duration; use tokio::time::timeout; @@ -27,9 +28,34 @@ pub struct ObjectSpec { pub key: String, pub size_bytes: usize, pub sha256: String, + seed: u64, + index: usize, +} + +#[derive(Debug)] +pub struct PreparedObject { + pub spec: ObjectSpec, body: Vec, } +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub struct WorkloadSizeClass { + pub size_bytes: usize, + pub object_count: usize, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub struct WorkloadPlan { + pub seed: u64, + pub generator: &'static str, + pub object_count: usize, + pub concurrency: usize, + pub total_payload_bytes: u64, + pub size_distribution: Vec, + #[serde(skip)] + sizes: Vec, +} + #[derive(Clone)] pub struct S3WorkloadClient { client: Client, @@ -48,20 +74,82 @@ impl ObjectSpec { format!("fault-test/{run_id}/") } - pub fn deterministic(run_id: &str, index: usize, size_bytes: usize) -> Self { + pub fn prepare_seeded( + run_id: &str, + index: usize, + size_bytes: usize, + seed: u64, + ) -> PreparedObject { let key = format!("{}object-{index:06}", Self::key_prefix(run_id)); - let body = deterministic_bytes(index, size_bytes); + let body = seeded_bytes(seed, index, size_bytes); let sha256 = sha256_hex(&body); - Self { - key, - size_bytes, - sha256, + PreparedObject { + spec: Self { + key, + size_bytes, + sha256, + seed, + index, + }, + body, + } + } + + pub fn prepare(&self) -> PreparedObject { + let body = seeded_bytes(self.seed, self.index, self.size_bytes); + debug_assert_eq!(sha256_hex(&body), self.sha256); + PreparedObject { + spec: self.clone(), body, } } } +impl WorkloadPlan { + pub fn seeded(seed: u64, object_count: usize, concurrency: usize) -> Self { + const SIZE_CLASSES: &[(usize, usize)] = &[ + (4 * 1024, 85), + (16 * 1024, 10), + (8 * 1024 * 1024, 4), + (16 * 1024 * 1024, 1), + ]; + + let mut sizes = Vec::with_capacity(object_count); + let mut size_distribution = Vec::with_capacity(SIZE_CLASSES.len()); + let mut assigned = 0; + for (position, (size_bytes, weight)) in SIZE_CLASSES.iter().copied().enumerate() { + let count = if position + 1 == SIZE_CLASSES.len() { + object_count.saturating_sub(assigned) + } else { + object_count.saturating_mul(weight) / 100 + }; + sizes.extend(std::iter::repeat_n(size_bytes, count)); + size_distribution.push(WorkloadSizeClass { + size_bytes, + object_count: count, + }); + assigned += count; + } + + shuffle_sizes(&mut sizes, seed); + let total_payload_bytes = sizes.iter().map(|size| *size as u64).sum(); + Self { + seed, + generator: "splitmix64-v1", + object_count, + concurrency, + total_payload_bytes, + size_distribution, + sizes, + } + } + + pub fn size_at(&self, index: usize) -> usize { + self.sizes[index] + } +} + impl S3WorkloadClient { pub async fn new( endpoint: impl Into, @@ -94,7 +182,7 @@ impl S3WorkloadClient { }) } - pub async fn create_bucket(&self, recorder: &mut Recorder) -> Result { + pub async fn create_bucket(&self, recorder: &Recorder) -> Result { let record = recorder.begin( OperationKind::CreateBucket, self.bucket.clone(), @@ -137,22 +225,23 @@ impl S3WorkloadClient { pub async fn put_object( &self, - object: &ObjectSpec, - recorder: &mut Recorder, + object: &PreparedObject, + recorder: &Recorder, ) -> Result { + let spec = &object.spec; let record = recorder.begin( OperationKind::Put, self.bucket.clone(), - Some(object.key.clone()), - Some(object.sha256.clone()), - Some(object.size_bytes), + Some(spec.key.clone()), + Some(spec.sha256.clone()), + Some(spec.size_bytes), ); let result = timeout( self.request_timeout, self.client .put_object() .bucket(&self.bucket) - .key(&object.key) + .key(&spec.key) .body(ByteStream::from(object.body.clone())) .send(), ) @@ -185,14 +274,14 @@ impl S3WorkloadClient { } } - pub async fn get_object(&self, key: &str, recorder: &mut Recorder) -> Result>> { + pub async fn get_object(&self, key: &str, recorder: &Recorder) -> Result>> { Ok(self.get_object_result(key, recorder).await?.body) } pub async fn get_object_result( &self, key: &str, - recorder: &mut Recorder, + recorder: &Recorder, ) -> Result { let record = recorder.begin( OperationKind::Get, @@ -280,11 +369,7 @@ impl S3WorkloadClient { } } - pub async fn head_object( - &self, - key: &str, - recorder: &mut Recorder, - ) -> Result { + pub async fn head_object(&self, key: &str, recorder: &Recorder) -> Result { let record = recorder.begin( OperationKind::Head, self.bucket.clone(), @@ -332,7 +417,7 @@ impl S3WorkloadClient { pub async fn list_prefix( &self, prefix: &str, - recorder: &mut Recorder, + recorder: &Recorder, ) -> Result>> { let record = recorder.begin( OperationKind::List, @@ -341,48 +426,65 @@ impl S3WorkloadClient { None, None, ); - let response = timeout( - self.request_timeout, - self.client + let mut keys = Vec::new(); + let mut continuation_token = None; + loop { + let mut request = self + .client .list_objects_v2() .bucket(&self.bucket) - .prefix(prefix) - .send(), - ) - .await; - - match response { - Ok(Ok(output)) => { - let keys = output + .prefix(prefix); + if let Some(token) = continuation_token.as_deref() { + request = request.continuation_token(token); + } + let response = timeout(self.request_timeout, request.send()).await; + let output = match response { + Ok(Ok(output)) => output, + Ok(Err(error)) => { + let outcome = classify_sdk_error(&error); + recorder.finish( + record, + outcome, + sdk_error_status(&error), + Some(format!("list prefix failed: {error}")), + )?; + return Ok(None); + } + Err(_) => { + recorder.finish( + record, + OperationOutcome::Timeout, + None, + Some("list prefix timed out".to_string()), + )?; + return Ok(None); + } + }; + keys.extend( + output .contents() .iter() - .filter_map(|object| object.key().map(str::to_string)) - .collect::>(); - let mut record = record; - record.size_bytes = Some(keys.len()); - recorder.finish(record, OperationOutcome::Ok, Some(200), None)?; - Ok(Some(keys)) + .filter_map(|object| object.key().map(str::to_string)), + ); + if !output.is_truncated().unwrap_or(false) { + break; } - Ok(Err(error)) => { - let outcome = classify_sdk_error(&error); + continuation_token = output.next_continuation_token().map(str::to_string); + if continuation_token.is_none() { recorder.finish( record, - outcome, - sdk_error_status(&error), - Some(format!("list prefix failed: {error}")), - )?; - Ok(None) - } - Err(_) => { - recorder.finish( - record, - OperationOutcome::Timeout, - None, - Some("list prefix timed out".to_string()), + OperationOutcome::Unknown, + Some(200), + Some("truncated LIST response omitted continuation token".to_string()), )?; - Ok(None) + return Ok(None); } } + + let mut record = record; + record.size_bytes = Some(keys.len()); + recorder.finish(record, OperationOutcome::Ok, Some(200), None)?; + Ok(Some(keys)) } } @@ -410,10 +512,40 @@ pub async fn wait_for_s3_endpoint(endpoint: &str, timeout_duration: Duration) -> } } -fn deterministic_bytes(index: usize, size_bytes: usize) -> Vec { - (0..size_bytes) - .map(|offset| ((offset + index * 31) % 251) as u8) - .collect() +fn seeded_bytes(seed: u64, index: usize, size_bytes: usize) -> Vec { + let mut generator = SplitMix64::new(seed ^ (index as u64).wrapping_mul(0xD6E8_FEB8_6659_FD93)); + let mut body = vec![0; size_bytes]; + for chunk in body.chunks_mut(8) { + let bytes = generator.next_u64().to_le_bytes(); + chunk.copy_from_slice(&bytes[..chunk.len()]); + } + body +} + +fn shuffle_sizes(sizes: &mut [usize], seed: u64) { + let mut generator = SplitMix64::new(seed ^ 0xA076_1D64_78BD_642F); + for index in (1..sizes.len()).rev() { + let swap_with = (generator.next_u64() % (index as u64 + 1)) as usize; + sizes.swap(index, swap_with); + } +} + +struct SplitMix64 { + state: u64, +} + +impl SplitMix64 { + fn new(seed: u64) -> Self { + Self { state: seed } + } + + fn next_u64(&mut self) -> u64 { + self.state = self.state.wrapping_add(0x9E37_79B9_7F4A_7C15); + let mut value = self.state; + value = (value ^ (value >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9); + value = (value ^ (value >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB); + value ^ (value >> 31) + } } fn classify_sdk_error(error: &SdkError) -> OperationOutcome { @@ -435,17 +567,44 @@ fn sdk_error_status(error: &SdkError) -> Option { #[cfg(test)] mod tests { - use super::{ObjectSpec, sha256_hex}; + use super::{ObjectSpec, WorkloadPlan, sha256_hex}; #[test] - fn deterministic_objects_have_stable_keys_sizes_and_hashes() { - let object = ObjectSpec::deterministic("run-1", 7, 4096); - let same = ObjectSpec::deterministic("run-1", 7, 4096); + fn seeded_objects_have_stable_keys_sizes_and_hashes() { + let object = ObjectSpec::prepare_seeded("run-1", 7, 4096, 42); + let same = ObjectSpec::prepare_seeded("run-1", 7, 4096, 42); assert_eq!(ObjectSpec::key_prefix("run-1"), "fault-test/run-1/"); - assert_eq!(object.key, "fault-test/run-1/object-000007"); - assert_eq!(object.size_bytes, 4096); - assert_eq!(object.sha256, same.sha256); - assert_eq!(object.sha256, sha256_hex(&same.body)); + assert_eq!(object.spec.key, "fault-test/run-1/object-000007"); + assert_eq!(object.spec.size_bytes, 4096); + assert_eq!(object.spec.sha256, same.spec.sha256); + assert_eq!(object.spec.sha256, sha256_hex(&same.body)); + assert_ne!( + object.spec.sha256, + ObjectSpec::prepare_seeded("run-1", 7, 4096, 43).spec.sha256 + ); + } + + #[test] + fn workload_plan_is_weighted_shuffled_and_reproducible() { + let plan = WorkloadPlan::seeded(42, 4000, 50); + let same = WorkloadPlan::seeded(42, 4000, 50); + let different = WorkloadPlan::seeded(43, 4000, 50); + + assert_eq!(plan, same); + assert_ne!(plan.sizes, different.sizes); + assert_eq!( + plan.size_distribution + .iter() + .map(|class| (class.size_bytes, class.object_count)) + .collect::>(), + vec![ + (4 * 1024, 3400), + (16 * 1024, 400), + (8 * 1024 * 1024, 160), + (16 * 1024 * 1024, 40), + ] + ); + assert_eq!(plan.total_payload_bytes, 2_033_745_920); } } diff --git a/e2e/src/framework/tenant_factory.rs b/e2e/src/framework/tenant_factory.rs index b1ae9bf..c12afa0 100644 --- a/e2e/src/framework/tenant_factory.rs +++ b/e2e/src/framework/tenant_factory.rs @@ -13,9 +13,11 @@ // limitations under the License. use k8s_openapi::api::core::v1::{ - EnvVar, LocalObjectReference, PersistentVolumeClaimSpec, VolumeResourceRequirements, + Affinity, EnvVar, LocalObjectReference, PersistentVolumeClaimSpec, PodAffinityTerm, + PodAntiAffinity, VolumeResourceRequirements, }; use k8s_openapi::apimachinery::pkg::api::resource::Quantity; +use k8s_openapi::apimachinery::pkg::apis::meta::v1::LabelSelector; use operator::types::v1alpha1::k8s::ImagePullPolicy; use operator::types::v1alpha1::k8s::PodManagementPolicy; use operator::types::v1alpha1::persistence::PersistenceConfig; @@ -32,9 +34,11 @@ pub struct TenantTemplate { pub credential_secret_name: String, pub servers: i32, pub volumes_per_server: i32, + pub storage_request: String, pub pod_management_policy: Option, pub unsafe_bypass_disk_check: bool, pub node_selector: Option>, + pub affinity: Option, } impl TenantTemplate { @@ -53,6 +57,7 @@ impl TenantTemplate { credential_secret_name: credential_secret_name.into(), servers: 4, volumes_per_server: 2, + storage_request: "10Gi".to_string(), pod_management_policy: Some(PodManagementPolicy::Parallel), unsafe_bypass_disk_check: true, node_selector: Some( @@ -60,6 +65,7 @@ impl TenantTemplate { .into_iter() .collect(), ), + affinity: None, } } @@ -70,17 +76,20 @@ impl TenantTemplate { storage_class: impl Into, credential_secret_name: impl Into, ) -> Self { + let name = name.into(); Self { namespace: namespace.into(), - name: name.into(), + name: name.clone(), image: image.into(), storage_class: storage_class.into(), credential_secret_name: credential_secret_name.into(), servers: 4, volumes_per_server: 1, + storage_request: "80Gi".to_string(), pod_management_policy: Some(PodManagementPolicy::Parallel), unsafe_bypass_disk_check: false, node_selector: None, + affinity: Some(fault_tenant_pod_anti_affinity(&name)), } } @@ -94,9 +103,12 @@ impl TenantTemplate { access_modes: Some(vec!["ReadWriteOnce".to_string()]), resources: Some(VolumeResourceRequirements { requests: Some( - [("storage".to_string(), Quantity("10Gi".to_string()))] - .into_iter() - .collect(), + [( + "storage".to_string(), + Quantity(self.storage_request.clone()), + )] + .into_iter() + .collect(), ), ..Default::default() }), @@ -107,6 +119,7 @@ impl TenantTemplate { }, scheduling: SchedulingConfig { node_selector: self.node_selector.clone(), + affinity: self.affinity.clone(), ..SchedulingConfig::default() }, }; @@ -143,6 +156,27 @@ impl TenantTemplate { } } +fn fault_tenant_pod_anti_affinity(tenant_name: &str) -> Affinity { + Affinity { + pod_anti_affinity: Some(PodAntiAffinity { + required_during_scheduling_ignored_during_execution: Some(vec![PodAffinityTerm { + label_selector: Some(LabelSelector { + match_labels: Some( + [("rustfs.tenant".to_string(), tenant_name.to_string())] + .into_iter() + .collect(), + ), + ..LabelSelector::default() + }), + topology_key: "kubernetes.io/hostname".to_string(), + ..PodAffinityTerm::default() + }]), + ..PodAntiAffinity::default() + }), + ..Affinity::default() + } +} + #[cfg(test)] mod tests { use super::TenantTemplate; @@ -197,7 +231,7 @@ mod tests { } #[test] - fn real_cluster_tenant_uses_scheduler_defaults_and_disk_checks() { + fn real_cluster_tenant_uses_fault_storage_spread_and_disk_checks() { let tenant = TenantTemplate::real_cluster( "rustfs-fault-test", "fault-test-tenant", @@ -208,6 +242,32 @@ mod tests { .build(); assert_eq!(tenant.spec.pools[0].persistence.volumes_per_server, 1); + assert_eq!( + tenant.spec.pools[0] + .scheduling + .affinity + .as_ref() + .and_then(|affinity| affinity.pod_anti_affinity.as_ref()) + .and_then(|anti_affinity| { + anti_affinity + .required_during_scheduling_ignored_during_execution + .as_ref() + }) + .and_then(|terms| terms.first()) + .map(|term| term.topology_key.as_str()), + Some("kubernetes.io/hostname") + ); + assert_eq!( + tenant.spec.pools[0] + .persistence + .volume_claim_template + .as_ref() + .and_then(|claim| claim.resources.as_ref()) + .and_then(|resources| resources.requests.as_ref()) + .and_then(|requests| requests.get("storage")) + .map(|quantity| quantity.0.as_str()), + Some("80Gi") + ); assert!(tenant.spec.pools[0].scheduling.node_selector.is_none()); assert!( tenant diff --git a/e2e/tests/faults.rs b/e2e/tests/faults.rs index aa26e1a..76fab74 100644 --- a/e2e/tests/faults.rs +++ b/e2e/tests/faults.rs @@ -13,6 +13,7 @@ // limitations under the License. use anyhow::{Context, Result, bail, ensure}; +use futures::{StreamExt, stream}; use kube::Api; use operator::types::v1alpha1::tenant::Tenant; use rustfs_operator_e2e::framework::{ @@ -32,7 +33,7 @@ use rustfs_operator_e2e::framework::{ kube_client, port_forward::{PortForwardGuard, PortForwardSpec}, resources, - s3_workload::{ObjectSpec, S3WorkloadClient, wait_for_s3_endpoint}, + s3_workload::{ObjectSpec, S3WorkloadClient, WorkloadPlan, wait_for_s3_endpoint}, wait, }; use serde::Serialize; @@ -42,7 +43,6 @@ use std::time::{Duration, Instant}; use uuid::Uuid; const RUSTFS_DATA_VOLUME: &str = "/data/rustfs0"; -const SMALL_OBJECT_SIZE_BYTES: usize = 4 * 1024; #[tokio::test] #[ignore = "destructive RustFS workload fault scenario; select with RUSTFS_FAULT_TEST_SCENARIO"] @@ -92,9 +92,27 @@ async fn run_fault_case( wait_for_ready_tenant(&config.cluster).await?; let run_id = format!("run-{}", Uuid::new_v4()); + let workload_seed = config.workload_seed.unwrap_or_else(generated_seed); + let workload_plan = WorkloadPlan::seeded( + workload_seed, + scenario.object_count, + config.workload_concurrency, + ); let bucket = bucket_name(&run_id); let history_path = collector.case_dir(scenario.case_name).join("history.jsonl"); - let mut history = Recorder::create(history_path, &scenario.name, &run_id)?; + let history = Recorder::create(history_path, &scenario.name, &run_id)?; + collector.write_text( + scenario.case_name, + "workload-plan.json", + &serde_json::to_string_pretty(&workload_plan)?, + )?; + eprintln!( + "fault workload seed={} objects={} concurrency={} payload_bytes={}", + workload_plan.seed, + workload_plan.object_count, + workload_plan.concurrency, + workload_plan.total_payload_bytes + ); let cluster = &config.cluster; let port_forward_spec = @@ -112,13 +130,20 @@ async fn run_fault_case( config.request_timeout, ) .await?; - let bucket_outcome = s3.create_bucket(&mut history).await?; + let bucket_outcome = s3.create_bucket(&history).await?; ensure!( bucket_outcome == OperationOutcome::Ok, "fault workload bucket creation did not succeed: {bucket_outcome:?}" ); - let prefilled = prefill_objects(&s3, &mut history, &run_id, scenario.prefill_count()).await?; + let prefilled = prefill_objects( + &s3, + &history, + &run_id, + &workload_plan, + scenario.prefill_count(), + ) + .await?; let pods_before = rustfs_pod_identities(cluster)?; let mut fault = AppliedFault::apply(config, collector, scenario, spec.backend, &run_id)?; @@ -161,8 +186,9 @@ async fn run_fault_case( let mut workload = match run_mixed_workload( &s3, - &mut history, + &history, &run_id, + &workload_plan, &prefilled, scenario.prefill_count(), scenario.mixed_workload_count(), @@ -211,14 +237,19 @@ async fn run_fault_case( wait_for_ready_tenant(cluster).await?; let pods_after = rustfs_pod_identities(cluster)?; ensure_port_forward(&mut port_forward, cluster, &endpoint).await?; - workload.summary.recommitted_after_recovery = - recommit_unconfirmed_objects(&s3, &mut history, &workload.unconfirmed_puts).await?; + workload.summary.recommitted_after_recovery = recommit_unconfirmed_objects( + &s3, + &history, + &workload.unconfirmed_puts, + workload_plan.concurrency, + ) + .await?; collector.write_text( scenario.case_name, "workload-summary.json", &serde_json::to_string_pretty(&workload.summary)?, )?; - let report = checker::check_s3_history(&s3, &mut history, true).await?; + let report = checker::check_s3_history(&s3, &history, true, workload_plan.concurrency).await?; collector.write_text( scenario.case_name, "checker-report.json", @@ -232,6 +263,7 @@ async fn run_fault_case( active_during_workload: true, recovered: report.tenant_recovered, client_disruptions: workload.summary.disrupted(), + workload_plan, pods_before, pods_after, active_snapshot, @@ -599,6 +631,7 @@ struct FaultEvidence { active_during_workload: bool, recovered: bool, client_disruptions: usize, + workload_plan: WorkloadPlan, pods_before: Vec, pods_after: Vec, active_snapshot: FaultStatusSnapshot, @@ -806,55 +839,95 @@ async fn wait_for_tenant_s3( async fn prefill_objects( s3: &S3WorkloadClient, - history: &mut Recorder, + history: &Recorder, run_id: &str, + plan: &WorkloadPlan, count: usize, ) -> Result> { + let tasks = (0..count).map(|index| { + let s3 = s3.clone(); + let history = history.clone(); + let run_id = run_id.to_string(); + let size_bytes = plan.size_at(index); + let seed = plan.seed; + async move { + let object = ObjectSpec::prepare_seeded(&run_id, index, size_bytes, seed); + let spec = object.spec.clone(); + let put_outcome = s3.put_object(&object, &history).await?; + ensure!( + put_outcome == OperationOutcome::Ok, + "prefill PUT failed before fault injection for key {}: {put_outcome:?}", + spec.key + ); + let head_outcome = s3.head_object(&spec.key, &history).await?; + ensure!( + head_outcome == OperationOutcome::Ok, + "prefill HEAD failed before fault injection for key {}: {head_outcome:?}", + spec.key + ); + Ok::<_, anyhow::Error>((index, spec)) + } + }); + let results = stream::iter(tasks) + .buffer_unordered(plan.concurrency) + .collect::>() + .await; let mut objects = Vec::with_capacity(count); - - for index in 0..count { - let object = ObjectSpec::deterministic(run_id, index, SMALL_OBJECT_SIZE_BYTES); - let put_outcome = s3.put_object(&object, history).await?; - ensure!( - put_outcome == OperationOutcome::Ok, - "prefill PUT failed before fault injection for key {}: {put_outcome:?}", - object.key - ); - let head_outcome = s3.head_object(&object.key, history).await?; - ensure!( - head_outcome == OperationOutcome::Ok, - "prefill HEAD failed before fault injection for key {}: {head_outcome:?}", - object.key - ); - objects.push(object); + for result in results { + objects.push(result?); } + objects.sort_by_key(|(index, _)| *index); - Ok(objects) + Ok(objects.into_iter().map(|(_, object)| object).collect()) } async fn run_mixed_workload( s3: &S3WorkloadClient, - history: &mut Recorder, + history: &Recorder, run_id: &str, + plan: &WorkloadPlan, prefilled: &[ObjectSpec], start_index: usize, count: usize, ) -> Result { - let mut summary = WorkloadSummary::default(); - let mut unconfirmed_puts = Vec::new(); - - for offset in 0..count { - let object = - ObjectSpec::deterministic(run_id, start_index + offset, SMALL_OBJECT_SIZE_BYTES); - let put_outcome = s3.put_object(&object, history).await?; - summary.puts.record(put_outcome); - if put_outcome != OperationOutcome::Ok { - unconfirmed_puts.push(object.clone()); + let tasks = (0..count).map(|offset| { + let s3 = s3.clone(); + let history = history.clone(); + let run_id = run_id.to_string(); + let index = start_index + offset; + let size_bytes = plan.size_at(index); + let seed = plan.seed; + let existing = prefilled[offset % prefilled.len()].clone(); + async move { + let object = ObjectSpec::prepare_seeded(&run_id, index, size_bytes, seed); + let spec = object.spec.clone(); + let put_outcome = s3.put_object(&object, &history).await?; + let get_outcome = s3.get_object_result(&existing.key, &history).await?.outcome; + Ok::<_, anyhow::Error>(MixedTaskResult { + index, + object: spec, + put_outcome, + get_outcome, + }) } + }); + let results = stream::iter(tasks) + .buffer_unordered(plan.concurrency) + .collect::>() + .await; + let mut completed = Vec::with_capacity(count); + for result in results { + completed.push(result?); + } + completed.sort_by_key(|result| result.index); - if let Some(existing) = prefilled.get(offset % prefilled.len()) { - let get_result = s3.get_object_result(&existing.key, history).await?; - summary.gets.record(get_result.outcome); + let mut summary = WorkloadSummary::new(plan); + let mut unconfirmed_puts = Vec::new(); + for result in completed { + summary.puts.record(result.put_outcome); + summary.gets.record(result.get_outcome); + if result.put_outcome != OperationOutcome::Ok { + unconfirmed_puts.push(result.object); } } @@ -867,34 +940,72 @@ async fn run_mixed_workload( async fn recommit_unconfirmed_objects( s3: &S3WorkloadClient, - history: &mut Recorder, + history: &Recorder, objects: &[ObjectSpec], + concurrency: usize, ) -> Result { - for object in objects { - let outcome = s3.put_object(object, history).await?; + let tasks = objects.iter().cloned().map(|object| { + let s3 = s3.clone(); + let history = history.clone(); + async move { + let prepared = object.prepare(); + let outcome = s3.put_object(&prepared, &history).await?; + Ok::<_, anyhow::Error>((object.key, outcome)) + } + }); + let results = stream::iter(tasks) + .buffer_unordered(concurrency) + .collect::>() + .await; + for result in results { + let (key, outcome) = result?; ensure!( outcome == OperationOutcome::Ok, "PUT for previously unconfirmed object {} did not commit after recovery: {outcome:?}", - object.key + key ); } Ok(objects.len()) } +#[derive(Debug)] +struct MixedTaskResult { + index: usize, + object: ObjectSpec, + put_outcome: OperationOutcome, + get_outcome: OperationOutcome, +} + #[derive(Debug)] struct MixedWorkloadResult { summary: WorkloadSummary, unconfirmed_puts: Vec, } -#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] struct WorkloadSummary { + seed: u64, + object_count: usize, + concurrency: usize, + total_payload_bytes: u64, puts: OutcomeCounts, gets: OutcomeCounts, recommitted_after_recovery: usize, } impl WorkloadSummary { + fn new(plan: &WorkloadPlan) -> Self { + Self { + seed: plan.seed, + object_count: plan.object_count, + concurrency: plan.concurrency, + total_payload_bytes: plan.total_payload_bytes, + puts: OutcomeCounts::default(), + gets: OutcomeCounts::default(), + recommitted_after_recovery: 0, + } + } + fn require_exercised(&self) -> Result<()> { ensure!( self.puts.total() > 0 && self.gets.total() > 0, @@ -959,6 +1070,13 @@ fn bucket_name(run_id: &str) -> String { format!("rustfs-fault-{suffix}") } +fn generated_seed() -> u64 { + let run = Uuid::new_v4(); + let mut bytes = [0; 8]; + bytes.copy_from_slice(&run.as_bytes()[..8]); + u64::from_le_bytes(bytes) +} + fn warp_bucket_name(run_id: &str) -> String { format!("{}-warp", bucket_name(run_id)) } @@ -970,6 +1088,7 @@ mod tests { pod_replacement_observed, warp_bucket_name, }; use rustfs_operator_e2e::framework::history::OperationOutcome; + use rustfs_operator_e2e::framework::s3_workload::WorkloadPlan; #[test] fn fault_bucket_name_is_s3_compatible_and_run_scoped() { @@ -985,7 +1104,7 @@ mod tests { #[test] fn workload_summary_counts_disrupted_operations() { - let mut summary = WorkloadSummary::default(); + let mut summary = WorkloadSummary::new(&WorkloadPlan::seeded(42, 4000, 50)); summary.puts.record(OperationOutcome::Ok); summary.gets.record(OperationOutcome::Timeout); @@ -999,6 +1118,10 @@ mod tests { #[test] fn workload_summary_can_require_fault_evidence() { let summary = WorkloadSummary { + seed: 42, + object_count: 4000, + concurrency: 50, + total_payload_bytes: 2_033_745_920, puts: OutcomeCounts { ok: 1, ..OutcomeCounts::default() From df90a88ccea17c97f4ea5cfe369a8844530a27c5 Mon Sep 17 00:00:00 2001 From: GatewayJ <18332154+GatewayJ@users.noreply.github.com> Date: Fri, 19 Jun 2026 22:30:57 +0800 Subject: [PATCH 14/20] fix(chaos): bypass port-forward for concurrent workload --- FAULT_INJECTION_TEST_PLAN.md | 10 +++++ e2e/src/framework/fault_config.rs | 5 +++ e2e/tests/faults.rs | 73 ++++++++++++++++++++++--------- 3 files changed, 67 insertions(+), 21 deletions(-) diff --git a/FAULT_INJECTION_TEST_PLAN.md b/FAULT_INJECTION_TEST_PLAN.md index b1d9505..90ef931 100644 --- a/FAULT_INJECTION_TEST_PLAN.md +++ b/FAULT_INJECTION_TEST_PLAN.md @@ -198,6 +198,10 @@ export RUSTFS_FAULT_TEST_CHAOS_NAMESPACE=chaos-mesh export RUN_ROOT="target/fault-tests/$(date -u +%Y%m%dT%H%M%SZ)" ``` +如果 runner 位于集群节点或 Pod 内,并且能够访问 Service ClusterIP,建议设置 +`RUSTFS_FAULT_TEST_USE_CLUSTER_IP=1`。50 并发 workload 不应经过 `kubectl port-forward`; +port-forward 仅适合从集群外执行的低并发控制和调试流量。 + 运行一个场景: ```bash @@ -487,6 +491,7 @@ kubectl get iochaos,podchaos,networkchaos -A | `RUSTFS_FAULT_TEST_WORKLOAD_CONCURRENCY` | `50` | prefill、故障 workload、恢复重写和 checker 的最大并发。 | | `RUSTFS_FAULT_TEST_SEED` | 随机生成 | 可选 u64 seed;设置后可重放尺寸顺序和对象内容。 | | `RUSTFS_FAULT_TEST_REQUEST_TIMEOUT_SECONDS` | `30` | 单个 S3 操作超时。 | +| `RUSTFS_FAULT_TEST_USE_CLUSTER_IP` | `false` | 从可访问 ClusterIP 的节点/Pod 运行时直连本测试 Tenant Service,避免 port-forward 成为高并发瓶颈。 | | `RUSTFS_FAULT_TEST_REQUIRE_CLIENT_DISRUPTION` | `false` | 是否强制要求客户端看到故障。 | | `RUSTFS_FAULT_TEST_CHAOS_NAMESPACE` | `chaos-mesh` | Chaos Mesh resource namespace。 | | `RUSTFS_FAULT_TEST_WARP_DURATION_SECONDS` | `60` | Warp mixed workload 时间。 | @@ -653,6 +658,10 @@ export RUSTFS_FAULT_TEST_CHAOS_NAMESPACE=chaos-mesh export RUN_ROOT="target/fault-tests/$(date -u +%Y%m%dT%H%M%SZ)" ``` +If the runner is on a cluster node or in a Pod and can reach Service ClusterIPs, set +`RUSTFS_FAULT_TEST_USE_CLUSTER_IP=1`. The 50-concurrency workload should not traverse +`kubectl port-forward`; port-forward is intended only for low-concurrency control and debugging traffic from outside the cluster. + Run one scenario: ```bash @@ -875,6 +884,7 @@ Finally verify nodes, the Operator, Chaos Mesh, PVs, and remaining Chaos resourc | `RUSTFS_FAULT_TEST_WORKLOAD_CONCURRENCY` | `50` | Maximum concurrency for prefill, fault workload, recovery writes, and checker reads. | | `RUSTFS_FAULT_TEST_SEED` | generated randomly | Optional u64 seed for replaying the size order and object content. | | `RUSTFS_FAULT_TEST_REQUEST_TIMEOUT_SECONDS` | `30` | S3 operation timeout. | +| `RUSTFS_FAULT_TEST_USE_CLUSTER_IP` | `false` | Directly use this test Tenant's Service from a node/Pod that can reach ClusterIP, avoiding port-forward as a high-concurrency bottleneck. | | `RUSTFS_FAULT_TEST_REQUIRE_CLIENT_DISRUPTION` | `false` | Require client-visible disruption when enabled. | | `RUSTFS_FAULT_TEST_CHAOS_NAMESPACE` | `chaos-mesh` | Namespace for Chaos resources. | | `RUSTFS_FAULT_TEST_WARP_DURATION_SECONDS` | `60` | Warp mixed workload duration. | diff --git a/e2e/src/framework/fault_config.rs b/e2e/src/framework/fault_config.rs index 49022d0..3abbc57 100644 --- a/e2e/src/framework/fault_config.rs +++ b/e2e/src/framework/fault_config.rs @@ -30,6 +30,7 @@ pub struct FaultTestConfig { pub workload_concurrency: usize, pub workload_seed: Option, pub request_timeout: Duration, + pub use_cluster_ip: bool, pub require_client_disruption: bool, pub dm_name: Option, pub dm_node: Option, @@ -107,6 +108,7 @@ impl FaultTestConfig { "RUSTFS_FAULT_TEST_REQUEST_TIMEOUT_SECONDS", 30, )), + use_cluster_ip: env_bool(&get_env, "RUSTFS_FAULT_TEST_USE_CLUSTER_IP"), require_client_disruption: env_bool( &get_env, "RUSTFS_FAULT_TEST_REQUIRE_CLIENT_DISRUPTION", @@ -301,6 +303,7 @@ mod tests { assert_eq!(config.workload_concurrency, 50); assert_eq!(config.workload_seed, None); assert_eq!(config.request_timeout, std::time::Duration::from_secs(30)); + assert!(!config.use_cluster_ip); assert!(config.dm_name.is_none()); assert!(config.dm_node.is_none()); assert!(config.dm_mount_path.is_none()); @@ -327,6 +330,7 @@ mod tests { "RUSTFS_FAULT_TEST_WORKLOAD_CONCURRENCY" => Some("8".to_string()), "RUSTFS_FAULT_TEST_SEED" => Some("4242".to_string()), "RUSTFS_FAULT_TEST_REQUEST_TIMEOUT_SECONDS" => Some("7".to_string()), + "RUSTFS_FAULT_TEST_USE_CLUSTER_IP" => Some("true".to_string()), "RUSTFS_FAULT_TEST_REQUIRE_CLIENT_DISRUPTION" => Some("true".to_string()), "RUSTFS_FAULT_TEST_DM_NAME" => Some("rustfs-test".to_string()), "RUSTFS_FAULT_TEST_DM_NODE" => Some("worker-a".to_string()), @@ -352,6 +356,7 @@ mod tests { assert_eq!(config.workload_concurrency, 8); assert_eq!(config.workload_seed, Some(4242)); assert_eq!(config.request_timeout, std::time::Duration::from_secs(7)); + assert!(config.use_cluster_ip); assert!(config.require_client_disruption); assert_eq!(config.dm_name.as_deref(), Some("rustfs-test")); assert_eq!(config.dm_node.as_deref(), Some("worker-a")); diff --git a/e2e/tests/faults.rs b/e2e/tests/faults.rs index 76fab74..c445b28 100644 --- a/e2e/tests/faults.rs +++ b/e2e/tests/faults.rs @@ -13,7 +13,7 @@ // limitations under the License. use anyhow::{Context, Result, bail, ensure}; -use futures::{StreamExt, stream}; +use futures::{StreamExt, TryStreamExt, stream}; use kube::Api; use operator::types::v1alpha1::tenant::Tenant; use rustfs_operator_e2e::framework::{ @@ -31,6 +31,7 @@ use rustfs_operator_e2e::framework::{ history::Recorder, host_faults::{self, DmFlakeyGuard, DmFlakeySpec, DmStatusSnapshot}, kube_client, + kubectl::Kubectl, port_forward::{PortForwardGuard, PortForwardSpec}, resources, s3_workload::{ObjectSpec, S3WorkloadClient, WorkloadPlan, wait_for_s3_endpoint}, @@ -115,11 +116,8 @@ async fn run_fault_case( ); let cluster = &config.cluster; - let port_forward_spec = - PortForwardSpec::tenant_io(&cluster.test_namespace, &cluster.tenant_name); - let endpoint = port_forward_spec.local_base_url(); - let mut port_forward = PortForwardSpec::start_tenant_io(cluster)?; - wait_for_tenant_s3(&mut port_forward, &endpoint, cluster.timeout).await?; + let (endpoint, mut port_forward) = s3_access(config)?; + ensure_s3_access(&mut port_forward, cluster, &endpoint).await?; let (access_key, secret_key) = resources::test_credentials(); let s3 = S3WorkloadClient::new( @@ -153,7 +151,7 @@ async fn run_fault_case( } let active_snapshot = fault.snapshot("active")?; - if let Err(error) = ensure_port_forward(&mut port_forward, cluster, &endpoint).await { + if let Err(error) = ensure_s3_access(&mut port_forward, cluster, &endpoint).await { collect_fault_artifacts(collector, scenario.case_name, &fault, "port-forward-failed")?; return Err(error); } @@ -173,7 +171,7 @@ async fn run_fault_case( return Err(error); } - if let Err(error) = ensure_port_forward(&mut port_forward, cluster, &endpoint).await { + if let Err(error) = ensure_s3_access(&mut port_forward, cluster, &endpoint).await { collect_fault_artifacts( collector, scenario.case_name, @@ -236,7 +234,7 @@ async fn run_fault_case( wait_for_ready_tenant(cluster).await?; let pods_after = rustfs_pod_identities(cluster)?; - ensure_port_forward(&mut port_forward, cluster, &endpoint).await?; + ensure_s3_access(&mut port_forward, cluster, &endpoint).await?; workload.summary.recommitted_after_recovery = recommit_unconfirmed_objects( &s3, &history, @@ -808,15 +806,52 @@ async fn wait_for_ready_tenant(config: &ClusterTestConfig) -> Result { wait::wait_for_tenant_ready(tenants, &config.tenant_name, config.timeout).await } -async fn ensure_port_forward( - port_forward: &mut PortForwardGuard, +fn s3_access(config: &FaultTestConfig) -> Result<(String, Option)> { + let cluster = &config.cluster; + if config.use_cluster_ip { + let service = format!("{}-io", cluster.tenant_name); + let output = Kubectl::new(cluster) + .namespaced(&cluster.test_namespace) + .command([ + "get".to_string(), + "service".to_string(), + service.clone(), + "-o".to_string(), + "jsonpath={.spec.clusterIP}".to_string(), + ]) + .run_checked() + .with_context(|| format!("read ClusterIP for fault-test service {service:?}"))?; + let cluster_ip = output.stdout.trim(); + ensure!( + !cluster_ip.is_empty() && cluster_ip != "None", + "fault-test service {service:?} does not have a ClusterIP" + ); + let host = if cluster_ip.contains(':') { + format!("[{cluster_ip}]") + } else { + cluster_ip.to_string() + }; + return Ok((format!("http://{host}:9000"), None)); + } + + let spec = PortForwardSpec::tenant_io(&cluster.test_namespace, &cluster.tenant_name); + let endpoint = spec.local_base_url(); + Ok((endpoint, Some(PortForwardSpec::start_tenant_io(cluster)?))) +} + +async fn ensure_s3_access( + port_forward: &mut Option, config: &ClusterTestConfig, endpoint: &str, ) -> Result<()> { - if port_forward.ensure_running().is_err() { - *port_forward = PortForwardSpec::start_tenant_io(config)?; + if let Some(guard) = port_forward { + if guard.ensure_running().is_err() { + *guard = PortForwardSpec::start_tenant_io(config)?; + } + return wait_for_tenant_s3(guard, endpoint, config.timeout).await; } - wait_for_tenant_s3(port_forward, endpoint, config.timeout).await + + wait_for_s3_endpoint(endpoint, config.timeout).await } async fn wait_for_tenant_s3( @@ -868,14 +903,10 @@ async fn prefill_objects( Ok::<_, anyhow::Error>((index, spec)) } }); - let results = stream::iter(tasks) + let mut objects = stream::iter(tasks) .buffer_unordered(plan.concurrency) - .collect::>() - .await; - let mut objects = Vec::with_capacity(count); - for result in results { - objects.push(result?); - } + .try_collect::>() + .await?; objects.sort_by_key(|(index, _)| *index); Ok(objects.into_iter().map(|(_, object)| object).collect()) From 0575ed92923cd9200e70e4e186f40da3a9675d04 Mon Sep 17 00:00:00 2001 From: GatewayJ <18332154+GatewayJ@users.noreply.github.com> Date: Sat, 20 Jun 2026 11:15:22 +0800 Subject: [PATCH 15/20] test(chaos): package fault-test operations --- FAULT_INJECTION_TEST_PLAN.md | 896 --------------------------- Makefile | 10 +- README.md | 14 - e2e/FAULT_TESTING.md | 420 +++++++++++++ e2e/Makefile | 60 ++ e2e/README.md | 19 +- e2e/scripts/fault-test.sh | 453 ++++++++++++++ e2e/src/framework/fault_config.rs | 18 +- e2e/src/framework/fault_scenarios.rs | 6 +- e2e/src/framework/resources.rs | 2 +- e2e/src/framework/s3_workload.rs | 16 +- e2e/src/framework/tenant_factory.rs | 4 +- e2e/tests/faults.rs | 8 +- 13 files changed, 975 insertions(+), 951 deletions(-) delete mode 100644 FAULT_INJECTION_TEST_PLAN.md create mode 100644 e2e/FAULT_TESTING.md create mode 100644 e2e/Makefile create mode 100644 e2e/scripts/fault-test.sh diff --git a/FAULT_INJECTION_TEST_PLAN.md b/FAULT_INJECTION_TEST_PLAN.md deleted file mode 100644 index 90ef931..0000000 --- a/FAULT_INJECTION_TEST_PLAN.md +++ /dev/null @@ -1,896 +0,0 @@ - - -# RustFS Fault Injection Operations Manual / RustFS 故障注入测试操作手册 - -- [中文操作手册](#中文操作手册) -- [English Operations Manual](#english-operations-manual) - -## 中文操作手册 - -### 1. 目的与范围 - -本手册用于在专用的真实 Kubernetes 测试集群中运行 RustFS 故障注入测试。测试对象是由 RustFS Operator 创建的测试 Tenant,不是现有业务 Tenant,也不是生产 Operator 控制面。 - -每次执行 `make fault-test` 只运行 `RUSTFS_FAULT_TEST_SCENARIO` 选择的一个场景,并只报告一个真实的 destructive test。七个场景必须串行执行。 - -测试分为两类: - -1. 六个 Kubernetes-native 场景,使用 Chaos Mesh 和动态 StorageClass。 -2. 一个 `dm-flakey` 场景,使用专用静态 Local PV、Linux Device Mapper 和 privileged helper Pod。 - -执行 `dm-flakey` 前不需要重装 Kubernetes、RustFS Operator、Chaos Mesh 或 Rust 工具链;只需要把 fault-test Tenant 的存储 fixture 切换为专用静态 Local PV。 - -### 2. 安全要求 - -必须满足以下要求: - -- 只能在专用测试集群执行,禁止在生产或共享开发集群执行。 -- 当前 context 不能以 `kind-` 开头。 -- 不得把 `RUSTFS_FAULT_TEST_NAMESPACE` 或 `RUSTFS_FAULT_TEST_TENANT` 指向现有业务资源。 -- 常规场景必须使用支持动态供给的 StorageClass。 -- `dm-flakey` 只能使用专用的 `kubernetes.io/no-provisioner` StorageClass 和专用块设备或 loop 文件。 -- DM Local PV 路径不得复用现有 RustFS 数据目录。 -- 所有场景必须串行运行;默认本地 port-forward 端口为 `19000`。 -- 失败时先保存 artifacts,再清理故障资源和测试 namespace。 - -测试 runner 默认创建: - -```text -namespace: rustfs-fault-test -tenant: fault-test-tenant -``` - -如果 namespace 已存在,必须同时具备: - -```text -app.kubernetes.io/managed-by=rustfs-operator-fault-test -rustfs.com/fault-test-tenant=fault-test-tenant -``` - -runner 不会自动认领未标记的 namespace,也不会删除不属于它的 namespace。 - -### 3. 场景目录 - -| 场景 | 后端 | 隔离方式 | 主要验证 | -| --- | --- | --- | --- | -| `io-eio` | Chaos Mesh IOChaos | 新 Tenant/PVC | 一个数据卷发生 EIO 后,已提交对象不丢失、不损坏。 | -| `pod-kill-one` | Chaos Mesh PodChaos | 可复用 Ready Tenant | 删除一个 RustFS Pod 后,替代 Pod 出现且对象保持正确。 | -| `network-partition-one` | Chaos Mesh NetworkChaos | 可复用 Ready Tenant | 一个 Pod 与同 Tenant peers 分区后,恢复时对象保持正确。 | -| `io-read-mistake` | Chaos Mesh IOChaos | 新 Tenant/PVC | 读路径被篡改时,成功 GET 不能返回错误内容。 | -| `disk-full` | Chaos Mesh IOChaos | 新 Tenant/PVC | 写操作返回 ENOSPC 后,已提交对象保持正确。 | -| `warp-under-chaos` | Warp + IOChaos | 新 Tenant/PVC | 记录故障下性能,正确性仍由 history/checker 判断。 | -| `dm-flakey` | Linux Device Mapper | 专用静态 Local PV | 底层块设备间歇性 EIO 后,恢复时对象保持正确。 | - -默认 workload 写入或确认 4000 个对象,并使用 50 并发。尺寸计划先按固定比例生成,再由 seed 确定性打乱:4KiB 85%(3400 个)、16KiB 10%(400 个)、8MiB 4%(160 个)、16MiB 1%(40 个)。每个场景的逻辑 payload 为 2,033,745,920 bytes,约 1.89GiB。 - -对象内容由同一个 seed 和对象索引通过 `splitmix64-v1` 确定性生成。`workload-plan.json` 记录 seed、生成器版本、并发、尺寸分布和总 payload;`history.jsonl` 记录每个 key 的 size、SHA-256 和结果。设置 `RUSTFS_FAULT_TEST_SEED=` 可以重放相同尺寸顺序和对象内容。 - -客户端没有看到错误不代表故障未生效;权威故障证据来自 Chaos 状态或 DM table/status,以及 `fault-evidence.json`。 - -`RUSTFS_FAULT_TEST_PERCENT=20` 表示 Chaos Mesh 对匹配 I/O 操作的注入概率,不表示预先固定选择 20% 的对象。 - -### 4. 测试机要求 - -运行测试的主机需要: - -- `kubectl` -- Rust stable 和 Cargo,支持 Rust edition 2024 -- GNU Make -- 可访问 Kubernetes API 的 kubeconfig -- `warp` v1.3.1,仅 `warp-under-chaos` 需要 -- 足够空间保存 `target/fault-tests` artifacts - -建议测试账户在专用测试集群使用 cluster-admin。最小权限至少需要: - -- 读取 CRD、Node 和 StorageClass -- 创建、读取、更新和删除 namespace、Secret、Pod、Service、PVC、StatefulSet 和 Tenant -- 在 Chaos Mesh namespace 管理 IOChaos、PodChaos 和 NetworkChaos -- 读取 Pod 日志、events,并执行 `kubectl exec` -- `dm-flakey` 允许创建 privileged、`hostPID`、`hostPath: /` 的 helper Pod - -代码检查: - -```bash -rustc --version -cargo --version -kubectl version --client -make e2e-check -``` - -### 5. Kubernetes 和 RustFS 前置检查 - -切换并记录目标 context: - -```bash -kubectl config use-context -kubectl config current-context -kubectl get nodes -``` - -确认 RustFS Operator、Tenant CRD 和 StorageClass: - -```bash -kubectl get crd tenants.rustfs.com -kubectl -n rustfs-system get deployment -kubectl get storageclass -``` - -常规场景需要至少四个可调度节点和四个 `80Gi` RWO PVC。fault Tenant 使用 required pod anti-affinity,把四个 RustFS Pod 分散到不同的 `kubernetes.io/hostname`。StorageClass 必须支持动态供给,不能是 `kubernetes.io/no-provisioner`。每个承载 fault-test PVC 的节点应至少有 100Gi 可用空间;执行前必须按实际 StorageClass 拓扑核对容量。 - -不能只看 PVC 显示的 capacity。hostPath/local-path provisioner 通常不执行容量配额,必须检查它的实际 node path 和对应文件系统: - -```bash -kubectl -n kube-system get configmap local-path-config -o yaml -kubectl get pv -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.spec.hostPath.path}{"\n"}{end}' -df -h -``` - -K3s 默认 `/var/lib/rancher/k3s/storage` 经常位于较小的系统盘。若该文件系统不足 100Gi,不得用于本测试;应部署专用的动态 provisioner/StorageClass,把新 fault-test PVC 放到 `/data/rustfs/rustfs-fault-local-path` 之类的独立数据盘目录。不要修改或迁移现有业务 PVC。 - -建议固定已验证的 RustFS image digest,避免 `latest` 漂移: - -```bash -export RUSTFS_IMAGE='docker.io/rustfs/rustfs@sha256:' -``` - -### 6. 安装和验证 Chaos Mesh - -以下示例使用已验证的 Chaos Mesh v2.8.3: - -```bash -helm repo add chaos-mesh https://charts.chaos-mesh.org -helm repo update - -helm upgrade --install chaos-mesh chaos-mesh/chaos-mesh \ - -n chaos-mesh --create-namespace \ - --version 2.8.3 \ - --set chaosDaemon.runtime=containerd \ - --set chaosDaemon.socketPath=/run/containerd/containerd.sock \ - --set dashboard.create=false \ - --wait --timeout 10m -``` - -K3s 使用: - -```text -/run/k3s/containerd/containerd.sock -``` - -其他发行版必须根据实际容器运行时修改 `chaosDaemon.runtime` 和 `chaosDaemon.socketPath`。 - -验证: - -```bash -kubectl -n chaos-mesh get deployment,daemonset -kubectl get crd \ - iochaos.chaos-mesh.org \ - podchaos.chaos-mesh.org \ - networkchaos.chaos-mesh.org -``` - -要求 controller-manager 全部 Ready,chaos-daemon 在所有目标节点 Ready。 - -### 7. 运行普通测试 - -先设置公共参数: - -```bash -export RUSTFS_FAULT_TEST_STORAGE_CLASS= -export RUSTFS_FAULT_TEST_SERVER_IMAGE="$RUSTFS_IMAGE" -export RUSTFS_FAULT_TEST_OPERATOR_NAMESPACE=rustfs-system -export RUSTFS_FAULT_TEST_NAMESPACE=rustfs-fault-test -export RUSTFS_FAULT_TEST_TENANT=fault-test-tenant -export RUSTFS_FAULT_TEST_CHAOS_NAMESPACE=chaos-mesh -export RUN_ROOT="target/fault-tests/$(date -u +%Y%m%dT%H%M%SZ)" -``` - -如果 runner 位于集群节点或 Pod 内,并且能够访问 Service ClusterIP,建议设置 -`RUSTFS_FAULT_TEST_USE_CLUSTER_IP=1`。50 并发 workload 不应经过 `kubectl port-forward`; -port-forward 仅适合从集群外执行的低并发控制和调试流量。 - -运行一个场景: - -```bash -RUSTFS_FAULT_TEST_SCENARIO=io-eio \ -RUSTFS_FAULT_TEST_ARTIFACTS="$RUN_ROOT/io-eio" \ -make fault-test -``` - -`make fault-test` 会在内部设置 `RUSTFS_FAULT_TEST_DESTRUCTIVE=1`。不要直接绕过 Make 入口运行 destructive test。 - -测试期间持续观察节点、现有业务 Tenant 和 fault-test Tenant。任一非目标资源变为非 Ready 时,应立即删除当前 managed Chaos resource、停止后续场景并收集现场。 - -按推荐顺序运行六个普通场景,并在首个失败后停止: - -```bash -for scenario in \ - io-eio \ - pod-kill-one \ - network-partition-one \ - io-read-mistake \ - disk-full \ - warp-under-chaos -do - RUSTFS_FAULT_TEST_SCENARIO="$scenario" \ - RUSTFS_FAULT_TEST_ARTIFACTS="$RUN_ROOT/$scenario" \ - make fault-test || break -done -``` - -`warp-under-chaos` 执行前验证: - -```bash -warp --version -``` - -Warp 性能数据不参与 correctness verdict。 - -### 8. `dm-flakey` 专用操作 - -#### 8.1 不需要重装集群 - -如果前六个场景已经执行,只需: - -1. 保留 Kubernetes、Operator、Chaos Mesh 和 Rust 工具链。 -2. 停止其他 fault-test 进程。 -3. 为四个测试 Pod 准备四个专用静态 Local PV。 -4. 其中一个 PV 必须由 Device Mapper 设备提供。 -5. 使用新的静态 StorageClass 运行 `dm-flakey`。 - -runner 会 reset fault-test Tenant/PVC,但不会创建主机块设备、静态 PV 或 StorageClass。 - -#### 8.2 允许 privileged helper - -如果 fault-test namespace 已存在: - -```bash -kubectl label namespace rustfs-fault-test \ - pod-security.kubernetes.io/enforce=privileged \ - --overwrite -``` - -如果要在第一次运行前预创建 namespace: - -```bash -kubectl create namespace rustfs-fault-test -kubectl label namespace rustfs-fault-test \ - app.kubernetes.io/managed-by=rustfs-operator-fault-test \ - pod-security.kubernetes.io/enforce=privileged -kubectl annotate namespace rustfs-fault-test \ - rustfs.com/fault-test-tenant=fault-test-tenant -``` - -#### 8.3 准备四个专用卷 - -推荐使用四个真实专用测试块设备。loop 文件仅适用于实验室环境。每个 backing filesystem 建议至少 `90Gi`,静态 PV capacity 固定为 `80Gi`。 - -目标 DM 节点的实验室 loop 示例;使用真实专用块设备时跳过 `truncate` 和 `losetup`: - -```bash -export LAB=/data/rustfs/rustfs-fault-lab -export DM_NAME=rustfs-fault-dm - -mkdir -p "$LAB/volume" -truncate -s 90G "$LAB/disk.img" -BACKING=$(losetup --find --show "$LAB/disk.img") -SECTORS=$(blockdev --getsz "$BACKING") -dmsetup create "$DM_NAME" --table "0 $SECTORS linear $BACKING 0" -mkfs.ext4 -F "/dev/mapper/$DM_NAME" -mount "/dev/mapper/$DM_NAME" "$LAB/volume" -``` - -其他三个节点把各自专用块设备直接格式化并挂载到同一路径: - -```bash -mkdir -p /data/rustfs/rustfs-fault-lab/volume -mkfs.ext4 -F -mount /data/rustfs/rustfs-fault-lab/volume -``` - -不得格式化或挂载现有 RustFS 数据盘。 - -#### 8.4 创建静态 StorageClass 和 Local PV - -StorageClass: - -```yaml -apiVersion: storage.k8s.io/v1 -kind: StorageClass -metadata: - name: rustfs-fault-dm - labels: - app.kubernetes.io/managed-by: rustfs-operator-fault-test -provisioner: kubernetes.io/no-provisioner -volumeBindingMode: WaitForFirstConsumer -reclaimPolicy: Retain -``` - -为四个节点分别创建一个 PV。每个 PV 使用唯一名称和对应 node affinity: - -```yaml -apiVersion: v1 -kind: PersistentVolume -metadata: - name: rustfs-fault-dm- - labels: - app.kubernetes.io/managed-by: rustfs-operator-fault-test -spec: - capacity: - storage: 80Gi - volumeMode: Filesystem - accessModes: - - ReadWriteOnce - persistentVolumeReclaimPolicy: Retain - storageClassName: rustfs-fault-dm - local: - path: /data/rustfs/rustfs-fault-lab/volume - nodeAffinity: - required: - nodeSelectorTerms: - - matchExpressions: - - key: kubernetes.io/hostname - operator: In - values: - - -``` - -验证四个 PV 均为 `Available`: - -```bash -kubectl get storageclass rustfs-fault-dm -kubectl get pv -l app.kubernetes.io/managed-by=rustfs-operator-fault-test -o wide -``` - -#### 8.5 运行 `dm-flakey` - -目标节点名必须是 Kubernetes `metadata.name`,挂载路径必须与目标 PV 的 `spec.local.path` 完全一致。 - -先在目标节点执行 `blockdev --getsz `,再把结果设置为测试机上的 `SECTORS`。 - -```bash -export DM_NODE= -export DM_MOUNT_PATH=/data/rustfs/rustfs-fault-lab/volume -export BACKING_DEVICE= -export SECTORS= - -RUSTFS_FAULT_TEST_SCENARIO=dm-flakey \ -RUSTFS_FAULT_TEST_STORAGE_CLASS=rustfs-fault-dm \ -RUSTFS_FAULT_TEST_SERVER_IMAGE="$RUSTFS_IMAGE" \ -RUSTFS_FAULT_TEST_DM_NAME=rustfs-fault-dm \ -RUSTFS_FAULT_TEST_DM_NODE="$DM_NODE" \ -RUSTFS_FAULT_TEST_DM_MOUNT_PATH="$DM_MOUNT_PATH" \ -RUSTFS_FAULT_TEST_DM_FAULT_TABLE="0 $SECTORS flakey $BACKING_DEVICE 0 1 15" \ -RUSTFS_FAULT_TEST_ARTIFACTS="$RUN_ROOT/dm-flakey" \ -make fault-test -``` - -该 table 表示底层设备正常 1 秒、故障 15 秒并循环。helper 会验证 Pod、PVC、PV、节点、Local PV 路径和 Device Mapper mount source 的关系,然后加载 fault table。恢复时使用注入前的 linear table。 - -#### 8.6 DM 紧急恢复 - -如果测试进程异常退出且设备仍为 flakey,立即在目标节点执行: - -```bash -dmsetup suspend --noflush rustfs-fault-dm -dmsetup load rustfs-fault-dm \ - --table "0 $SECTORS linear $BACKING_DEVICE 0" -dmsetup resume --noudevsync rustfs-fault-dm -dmsetup table rustfs-fault-dm -``` - -确认 table 已恢复为 `linear` 后再删除测试 Pod、PVC 或卸载文件系统。 - -### 9. 验收标准 - -每个场景必须满足: - -- `make fault-test` 退出码为 0。 -- `fault-evidence.json` 中 `injected=true`、`active_during_workload=true`、`recovered=true`。 -- `checker-report.json` 中 `committed_puts=4000`。 -- `missing_committed_objects` 为空。 -- `hash_mismatches` 为空。 -- `successful_corrupted_reads` 为空。 -- `list_warnings` 为空。 -- fault-test Tenant 恢复 Ready。 - -`RUSTFS_FAULT_TEST_REQUIRE_CLIENT_DISRUPTION` 默认是 `false`。因此客户端没有失败或超时可以接受,只要故障后端明确证明故障已选中并注入。 - -主要 artifacts: - -```text -history.jsonl -workload-plan.json -workload-summary.json -checker-report.json -fault-evidence.json -chaos-manifest.yaml -dm-flakey-active.json -Kubernetes logs/events/snapshots -``` - -### 10. 清理 - -先确认 namespace 所有权: - -```bash -kubectl get namespace rustfs-fault-test --show-labels -kubectl get namespace rustfs-fault-test \ - -o jsonpath='{.metadata.annotations.rustfs\.com/fault-test-tenant}{"\n"}' -``` - -清理测试资源: - -```bash -kubectl delete namespace rustfs-fault-test --wait=true -kubectl delete iochaos,podchaos,networkchaos \ - -n chaos-mesh \ - -l app.kubernetes.io/managed-by=rustfs-operator-fault-test \ - --ignore-not-found -``` - -动态 PV 是否自动删除取决于 StorageClass reclaim policy。`Retain` PV 必须由运维手动删除并清理后端数据。 - -DM 场景额外清理: - -1. 删除 fault-test namespace,等待 Pod/PVC 消失。 -2. 删除四个静态 PV 和 `rustfs-fault-dm` StorageClass。 -3. 在目标节点确认 DM table 为 `linear`。 -4. 卸载四个实验卷。 -5. 删除 DM mapping。 -6. detach loop 设备并删除专用实验目录。 - -示例: - -```bash -umount /data/rustfs/rustfs-fault-lab/volume -dmsetup remove rustfs-fault-dm -losetup -d # 仅 loop 实验环境 -rm -rf /data/rustfs/rustfs-fault-lab -``` - -最后确认: - -```bash -kubectl get nodes -kubectl -n rustfs-system get deployment -kubectl -n chaos-mesh get deployment,daemonset -kubectl get pv -kubectl get iochaos,podchaos,networkchaos -A -``` - -### 11. 常用环境变量 - -| 变量 | 默认值 | 说明 | -| --- | --- | --- | -| `RUSTFS_FAULT_TEST_STORAGE_CLASS` | 必填 | 常规动态 StorageClass 或 DM 专用静态 StorageClass。 | -| `RUSTFS_FAULT_TEST_DESTRUCTIVE` | 由 Make 设置 | destructive opt-in,不应手动绕过 Make 入口。 | -| `RUSTFS_FAULT_TEST_SCENARIO` | `io-eio` | 选择七个场景之一。 | -| `RUSTFS_FAULT_TEST_NAMESPACE` | `rustfs-fault-test` | 专用测试 namespace。 | -| `RUSTFS_FAULT_TEST_TENANT` | `fault-test-tenant` | 专用测试 Tenant。 | -| `RUSTFS_FAULT_TEST_OPERATOR_NAMESPACE` | `rustfs-system` | Operator namespace。 | -| `RUSTFS_FAULT_TEST_SERVER_IMAGE` | `rustfs/rustfs:latest` | 建议设置为已验证 digest。 | -| `RUSTFS_FAULT_TEST_ARTIFACTS` | `target/fault-tests/artifacts` | 当前场景 artifacts 目录。 | -| `RUSTFS_FAULT_TEST_TIMEOUT_SECONDS` | `300` | Kubernetes/Tenant 等待超时。 | -| `RUSTFS_FAULT_TEST_DURATION_SECONDS` | `900` | Chaos 故障持续时间。 | -| `RUSTFS_FAULT_TEST_PERCENT` | `20`;`disk-full` 为 `100` | 支持百分比的故障注入比例。 | -| `RUSTFS_FAULT_TEST_WORKLOAD_OBJECTS` | `4000` | workload 对象数量。 | -| `RUSTFS_FAULT_TEST_WORKLOAD_CONCURRENCY` | `50` | prefill、故障 workload、恢复重写和 checker 的最大并发。 | -| `RUSTFS_FAULT_TEST_SEED` | 随机生成 | 可选 u64 seed;设置后可重放尺寸顺序和对象内容。 | -| `RUSTFS_FAULT_TEST_REQUEST_TIMEOUT_SECONDS` | `30` | 单个 S3 操作超时。 | -| `RUSTFS_FAULT_TEST_USE_CLUSTER_IP` | `false` | 从可访问 ClusterIP 的节点/Pod 运行时直连本测试 Tenant Service,避免 port-forward 成为高并发瓶颈。 | -| `RUSTFS_FAULT_TEST_REQUIRE_CLIENT_DISRUPTION` | `false` | 是否强制要求客户端看到故障。 | -| `RUSTFS_FAULT_TEST_CHAOS_NAMESPACE` | `chaos-mesh` | Chaos Mesh resource namespace。 | -| `RUSTFS_FAULT_TEST_WARP_DURATION_SECONDS` | `60` | Warp mixed workload 时间。 | -| `RUSTFS_FAULT_TEST_DM_NAME` | 无 | DM mapping 名称,DM 场景必填。 | -| `RUSTFS_FAULT_TEST_DM_NODE` | 无 | DM 目标 Kubernetes 节点,DM 场景必填。 | -| `RUSTFS_FAULT_TEST_DM_MOUNT_PATH` | 无 | DM Local PV 路径,DM 场景必填。 | -| `RUSTFS_FAULT_TEST_DM_FAULT_TABLE` | 无 | 注入时的 dmsetup table,DM 场景必填。 | -| `RUSTFS_FAULT_TEST_DM_RECOVERY_TABLE` | 注入前 table | 可选恢复 table。 | -| `RUSTFS_FAULT_TEST_DM_HELPER_IMAGE` | `rancher/mirrored-library-busybox:1.37.0` | privileged helper image。 | - -## English Operations Manual - -### 1. Purpose and scope - -This manual describes how to run RustFS fault-injection tests in a dedicated, real Kubernetes test cluster. The target is the test Tenant created by the RustFS Operator, not an existing application Tenant or the production Operator control plane. - -Each `make fault-test` invocation runs exactly one destructive test selected by `RUSTFS_FAULT_TEST_SCENARIO`. Run all seven scenarios serially. - -The suite has two operational groups: - -1. Six Kubernetes-native scenarios using Chaos Mesh and a dynamic StorageClass. -2. One `dm-flakey` scenario using dedicated static Local PVs, Linux Device Mapper, and a privileged helper Pod. - -Running `dm-flakey` does not require reinstalling Kubernetes, the RustFS Operator, Chaos Mesh, or the Rust toolchain. Only the fault-test Tenant storage fixture must be replaced with dedicated static Local PVs. - -### 2. Safety requirements - -- Run only in a dedicated test cluster; never use a production or shared development cluster. -- The current context must not start with `kind-`. -- Never point the configured namespace or Tenant at existing application resources. -- Use a dynamically provisioned StorageClass for regular scenarios. -- Use a dedicated `kubernetes.io/no-provisioner` StorageClass and dedicated devices or loop files for `dm-flakey`. -- Never reuse an existing RustFS data directory for a DM Local PV. -- Run scenarios serially because the default namespace, Tenant, and local port `19000` are shared. -- On failure, preserve artifacts before removing the fault and test resources. - -The default test resources are: - -```text -namespace: rustfs-fault-test -tenant: fault-test-tenant -``` - -An existing namespace must contain both ownership markers: - -```text -app.kubernetes.io/managed-by=rustfs-operator-fault-test -rustfs.com/fault-test-tenant=fault-test-tenant -``` - -The runner never claims an unmarked namespace. - -### 3. Scenario catalog - -| Scenario | Backend | Isolation | Main validation | -| --- | --- | --- | --- | -| `io-eio` | Chaos Mesh IOChaos | Fresh Tenant/PVC | Committed objects survive EIO on one data volume. | -| `pod-kill-one` | Chaos Mesh PodChaos | Reusable Ready Tenant | A killed Pod is replaced without losing committed objects. | -| `network-partition-one` | Chaos Mesh NetworkChaos | Reusable Ready Tenant | Objects remain correct after one Pod is partitioned from its peers. | -| `io-read-mistake` | Chaos Mesh IOChaos | Fresh Tenant/PVC | A successful GET never returns altered bytes. | -| `disk-full` | Chaos Mesh IOChaos | Fresh Tenant/PVC | Committed objects survive injected ENOSPC write failures. | -| `warp-under-chaos` | Warp + IOChaos | Fresh Tenant/PVC | Performance is reported separately from correctness. | -| `dm-flakey` | Linux Device Mapper | Dedicated static Local PV | Objects remain correct after intermittent block-device EIO. | - -The default workload commits or reconciles 4000 objects with concurrency 50. The size plan is generated with fixed weights and then deterministically shuffled by the seed: 4KiB 85% (3400 objects), 16KiB 10% (400), 8MiB 4% (160), and 16MiB 1% (40). The logical payload per scenario is 2,033,745,920 bytes, approximately 1.89GiB. - -Object content is deterministically generated from the same seed and object index by `splitmix64-v1`. `workload-plan.json` records the seed, generator version, concurrency, size distribution, and total payload. `history.jsonl` records each key's size, SHA-256, and outcome. Set `RUSTFS_FAULT_TEST_SEED=` to replay the same size order and object content. - -A lack of client-visible errors does not mean that injection failed. Backend state and `fault-evidence.json` are the authoritative fault evidence. - -`RUSTFS_FAULT_TEST_PERCENT=20` is an injection probability for matching I/O operations, not a fixed selection of 20 percent of the objects. - -### 4. Runner requirements - -The runner host needs: - -- `kubectl` -- Rust stable and Cargo with Rust edition 2024 support -- GNU Make -- A kubeconfig that can reach the target Kubernetes API -- `warp` v1.3.1 for `warp-under-chaos` -- Sufficient space for `target/fault-tests` artifacts - -Cluster-admin is recommended in a dedicated test cluster. At minimum, the account needs CRUD access to the fault-test Kubernetes resources and Chaos CRs, Pod logs/events/exec access, and permission to create the privileged DM helper Pod. - -Validate the code and tools: - -```bash -rustc --version -cargo --version -kubectl version --client -make e2e-check -``` - -### 5. Kubernetes and RustFS preflight - -```bash -kubectl config use-context -kubectl config current-context -kubectl get nodes -kubectl get crd tenants.rustfs.com -kubectl -n rustfs-system get deployment -kubectl get storageclass -``` - -Regular scenarios require four schedulable nodes and four `80Gi` RWO PVCs. The fault Tenant uses required Pod anti-affinity to spread the four RustFS Pods across distinct `kubernetes.io/hostname` values. The selected StorageClass must support dynamic provisioning and must not use `kubernetes.io/no-provisioner`. Each node that hosts a fault-test PVC should have at least 100Gi available; verify capacity against the actual StorageClass topology before running. - -Do not trust the capacity displayed on a PVC alone. hostPath/local-path provisioners commonly do not enforce capacity. Inspect the actual node path and its backing filesystem: - -```bash -kubectl -n kube-system get configmap local-path-config -o yaml -kubectl get pv -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.spec.hostPath.path}{"\n"}{end}' -df -h -``` - -The K3s default `/var/lib/rancher/k3s/storage` is often on a smaller system disk. If that filesystem has less than 100Gi available, do not use it for this suite. Deploy a dedicated dynamic provisioner/StorageClass that places new fault-test PVCs under an isolated data-disk path such as `/data/rustfs/rustfs-fault-local-path`. Do not modify or migrate existing application PVCs. - -Pin a validated RustFS image digest instead of using `latest`: - -```bash -export RUSTFS_IMAGE='docker.io/rustfs/rustfs@sha256:' -``` - -### 6. Install and validate Chaos Mesh - -The following example uses the validated Chaos Mesh v2.8.3 release: - -```bash -helm repo add chaos-mesh https://charts.chaos-mesh.org -helm repo update - -helm upgrade --install chaos-mesh chaos-mesh/chaos-mesh \ - -n chaos-mesh --create-namespace \ - --version 2.8.3 \ - --set chaosDaemon.runtime=containerd \ - --set chaosDaemon.socketPath=/run/containerd/containerd.sock \ - --set dashboard.create=false \ - --wait --timeout 10m -``` - -K3s uses `/run/k3s/containerd/containerd.sock`. Adjust the runtime and socket path for other distributions. - -```bash -kubectl -n chaos-mesh get deployment,daemonset -kubectl get crd \ - iochaos.chaos-mesh.org \ - podchaos.chaos-mesh.org \ - networkchaos.chaos-mesh.org -``` - -All controller-manager replicas and all target-node chaos-daemon Pods must be Ready. - -### 7. Run the regular scenarios - -Set common parameters: - -```bash -export RUSTFS_FAULT_TEST_STORAGE_CLASS= -export RUSTFS_FAULT_TEST_SERVER_IMAGE="$RUSTFS_IMAGE" -export RUSTFS_FAULT_TEST_OPERATOR_NAMESPACE=rustfs-system -export RUSTFS_FAULT_TEST_NAMESPACE=rustfs-fault-test -export RUSTFS_FAULT_TEST_TENANT=fault-test-tenant -export RUSTFS_FAULT_TEST_CHAOS_NAMESPACE=chaos-mesh -export RUN_ROOT="target/fault-tests/$(date -u +%Y%m%dT%H%M%SZ)" -``` - -If the runner is on a cluster node or in a Pod and can reach Service ClusterIPs, set -`RUSTFS_FAULT_TEST_USE_CLUSTER_IP=1`. The 50-concurrency workload should not traverse -`kubectl port-forward`; port-forward is intended only for low-concurrency control and debugging traffic from outside the cluster. - -Run one scenario: - -```bash -RUSTFS_FAULT_TEST_SCENARIO=io-eio \ -RUSTFS_FAULT_TEST_ARTIFACTS="$RUN_ROOT/io-eio" \ -make fault-test -``` - -`make fault-test` sets `RUSTFS_FAULT_TEST_DESTRUCTIVE=1` internally. Do not bypass the Make entry point to invoke the destructive test directly. - -Continuously monitor nodes, any existing application Tenant, and the fault-test Tenant. If a non-target resource becomes non-Ready, remove the current managed Chaos resource, stop subsequent scenarios, and collect evidence. - -Run all six regular scenarios in the recommended order and stop after the first failure: - -```bash -for scenario in \ - io-eio \ - pod-kill-one \ - network-partition-one \ - io-read-mistake \ - disk-full \ - warp-under-chaos -do - RUSTFS_FAULT_TEST_SCENARIO="$scenario" \ - RUSTFS_FAULT_TEST_ARTIFACTS="$RUN_ROOT/$scenario" \ - make fault-test || break -done -``` - -Run `warp --version` before `warp-under-chaos`. Warp output is performance evidence and does not determine the correctness verdict. - -### 8. Dedicated `dm-flakey` procedure - -#### 8.1 No cluster reinstall is required - -After running the six regular scenarios, keep Kubernetes, the Operator, Chaos Mesh, and the Rust toolchain. Stop other fault-test processes, prepare four dedicated static Local PVs, put one PV behind Device Mapper, and run the scenario with the static StorageClass. - -The runner resets the fault-test Tenant and PVCs, but it does not create host block devices, static PVs, or the StorageClass. - -#### 8.2 Allow the privileged helper - -For an existing namespace: - -```bash -kubectl label namespace rustfs-fault-test \ - pod-security.kubernetes.io/enforce=privileged \ - --overwrite -``` - -To pre-create the namespace before the first run: - -```bash -kubectl create namespace rustfs-fault-test -kubectl label namespace rustfs-fault-test \ - app.kubernetes.io/managed-by=rustfs-operator-fault-test \ - pod-security.kubernetes.io/enforce=privileged -kubectl annotate namespace rustfs-fault-test \ - rustfs.com/fault-test-tenant=fault-test-tenant -``` - -#### 8.3 Prepare four dedicated volumes - -Prefer four dedicated test block devices. Loop files are acceptable only in a lab. Each backing filesystem should be at least `90Gi`, while static PV capacity is fixed at `80Gi`. - -Lab loop example on the target DM node; skip `truncate` and `losetup` when using a real dedicated block device: - -```bash -export LAB=/data/rustfs/rustfs-fault-lab -export DM_NAME=rustfs-fault-dm - -mkdir -p "$LAB/volume" -truncate -s 90G "$LAB/disk.img" -BACKING=$(losetup --find --show "$LAB/disk.img") -SECTORS=$(blockdev --getsz "$BACKING") -dmsetup create "$DM_NAME" --table "0 $SECTORS linear $BACKING 0" -mkfs.ext4 -F "/dev/mapper/$DM_NAME" -mount "/dev/mapper/$DM_NAME" "$LAB/volume" -``` - -On each of the other three nodes, format and mount its dedicated device directly at `/data/rustfs/rustfs-fault-lab/volume`. Never format an existing RustFS data device. - -#### 8.4 Create the static StorageClass and Local PVs - -```yaml -apiVersion: storage.k8s.io/v1 -kind: StorageClass -metadata: - name: rustfs-fault-dm - labels: - app.kubernetes.io/managed-by: rustfs-operator-fault-test -provisioner: kubernetes.io/no-provisioner -volumeBindingMode: WaitForFirstConsumer -reclaimPolicy: Retain -``` - -Create four copies of this PV template, with a unique name and the corresponding node affinity: - -```yaml -apiVersion: v1 -kind: PersistentVolume -metadata: - name: rustfs-fault-dm- - labels: - app.kubernetes.io/managed-by: rustfs-operator-fault-test -spec: - capacity: - storage: 80Gi - volumeMode: Filesystem - accessModes: [ReadWriteOnce] - persistentVolumeReclaimPolicy: Retain - storageClassName: rustfs-fault-dm - local: - path: /data/rustfs/rustfs-fault-lab/volume - nodeAffinity: - required: - nodeSelectorTerms: - - matchExpressions: - - key: kubernetes.io/hostname - operator: In - values: [] -``` - -Verify that all four PVs are `Available` before running the test. - -#### 8.5 Run `dm-flakey` - -The configured node must match Kubernetes `metadata.name`, and the mount path must exactly match the target PV `spec.local.path`. - -Run `blockdev --getsz ` on the target node first, then set that value as `SECTORS` on the runner host. - -```bash -export DM_NODE= -export DM_MOUNT_PATH=/data/rustfs/rustfs-fault-lab/volume -export BACKING_DEVICE= -export SECTORS= - -RUSTFS_FAULT_TEST_SCENARIO=dm-flakey \ -RUSTFS_FAULT_TEST_STORAGE_CLASS=rustfs-fault-dm \ -RUSTFS_FAULT_TEST_SERVER_IMAGE="$RUSTFS_IMAGE" \ -RUSTFS_FAULT_TEST_DM_NAME=rustfs-fault-dm \ -RUSTFS_FAULT_TEST_DM_NODE="$DM_NODE" \ -RUSTFS_FAULT_TEST_DM_MOUNT_PATH="$DM_MOUNT_PATH" \ -RUSTFS_FAULT_TEST_DM_FAULT_TABLE="0 $SECTORS flakey $BACKING_DEVICE 0 1 15" \ -RUSTFS_FAULT_TEST_ARTIFACTS="$RUN_ROOT/dm-flakey" \ -make fault-test -``` - -The fault table alternates between one second up and fifteen seconds down. The helper verifies the Pod-to-PVC-to-PV-to-node-to-mount relationship before loading the table, and restores the original linear table afterward. - -#### 8.6 Emergency DM recovery - -If the test process exits while the target is still flakey, restore it immediately on the target node: - -```bash -dmsetup suspend --noflush rustfs-fault-dm -dmsetup load rustfs-fault-dm \ - --table "0 $SECTORS linear $BACKING_DEVICE 0" -dmsetup resume --noudevsync rustfs-fault-dm -dmsetup table rustfs-fault-dm -``` - -Confirm that the table is `linear` before deleting Pods/PVCs or unmounting the filesystem. - -### 9. Acceptance criteria - -For every scenario: - -- `make fault-test` exits with status 0. -- `fault-evidence.json` reports `injected=true`, `active_during_workload=true`, and `recovered=true`. -- `checker-report.json` reports `committed_puts=4000`. -- `missing_committed_objects`, `hash_mismatches`, `successful_corrupted_reads`, and `list_warnings` are empty. -- The fault-test Tenant returns to Ready. - -`RUSTFS_FAULT_TEST_REQUIRE_CLIENT_DISRUPTION` defaults to `false`. No client-visible failure is acceptable when the backend evidence proves that the fault was selected and injected. - -Key artifacts are `workload-plan.json`, `history.jsonl`, `workload-summary.json`, `checker-report.json`, `fault-evidence.json`, Chaos manifests/status, DM snapshots, and Kubernetes logs/events. - -### 10. Cleanup - -Verify namespace ownership before deletion, then remove the test namespace and any managed Chaos resources: - -```bash -kubectl get namespace rustfs-fault-test --show-labels -kubectl delete namespace rustfs-fault-test --wait=true -kubectl delete iochaos,podchaos,networkchaos \ - -n chaos-mesh \ - -l app.kubernetes.io/managed-by=rustfs-operator-fault-test \ - --ignore-not-found -``` - -Dynamic PV deletion depends on the StorageClass reclaim policy. Retained PVs and backend data require manual cleanup. - -For `dm-flakey`, delete the namespace first, then the four static PVs and StorageClass. Confirm a linear DM table, unmount all four lab filesystems, remove the DM mapping, detach any loop devices, and delete only the dedicated lab directory. - -```bash -umount /data/rustfs/rustfs-fault-lab/volume -dmsetup remove rustfs-fault-dm -losetup -d # lab loop setup only -rm -rf /data/rustfs/rustfs-fault-lab -``` - -Finally verify nodes, the Operator, Chaos Mesh, PVs, and remaining Chaos resources. - -### 11. Environment variables - -| Variable | Default | Purpose | -| --- | --- | --- | -| `RUSTFS_FAULT_TEST_STORAGE_CLASS` | required | Dynamic class for regular scenarios or dedicated static class for DM. | -| `RUSTFS_FAULT_TEST_DESTRUCTIVE` | set by Make | Destructive opt-in; do not bypass the Make entry point. | -| `RUSTFS_FAULT_TEST_SCENARIO` | `io-eio` | Selects one of the seven scenarios. | -| `RUSTFS_FAULT_TEST_NAMESPACE` | `rustfs-fault-test` | Dedicated test namespace. | -| `RUSTFS_FAULT_TEST_TENANT` | `fault-test-tenant` | Dedicated test Tenant. | -| `RUSTFS_FAULT_TEST_OPERATOR_NAMESPACE` | `rustfs-system` | Operator namespace. | -| `RUSTFS_FAULT_TEST_SERVER_IMAGE` | `rustfs/rustfs:latest` | Pin a validated digest in real runs. | -| `RUSTFS_FAULT_TEST_ARTIFACTS` | `target/fault-tests/artifacts` | Current scenario artifact directory. | -| `RUSTFS_FAULT_TEST_TIMEOUT_SECONDS` | `300` | Kubernetes/Tenant wait timeout. | -| `RUSTFS_FAULT_TEST_DURATION_SECONDS` | `900` | Chaos duration. | -| `RUSTFS_FAULT_TEST_PERCENT` | `20`; `100` for `disk-full` | Injection percentage where supported. | -| `RUSTFS_FAULT_TEST_WORKLOAD_OBJECTS` | `4000` | Workload object count. | -| `RUSTFS_FAULT_TEST_WORKLOAD_CONCURRENCY` | `50` | Maximum concurrency for prefill, fault workload, recovery writes, and checker reads. | -| `RUSTFS_FAULT_TEST_SEED` | generated randomly | Optional u64 seed for replaying the size order and object content. | -| `RUSTFS_FAULT_TEST_REQUEST_TIMEOUT_SECONDS` | `30` | S3 operation timeout. | -| `RUSTFS_FAULT_TEST_USE_CLUSTER_IP` | `false` | Directly use this test Tenant's Service from a node/Pod that can reach ClusterIP, avoiding port-forward as a high-concurrency bottleneck. | -| `RUSTFS_FAULT_TEST_REQUIRE_CLIENT_DISRUPTION` | `false` | Require client-visible disruption when enabled. | -| `RUSTFS_FAULT_TEST_CHAOS_NAMESPACE` | `chaos-mesh` | Namespace for Chaos resources. | -| `RUSTFS_FAULT_TEST_WARP_DURATION_SECONDS` | `60` | Warp mixed workload duration. | -| `RUSTFS_FAULT_TEST_DM_NAME` | unset | DM mapping name; required for DM. | -| `RUSTFS_FAULT_TEST_DM_NODE` | unset | Target Kubernetes node; required for DM. | -| `RUSTFS_FAULT_TEST_DM_MOUNT_PATH` | unset | Target Local PV path; required for DM. | -| `RUSTFS_FAULT_TEST_DM_FAULT_TABLE` | unset | Fault dmsetup table; required for DM. | -| `RUSTFS_FAULT_TEST_DM_RECOVERY_TABLE` | original table | Optional explicit recovery table. | -| `RUSTFS_FAULT_TEST_DM_HELPER_IMAGE` | `rancher/mirrored-library-busybox:1.37.0` | Privileged helper image. | diff --git a/Makefile b/Makefile index 6fda94e..35b9785 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,7 @@ .PHONY: pre-commit fmt fmt-check clippy test build help .PHONY: docker-build-operator docker-build-console-web docker-build-all .PHONY: console-lint console-build console-fmt console-fmt-check -.PHONY: e2e-check e2e-live-create .e2e-live-install-cert-manager e2e-live-run e2e-live-update e2e-live-delete fault-test +.PHONY: e2e-check e2e-live-create .e2e-live-install-cert-manager e2e-live-run e2e-live-update e2e-live-delete # Default target IMAGE_REPO ?= rustfs/operator @@ -43,7 +43,6 @@ help: @echo " make e2e-check - Check Rust-native e2e harness (fmt + test + clippy)" @echo " make e2e-live-create - Clean dedicated storage, recreate live Kind environment, install cert-manager, and load e2e image" @echo " make e2e-live-run - Run all non-destructive live suites in the existing live environment" - @echo " make fault-test - Run destructive fault tests against the current real Kubernetes context" @echo " make e2e-live-update - Rebuild image and update the live environment (load + rollout)" @echo " make e2e-live-delete - Delete live Kind environment and clean dedicated storage" @@ -96,8 +95,6 @@ CERT_MANAGER_VERSION ?= v1.16.2 CERT_MANAGER_MANIFEST_URL ?= https://github.com/cert-manager/cert-manager/releases/download/$(CERT_MANAGER_VERSION)/cert-manager.yaml CERT_MANAGER_ROLLOUT_TIMEOUT ?= 180s E2E_LIVE_ENV ?= RUSTFS_E2E_LIVE=1 RUSTFS_E2E_CERT_MANAGER_VERSION=$(CERT_MANAGER_VERSION) -FAULT_TEST_MANIFEST ?= e2e/Cargo.toml -FAULT_TEST_THREADS ?= 1 # Rust-native e2e harness checks (non-live; ignored live tests remain opt-in) e2e-check: @@ -132,11 +129,6 @@ e2e-live-run: RUSTFS_E2E_LIVE=1 cargo test --manifest-path $(E2E_MANIFEST) --test cert_manager_tls -- --ignored --test-threads=$(E2E_TEST_THREADS) --nocapture @echo "configured live e2e suites passed." -fault-test: - @test -n "$(RUSTFS_FAULT_TEST_STORAGE_CLASS)" || (echo "RUSTFS_FAULT_TEST_STORAGE_CLASS is required" && exit 1) - @echo "running destructive RustFS fault tests against current Kubernetes context: $$(kubectl config current-context)" - RUSTFS_FAULT_TEST_DESTRUCTIVE=1 cargo test --manifest-path $(FAULT_TEST_MANIFEST) --test faults -- --ignored --test-threads=$(FAULT_TEST_THREADS) --nocapture - e2e-live-update: docker build --network host -t rustfs/operator:e2e . docker build --network host -t rustfs/console-web:e2e -f console-web/Dockerfile console-web diff --git a/README.md b/README.md index 93baa12..1dbab1f 100755 --- a/README.md +++ b/README.md @@ -73,25 +73,11 @@ From the repo root: | `make e2e-check` | Validate the e2e harness without creating a live cluster. | | `make e2e-live-create` | Build e2e images, recreate the dedicated Kind cluster, install cert-manager, and load images. | | `make e2e-live-run` | Deploy the dev control plane and run all non-destructive live suites. | -| `make fault-test` | Run destructive RustFS fault tests against the current real Kubernetes context. | | `make e2e-live-update` | Rebuild images, reload them into Kind, and roll out control-plane deployments. | | `make e2e-live-delete` | Delete the dedicated Kind cluster and its local storage. | CI (`.github/workflows/ci.yml`) runs Rust tests (including `nextest`), `cargo fmt --check`, `clippy`, the Rust-native e2e harness checks, and `console-web` lint/build/format checks. Use **`make pre-commit`** before opening a PR so local validation stays aligned. -### Run fault tests on a real Kubernetes cluster - -Fault tests are separate from the Kind e2e workflow. They use the current kubectl context, reject `kind-*` contexts, and run one explicitly selected destructive scenario at a time: - -```bash -kubectl config use-context -RUSTFS_FAULT_TEST_SCENARIO=io-eio \ -RUSTFS_FAULT_TEST_STORAGE_CLASS= \ -make fault-test -``` - -See the bilingual [Fault Injection Operations Manual](FAULT_INJECTION_TEST_PLAN.md) for cluster preparation, Chaos Mesh installation, all seven scenarios, the dedicated `dm-flakey` Local PV procedure, acceptance criteria, emergency recovery, and cleanup. - Contribution workflow, commit style, and PR expectations: [`CONTRIBUTING.md`](CONTRIBUTING.md). ### Run a local controller against e2e diff --git a/e2e/FAULT_TESTING.md b/e2e/FAULT_TESTING.md new file mode 100644 index 0000000..e5e9383 --- /dev/null +++ b/e2e/FAULT_TESTING.md @@ -0,0 +1,420 @@ + + +# RustFS Fault-Test Operations / RustFS 故障测试操作手册 + +本手册是 Agent 和开发人员使用 `e2e` package 故障测试工具的唯一操作入口。它说明执行步骤、步骤原因、安全边界、验收证据和清理方式。 + +This manual is the single operational entry point for agents and developers using the fault-test tooling in the `e2e` package. It explains the steps, reasons, safety boundaries, evidence, and cleanup. + +## 1. Purpose And Safety / 目的与安全边界 + +故障测试只允许在专用真实 Kubernetes 测试集群执行。测试会创建并删除专用 Tenant、PVC、Pod、Service、StatefulSet 和 Chaos resources。禁止把测试 namespace、Tenant、StorageClass 或 DM 路径指向现有业务资源。 + +Run fault tests only in a dedicated real Kubernetes test cluster. The suite creates and removes a dedicated Tenant, PVCs, Pods, Services, StatefulSets, and Chaos resources. Never point its namespace, Tenant, StorageClass, or DM path at application resources. + +固定测试所有权: + +```text +namespace: rustfs-fault-test +tenant: fault-test-tenant +manager: app.kubernetes.io/managed-by=rustfs-operator-fault-test +annotation: rustfs.com/fault-test-tenant=fault-test-tenant +``` + +安全规则 / Safety rules: + +- 当前 context 必须与 `RUSTFS_FAULT_TEST_EXPECTED_CONTEXT` 完全一致,并且不能是 `kind-*`。 +- 四个 RustFS 测试 Pod 必须调度到至少四个 Ready 节点。 +- 常规场景使用独立动态 StorageClass;`dm-flakey` 使用独立静态 Local PV StorageClass。 +- Make 编排器会监控所有节点和运行前已有的非 fault Tenant;任一异常会撤销 managed Chaos 并停止测试。 +- `fault-cleanup` 只删除带正确所有权标记的 namespace 和 Chaos,不删除外部 StorageClass、PV 或主机设备。 +- The current context must exactly match `RUSTFS_FAULT_TEST_EXPECTED_CONTEXT` and must not be `kind-*`. +- The four RustFS test Pods require at least four Ready schedulable nodes. +- Regular scenarios use a dedicated dynamic StorageClass; `dm-flakey` uses a dedicated static Local PV StorageClass. +- The Make runner monitors every node and every pre-existing non-fault Tenant. It removes managed Chaos and stops on degradation. +- `fault-cleanup` removes only the owned namespace and managed Chaos. It never removes external StorageClasses, PVs, or host devices. + +## 2. Workload Profile / 工作负载 + +每个场景使用 seed 确定性生成对象内容和尺寸顺序。未设置 `RUSTFS_FAULT_TEST_SEED` 时自动生成 seed;所有重放信息写入 `workload-plan.json` 和 `history.jsonl`。 + +Each scenario deterministically generates object content and size order from a seed. A seed is generated when `RUSTFS_FAULT_TEST_SEED` is unset. Replay information is recorded in `workload-plan.json` and `history.jsonl`. + +| Size | Weight | Objects | +| --- | ---: | ---: | +| 4KiB | 85% | 34,000 | +| 16KiB | 10% | 4,000 | +| 8MiB | 4% | 1,600 | +| 16MiB | 1% | 400 | + +```text +objects: 40,000 +concurrency: 100 +payload/scenario: 20,337,459,200 bytes (~18.94GiB) +PVCs: 4 × 100Gi +maximum fault TTL: 7,200 seconds +``` + +7,200 秒是故障资源的最大保护时间,不是固定等待时间。正常测试在 workload 完成后立即恢复故障。较长 TTL 防止 40,000 对象 workload 在完成前超过 Chaos duration。 + +The 7,200-second duration is a maximum fault-resource safety window, not a fixed wait. Successful runs recover immediately after the workload. The larger window prevents the 40,000-object workload from outliving Chaos. + +## 3. Package Commands / Package 命令 + +所有公共入口都位于 `e2e/Makefile`。从仓库根目录执行: + +All public entry points are in `e2e/Makefile`. Run them from the repository root: + +```bash +make -C e2e help +make -C e2e fault-check +make -C e2e fault-preflight SCENARIO=io-eio +make -C e2e fault-run SCENARIO=io-eio +make -C e2e fault-run-regular +make -C e2e fault-run-dm +make -C e2e fault-cleanup +``` + +| Target | Behavior / 行为 | +| --- | --- | +| `fault-check` | Rust fmt/test/clippy 和 Bash 语法检查;不访问集群。 / Rust fmt, tests, clippy, and Bash syntax; no cluster mutation. | +| `fault-preflight` | 校验 context、CRD、StorageClass、Chaos、节点、namespace 所有权和现有 Tenant。 / Validates context, CRDs, storage, Chaos, nodes, ownership, and existing Tenants. | +| `fault-run` | 运行一个场景,持续健康守护并验收 artifacts。 / Runs one guarded scenario and validates artifacts. | +| `fault-run-regular` | 串行运行六个常规场景,首败停止。 / Runs six regular scenarios serially and stops on first failure. | +| `fault-run-dm` | 使用预先准备的静态 PV 和 DM 设备运行 `dm-flakey`。 / Runs `dm-flakey` with pre-provisioned static PVs and DM storage. | +| `fault-cleanup` | 安全删除 owned namespace 和 managed Chaos。 / Safely removes the owned namespace and managed Chaos. | + +## 4. Cluster Preparation / 集群准备 + +### 4.1 Required Tools / 必需工具 + +```bash +rustc --version +cargo --version +kubectl version --client +jq --version +make -C e2e fault-check +``` + +`warp` v1.3.1 仅用于 `warp-under-chaos`。运行机必须能访问 Kubernetes API;如果设置 ClusterIP 直连,还必须能访问 Service ClusterIP。 + +`warp` v1.3.1 is required only for `warp-under-chaos`. The runner must reach the Kubernetes API and, when ClusterIP mode is enabled, Service ClusterIPs. + +### 4.2 Kubernetes And Storage / Kubernetes 与存储 + +```bash +kubectl config current-context +kubectl get nodes +kubectl get crd tenants.rustfs.com +kubectl get storageclass +kubectl get tenant -A +``` + +常规场景要求动态 StorageClass。每个承载测试 PVC 的节点应在实际 provisioner 路径上至少有 120Gi 可用空间。hostPath/local-path 的 PVC capacity 通常不执行真实配额,必须检查后端文件系统,而不能只看 `kubectl get pvc`。 + +Regular scenarios require a dynamic StorageClass. Every node that can host a test PVC should have at least 120Gi available on the actual provisioner filesystem. hostPath/local-path capacity is commonly not enforced, so inspect the backing filesystem instead of trusting only `kubectl get pvc`. + +```bash +kubectl -n kube-system get configmap local-path-config -o yaml +kubectl get pv -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.spec.hostPath.path}{"\n"}{end}' +df -h +``` + +如果 K3s 默认 `/var/lib/rancher/k3s/storage` 位于小系统盘,应创建独立 provisioner/StorageClass,把 fault-test PVC 放到 `/data/rustfs/rustfs-fault-local-path` 等专用数据盘目录。不得修改现有业务 PVC 或默认 provisioner。 + +If K3s stores its default local-path data on a small system disk, create an independent provisioner and StorageClass backed by a dedicated data-disk path such as `/data/rustfs/rustfs-fault-local-path`. Do not modify existing application PVCs or the default provisioner. + +### 4.3 Chaos Mesh / Chaos Mesh + +已验证版本为 Chaos Mesh v2.8.3: + +The validated version is Chaos Mesh v2.8.3: + +```bash +helm repo add chaos-mesh https://charts.chaos-mesh.org +helm repo update +helm upgrade --install chaos-mesh chaos-mesh/chaos-mesh \ + -n chaos-mesh --create-namespace --version 2.8.3 \ + --set chaosDaemon.runtime=containerd \ + --set chaosDaemon.socketPath=/run/k3s/containerd/containerd.sock \ + --set dashboard.create=false \ + --wait --timeout 10m + +kubectl -n chaos-mesh get deployment,daemonset +kubectl get crd iochaos.chaos-mesh.org podchaos.chaos-mesh.org networkchaos.chaos-mesh.org +``` + +非 K3s 集群必须使用实际 container runtime socket。 + +Non-K3s clusters must use their actual container runtime socket. + +## 5. Regular Scenarios / 常规场景 + +先固定 context、动态 StorageClass 和 RustFS image digest。测试机位于集群节点或 Pod 内时使用 ClusterIP,避免 100 并发经过 `kubectl port-forward`。 + +Pin the context, dynamic StorageClass, and RustFS image digest. Use ClusterIP when the runner is on a cluster node or in a Pod so 100 concurrent requests do not traverse `kubectl port-forward`. + +```bash +export RUSTFS_FAULT_TEST_EXPECTED_CONTEXT=default +export RUSTFS_FAULT_TEST_STORAGE_CLASS= +export RUSTFS_FAULT_TEST_SERVER_IMAGE='docker.io/rustfs/rustfs@sha256:' +export RUSTFS_FAULT_TEST_USE_CLUSTER_IP=1 +export RUSTFS_FAULT_TEST_RUN_ROOT="$PWD/e2e/target/fault-tests/$(date -u +%Y%m%dT%H%M%SZ)" + +make -C e2e fault-preflight SCENARIO=io-eio +make -C e2e fault-run SCENARIO=io-eio +``` + +场景顺序 / Scenario order: + +```text +io-eio +pod-kill-one +network-partition-one +io-read-mistake +disk-full +warp-under-chaos +``` + +完整运行: + +Run all regular scenarios: + +```bash +make -C e2e fault-run-regular +``` + +分阶段验证时,可以先运行 `io-eio`,再通过 `RUSTFS_FAULT_TEST_SCENARIOS` 指定剩余场景: + +For staged validation, run `io-eio` first and then select the remaining scenarios with `RUSTFS_FAULT_TEST_SCENARIOS`: + +```bash +export RUSTFS_FAULT_TEST_SCENARIOS='pod-kill-one network-partition-one io-read-mistake disk-full warp-under-chaos' +make -C e2e fault-run-regular +unset RUSTFS_FAULT_TEST_SCENARIOS +``` + +测试可能持续数小时。不要并行运行场景。每个场景完成后编排脚本会校验 seed、尺寸分布、故障状态、40,000 committed PUT 和 checker verdict。 + +The suite can run for several hours. Do not run scenarios in parallel. After every scenario, the runner validates the seed, size distribution, fault state, 40,000 committed PUTs, and checker verdict. + +## 6. dm-flakey / dm-flakey + +`dm-flakey` 不需要重装 Kubernetes、Operator、Chaos Mesh 或 Rust。它只需要把 fault Tenant 的存储切换为四个专用静态 Local PV,其中一个 PV 由 Device Mapper 提供。 + +`dm-flakey` does not require reinstalling Kubernetes, the Operator, Chaos Mesh, or Rust. It only switches the fault Tenant to four dedicated static Local PVs, one backed by Device Mapper. + +### 6.1 Host Storage / 主机存储 + +真实专用块设备优先。loop 文件仅适用于实验室。每个 backing 至少 120Gi,并且路径必须只服务 fault-test。 + +Prefer dedicated block devices. Loop files are for lab use only. Each backing device must be at least 120Gi and serve only fault-test. + +DM 节点示例 / DM-node example: + +```bash +export LAB=/data/rustfs/rustfs-fault-lab +export DM_NAME=rustfs-fault-dm +sudo mkdir -p "$LAB/volume" +sudo truncate -s 120G "$LAB/disk.img" +export BACKING=$(sudo losetup --find --show "$LAB/disk.img") +export SECTORS=$(sudo blockdev --getsz "$BACKING") +sudo dmsetup create "$DM_NAME" --table "0 $SECTORS linear $BACKING 0" +sudo mkfs.ext4 -F "/dev/mapper/$DM_NAME" +sudo mount "/dev/mapper/$DM_NAME" "$LAB/volume" +sudo chmod 0777 "$LAB/volume" +``` + +其他三个节点 / Other three nodes: + +```bash +export LAB=/data/rustfs/rustfs-fault-lab +sudo mkdir -p "$LAB/volume" +sudo truncate -s 120G "$LAB/disk.img" +export BACKING=$(sudo losetup --find --show "$LAB/disk.img") +sudo mkfs.ext4 -F "$BACKING" +sudo mount "$BACKING" "$LAB/volume" +sudo chmod 0777 "$LAB/volume" +``` + +### 6.2 Static StorageClass And PVs / 静态 StorageClass 与 PV + +创建 `kubernetes.io/no-provisioner` StorageClass,并为四个节点各创建一个 `100Gi` Local PV。每个 PV 的 node affinity 必须匹配实际节点;`local.path` 必须是 `/data/rustfs/rustfs-fault-lab/volume`。 + +Create a `kubernetes.io/no-provisioner` StorageClass and one `100Gi` Local PV per node. Each PV must use the matching node affinity and `/data/rustfs/rustfs-fault-lab/volume` as `local.path`. + +```yaml +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: rustfs-fault-dm +provisioner: kubernetes.io/no-provisioner +volumeBindingMode: WaitForFirstConsumer +reclaimPolicy: Retain +--- +apiVersion: v1 +kind: PersistentVolume +metadata: + name: rustfs-fault-dm- + labels: + app.kubernetes.io/managed-by: rustfs-operator-fault-test +spec: + capacity: + storage: 100Gi + volumeMode: Filesystem + accessModes: [ReadWriteOnce] + persistentVolumeReclaimPolicy: Retain + storageClassName: rustfs-fault-dm + local: + path: /data/rustfs/rustfs-fault-lab/volume + nodeAffinity: + required: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: [] +``` + +验证四个 PV 为 `Available`: + +Verify all four PVs are `Available`: + +```bash +kubectl get storageclass rustfs-fault-dm +kubectl get pv -l app.kubernetes.io/managed-by=rustfs-operator-fault-test -o wide +``` + +helper Pod 需要 privileged Pod Security。复用常规场景创建的 namespace 时补充 label;如果 namespace 不存在,则预创建带完整所有权的 namespace: + +The helper Pod requires privileged Pod Security. Label the namespace left by regular scenarios, or pre-create an owned namespace when it does not exist: + +```bash +if kubectl get namespace rustfs-fault-test >/dev/null 2>&1; then + kubectl label namespace rustfs-fault-test \ + pod-security.kubernetes.io/enforce=privileged --overwrite +else + kubectl create namespace rustfs-fault-test + kubectl label namespace rustfs-fault-test \ + app.kubernetes.io/managed-by=rustfs-operator-fault-test \ + pod-security.kubernetes.io/enforce=privileged + kubectl annotate namespace rustfs-fault-test \ + rustfs.com/fault-test-tenant=fault-test-tenant +fi +``` + +### 6.3 Run / 执行 + +```bash +export RUSTFS_FAULT_TEST_STORAGE_CLASS=rustfs-fault-dm +export RUSTFS_FAULT_TEST_DM_NAME=rustfs-fault-dm +export RUSTFS_FAULT_TEST_DM_NODE= +export RUSTFS_FAULT_TEST_DM_MOUNT_PATH=/data/rustfs/rustfs-fault-lab/volume +export RUSTFS_FAULT_TEST_DM_FAULT_TABLE="0 $SECTORS flakey $BACKING 0 1 15" + +make -C e2e fault-preflight SCENARIO=dm-flakey +make -C e2e fault-run-dm +``` + +## 7. Evidence And Acceptance / 证据与验收 + +每个场景目录至少包含: + +Each scenario directory contains at least: + +```text +test.log +health-watch.log +workload-plan.json +history.jsonl +workload-summary.json +checker-report.json +fault-evidence.json +nodes-before.txt / nodes-after.txt +tenants-before.txt / tenants-after.txt +pods-before.txt / pods-after.txt +Chaos or DM snapshots +``` + +通过条件 / Pass criteria: + +- 测试退出码为 0。 +- `fault-evidence.json` 的 `injected`、`active_during_workload`、`recovered` 都为 `true`。 +- `workload-plan.json` 精确记录 40,000 对象、100 并发和四档尺寸分布。 +- `checker-report.json` 的 `committed_puts=40000`,并且 missing、hash mismatch、successful corrupted read、LIST warning 均为空。 +- fault Tenant 恢复 Ready;所有原有非 fault Tenant 和节点保持 Ready。 +- The test exits with zero. +- `fault-evidence.json` reports `injected`, `active_during_workload`, and `recovered` as `true`. +- `workload-plan.json` reports exactly 40,000 objects, concurrency 100, and the four size classes. +- `checker-report.json` reports `committed_puts=40000` with no missing object, hash mismatch, successful corrupted read, or LIST warning. +- The fault Tenant recovers Ready while every pre-existing non-fault Tenant and node remains Ready. + +客户端没有看到错误并不表示故障无效。故障是否生效由 Chaos/DM 后端证据判断;客户端 disruption 单独记录。 + +No client-visible error does not mean the fault was inactive. Chaos/DM backend evidence proves injection; client disruption is reported separately. + +## 8. Cleanup And Recovery / 清理与恢复 + +先运行安全清理: + +Start with owned-resource cleanup: + +```bash +make -C e2e fault-cleanup +``` + +然后由运维删除本次创建的外部 StorageClass、静态 PV、独立 provisioner 和主机设备。DM 实验室清理示例: + +Operators must then remove the external StorageClass, static PVs, independent provisioner, and host devices created for the run. Lab DM cleanup example: + +```bash +sudo umount /data/rustfs/rustfs-fault-lab/volume +sudo dmsetup remove rustfs-fault-dm # DM node only +sudo losetup -d +sudo rm -rf /data/rustfs/rustfs-fault-lab +kubectl delete pv -l app.kubernetes.io/managed-by=rustfs-operator-fault-test +kubectl delete storageclass rustfs-fault-dm +``` + +最终确认 / Final checks: + +```bash +kubectl get nodes +kubectl get tenant -A +kubectl -n chaos-mesh get deployment,daemonset +kubectl get iochaos,podchaos,networkchaos -A +kubectl get namespace rustfs-fault-test +``` + +## 9. Runtime Variables / 运行参数 + +| Variable | Default | Purpose / 用途 | +| --- | --- | --- | +| `RUSTFS_FAULT_TEST_EXPECTED_CONTEXT` | required | 防止在错误 context 执行。 / Prevents execution against the wrong context. | +| `RUSTFS_FAULT_TEST_STORAGE_CLASS` | required | 常规动态 SC 或 DM 静态 SC。 / Dynamic regular SC or static DM SC. | +| `RUSTFS_FAULT_TEST_SERVER_IMAGE` | required by Make | 建议固定 digest。 / Pin an image digest. | +| `RUSTFS_FAULT_TEST_RUN_ROOT` | timestamp directory | 整次运行的 artifacts 根目录。 / Artifact root for the run. | +| `RUSTFS_FAULT_TEST_SCENARIOS` | six regular scenarios | `fault-run-regular` 的空格分隔场景列表。 / Space-separated regular scenario list. | +| `RUSTFS_FAULT_TEST_SEED` | generated | 固定后可重放相同对象。 / Replays the same objects when set. | +| `RUSTFS_FAULT_TEST_USE_CLUSTER_IP` | `false` | 集群节点/Pod 内建议设为 `1`。 / Set to `1` on a node or in-cluster runner. | +| `RUSTFS_FAULT_TEST_WORKLOAD_OBJECTS` | `40000` | Make runner 强制验收该值。 / Required object count. | +| `RUSTFS_FAULT_TEST_WORKLOAD_CONCURRENCY` | `100` | Make runner 强制验收该值。 / Required concurrency. | +| `RUSTFS_FAULT_TEST_DURATION_SECONDS` | `7200` | 最大故障 TTL。 / Maximum fault TTL. | +| `RUSTFS_FAULT_TEST_REQUEST_TIMEOUT_SECONDS` | `30` | 单次 S3 请求超时。 / Per-request S3 timeout. | +| `RUSTFS_FAULT_TEST_REQUIRE_CLIENT_DISRUPTION` | `false` | 是否要求客户端可见错误。 / Whether client-visible disruption is mandatory. | +| `RUSTFS_FAULT_TEST_CHAOS_NAMESPACE` | `chaos-mesh` | Chaos resource namespace。 | +| `RUSTFS_FAULT_TEST_DM_*` | unset | `dm-flakey` 专用映射参数。 / DM mapping parameters. | diff --git a/e2e/Makefile b/e2e/Makefile new file mode 100644 index 0000000..950684a --- /dev/null +++ b/e2e/Makefile @@ -0,0 +1,60 @@ +# Copyright 2025 RustFS Team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +SHELL := /bin/bash + +FAULT_SCRIPT := $(CURDIR)/scripts/fault-test.sh +MANIFEST := $(CURDIR)/Cargo.toml + +.PHONY: help fault-check fault-preflight fault-run fault-run-regular fault-run-dm fault-cleanup + +help: + @echo "RustFS e2e fault-test package" + @echo "" + @echo "Usage:" + @echo " make -C e2e fault-check" + @echo " make -C e2e fault-preflight [SCENARIO=io-eio]" + @echo " make -C e2e fault-run SCENARIO=io-eio" + @echo " make -C e2e fault-run-regular" + @echo " make -C e2e fault-run-dm" + @echo " make -C e2e fault-cleanup" + @echo "" + @echo "Required runtime environment:" + @echo " RUSTFS_FAULT_TEST_EXPECTED_CONTEXT" + @echo " RUSTFS_FAULT_TEST_STORAGE_CLASS" + @echo " RUSTFS_FAULT_TEST_SERVER_IMAGE" + @echo "" + @echo "See e2e/FAULT_TESTING.md for cluster preparation and safety requirements." + +fault-check: + bash -n $(FAULT_SCRIPT) + cargo fmt --manifest-path $(MANIFEST) --all --check + cargo test --manifest-path $(MANIFEST) + cargo clippy --manifest-path $(MANIFEST) --all-targets -- -D warnings + +fault-preflight: + @bash $(FAULT_SCRIPT) preflight "$(or $(SCENARIO),io-eio)" + +fault-run: + @test -n "$(SCENARIO)" || (echo "SCENARIO is required" >&2; exit 2) + @bash $(FAULT_SCRIPT) run "$(SCENARIO)" + +fault-run-regular: + @bash $(FAULT_SCRIPT) run-regular + +fault-run-dm: + @bash $(FAULT_SCRIPT) run dm-flakey + +fault-cleanup: + @bash $(FAULT_SCRIPT) cleanup diff --git a/e2e/README.md b/e2e/README.md index 1ffce49..6d6bc12 100644 --- a/e2e/README.md +++ b/e2e/README.md @@ -16,6 +16,10 @@ The harness is split into four top-level domains: ```text e2e/ Cargo.toml + Makefile package-local fault-test entrypoints + FAULT_TESTING.md bilingual fault-test operations manual + scripts/ + fault-test.sh guarded real-cluster scenario orchestration manifests/ kind-rustfs-e2e.yaml dedicated 1 control-plane + 3 worker Kind cluster src/ @@ -88,7 +92,7 @@ make e2e-live-run The harness refuses to run live tests unless the active Kubernetes context matches the configured dedicated Kind context. -Fault tests have separate safety defaults and run exactly one selected scenario per invocation: +Fault tests have separate safety defaults and are operated entirely from this package: ```text context: current non-Kind kubectl context @@ -96,20 +100,21 @@ test namespace: rustfs-fault-test tenant name: fault-test-tenant storage class: required via RUSTFS_FAULT_TEST_STORAGE_CLASS artifacts: target/fault-tests/artifacts -PVCs: 4 × 80Gi -objects: 4000 with seeded weighted sizes -concurrency: 50 +PVCs: 4 × 100Gi +objects: 40000 with seeded weighted sizes +concurrency: 100 ``` Run them independently from the Kind lifecycle: ```bash -RUSTFS_FAULT_TEST_SCENARIO=io-eio \ +RUSTFS_FAULT_TEST_EXPECTED_CONTEXT= \ RUSTFS_FAULT_TEST_STORAGE_CLASS= \ -make fault-test +RUSTFS_FAULT_TEST_SERVER_IMAGE= \ +make -C e2e fault-run SCENARIO=io-eio ``` -The runner creates an absent namespace with ownership metadata and refuses to reset or claim an existing namespace unless its ownership markers match. See the bilingual [Fault Injection Operations Manual](../FAULT_INJECTION_TEST_PLAN.md) for prerequisites, all seven scenarios, the dedicated `dm-flakey` storage procedure, validation, recovery, and cleanup. +The runner creates an absent namespace with ownership metadata and refuses to reset or claim an existing namespace unless its ownership markers match. See the package-local bilingual [Fault-Test Operations Manual](FAULT_TESTING.md) for the Make targets, prerequisites, all seven scenarios, `dm-flakey` storage procedure, evidence, recovery, and cleanup. ## Non-live validation diff --git a/e2e/scripts/fault-test.sh b/e2e/scripts/fault-test.sh new file mode 100644 index 0000000..c29308d --- /dev/null +++ b/e2e/scripts/fault-test.sh @@ -0,0 +1,453 @@ +#!/usr/bin/env bash +# Copyright 2025 RustFS Team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -Eeuo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PACKAGE_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" +MANIFEST="$PACKAGE_DIR/Cargo.toml" +MANAGER="rustfs-operator-fault-test" +MANAGER_SELECTOR="app.kubernetes.io/managed-by=$MANAGER" +DEFAULT_SCENARIOS="io-eio pod-kill-one network-partition-one io-read-mistake disk-full warp-under-chaos" +EXPECTED_OBJECTS=40000 +EXPECTED_CONCURRENCY=100 +EXPECTED_PAYLOAD_BYTES=20337459200 + +FAULT_NAMESPACE="${RUSTFS_FAULT_TEST_NAMESPACE:-rustfs-fault-test}" +FAULT_TENANT="${RUSTFS_FAULT_TEST_TENANT:-fault-test-tenant}" +CHAOS_NAMESPACE="${RUSTFS_FAULT_TEST_CHAOS_NAMESPACE:-chaos-mesh}" +ACTIVE_PID="" +ACTIVE_ARTIFACTS="" + +usage() { + cat <<'EOF' +Usage: fault-test.sh [scenario] + +Commands: + preflight [scenario] Validate the current real-cluster environment. + run Run one destructive scenario with health guards. + run-regular Run the six regular scenarios serially. + cleanup Remove managed Chaos and the owned fault namespace. + +Run through the package Make targets documented in e2e/FAULT_TESTING.md. +EOF +} + +die() { + echo "fault-test: $*" >&2 + exit 1 +} + +require_command() { + command -v "$1" >/dev/null 2>&1 || die "required command not found: $1" +} + +kubectl_context() { + kubectl config current-context +} + +kubectl_ns() { + kubectl --context "$RUSTFS_FAULT_TEST_EXPECTED_CONTEXT" -n "$1" "${@:2}" +} + +kubectl_cluster() { + kubectl --context "$RUSTFS_FAULT_TEST_EXPECTED_CONTEXT" "$@" +} + +is_supported_scenario() { + case "$1" in + io-eio|pod-kill-one|network-partition-one|io-read-mistake|disk-full|warp-under-chaos|dm-flakey) + return 0 + ;; + *) + return 1 + ;; + esac +} + +scenario_crd() { + case "$1" in + pod-kill-one) echo "podchaos.chaos-mesh.org" ;; + network-partition-one) echo "networkchaos.chaos-mesh.org" ;; + dm-flakey) echo "" ;; + *) echo "iochaos.chaos-mesh.org" ;; + esac +} + +require_namespace_ownership() { + if ! kubectl_cluster get namespace "$FAULT_NAMESPACE" >/dev/null 2>&1; then + return 0 + fi + + local manager tenant + manager="$(kubectl_cluster get namespace "$FAULT_NAMESPACE" -o jsonpath='{.metadata.labels.app\.kubernetes\.io/managed-by}')" + tenant="$(kubectl_cluster get namespace "$FAULT_NAMESPACE" -o jsonpath='{.metadata.annotations.rustfs\.com/fault-test-tenant}')" + [[ "$manager" == "$MANAGER" ]] || die "namespace $FAULT_NAMESPACE is not managed by $MANAGER" + [[ "$tenant" == "$FAULT_TENANT" ]] || die "namespace $FAULT_NAMESPACE is not owned by tenant $FAULT_TENANT" +} + +require_non_fault_tenants_ready() { + local unhealthy + unhealthy="$(kubectl_cluster get tenants -A -o json | jq -r --arg namespace "$FAULT_NAMESPACE" ' + .items[] + | select(.metadata.namespace != $namespace) + | select((.status.currentState // "") != "Ready") + | "\(.metadata.namespace)/\(.metadata.name)=\(.status.currentState // "missing")" + ')" + [[ -z "$unhealthy" ]] || die "non-fault Tenant is not Ready: $unhealthy" +} + +require_chaos_ready() { + local deployment_ready daemon_ready + deployment_ready="$(kubectl_ns "$CHAOS_NAMESPACE" get deployment chaos-controller-manager -o json | jq -r ' + (.status.readyReplicas // 0) == (.spec.replicas // 0) and (.spec.replicas // 0) > 0 + ')" + daemon_ready="$(kubectl_ns "$CHAOS_NAMESPACE" get daemonset chaos-daemon -o json | jq -r ' + (.status.numberReady // 0) == (.status.desiredNumberScheduled // 0) and (.status.desiredNumberScheduled // 0) > 0 + ')" + [[ "$deployment_ready" == "true" ]] || die "Chaos Mesh controller-manager is not fully Ready" + [[ "$daemon_ready" == "true" ]] || die "Chaos Mesh chaos-daemon is not fully Ready" +} + +require_storage_class() { + local scenario="$1" + local storage_class provisioner pv_count + storage_class="${RUSTFS_FAULT_TEST_STORAGE_CLASS:-}" + [[ -n "$storage_class" ]] || die "RUSTFS_FAULT_TEST_STORAGE_CLASS is required" + provisioner="$(kubectl_cluster get storageclass "$storage_class" -o json | jq -r '.provisioner // ""')" + [[ -n "$provisioner" ]] || die "StorageClass $storage_class has no provisioner" + + if [[ "$scenario" == "dm-flakey" ]]; then + [[ "$provisioner" == "kubernetes.io/no-provisioner" ]] || die "dm-flakey requires a no-provisioner StorageClass" + pv_count="$(kubectl_cluster get pv -o json | jq -r --arg storage_class "$storage_class" ' + [.items[] + | select(.spec.storageClassName == $storage_class) + | select(.status.phase == "Available" or .status.phase == "Bound") + | select(.spec.capacity.storage == "100Gi")] + | length + ')" + [[ "$pv_count" -eq 4 ]] || die "dm-flakey requires exactly four Available/Bound 100Gi PVs, found $pv_count" + else + [[ "$provisioner" != "kubernetes.io/no-provisioner" ]] || die "regular scenarios require dynamic provisioning" + fi +} + +preflight() { + local scenario="${1:-io-eio}" + local current_context ready_nodes crd + is_supported_scenario "$scenario" || die "unsupported scenario: $scenario" + + require_command cargo + require_command jq + require_command kubectl + require_command pgrep + [[ -n "${RUSTFS_FAULT_TEST_EXPECTED_CONTEXT:-}" ]] || die "RUSTFS_FAULT_TEST_EXPECTED_CONTEXT is required" + [[ -n "${RUSTFS_FAULT_TEST_SERVER_IMAGE:-}" ]] || die "RUSTFS_FAULT_TEST_SERVER_IMAGE is required" + + current_context="$(kubectl_context)" + [[ "$current_context" == "$RUSTFS_FAULT_TEST_EXPECTED_CONTEXT" ]] || die "current context $current_context does not match expected context $RUSTFS_FAULT_TEST_EXPECTED_CONTEXT" + [[ "$current_context" != kind-* ]] || die "fault tests require a real Kubernetes cluster, got $current_context" + + kubectl_cluster get crd tenants.rustfs.com >/dev/null + ready_nodes="$(kubectl_cluster get nodes -o json | jq -r '[.items[] + | select(.spec.unschedulable != true) + | select(any(.status.conditions[]; .type == "Ready" and .status == "True"))] | length')" + [[ "$ready_nodes" -ge 4 ]] || die "at least four schedulable Ready nodes are required, found $ready_nodes" + + require_storage_class "$scenario" + require_namespace_ownership + require_non_fault_tenants_ready + + if [[ "$scenario" != "dm-flakey" ]]; then + crd="$(scenario_crd "$scenario")" + kubectl_cluster get crd "$crd" >/dev/null + require_chaos_ready + fi + if [[ "$scenario" == "warp-under-chaos" ]]; then + require_command warp + fi + if [[ "$scenario" == "dm-flakey" ]]; then + [[ -n "${RUSTFS_FAULT_TEST_DM_NAME:-}" ]] || die "RUSTFS_FAULT_TEST_DM_NAME is required" + [[ -n "${RUSTFS_FAULT_TEST_DM_NODE:-}" ]] || die "RUSTFS_FAULT_TEST_DM_NODE is required" + [[ -n "${RUSTFS_FAULT_TEST_DM_MOUNT_PATH:-}" ]] || die "RUSTFS_FAULT_TEST_DM_MOUNT_PATH is required" + [[ -n "${RUSTFS_FAULT_TEST_DM_FAULT_TABLE:-}" ]] || die "RUSTFS_FAULT_TEST_DM_FAULT_TABLE is required" + kubectl_cluster get namespace "$FAULT_NAMESPACE" >/dev/null 2>&1 || die "dm-flakey requires a pre-created owned fault namespace with privileged Pod Security" + [[ "$(kubectl_cluster get namespace "$FAULT_NAMESPACE" -o jsonpath='{.metadata.labels.pod-security\.kubernetes\.io/enforce}')" == "privileged" ]] || die "dm-flakey requires pod-security.kubernetes.io/enforce=privileged on $FAULT_NAMESPACE" + fi + + echo "preflight passed: context=$current_context scenario=$scenario nodes=$ready_nodes storageClass=${RUSTFS_FAULT_TEST_STORAGE_CLASS}" +} + +preflight_cleanup() { + local current_context + require_command jq + require_command kubectl + [[ -n "${RUSTFS_FAULT_TEST_EXPECTED_CONTEXT:-}" ]] || die "RUSTFS_FAULT_TEST_EXPECTED_CONTEXT is required" + current_context="$(kubectl_context)" + [[ "$current_context" == "$RUSTFS_FAULT_TEST_EXPECTED_CONTEXT" ]] || die "current context $current_context does not match expected context $RUSTFS_FAULT_TEST_EXPECTED_CONTEXT" + [[ "$current_context" != kind-* ]] || die "fault cleanup requires a real Kubernetes cluster, got $current_context" + require_namespace_ownership +} + +cleanup_managed_chaos() { + kubectl_ns "$CHAOS_NAMESPACE" delete iochaos,podchaos,networkchaos \ + -l "$MANAGER_SELECTOR" --ignore-not-found=true --wait=false >/dev/null 2>&1 || true +} + +terminate_process_tree() { + local parent="$1" + local child + for child in $(pgrep -P "$parent" 2>/dev/null || true); do + terminate_process_tree "$child" + done + kill -TERM "$parent" 2>/dev/null || true +} + +handle_signal() { + cleanup_managed_chaos + if [[ -n "$ACTIVE_PID" ]]; then + terminate_process_tree "$ACTIVE_PID" + fi + if [[ -n "$ACTIVE_ARTIFACTS" ]]; then + touch "$ACTIVE_ARTIFACTS/interrupted" + echo 130 >"$ACTIVE_ARTIFACTS/exit-code" + capture_cluster_snapshot "$ACTIVE_ARTIFACTS" interrupted + capture_fault_logs "$ACTIVE_ARTIFACTS" + fi + exit 130 +} + +capture_cluster_snapshot() { + local artifacts="$1" stage="$2" + kubectl_cluster get nodes -o wide >"$artifacts/nodes-$stage.txt" 2>&1 || true + kubectl_cluster get tenants -A -o wide >"$artifacts/tenants-$stage.txt" 2>&1 || true + kubectl_cluster get pods -A -o wide >"$artifacts/pods-$stage.txt" 2>&1 || true + kubectl_cluster get pv,pvc -A -o wide >"$artifacts/volumes-$stage.txt" 2>&1 || true + kubectl_ns "$CHAOS_NAMESPACE" get iochaos,podchaos,networkchaos -o yaml >"$artifacts/chaos-$stage.yaml" 2>&1 || true + kubectl_ns "$FAULT_NAMESPACE" get events --sort-by=.lastTimestamp >"$artifacts/events-$stage.txt" 2>&1 || true +} + +capture_fault_logs() { + local artifacts="$1" pod name + for pod in $(kubectl_ns "$FAULT_NAMESPACE" get pods -l "rustfs.tenant=$FAULT_TENANT" -o name 2>/dev/null || true); do + name="${pod#pod/}" + kubectl_ns "$FAULT_NAMESPACE" logs "$pod" >"$artifacts/$name.log" 2>&1 || true + kubectl_ns "$FAULT_NAMESPACE" logs "$pod" --previous >"$artifacts/$name-previous.log" 2>&1 || true + done +} + +health_is_safe() { + local baseline_nodes="$1" baseline_tenants="$2" + local current_nodes namespace tenant state + current_nodes="$(kubectl_cluster get nodes -o json 2>/dev/null | jq -r '[.items[] | select(any(.status.conditions[]; .type == "Ready" and .status == "True"))] | length' 2>/dev/null || echo 0)" + [[ "$current_nodes" -eq "$baseline_nodes" ]] || return 1 + + while IFS=$'\t' read -r namespace tenant; do + [[ -n "$namespace" ]] || continue + state="$(kubectl_ns "$namespace" get tenant "$tenant" -o jsonpath='{.status.currentState}' 2>/dev/null || true)" + [[ "$state" == "Ready" ]] || return 1 + done <"$baseline_tenants" + return 0 +} + +find_artifact() { + find "$1" -name "$2" -type f -print -quit +} + +validate_scenario_artifacts() { + local scenario="$1" artifacts="$2" run_root="$3" + local plan evidence checker summary seed disruptions recommitted committed + plan="$(find_artifact "$artifacts" workload-plan.json)" + evidence="$(find_artifact "$artifacts" fault-evidence.json)" + checker="$(find_artifact "$artifacts" checker-report.json)" + summary="$(find_artifact "$artifacts" workload-summary.json)" + [[ -f "$plan" ]] || die "$scenario did not produce workload-plan.json" + [[ -f "$evidence" ]] || die "$scenario did not produce fault-evidence.json" + [[ -f "$checker" ]] || die "$scenario did not produce checker-report.json" + [[ -f "$summary" ]] || die "$scenario did not produce workload-summary.json" + + jq -e --argjson objects "$EXPECTED_OBJECTS" --argjson concurrency "$EXPECTED_CONCURRENCY" --argjson payload "$EXPECTED_PAYLOAD_BYTES" ' + .object_count == $objects + and .concurrency == $concurrency + and .total_payload_bytes == $payload + and .size_distribution == [ + {"size_bytes":4096,"object_count":34000}, + {"size_bytes":16384,"object_count":4000}, + {"size_bytes":8388608,"object_count":1600}, + {"size_bytes":16777216,"object_count":400} + ] + ' "$plan" >/dev/null || die "$scenario workload plan does not match the required profile" + jq -e '.injected == true and .active_during_workload == true and .recovered == true' "$evidence" >/dev/null || die "$scenario fault evidence is incomplete" + jq -e --argjson objects "$EXPECTED_OBJECTS" ' + .committed_puts == $objects + and (.missing_committed_objects | length) == 0 + and (.hash_mismatches | length) == 0 + and (.successful_corrupted_reads | length) == 0 + and (.list_warnings | length) == 0 + and .tenant_recovered == true + and .passed == true + ' "$checker" >/dev/null || die "$scenario checker verdict failed" + + seed="$(jq -r '.seed' "$plan")" + disruptions="$(jq -r '.client_disruptions' "$evidence")" + recommitted="$(jq -r '.recommitted_after_recovery' "$summary")" + committed="$(jq -r '.committed_puts' "$checker")" + printf '%s\t%s\t0\t%s\t%s\t%s\t0\t0\t0\t0\ttrue\n' \ + "$scenario" "$seed" "$disruptions" "$recommitted" "$committed" >>"$run_root/validation-summary.tsv" +} + +run_scenario() { + local scenario="$1" run_root="$2" + local artifacts="$run_root/$scenario" + local baseline_nodes baseline_tenants test_pid rc current_time health_checks + preflight "$scenario" + mkdir -p "$artifacts" + baseline_nodes="$(kubectl_cluster get nodes -o json | jq -r '.items | length')" + baseline_tenants="$artifacts/baseline-tenants.tsv" + kubectl_cluster get tenants -A -o json | jq -r --arg namespace "$FAULT_NAMESPACE" ' + .items[] | select(.metadata.namespace != $namespace) | [.metadata.namespace,.metadata.name] | @tsv + ' >"$baseline_tenants" + capture_cluster_snapshot "$artifacts" before + + echo "starting scenario=$scenario artifacts=$artifacts" + ( + set +e + RUSTFS_FAULT_TEST_DESTRUCTIVE=1 \ + RUSTFS_FAULT_TEST_SCENARIO="$scenario" \ + RUSTFS_FAULT_TEST_WORKLOAD_OBJECTS="$EXPECTED_OBJECTS" \ + RUSTFS_FAULT_TEST_WORKLOAD_CONCURRENCY="$EXPECTED_CONCURRENCY" \ + RUSTFS_FAULT_TEST_DURATION_SECONDS="${RUSTFS_FAULT_TEST_DURATION_SECONDS:-7200}" \ + RUSTFS_FAULT_TEST_ARTIFACTS="$artifacts" \ + cargo test --manifest-path "$MANIFEST" --test faults -- --ignored --test-threads=1 --nocapture \ + >"$artifacts/test.log" 2>&1 + echo "$?" >"$artifacts/test-exit-code.tmp" + ) & + test_pid=$! + ACTIVE_PID="$test_pid" + ACTIVE_ARTIFACTS="$artifacts" + health_checks=0 + + while kill -0 "$test_pid" 2>/dev/null; do + current_time="$(date -u +%FT%TZ)" + health_checks=$((health_checks + 1)) + if health_is_safe "$baseline_nodes" "$baseline_tenants"; then + echo "$current_time safe=true" >>"$artifacts/health-watch.log" + if (( health_checks % 6 == 0 )); then + echo "scenario=$scenario running safe=true time=$current_time" + fi + else + echo "$current_time safe=false" >>"$artifacts/health-watch.log" + touch "$artifacts/health-guard-failed" + cleanup_managed_chaos + terminate_process_tree "$test_pid" + break + fi + sleep 10 + done + + wait "$test_pid" 2>/dev/null || true + ACTIVE_PID="" + ACTIVE_ARTIFACTS="" + rc=125 + [[ -f "$artifacts/test-exit-code.tmp" ]] && rc="$(cat "$artifacts/test-exit-code.tmp")" + [[ ! -f "$artifacts/health-guard-failed" ]] || rc=90 + echo "$rc" >"$artifacts/exit-code" + capture_cluster_snapshot "$artifacts" after + capture_fault_logs "$artifacts" + + if [[ "$rc" -ne 0 ]]; then + cleanup_managed_chaos + echo "scenario failed: $scenario rc=$rc log=$artifacts/test.log" >&2 + return "$rc" + fi + validate_scenario_artifacts "$scenario" "$artifacts" "$run_root" + echo "scenario passed: $scenario" +} + +new_run_root() { + if [[ -n "${RUSTFS_FAULT_TEST_RUN_ROOT:-}" ]]; then + echo "$RUSTFS_FAULT_TEST_RUN_ROOT" + else + echo "$PACKAGE_DIR/target/fault-tests/$(date -u +%Y%m%dT%H%M%SZ)" + fi +} + +initialize_summary() { + local run_root="$1" + mkdir -p "$run_root" + if [[ ! -f "$run_root/validation-summary.tsv" ]]; then + printf 'scenario\tseed\texit\tdisruptions\trecommitted\tcommitted\tmissing\thash_mismatch\tcorrupt_read\tlist_warning\trecovered\n' \ + >"$run_root/validation-summary.tsv" + fi +} + +run_one() { + local scenario="$1" run_root + is_supported_scenario "$scenario" || die "unsupported scenario: $scenario" + run_root="$(new_run_root)" + initialize_summary "$run_root" + run_scenario "$scenario" "$run_root" + echo "run artifacts: $run_root" +} + +run_regular() { + local run_root scenario + local scenarios="${RUSTFS_FAULT_TEST_SCENARIOS:-$DEFAULT_SCENARIOS}" + run_root="$(new_run_root)" + initialize_summary "$run_root" + for scenario in $scenarios; do + [[ "$scenario" != "dm-flakey" ]] || die "run-regular cannot include dm-flakey" + run_scenario "$scenario" "$run_root" || return $? + done + echo "regular scenario artifacts: $run_root" +} + +cleanup() { + cleanup_managed_chaos + if kubectl_cluster get namespace "$FAULT_NAMESPACE" >/dev/null 2>&1; then + require_namespace_ownership + kubectl_cluster delete namespace "$FAULT_NAMESPACE" --wait=true + fi + if kubectl_ns "$CHAOS_NAMESPACE" get iochaos,podchaos,networkchaos -l "$MANAGER_SELECTOR" -o name 2>/dev/null | grep -q .; then + die "managed Chaos resources remain after cleanup" + fi + echo "managed fault-test resources cleaned; external StorageClasses, PVs, and host devices were not changed" +} + +trap handle_signal INT TERM HUP + +case "${1:-help}" in + help|-h|--help) + usage + ;; + preflight) + preflight "${2:-io-eio}" + ;; + run) + [[ -n "${2:-}" ]] || die "scenario is required" + run_one "$2" + ;; + run-regular) + run_regular + ;; + cleanup) + preflight_cleanup + cleanup + ;; + *) + usage >&2 + die "unknown command: $1" + ;; +esac diff --git a/e2e/src/framework/fault_config.rs b/e2e/src/framework/fault_config.rs index 3abbc57..53b73e4 100644 --- a/e2e/src/framework/fault_config.rs +++ b/e2e/src/framework/fault_config.rs @@ -97,11 +97,15 @@ impl FaultTestConfig { duration: Duration::from_secs(env_u64( &get_env, "RUSTFS_FAULT_TEST_DURATION_SECONDS", - 900, + 7200, )), percent: env_u8(&get_env, "RUSTFS_FAULT_TEST_PERCENT", default_percent), - workload_objects: env_usize(&get_env, "RUSTFS_FAULT_TEST_WORKLOAD_OBJECTS", 4000), - workload_concurrency: env_usize(&get_env, "RUSTFS_FAULT_TEST_WORKLOAD_CONCURRENCY", 50), + workload_objects: env_usize(&get_env, "RUSTFS_FAULT_TEST_WORKLOAD_OBJECTS", 40000), + workload_concurrency: env_usize( + &get_env, + "RUSTFS_FAULT_TEST_WORKLOAD_CONCURRENCY", + 100, + ), workload_seed: env_optional_u64(&get_env, "RUSTFS_FAULT_TEST_SEED")?, request_timeout: Duration::from_secs(env_u64( &get_env, @@ -135,7 +139,7 @@ impl FaultTestConfig { pub fn require_destructive_enabled(&self) -> Result<()> { ensure!( self.destructive_enabled, - "destructive fault tests are disabled; run through `make fault-test` or set RUSTFS_FAULT_TEST_DESTRUCTIVE=1 explicitly" + "destructive fault tests are disabled; run through an e2e package fault Make target or set RUSTFS_FAULT_TEST_DESTRUCTIVE=1 explicitly" ); Ok(()) } @@ -297,10 +301,10 @@ mod tests { std::path::PathBuf::from("target/fault-tests/artifacts") ); assert_eq!(config.scenario, "io-eio"); - assert_eq!(config.duration, std::time::Duration::from_secs(900)); + assert_eq!(config.duration, std::time::Duration::from_secs(7200)); assert_eq!(config.percent, 20); - assert_eq!(config.workload_objects, 4000); - assert_eq!(config.workload_concurrency, 50); + assert_eq!(config.workload_objects, 40000); + assert_eq!(config.workload_concurrency, 100); assert_eq!(config.workload_seed, None); assert_eq!(config.request_timeout, std::time::Duration::from_secs(30)); assert!(!config.use_cluster_ip); diff --git a/e2e/src/framework/fault_scenarios.rs b/e2e/src/framework/fault_scenarios.rs index 7827f5c..7f83c03 100644 --- a/e2e/src/framework/fault_scenarios.rs +++ b/e2e/src/framework/fault_scenarios.rs @@ -271,10 +271,10 @@ mod tests { scenario.case_name, "fault_io_eio_preserves_committed_objects" ); - assert_eq!(scenario.duration, Duration::from_secs(900)); + assert_eq!(scenario.duration, Duration::from_secs(7200)); assert_eq!(scenario.percent, 20); - assert_eq!(scenario.prefill_count(), 2000); - assert_eq!(scenario.mixed_workload_count(), 2000); + assert_eq!(scenario.prefill_count(), 20000); + assert_eq!(scenario.mixed_workload_count(), 20000); } #[test] diff --git a/e2e/src/framework/resources.rs b/e2e/src/framework/resources.rs index a1309db..40b51a2 100644 --- a/e2e/src/framework/resources.rs +++ b/e2e/src/framework/resources.rs @@ -450,7 +450,7 @@ mod tests { assert!(manifest.contains("namespace: rustfs-fault-test")); assert!(manifest.contains("storageClassName: fast-csi")); - assert!(manifest.contains("storage: 80Gi")); + assert!(manifest.contains("storage: 100Gi")); assert!(!manifest.contains("rustfs-storage")); assert!(!manifest.contains("RUSTFS_UNSAFE_BYPASS_DISK_CHECK")); } diff --git a/e2e/src/framework/s3_workload.rs b/e2e/src/framework/s3_workload.rs index 7b84c5e..d201b9e 100644 --- a/e2e/src/framework/s3_workload.rs +++ b/e2e/src/framework/s3_workload.rs @@ -587,9 +587,9 @@ mod tests { #[test] fn workload_plan_is_weighted_shuffled_and_reproducible() { - let plan = WorkloadPlan::seeded(42, 4000, 50); - let same = WorkloadPlan::seeded(42, 4000, 50); - let different = WorkloadPlan::seeded(43, 4000, 50); + let plan = WorkloadPlan::seeded(42, 40000, 100); + let same = WorkloadPlan::seeded(42, 40000, 100); + let different = WorkloadPlan::seeded(43, 40000, 100); assert_eq!(plan, same); assert_ne!(plan.sizes, different.sizes); @@ -599,12 +599,12 @@ mod tests { .map(|class| (class.size_bytes, class.object_count)) .collect::>(), vec![ - (4 * 1024, 3400), - (16 * 1024, 400), - (8 * 1024 * 1024, 160), - (16 * 1024 * 1024, 40), + (4 * 1024, 34000), + (16 * 1024, 4000), + (8 * 1024 * 1024, 1600), + (16 * 1024 * 1024, 400), ] ); - assert_eq!(plan.total_payload_bytes, 2_033_745_920); + assert_eq!(plan.total_payload_bytes, 20_337_459_200); } } diff --git a/e2e/src/framework/tenant_factory.rs b/e2e/src/framework/tenant_factory.rs index c12afa0..af806d3 100644 --- a/e2e/src/framework/tenant_factory.rs +++ b/e2e/src/framework/tenant_factory.rs @@ -85,7 +85,7 @@ impl TenantTemplate { credential_secret_name: credential_secret_name.into(), servers: 4, volumes_per_server: 1, - storage_request: "80Gi".to_string(), + storage_request: "100Gi".to_string(), pod_management_policy: Some(PodManagementPolicy::Parallel), unsafe_bypass_disk_check: false, node_selector: None, @@ -266,7 +266,7 @@ mod tests { .and_then(|resources| resources.requests.as_ref()) .and_then(|requests| requests.get("storage")) .map(|quantity| quantity.0.as_str()), - Some("80Gi") + Some("100Gi") ); assert!(tenant.spec.pools[0].scheduling.node_selector.is_none()); assert!( diff --git a/e2e/tests/faults.rs b/e2e/tests/faults.rs index c445b28..53f97b5 100644 --- a/e2e/tests/faults.rs +++ b/e2e/tests/faults.rs @@ -1135,7 +1135,7 @@ mod tests { #[test] fn workload_summary_counts_disrupted_operations() { - let mut summary = WorkloadSummary::new(&WorkloadPlan::seeded(42, 4000, 50)); + let mut summary = WorkloadSummary::new(&WorkloadPlan::seeded(42, 40000, 100)); summary.puts.record(OperationOutcome::Ok); summary.gets.record(OperationOutcome::Timeout); @@ -1150,9 +1150,9 @@ mod tests { fn workload_summary_can_require_fault_evidence() { let summary = WorkloadSummary { seed: 42, - object_count: 4000, - concurrency: 50, - total_payload_bytes: 2_033_745_920, + object_count: 40000, + concurrency: 100, + total_payload_bytes: 20_337_459_200, puts: OutcomeCounts { ok: 1, ..OutcomeCounts::default() From 9485758580499476b888b9d108a55d09b6876c45 Mon Sep 17 00:00:00 2001 From: GatewayJ <18332154+GatewayJ@users.noreply.github.com> Date: Sat, 20 Jun 2026 11:35:24 +0800 Subject: [PATCH 16/20] fix(chaos): limit package check build jobs --- e2e/FAULT_TESTING.md | 2 +- e2e/Makefile | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/e2e/FAULT_TESTING.md b/e2e/FAULT_TESTING.md index e5e9383..fdef2f4 100644 --- a/e2e/FAULT_TESTING.md +++ b/e2e/FAULT_TESTING.md @@ -91,7 +91,7 @@ make -C e2e fault-cleanup | Target | Behavior / 行为 | | --- | --- | -| `fault-check` | Rust fmt/test/clippy 和 Bash 语法检查;不访问集群。 / Rust fmt, tests, clippy, and Bash syntax; no cluster mutation. | +| `fault-check` | 单 job Rust fmt/test/clippy 和 Bash 语法检查;不访问集群。 / Single-job Rust fmt, tests, clippy, and Bash syntax; no cluster mutation. | | `fault-preflight` | 校验 context、CRD、StorageClass、Chaos、节点、namespace 所有权和现有 Tenant。 / Validates context, CRDs, storage, Chaos, nodes, ownership, and existing Tenants. | | `fault-run` | 运行一个场景,持续健康守护并验收 artifacts。 / Runs one guarded scenario and validates artifacts. | | `fault-run-regular` | 串行运行六个常规场景,首败停止。 / Runs six regular scenarios serially and stops on first failure. | diff --git a/e2e/Makefile b/e2e/Makefile index 950684a..cff152e 100644 --- a/e2e/Makefile +++ b/e2e/Makefile @@ -16,6 +16,7 @@ SHELL := /bin/bash FAULT_SCRIPT := $(CURDIR)/scripts/fault-test.sh MANIFEST := $(CURDIR)/Cargo.toml +FAULT_BUILD_JOBS ?= 1 .PHONY: help fault-check fault-preflight fault-run fault-run-regular fault-run-dm fault-cleanup @@ -39,9 +40,9 @@ help: fault-check: bash -n $(FAULT_SCRIPT) - cargo fmt --manifest-path $(MANIFEST) --all --check - cargo test --manifest-path $(MANIFEST) - cargo clippy --manifest-path $(MANIFEST) --all-targets -- -D warnings + CARGO_BUILD_JOBS=$(FAULT_BUILD_JOBS) cargo fmt --manifest-path $(MANIFEST) --all --check + CARGO_BUILD_JOBS=$(FAULT_BUILD_JOBS) cargo test --manifest-path $(MANIFEST) + CARGO_BUILD_JOBS=$(FAULT_BUILD_JOBS) cargo clippy --manifest-path $(MANIFEST) --all-targets -- -D warnings fault-preflight: @bash $(FAULT_SCRIPT) preflight "$(or $(SCENARIO),io-eio)" From 08b7b758f69b677a6f1658aa527693fff36edf8b Mon Sep 17 00:00:00 2001 From: GatewayJ <18332154+GatewayJ@users.noreply.github.com> Date: Sat, 20 Jun 2026 11:47:33 +0800 Subject: [PATCH 17/20] fix(chaos): wait for stable fault tenant pods --- e2e/FAULT_TESTING.md | 4 + e2e/tests/faults.rs | 177 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 179 insertions(+), 2 deletions(-) diff --git a/e2e/FAULT_TESTING.md b/e2e/FAULT_TESTING.md index fdef2f4..25eb234 100644 --- a/e2e/FAULT_TESTING.md +++ b/e2e/FAULT_TESTING.md @@ -73,6 +73,10 @@ maximum fault TTL: 7,200 seconds The 7,200-second duration is a maximum fault-resource safety window, not a fixed wait. Successful runs recover immediately after the workload. The larger window prevents the 40,000-object workload from outliving Chaos. +Tenant `Ready` 之后、注入故障之前,以及故障恢复之后,测试都会等待四个 RustFS Pod 连续 60 秒保持 `Running/Ready`,且 Pod UID 和容器重启数不变。这个稳定窗口避免把启动期 DNS 或 Pod 重启抖动误判为故障注入结果。 + +After Tenant `Ready`, both before injection and after recovery, the test requires all four RustFS Pods to remain `Running/Ready` for 60 seconds with unchanged Pod UIDs and container restart counts. This stability window prevents startup DNS or restart churn from being misclassified as a fault-injection result. + ## 3. Package Commands / Package 命令 所有公共入口都位于 `e2e/Makefile`。从仓库根目录执行: diff --git a/e2e/tests/faults.rs b/e2e/tests/faults.rs index 53f97b5..81a2a67 100644 --- a/e2e/tests/faults.rs +++ b/e2e/tests/faults.rs @@ -41,9 +41,12 @@ use serde::Serialize; use std::collections::BTreeSet; use std::thread::sleep; use std::time::{Duration, Instant}; +use tokio::time::sleep as async_sleep; use uuid::Uuid; const RUSTFS_DATA_VOLUME: &str = "/data/rustfs0"; +const FAULT_TENANT_POD_COUNT: usize = 4; +const RUSTFS_POD_STABLE_WINDOW: Duration = Duration::from_secs(60); #[tokio::test] #[ignore = "destructive RustFS workload fault scenario; select with RUSTFS_FAULT_TEST_SCENARIO"] @@ -91,6 +94,7 @@ async fn run_fault_case( prepare_fault_fixture(&config.cluster, spec.isolation)?; wait_for_ready_tenant(&config.cluster).await?; + wait_for_stable_rustfs_pods(&config.cluster, RUSTFS_POD_STABLE_WINDOW).await?; let run_id = format!("run-{}", Uuid::new_v4()); let workload_seed = config.workload_seed.unwrap_or_else(generated_seed); @@ -233,6 +237,7 @@ async fn run_fault_case( } wait_for_ready_tenant(cluster).await?; + wait_for_stable_rustfs_pods(cluster, RUSTFS_POD_STABLE_WINDOW).await?; let pods_after = rustfs_pod_identities(cluster)?; ensure_s3_access(&mut port_forward, cluster, &endpoint).await?; workload.summary.recommitted_after_recovery = recommit_unconfirmed_objects( @@ -674,6 +679,16 @@ struct PodIdentity { uid: String, } +#[derive(Debug, Clone, PartialEq, Eq)] +struct PodRuntimeState { + name: String, + uid: String, + phase: String, + containers_ready: bool, + restart_count: u64, + terminating: bool, +} + fn rustfs_pod_identities(config: &ClusterTestConfig) -> Result> { let selector = format!("rustfs.tenant={}", config.tenant_name); let output = rustfs_operator_e2e::framework::kubectl::Kubectl::new(config) @@ -704,6 +719,135 @@ fn rustfs_pod_identities(config: &ClusterTestConfig) -> Result> Ok(pods) } +fn rustfs_pod_runtime_states(config: &ClusterTestConfig) -> Result> { + let selector = format!("rustfs.tenant={}", config.tenant_name); + let output = Kubectl::new(config) + .namespaced(&config.test_namespace) + .command(["get", "pod", "-l", &selector, "-o", "json"]) + .run_checked()?; + let value = serde_json::from_str::(&output.stdout) + .context("parse RustFS pod list json")?; + let items = value + .pointer("/items") + .and_then(serde_json::Value::as_array) + .context("RustFS pod list did not contain an items array")?; + let mut pods = items + .iter() + .map(|item| { + let metadata = item + .get("metadata") + .context("RustFS pod did not contain metadata")?; + let name = metadata + .get("name") + .and_then(serde_json::Value::as_str) + .context("RustFS pod metadata did not contain a name")?; + let uid = metadata + .get("uid") + .and_then(serde_json::Value::as_str) + .context("RustFS pod metadata did not contain a uid")?; + let phase = item + .pointer("/status/phase") + .and_then(serde_json::Value::as_str) + .unwrap_or("Unknown"); + let container_statuses = item + .pointer("/status/containerStatuses") + .and_then(serde_json::Value::as_array); + let containers_ready = container_statuses.is_some_and(|statuses| { + !statuses.is_empty() + && statuses.iter().all(|status| { + status + .get("ready") + .and_then(serde_json::Value::as_bool) + .unwrap_or(false) + }) + }); + let restart_count = container_statuses + .into_iter() + .flatten() + .filter_map(|status| status.get("restartCount")) + .filter_map(serde_json::Value::as_u64) + .sum(); + + Ok(PodRuntimeState { + name: name.to_string(), + uid: uid.to_string(), + phase: phase.to_string(), + containers_ready, + restart_count, + terminating: metadata.get("deletionTimestamp").is_some(), + }) + }) + .collect::>>()?; + pods.sort_by(|left, right| left.name.cmp(&right.name)); + Ok(pods) +} + +fn stable_pod_fingerprint(pods: &[PodRuntimeState]) -> Option> { + if pods.len() != FAULT_TENANT_POD_COUNT + || pods + .iter() + .any(|pod| pod.phase != "Running" || !pod.containers_ready || pod.terminating) + { + return None; + } + + Some( + pods.iter() + .map(|pod| (pod.uid.clone(), pod.restart_count)) + .collect(), + ) +} + +async fn wait_for_stable_rustfs_pods( + config: &ClusterTestConfig, + stable_window: Duration, +) -> Result<()> { + let deadline = Instant::now() + config.timeout; + let mut stable_since = None; + let mut stable_fingerprint = None; + let mut last_snapshot = Vec::new(); + let mut last_error = "not checked yet".to_string(); + + eprintln!( + "waiting for {FAULT_TENANT_POD_COUNT} RustFS pods to remain ready without restarts for {stable_window:?}" + ); + loop { + if Instant::now() >= deadline { + bail!( + "timed out waiting for stable RustFS pods after {:?}\nlast: {last_snapshot:?}\nlast error: {last_error}", + config.timeout + ); + } + + match rustfs_pod_runtime_states(config) { + Ok(current) => { + if let Some(fingerprint) = stable_pod_fingerprint(¤t) { + if stable_fingerprint.as_ref() != Some(&fingerprint) { + stable_since = Some(Instant::now()); + stable_fingerprint = Some(fingerprint); + } + if stable_since.is_some_and(|started| started.elapsed() >= stable_window) { + eprintln!("RustFS pods remained stable for {stable_window:?}"); + return Ok(()); + } + } else { + stable_since = None; + stable_fingerprint = None; + } + last_snapshot = current; + last_error = "none".to_string(); + } + Err(error) => { + stable_since = None; + stable_fingerprint = None; + last_error = error.to_string(); + } + } + + async_sleep(Duration::from_secs(1)).await; + } +} + fn wait_for_rustfs_pod_replacement( config: &ClusterTestConfig, before: &[PodIdentity], @@ -1115,8 +1259,8 @@ fn warp_bucket_name(run_id: &str) -> String { #[cfg(test)] mod tests { use super::{ - OutcomeCounts, PodIdentity, WorkloadSummary, bucket_name, pod_deletion_observed, - pod_replacement_observed, warp_bucket_name, + OutcomeCounts, PodIdentity, PodRuntimeState, WorkloadSummary, bucket_name, + pod_deletion_observed, pod_replacement_observed, stable_pod_fingerprint, warp_bucket_name, }; use rustfs_operator_e2e::framework::history::OperationOutcome; use rustfs_operator_e2e::framework::s3_workload::WorkloadPlan; @@ -1196,4 +1340,33 @@ mod tests { ], )); } + + #[test] + fn stable_pod_fingerprint_requires_four_ready_unchanged_pods() { + let pods = (0..4) + .map(|index| PodRuntimeState { + name: format!("rustfs-{index}"), + uid: format!("uid-{index}"), + phase: "Running".to_string(), + containers_ready: true, + restart_count: index, + terminating: false, + }) + .collect::>(); + + assert_eq!( + stable_pod_fingerprint(&pods), + Some(vec![ + ("uid-0".to_string(), 0), + ("uid-1".to_string(), 1), + ("uid-2".to_string(), 2), + ("uid-3".to_string(), 3), + ]) + ); + assert!(stable_pod_fingerprint(&pods[..3]).is_none()); + + let mut unready = pods; + unready[0].containers_ready = false; + assert!(stable_pod_fingerprint(&unready).is_none()); + } } From a78184f775dfcbe8be113920161c920d7fe63664 Mon Sep 17 00:00:00 2001 From: GatewayJ <18332154+GatewayJ@users.noreply.github.com> Date: Sat, 20 Jun 2026 11:57:51 +0800 Subject: [PATCH 18/20] fix(chaos): isolate fault-test compilation --- e2e/FAULT_TESTING.md | 6 ++++ e2e/scripts/fault-test.sh | 63 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/e2e/FAULT_TESTING.md b/e2e/FAULT_TESTING.md index 25eb234..4a85c3f 100644 --- a/e2e/FAULT_TESTING.md +++ b/e2e/FAULT_TESTING.md @@ -102,6 +102,10 @@ make -C e2e fault-cleanup | `fault-run-dm` | 使用预先准备的静态 PV 和 DM 设备运行 `dm-flakey`。 / Runs `dm-flakey` with pre-provisioned static PVs and DM storage. | | `fault-cleanup` | 安全删除 owned namespace 和 managed Chaos。 / Safely removes the owned namespace and managed Chaos. | +`fault-run*` 会先用单 job、最低主机优先级预编译精确的 `faults` 测试二进制,再等待 60 秒并确认原有 RustFS Pod 的 UID、重启数和 Ready 状态没有变化。预编译不计入故障窗口;如果编译影响现有 Tenant,runner 会在创建故障 Tenant 前停止。 + +Before creating a fault Tenant, every `fault-run*` target prebuilds the exact `faults` binary with one job and the lowest host priority. It then verifies for 60 seconds that every pre-existing RustFS Pod keeps the same UID, restart count, and Ready state. Compilation is outside the fault window, and the runner stops if the build disturbs an existing Tenant. + ## 4. Cluster Preparation / 集群准备 ### 4.1 Required Tools / 必需工具 @@ -415,6 +419,8 @@ kubectl get namespace rustfs-fault-test | `RUSTFS_FAULT_TEST_SCENARIOS` | six regular scenarios | `fault-run-regular` 的空格分隔场景列表。 / Space-separated regular scenario list. | | `RUSTFS_FAULT_TEST_SEED` | generated | 固定后可重放相同对象。 / Replays the same objects when set. | | `RUSTFS_FAULT_TEST_USE_CLUSTER_IP` | `false` | 集群节点/Pod 内建议设为 `1`。 / Set to `1` on a node or in-cluster runner. | +| `RUSTFS_FAULT_TEST_BUILD_JOBS` | `1` | 预编译并行度;小型控制面保持为 1。 / Prebuild parallelism; keep at 1 on small control planes. | +| `RUSTFS_FAULT_TEST_BUILD_SETTLE_SECONDS` | `60` | 预编译后原有 RustFS Pod 的稳定校验时间。 / Existing-Pod stability check after prebuild. | | `RUSTFS_FAULT_TEST_WORKLOAD_OBJECTS` | `40000` | Make runner 强制验收该值。 / Required object count. | | `RUSTFS_FAULT_TEST_WORKLOAD_CONCURRENCY` | `100` | Make runner 强制验收该值。 / Required concurrency. | | `RUSTFS_FAULT_TEST_DURATION_SECONDS` | `7200` | 最大故障 TTL。 / Maximum fault TTL. | diff --git a/e2e/scripts/fault-test.sh b/e2e/scripts/fault-test.sh index c29308d..a0b4c56 100644 --- a/e2e/scripts/fault-test.sh +++ b/e2e/scripts/fault-test.sh @@ -24,6 +24,8 @@ DEFAULT_SCENARIOS="io-eio pod-kill-one network-partition-one io-read-mistake dis EXPECTED_OBJECTS=40000 EXPECTED_CONCURRENCY=100 EXPECTED_PAYLOAD_BYTES=20337459200 +BUILD_JOBS="${RUSTFS_FAULT_TEST_BUILD_JOBS:-1}" +BUILD_SETTLE_SECONDS="${RUSTFS_FAULT_TEST_BUILD_SETTLE_SECONDS:-60}" FAULT_NAMESPACE="${RUSTFS_FAULT_TEST_NAMESPACE:-rustfs-fault-test}" FAULT_TENANT="${RUSTFS_FAULT_TEST_TENANT:-fault-test-tenant}" @@ -109,6 +111,59 @@ require_non_fault_tenants_ready() { [[ -z "$unhealthy" ]] || die "non-fault Tenant is not Ready: $unhealthy" } +snapshot_non_fault_rustfs_pods() { + kubectl_cluster get pods -A -o json | jq -r --arg namespace "$FAULT_NAMESPACE" ' + .items[] + | select(.metadata.namespace != $namespace) + | select(.metadata.labels["rustfs.tenant"] != null) + | [ + .metadata.namespace, + .metadata.name, + .metadata.uid, + ([.status.containerStatuses[]?.restartCount] | add // 0), + ((.status.phase == "Running") and ([.status.containerStatuses[]?.ready] | all)) + ] + | @tsv + ' | sort +} + +prepare_fault_binary() { + local scenario="$1" run_root="$2" + local before="$run_root/build-pods-before.tsv" + local current="$run_root/build-pods-current.tsv" + local changes="$run_root/build-pod-changes.diff" + local elapsed=0 interval=10 + local -a build_command=(cargo test --manifest-path "$MANIFEST" --test faults --no-run) + + [[ "$BUILD_JOBS" =~ ^[1-9][0-9]*$ ]] || die "RUSTFS_FAULT_TEST_BUILD_JOBS must be a positive integer" + [[ "$BUILD_SETTLE_SECONDS" =~ ^[0-9]+$ ]] || die "RUSTFS_FAULT_TEST_BUILD_SETTLE_SECONDS must be a non-negative integer" + preflight "$scenario" + snapshot_non_fault_rustfs_pods >"$before" + echo "preparing fault-test binary with jobs=$BUILD_JOBS and lowest host priority" + if command -v ionice >/dev/null 2>&1; then + CARGO_BUILD_JOBS="$BUILD_JOBS" nice -n 19 ionice -c3 "${build_command[@]}" \ + >"$run_root/fault-build.log" 2>&1 + else + CARGO_BUILD_JOBS="$BUILD_JOBS" nice -n 19 "${build_command[@]}" \ + >"$run_root/fault-build.log" 2>&1 + fi + + while (( elapsed <= BUILD_SETTLE_SECONDS )); do + snapshot_non_fault_rustfs_pods >"$current" + if ! cmp -s "$before" "$current"; then + diff -u "$before" "$current" >"$changes" || true + die "fault-test build changed a pre-existing RustFS Pod; see $changes" + fi + require_non_fault_tenants_ready + (( elapsed == BUILD_SETTLE_SECONDS )) && break + sleep "$interval" + elapsed=$((elapsed + interval)) + (( elapsed > BUILD_SETTLE_SECONDS )) && elapsed="$BUILD_SETTLE_SECONDS" + done + preflight "$scenario" + echo "fault-test binary ready; pre-existing RustFS Pods remained unchanged for ${BUILD_SETTLE_SECONDS}s" +} + require_chaos_ready() { local deployment_ready daemon_ready deployment_ready="$(kubectl_ns "$CHAOS_NAMESPACE" get deployment chaos-controller-manager -o json | jq -r ' @@ -152,6 +207,7 @@ preflight() { require_command cargo require_command jq require_command kubectl + require_command nice require_command pgrep [[ -n "${RUSTFS_FAULT_TEST_EXPECTED_CONTEXT:-}" ]] || die "RUSTFS_FAULT_TEST_EXPECTED_CONTEXT is required" [[ -n "${RUSTFS_FAULT_TEST_SERVER_IMAGE:-}" ]] || die "RUSTFS_FAULT_TEST_SERVER_IMAGE is required" @@ -398,17 +454,22 @@ run_one() { is_supported_scenario "$scenario" || die "unsupported scenario: $scenario" run_root="$(new_run_root)" initialize_summary "$run_root" + prepare_fault_binary "$scenario" "$run_root" run_scenario "$scenario" "$run_root" echo "run artifacts: $run_root" } run_regular() { - local run_root scenario + local run_root scenario prepared=false local scenarios="${RUSTFS_FAULT_TEST_SCENARIOS:-$DEFAULT_SCENARIOS}" run_root="$(new_run_root)" initialize_summary "$run_root" for scenario in $scenarios; do [[ "$scenario" != "dm-flakey" ]] || die "run-regular cannot include dm-flakey" + if [[ "$prepared" == "false" ]]; then + prepare_fault_binary "$scenario" "$run_root" + prepared=true + fi run_scenario "$scenario" "$run_root" || return $? done echo "regular scenario artifacts: $run_root" From 89aacec1ab6c162ec75b09f5e3b698432fa91c38 Mon Sep 17 00:00:00 2001 From: GatewayJ <18332154+GatewayJ@users.noreply.github.com> Date: Sat, 20 Jun 2026 12:06:00 +0800 Subject: [PATCH 19/20] fix(chaos): execute prebuilt fault test binary --- e2e/FAULT_TESTING.md | 4 ++-- e2e/scripts/fault-test.sh | 23 +++++++++++++++++++---- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/e2e/FAULT_TESTING.md b/e2e/FAULT_TESTING.md index 4a85c3f..a8eaa66 100644 --- a/e2e/FAULT_TESTING.md +++ b/e2e/FAULT_TESTING.md @@ -102,9 +102,9 @@ make -C e2e fault-cleanup | `fault-run-dm` | 使用预先准备的静态 PV 和 DM 设备运行 `dm-flakey`。 / Runs `dm-flakey` with pre-provisioned static PVs and DM storage. | | `fault-cleanup` | 安全删除 owned namespace 和 managed Chaos。 / Safely removes the owned namespace and managed Chaos. | -`fault-run*` 会先用单 job、最低主机优先级预编译精确的 `faults` 测试二进制,再等待 60 秒并确认原有 RustFS Pod 的 UID、重启数和 Ready 状态没有变化。预编译不计入故障窗口;如果编译影响现有 Tenant,runner 会在创建故障 Tenant 前停止。 +`fault-run*` 会先用单 job、最低主机优先级预编译精确的 `faults` 测试二进制,再等待 60 秒并确认原有 RustFS Pod 的 UID、重启数和 Ready 状态没有变化。故障窗口直接运行该二进制,不再次调用 Cargo。预编译不计入故障窗口;如果编译影响现有 Tenant,runner 会在创建故障 Tenant 前停止。 -Before creating a fault Tenant, every `fault-run*` target prebuilds the exact `faults` binary with one job and the lowest host priority. It then verifies for 60 seconds that every pre-existing RustFS Pod keeps the same UID, restart count, and Ready state. Compilation is outside the fault window, and the runner stops if the build disturbs an existing Tenant. +Before creating a fault Tenant, every `fault-run*` target prebuilds the exact `faults` binary with one job and the lowest host priority. It then verifies for 60 seconds that every pre-existing RustFS Pod keeps the same UID, restart count, and Ready state. The fault window executes that binary directly without invoking Cargo again. Compilation is outside the fault window, and the runner stops if the build disturbs an existing Tenant. ## 4. Cluster Preparation / 集群准备 diff --git a/e2e/scripts/fault-test.sh b/e2e/scripts/fault-test.sh index a0b4c56..03fc310 100644 --- a/e2e/scripts/fault-test.sh +++ b/e2e/scripts/fault-test.sh @@ -32,6 +32,7 @@ FAULT_TENANT="${RUSTFS_FAULT_TEST_TENANT:-fault-test-tenant}" CHAOS_NAMESPACE="${RUSTFS_FAULT_TEST_CHAOS_NAMESPACE:-chaos-mesh}" ACTIVE_PID="" ACTIVE_ARTIFACTS="" +FAULT_TEST_BINARY="" usage() { cat <<'EOF' @@ -132,8 +133,12 @@ prepare_fault_binary() { local before="$run_root/build-pods-before.tsv" local current="$run_root/build-pods-current.tsv" local changes="$run_root/build-pod-changes.diff" + local build_messages="$run_root/fault-build.jsonl" local elapsed=0 interval=10 - local -a build_command=(cargo test --manifest-path "$MANIFEST" --test faults --no-run) + local -a build_command=( + cargo test --manifest-path "$MANIFEST" --test faults --no-run + --message-format=json-render-diagnostics + ) [[ "$BUILD_JOBS" =~ ^[1-9][0-9]*$ ]] || die "RUSTFS_FAULT_TEST_BUILD_JOBS must be a positive integer" [[ "$BUILD_SETTLE_SECONDS" =~ ^[0-9]+$ ]] || die "RUSTFS_FAULT_TEST_BUILD_SETTLE_SECONDS must be a non-negative integer" @@ -142,11 +147,21 @@ prepare_fault_binary() { echo "preparing fault-test binary with jobs=$BUILD_JOBS and lowest host priority" if command -v ionice >/dev/null 2>&1; then CARGO_BUILD_JOBS="$BUILD_JOBS" nice -n 19 ionice -c3 "${build_command[@]}" \ - >"$run_root/fault-build.log" 2>&1 + >"$build_messages" 2>"$run_root/fault-build.log" else CARGO_BUILD_JOBS="$BUILD_JOBS" nice -n 19 "${build_command[@]}" \ - >"$run_root/fault-build.log" 2>&1 + >"$build_messages" 2>"$run_root/fault-build.log" fi + FAULT_TEST_BINARY="$(jq -r ' + select( + .reason == "compiler-artifact" + and .target.name == "faults" + and (.target.kind | index("test")) + ) + | .executable // empty + ' "$build_messages" | tail -n 1)" + [[ -x "$FAULT_TEST_BINARY" ]] || die "faults test binary was not produced; see $run_root/fault-build.log" + printf '%s\n' "$FAULT_TEST_BINARY" >"$run_root/fault-test-binary.path" while (( elapsed <= BUILD_SETTLE_SECONDS )); do snapshot_non_fault_rustfs_pods >"$current" @@ -386,7 +401,7 @@ run_scenario() { RUSTFS_FAULT_TEST_WORKLOAD_CONCURRENCY="$EXPECTED_CONCURRENCY" \ RUSTFS_FAULT_TEST_DURATION_SECONDS="${RUSTFS_FAULT_TEST_DURATION_SECONDS:-7200}" \ RUSTFS_FAULT_TEST_ARTIFACTS="$artifacts" \ - cargo test --manifest-path "$MANIFEST" --test faults -- --ignored --test-threads=1 --nocapture \ + "$FAULT_TEST_BINARY" --ignored --test-threads=1 --nocapture \ >"$artifacts/test.log" 2>&1 echo "$?" >"$artifacts/test-exit-code.tmp" ) & From 386686f2d13d3e40895eb5ac76798870d477a9ac Mon Sep 17 00:00:00 2001 From: GatewayJ <18332154+GatewayJ@users.noreply.github.com> Date: Sat, 20 Jun 2026 13:56:08 +0800 Subject: [PATCH 20/20] test(chaos): tune fault-test runner defaults --- e2e/FAULT_TESTING.md | 24 ++++++++++++++++------- e2e/README.md | 32 +++---------------------------- e2e/scripts/fault-test.sh | 2 +- e2e/src/framework/fault_config.rs | 8 ++------ e2e/src/framework/s3_workload.rs | 7 ++++--- e2e/tests/faults.rs | 4 ++-- 6 files changed, 29 insertions(+), 48 deletions(-) diff --git a/e2e/FAULT_TESTING.md b/e2e/FAULT_TESTING.md index a8eaa66..3440bd1 100644 --- a/e2e/FAULT_TESTING.md +++ b/e2e/FAULT_TESTING.md @@ -18,7 +18,7 @@ limitations under the License. 本手册是 Agent 和开发人员使用 `e2e` package 故障测试工具的唯一操作入口。它说明执行步骤、步骤原因、安全边界、验收证据和清理方式。 -This manual is the single operational entry point for agents and developers using the fault-test tooling in the `e2e` package. It explains the steps, reasons, safety boundaries, evidence, and cleanup. +This manual is the single operational entry point for agents and developers using the fault-test tooling in the `e2e` package. Fault-test commands, prerequisites, safety limits, evidence, and cleanup are intentionally kept here instead of duplicated in README files. ## 1. Purpose And Safety / 目的与安全边界 @@ -63,7 +63,7 @@ Each scenario deterministically generates object content and size order from a s ```text objects: 40,000 -concurrency: 100 +concurrency: 80 payload/scenario: 20,337,459,200 bytes (~18.94GiB) PVCs: 4 × 100Gi maximum fault TTL: 7,200 seconds @@ -106,6 +106,16 @@ make -C e2e fault-cleanup Before creating a fault Tenant, every `fault-run*` target prebuilds the exact `faults` binary with one job and the lowest host priority. It then verifies for 60 seconds that every pre-existing RustFS Pod keeps the same UID, restart count, and Ready state. The fault window executes that binary directly without invoking Cargo again. Compilation is outside the fault window, and the runner stops if the build disturbs an existing Tenant. +### 3.1 Recommended Flow / 推荐执行顺序 + +1. 运行 `make -C e2e fault-check`,先确认本地代码、脚本和普通测试可用。 / Run `make -C e2e fault-check` first to validate code, scripts, and non-live tests. +2. 准备真实测试集群、专用 StorageClass、Chaos Mesh 和固定 digest 的 RustFS image。 / Prepare the real test cluster, dedicated StorageClass, Chaos Mesh, and a pinned RustFS image digest. +3. 导出 `RUSTFS_FAULT_TEST_EXPECTED_CONTEXT`、`RUSTFS_FAULT_TEST_STORAGE_CLASS` 和 `RUSTFS_FAULT_TEST_SERVER_IMAGE`。 / Export the required context, StorageClass, and image variables. +4. 先执行 `make -C e2e fault-preflight SCENARIO=io-eio`,再单独跑 `io-eio`。 / Run `io-eio` preflight first, then run `io-eio` alone. +5. `io-eio` 通过后再执行 `make -C e2e fault-run-regular`。 / After `io-eio` passes, run the remaining regular scenarios with `fault-run-regular`. +6. 只有准备好静态 Local PV 和 Device Mapper 后,才执行 `make -C e2e fault-run-dm`。 / Run `fault-run-dm` only after static Local PVs and Device Mapper are ready. +7. 结束后先收集 artifacts,再执行 `make -C e2e fault-cleanup`。 / Collect artifacts before running `fault-cleanup`. + ## 4. Cluster Preparation / 集群准备 ### 4.1 Required Tools / 必需工具 @@ -172,9 +182,9 @@ Non-K3s clusters must use their actual container runtime socket. ## 5. Regular Scenarios / 常规场景 -先固定 context、动态 StorageClass 和 RustFS image digest。测试机位于集群节点或 Pod 内时使用 ClusterIP,避免 100 并发经过 `kubectl port-forward`。 +先固定 context、动态 StorageClass 和 RustFS image digest。测试机位于集群节点或 Pod 内时使用 ClusterIP,避免 80 并发经过 `kubectl port-forward`。 -Pin the context, dynamic StorageClass, and RustFS image digest. Use ClusterIP when the runner is on a cluster node or in a Pod so 100 concurrent requests do not traverse `kubectl port-forward`. +Pin the context, dynamic StorageClass, and RustFS image digest. Use ClusterIP when the runner is on a cluster node or in a Pod so 80 concurrent requests do not traverse `kubectl port-forward`. ```bash export RUSTFS_FAULT_TEST_EXPECTED_CONTEXT=default @@ -362,12 +372,12 @@ Chaos or DM snapshots - 测试退出码为 0。 - `fault-evidence.json` 的 `injected`、`active_during_workload`、`recovered` 都为 `true`。 -- `workload-plan.json` 精确记录 40,000 对象、100 并发和四档尺寸分布。 +- `workload-plan.json` 精确记录 40,000 对象、80 并发和四档尺寸分布。 - `checker-report.json` 的 `committed_puts=40000`,并且 missing、hash mismatch、successful corrupted read、LIST warning 均为空。 - fault Tenant 恢复 Ready;所有原有非 fault Tenant 和节点保持 Ready。 - The test exits with zero. - `fault-evidence.json` reports `injected`, `active_during_workload`, and `recovered` as `true`. -- `workload-plan.json` reports exactly 40,000 objects, concurrency 100, and the four size classes. +- `workload-plan.json` reports exactly 40,000 objects, concurrency 80, and the four size classes. - `checker-report.json` reports `committed_puts=40000` with no missing object, hash mismatch, successful corrupted read, or LIST warning. - The fault Tenant recovers Ready while every pre-existing non-fault Tenant and node remains Ready. @@ -422,7 +432,7 @@ kubectl get namespace rustfs-fault-test | `RUSTFS_FAULT_TEST_BUILD_JOBS` | `1` | 预编译并行度;小型控制面保持为 1。 / Prebuild parallelism; keep at 1 on small control planes. | | `RUSTFS_FAULT_TEST_BUILD_SETTLE_SECONDS` | `60` | 预编译后原有 RustFS Pod 的稳定校验时间。 / Existing-Pod stability check after prebuild. | | `RUSTFS_FAULT_TEST_WORKLOAD_OBJECTS` | `40000` | Make runner 强制验收该值。 / Required object count. | -| `RUSTFS_FAULT_TEST_WORKLOAD_CONCURRENCY` | `100` | Make runner 强制验收该值。 / Required concurrency. | +| `RUSTFS_FAULT_TEST_WORKLOAD_CONCURRENCY` | `80` | Make runner 强制验收该值。 / Required concurrency. | | `RUSTFS_FAULT_TEST_DURATION_SECONDS` | `7200` | 最大故障 TTL。 / Maximum fault TTL. | | `RUSTFS_FAULT_TEST_REQUEST_TIMEOUT_SECONDS` | `30` | 单次 S3 请求超时。 / Per-request S3 timeout. | | `RUSTFS_FAULT_TEST_REQUIRE_CLIENT_DISRUPTION` | `false` | 是否要求客户端可见错误。 / Whether client-visible disruption is mandatory. | diff --git a/e2e/README.md b/e2e/README.md index 6d6bc12..837a1b7 100644 --- a/e2e/README.md +++ b/e2e/README.md @@ -16,10 +16,9 @@ The harness is split into four top-level domains: ```text e2e/ Cargo.toml - Makefile package-local fault-test entrypoints - FAULT_TESTING.md bilingual fault-test operations manual + FAULT_TESTING.md package-local fault-test operations manual scripts/ - fault-test.sh guarded real-cluster scenario orchestration + fault-test.sh guarded real-cluster fault-test orchestration manifests/ kind-rustfs-e2e.yaml dedicated 1 control-plane + 3 worker Kind cluster src/ @@ -64,8 +63,7 @@ e2e/ 6. `framework::live` owns live-run opt-in and dedicated-context checks. 7. `cases/*` should describe behavior and call framework helpers; avoid shell details there. 8. Kind e2e cases remain in `cases/*`; real-cluster fault tests are intentionally excluded from that inventory. -9. Fault tests use `FaultTestConfig`, reject Kind contexts, require a dedicated namespace and StorageClass, and never use Kind local-volume assumptions. -10. The fault-test runner creates its namespace with ownership metadata. Existing namespaces must already have the matching manager label and Tenant annotation before destructive reset is allowed. +9. Destructive real-cluster fault tests are documented only in [`FAULT_TESTING.md`](FAULT_TESTING.md). ## Safety defaults @@ -92,30 +90,6 @@ make e2e-live-run The harness refuses to run live tests unless the active Kubernetes context matches the configured dedicated Kind context. -Fault tests have separate safety defaults and are operated entirely from this package: - -```text -context: current non-Kind kubectl context -test namespace: rustfs-fault-test -tenant name: fault-test-tenant -storage class: required via RUSTFS_FAULT_TEST_STORAGE_CLASS -artifacts: target/fault-tests/artifacts -PVCs: 4 × 100Gi -objects: 40000 with seeded weighted sizes -concurrency: 100 -``` - -Run them independently from the Kind lifecycle: - -```bash -RUSTFS_FAULT_TEST_EXPECTED_CONTEXT= \ -RUSTFS_FAULT_TEST_STORAGE_CLASS= \ -RUSTFS_FAULT_TEST_SERVER_IMAGE= \ -make -C e2e fault-run SCENARIO=io-eio -``` - -The runner creates an absent namespace with ownership metadata and refuses to reset or claim an existing namespace unless its ownership markers match. See the package-local bilingual [Fault-Test Operations Manual](FAULT_TESTING.md) for the Make targets, prerequisites, all seven scenarios, `dm-flakey` storage procedure, evidence, recovery, and cleanup. - ## Non-live validation ```bash diff --git a/e2e/scripts/fault-test.sh b/e2e/scripts/fault-test.sh index 03fc310..27e59ef 100644 --- a/e2e/scripts/fault-test.sh +++ b/e2e/scripts/fault-test.sh @@ -22,7 +22,7 @@ MANAGER="rustfs-operator-fault-test" MANAGER_SELECTOR="app.kubernetes.io/managed-by=$MANAGER" DEFAULT_SCENARIOS="io-eio pod-kill-one network-partition-one io-read-mistake disk-full warp-under-chaos" EXPECTED_OBJECTS=40000 -EXPECTED_CONCURRENCY=100 +EXPECTED_CONCURRENCY=80 EXPECTED_PAYLOAD_BYTES=20337459200 BUILD_JOBS="${RUSTFS_FAULT_TEST_BUILD_JOBS:-1}" BUILD_SETTLE_SECONDS="${RUSTFS_FAULT_TEST_BUILD_SETTLE_SECONDS:-60}" diff --git a/e2e/src/framework/fault_config.rs b/e2e/src/framework/fault_config.rs index 53b73e4..ab018d6 100644 --- a/e2e/src/framework/fault_config.rs +++ b/e2e/src/framework/fault_config.rs @@ -101,11 +101,7 @@ impl FaultTestConfig { )), percent: env_u8(&get_env, "RUSTFS_FAULT_TEST_PERCENT", default_percent), workload_objects: env_usize(&get_env, "RUSTFS_FAULT_TEST_WORKLOAD_OBJECTS", 40000), - workload_concurrency: env_usize( - &get_env, - "RUSTFS_FAULT_TEST_WORKLOAD_CONCURRENCY", - 100, - ), + workload_concurrency: env_usize(&get_env, "RUSTFS_FAULT_TEST_WORKLOAD_CONCURRENCY", 80), workload_seed: env_optional_u64(&get_env, "RUSTFS_FAULT_TEST_SEED")?, request_timeout: Duration::from_secs(env_u64( &get_env, @@ -304,7 +300,7 @@ mod tests { assert_eq!(config.duration, std::time::Duration::from_secs(7200)); assert_eq!(config.percent, 20); assert_eq!(config.workload_objects, 40000); - assert_eq!(config.workload_concurrency, 100); + assert_eq!(config.workload_concurrency, 80); assert_eq!(config.workload_seed, None); assert_eq!(config.request_timeout, std::time::Duration::from_secs(30)); assert!(!config.use_cluster_ip); diff --git a/e2e/src/framework/s3_workload.rs b/e2e/src/framework/s3_workload.rs index d201b9e..3e8d0a8 100644 --- a/e2e/src/framework/s3_workload.rs +++ b/e2e/src/framework/s3_workload.rs @@ -587,9 +587,9 @@ mod tests { #[test] fn workload_plan_is_weighted_shuffled_and_reproducible() { - let plan = WorkloadPlan::seeded(42, 40000, 100); - let same = WorkloadPlan::seeded(42, 40000, 100); - let different = WorkloadPlan::seeded(43, 40000, 100); + let plan = WorkloadPlan::seeded(42, 40000, 80); + let same = WorkloadPlan::seeded(42, 40000, 80); + let different = WorkloadPlan::seeded(43, 40000, 80); assert_eq!(plan, same); assert_ne!(plan.sizes, different.sizes); @@ -606,5 +606,6 @@ mod tests { ] ); assert_eq!(plan.total_payload_bytes, 20_337_459_200); + assert_eq!(plan.concurrency, 80); } } diff --git a/e2e/tests/faults.rs b/e2e/tests/faults.rs index 81a2a67..54cf4de 100644 --- a/e2e/tests/faults.rs +++ b/e2e/tests/faults.rs @@ -1279,7 +1279,7 @@ mod tests { #[test] fn workload_summary_counts_disrupted_operations() { - let mut summary = WorkloadSummary::new(&WorkloadPlan::seeded(42, 40000, 100)); + let mut summary = WorkloadSummary::new(&WorkloadPlan::seeded(42, 40000, 80)); summary.puts.record(OperationOutcome::Ok); summary.gets.record(OperationOutcome::Timeout); @@ -1295,7 +1295,7 @@ mod tests { let summary = WorkloadSummary { seed: 42, object_count: 40000, - concurrency: 100, + concurrency: 80, total_payload_bytes: 20_337_459_200, puts: OutcomeCounts { ok: 1,