diff --git a/.github/workflows/ci-spring-ydb.yaml b/.github/workflows/ci-spring-ydb.yaml
new file mode 100644
index 00000000..318109db
--- /dev/null
+++ b/.github/workflows/ci-spring-ydb.yaml
@@ -0,0 +1,76 @@
+name: Spring YDB CI with Maven
+
+on:
+ push:
+ paths:
+ - 'spring-ydb/**'
+ branches:
+ - main
+ pull_request:
+ paths:
+ - 'spring-ydb/**'
+
+env:
+ MAVEN_ARGS: --batch-mode --update-snapshots -Dstyle.color=always
+
+jobs:
+ prepare:
+ name: Prepare Maven cache
+ runs-on: ubuntu-24.04
+
+ env:
+ MAVEN_ARGS: --batch-mode -Dstyle.color=always
+
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+
+ - name: Set up JDK
+ uses: actions/setup-java@v4
+ with:
+ java-version: '17'
+ distribution: 'temurin'
+ cache: 'maven'
+
+ - name: Download dependencies (Default)
+ working-directory: ./spring-ydb
+ run: mvn $MAVEN_ARGS dependency:resolve-plugins dependency:go-offline
+
+ - name: Download dependencies (Spring Boot 3)
+ working-directory: ./spring-ydb
+ run: mvn $MAVEN_ARGS -Pspring-boot3 dependency:resolve-plugins dependency:go-offline
+
+ - name: Download dependencies (Spring Boot 4)
+ working-directory: ./spring-ydb
+ run: mvn $MAVEN_ARGS -Pspring-boot4 dependency:resolve-plugins dependency:go-offline
+
+ build:
+ name: Spring YDB build & tests
+ runs-on: ubuntu-24.04
+ needs: prepare
+
+ strategy:
+ matrix:
+ java: [ '17', '21', '24' ]
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Set up JDK ${{matrix.java}}
+ uses: actions/setup-java@v4
+ with:
+ java-version: ${{matrix.java}}
+ distribution: 'temurin'
+ cache: maven
+
+ - name: Build spring-ydb
+ working-directory: ./spring-ydb
+ run: mvn $MAVEN_ARGS package
+
+ - name: Tests with Spring Boot 3
+ working-directory: ./spring-ydb
+ run: mvn $MAVEN_ARGS -Pspring-boot3 test
+
+ - name: Tests with Spring Boot 4
+ working-directory: ./spring-ydb
+ run: mvn $MAVEN_ARGS -Pspring-boot4 test
diff --git a/.github/workflows/publish-spring-ydb.yaml b/.github/workflows/publish-spring-ydb.yaml
new file mode 100644
index 00000000..d64cf870
--- /dev/null
+++ b/.github/workflows/publish-spring-ydb.yaml
@@ -0,0 +1,82 @@
+name: Publish Spring YDB
+
+on:
+ push:
+ tags:
+ - 'spring-ydb/v*.*.*'
+
+env:
+ MAVEN_ARGS: --batch-mode --no-transfer-progress -Dstyle.color=always
+
+jobs:
+ validate:
+ name: Validate Spring YDB
+ runs-on: ubuntu-24.04
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Extract spring-ydb version
+ run: |
+ cd spring-ydb
+ SPRING_YDB_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)
+ echo "SPRING_YDB_VERSION=$SPRING_YDB_VERSION" >> "$GITHUB_ENV"
+
+ - name: Fail workflow if version is snapshot
+ if: endsWith(env.SPRING_YDB_VERSION, 'SNAPSHOT')
+ uses: actions/github-script@v6
+ with:
+ script: core.setFailed('SNAPSHOT version cannot be published')
+
+ - name: Fail workflow if version is not equal to tag name
+ if: format('spring-ydb/v{0}', env.SPRING_YDB_VERSION) != github.ref_name
+ uses: actions/github-script@v6
+ with:
+ script: core.setFailed('Release name must be equal to project version')
+
+ - name: Set up JDK
+ uses: actions/setup-java@v4
+ with:
+ java-version: 17
+ distribution: 'temurin'
+ cache: 'maven'
+
+ - name: Download dependencies
+ working-directory: ./spring-ydb
+ run: mvn $MAVEN_ARGS -Pspring-boot-minimal dependency:go-offline
+
+ - name: Build with Maven
+ working-directory: ./spring-ydb
+ run: mvn $MAVEN_ARGS -Pspring-boot-minimal package
+
+ publish:
+ name: Publish Spring YDB
+ runs-on: ubuntu-latest
+ needs: validate
+
+ steps:
+ - name: Install gpg secret key
+ run: |
+ # Install gpg secret key
+ cat <(echo -e "${{ secrets.MAVEN_OSSRH_GPG_SECRET_KEY }}") | gpg --batch --import
+ # Verify gpg secret key
+ gpg --list-secret-keys --keyid-format LONG
+
+ - uses: actions/checkout@v4
+
+ - name: Set up Maven Central Repository
+ uses: actions/setup-java@v4
+ with:
+ java-version: 17
+ distribution: 'temurin'
+ cache: 'maven'
+ server-id: ossrh-s01
+ server-username: MAVEN_USERNAME
+ server-password: MAVEN_PASSWORD
+
+ - name: Publish package
+ working-directory: ./spring-ydb
+ run: mvn $MAVEN_ARGS -Possrh-s01,spring-boot-minimal -Dgpg.passphrase=${{ secrets.MAVEN_OSSRH_GPG_PASSWORD }} clean deploy
+ env:
+ MAVEN_USERNAME: ${{ secrets.MAVEN_OSSRH_USERNAME }}
+ MAVEN_PASSWORD: ${{ secrets.MAVEN_OSSRH_TOKEN }}
diff --git a/spring-ydb/pom.xml b/spring-ydb/pom.xml
new file mode 100644
index 00000000..a4e80020
--- /dev/null
+++ b/spring-ydb/pom.xml
@@ -0,0 +1,142 @@
+
+
+
+ 4.0.0
+
+ tech.ydb
+ spring-ydb
+ 1.0.0-SNAPSHOT
+
+ Spring YDB
+ Spring integration modules for YDB
+ https://github.com/ydb-platform/ydb-java-dialects
+
+ pom
+
+
+
+ Ekaterina Isaeva
+ ikaterina0909@gmail.com
+ YDB
+ https://ydb.tech/
+
+
+ Kirill Kurdyukov
+ kurdyukov-kir@ydb.tech
+ YDB
+ https://ydb.tech/
+
+
+
+
+ https://github.com/ydb-platform/ydb-java-dialects
+ scm:git:https://github.com/ydb-platform/ydb-java-dialects.git
+ scm:git:https://github.com/ydb-platform/ydb-java-dialects.git
+
+
+
+
+ Apache License, Version 2.0
+ https://www.apache.org/licenses/LICENSE-2.0
+
+
+
+
+ spring-ydb-retry
+
+
+
+ 17
+ 17
+ 17
+ 17
+ UTF-8
+ 6.2.0
+ 3.4.0
+ 2.3.22
+
+
+
+
+
+ org.springframework.boot
+ spring-boot-dependencies
+ ${spring-boot.version}
+ pom
+ import
+
+
+ tech.ydb.jdbc
+ ydb-jdbc-driver
+ ${ydb-jdbc.version}
+
+
+
+
+
+
+ spring-boot-minimal
+
+ true
+
+
+ 3.4.0
+
+
+
+ spring-boot3
+
+ 3.5.7
+
+
+
+ spring-boot4
+
+ 4.0.0
+
+
+
+ ossrh-s01
+
+ false
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-gpg-plugin
+ 3.2.7
+
+
+ sign-artifacts
+ verify
+
+ sign
+
+
+
+
+
+ --pinentry-mode
+ loopback
+
+
+
+
+ org.sonatype.central
+ central-publishing-maven-plugin
+ 0.7.0
+ true
+
+ ossrh-s01
+
+
+
+
+
+
+
+
diff --git a/spring-ydb/spring-ydb-retry/README.md b/spring-ydb/spring-ydb-retry/README.md
new file mode 100644
index 00000000..67141d45
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/README.md
@@ -0,0 +1,84 @@
+# Spring YDB Retry
+
+## Overview
+
+This project is a Spring Boot auto-configuration module that provides automatic retry
+for transactional operations with [YDB](https://ydb.tech).
+
+### Features
+
+- Automatic retry of failed `@Transactional` methods on YDB retryable status codes
+- `@YdbTransactional` annotation with per-method retry settings (maxRetries, backoff, idempotency)
+- Dual backoff strategy (fast/slow) with jitter tailored to YDB error semantics
+- Idempotent mode for extended retry coverage on non-deterministic status codes
+- Fully configurable via `application.properties`
+
+## Getting Started
+
+### Requirements
+
+- Java 21 or above
+- Spring Boot 3.4+ / Spring Framework 6.2+
+- [YDB JDBC Driver](https://github.com/ydb-platform/ydb-jdbc-driver)
+- Access to a YDB Database instance
+
+### Installation
+
+For Maven, add the following dependency to your pom.xml:
+
+```xml
+
+ tech.ydb
+ spring-ydb-retry
+
+ ${spring-ydb-retry.version}
+
+```
+
+For Gradle, add the following to your build.gradle (or build.gradle.kts):
+
+```groovy
+dependencies {
+ implementation 'tech.ydb:spring-ydb-retry:$version' // Set actual version
+}
+```
+
+## Usage
+
+The module is auto-configured via Spring Boot. Once the dependency is on the classpath,
+all `@Transactional` (and `@YdbTransactional`) methods are intercepted with retry logic.
+
+### Annotation
+
+Use `@YdbTransactional` as a drop-in replacement for `@Transactional` with additional
+retry parameters:
+
+```java
+@YdbTransactional(maxRetries = 5, idempotent = true)
+public void save(User user) {
+ // retried up to 5 times on YDB retryable errors
+}
+```
+
+### Configuration
+
+Configure retry behavior in `application.properties`:
+
+```properties
+# Enable/disable retry (default: true)
+ydb.transaction.retry.enabled=true
+
+# Maximum retry attempts (default: 10)
+ydb.transaction.retry.max-retries=10
+
+# Backoff settings for slow errors
+ydb.transaction.retry.slow-backoff-base-ms=50
+ydb.transaction.retry.slow-cap-backoff-ms=5000
+
+# Backoff settings for fast errors
+ydb.transaction.retry.fast-backoff-base-ms=5
+ydb.transaction.retry.fast-cap-backoff-ms=500
+
+```
+
+Idempotent-only retry is configured per method via `@YdbTransactional(idempotent = true)`.
diff --git a/spring-ydb/spring-ydb-retry/pom.xml b/spring-ydb/spring-ydb-retry/pom.xml
new file mode 100644
index 00000000..26ebd729
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/pom.xml
@@ -0,0 +1,176 @@
+
+
+
+ 4.0.0
+
+
+ tech.ydb
+ spring-ydb
+ 1.0.0-SNAPSHOT
+
+
+ spring-ydb-retry
+ jar
+
+ Spring YDB Retry
+ Spring retry module for YDB
+
+
+
+ org.springframework.boot
+ spring-boot-autoconfigure
+ provided
+ true
+
+
+ org.springframework.boot
+ spring-boot
+ provided
+ true
+
+
+ org.springframework
+ spring-tx
+ provided
+
+
+ org.springframework
+ spring-core
+ provided
+
+
+ tech.ydb.jdbc
+ ydb-jdbc-driver
+ provided
+
+
+ tech.ydb.dialects
+ spring-data-jdbc-ydb
+ 1.1.0
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter
+ 5.11.4
+ test
+
+
+ org.mockito
+ mockito-core
+ 5.15.2
+ test
+
+
+ org.springframework
+ spring-test
+ test
+
+
+ org.springframework.data
+ spring-data-jdbc
+ test
+
+
+ org.flywaydb
+ flyway-core
+ test
+
+
+ tech.ydb.dialects
+ flyway-ydb-dialect
+ 1.0.0
+ test
+
+
+ org.springframework.boot
+ spring-boot-starter-jdbc
+ test
+
+
+ org.springframework.boot
+ spring-boot-starter-test
+ test
+
+
+ tech.ydb.test
+ ydb-junit5-support
+ 2.3.22
+ test
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-javadoc-plugin
+ 3.5.0
+
+ 17
+
+
+
+ attach-javadocs
+
+ jar
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-jar-plugin
+ 3.3.0
+
+
+
+ tech.ydb.spring.retry
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 3.11.0
+
+ UTF-8
+
+
+
+ org.apache.maven.plugins
+ maven-source-plugin
+ 3.2.1
+
+
+ attach-sources
+ verify
+
+ jar-no-fork
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-surefire-plugin
+ 3.5.2
+
+
+
+ junit.jupiter.execution.parallel.enabled = true
+ junit.jupiter.execution.parallel.mode.default = concurrent
+ junit.jupiter.execution.parallel.mode.classes.default = concurrent
+
+
+
+ true
+
+
+
+
+
+
diff --git a/spring-ydb/spring-ydb-retry/slo/Dockerfile b/spring-ydb/spring-ydb-retry/slo/Dockerfile
new file mode 100644
index 00000000..b1e79e2e
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/slo/Dockerfile
@@ -0,0 +1,22 @@
+FROM maven:3.9-eclipse-temurin-21 AS build
+
+WORKDIR /build
+
+COPY pom.xml ./pom.xml
+RUN mvn install -N -B
+
+COPY spring-ydb-retry/pom.xml ./spring-ydb-retry/pom.xml
+COPY spring-ydb-retry/src ./spring-ydb-retry/src
+RUN mvn install -DskipTests -B -pl spring-ydb-retry || mvn install -DskipTests -B -pl spring-ydb-retry
+
+COPY spring-ydb-retry/slo/pom.xml ./spring-ydb-retry/slo/pom.xml
+COPY spring-ydb-retry/slo/src ./spring-ydb-retry/slo/src
+RUN cd spring-ydb-retry/slo && mvn package -DskipTests -B || mvn package -DskipTests -B
+
+FROM eclipse-temurin:21-jre
+
+WORKDIR /app
+COPY --from=build /build/spring-ydb-retry/slo/target/ydb-slo-workload-1.0.0-SNAPSHOT-exec.jar app.jar
+
+EXPOSE 8080
+ENTRYPOINT ["java", "-jar", "app.jar"]
diff --git a/spring-ydb/spring-ydb-retry/slo/README.md b/spring-ydb/spring-ydb-retry/slo/README.md
new file mode 100644
index 00000000..3a49a295
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/slo/README.md
@@ -0,0 +1,126 @@
+# SLO Testing for YDB Spring Retry
+
+SLO (Service Level Objectives) testing validates that the **spring-ydb-retry** library reduces visible application
+errors during YDB cluster node failures — restarts, shutdowns, network issues, and kill signals.
+
+## How It Works
+
+Two identical Spring Boot applications run the same workload (read/write) against the same YDB cluster:
+
+| Instance | Port | Retry | Description |
+|------------------|------|------------------------------|-------------------------------------------------------------------|
+| `app-with-retry` | 8081 | **Enabled** (max 10 retries) | Uses the same workload with retry enabled |
+| `app-no-retry` | 8082 | **Disabled** | Uses the same workload with `YDB_TRANSACTION_RETRY_ENABLED=false` |
+
+A chaos script periodically stops, restarts, and kills random YDB nodes. The Grafana dashboard shows an error rate
+comparison, clearly demonstrating that retry significantly reduces visible application errors.
+
+## Test Scenarios
+
+Two chaos levels are available:
+
+| Scenario | Directory | Description |
+|----------------------|--------------------------------|--------------------------------------------------------------------------------------------------|
+| **chaos** | `playground/chaos/` | Baseline: stop/start, restart, SIGKILL of individual nodes |
+| **chaos-aggressive** | `playground/chaos-aggressive/` | Aggressive: pause/unpause, multi-node kill, rapid kill/start, triple kill + resource constraints |
+
+See [`playground/README.md`](playground/README.md) for details.
+
+## Quick Start
+
+### 1. Start (baseline chaos)
+
+```bash
+cd slo/playground/chaos
+docker compose up --build -d
+```
+
+Wait ~60 seconds for YDB to initialize and apps to seed data.
+
+### 2. Start (aggressive chaos)
+
+```bash
+cd slo/playground/chaos-aggressive
+docker compose up --build -d
+```
+
+### 3. Open Grafana
+
+Navigate to **http://localhost:3000** (login: `admin` / `admin`).
+
+The **"YDB Spring Retry SLO - Retry vs No-Retry Comparison"** dashboard is pre-loaded and auto-refreshes every 5
+seconds.
+
+### 4. Stop
+
+```bash
+docker compose down
+```
+
+To also remove data volumes:
+
+```bash
+docker compose down -v
+```
+
+## Services
+
+| Service | URL | Description |
+|---------------------------|-----------------------------------------------|---------------------------------|
+| Grafana | http://localhost:3000 | Metrics dashboard (admin/admin) |
+| Prometheus | http://localhost:9090 | Metrics storage |
+| YDB Monitoring | http://localhost:8765 | YDB cluster UI |
+| YDB gRPC | grpc://localhost:2136 | YDB endpoint |
+| App with retry metrics | internal `http://app-with-retry:9464/metrics` | Prometheus scrape target |
+| App without retry metrics | internal `http://app-no-retry:9464/metrics` | Prometheus scrape target |
+
+The app containers do not publish their internal Spring Boot or metrics ports to the host. Prometheus scrapes them over
+the Docker network at `:9464/metrics`.
+
+## Metrics
+
+The SLO application exports Prometheus metrics via OpenTelemetry SDK:
+
+| Metric | Type | Labels | Description |
+|----------------------------------|-----------|-----------------------------------------|-------------------|
+| `slo_operations_total` | Counter | ref, operation_type, status, error_type | Total operations |
+| `slo_operation_duration_seconds` | Histogram | ref, operation_type, status, error_type | Operation latency |
+
+### Labels
+
+| Label | Values | Description |
+|------------------|----------------------------------------------------------------------------------|-----------------------------------------|
+| `ref` | `with-retry`, `no-retry` | Instance identifier |
+| `operation_type` | `read`, `write` | Operation type |
+| `status` | `success`, `failure` | Operation result |
+| `error_type` | `none`, `UNAVAILABLE`, `TRANSPORT_UNAVAILABLE`, `OVERLOADED`, `BAD_SESSION`, ... | YDB status code or exception class name |
+
+## Configuration
+
+Environment variables for the app containers:
+
+| Variable | Default | Description |
+|-------------------------------------|--------------------------|---------------------------------------------------|
+| `SERVER_PORT` | 8080 | HTTP port |
+| `SPRING_DATASOURCE_URL` | - | YDB JDBC URL |
+| `YDB_TRANSACTION_RETRY_ENABLED` | true | Enable/disable retry |
+| `YDB_TRANSACTION_RETRY_MAX_RETRIES` | 10 | Max retry attempts |
+| `SLO_RUN_ID` | auto | Shared run identifier used for result folder name |
+| `SLO_RESULTS_DIR` | `/app/results` in Docker | Root directory for saved run results |
+| `REF` | unknown | Label for metrics (with-retry / no-retry) |
+| `SLO_READ_RPS` | 100 | Target read RPS |
+| `SLO_WRITE_RPS` | 100 | Target write RPS |
+| `SLO_INITIAL_DATA` | 1000 | Initial rows to seed |
+| `SLO_TIME` | 600 | Workload duration in seconds |
+
+## Saved Results
+
+```text
+results/
+ /
+ retry
+ no-retry
+```
+
+The `retry` file contains the final summary for `app-with-retry`, and `no-retry` contains the final summary for
+`app-no-retry`.
diff --git a/spring-ydb/spring-ydb-retry/slo/playground/README.md b/spring-ydb/spring-ydb-retry/slo/playground/README.md
new file mode 100644
index 00000000..802648fc
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/slo/playground/README.md
@@ -0,0 +1,96 @@
+# Playground
+
+Docker Compose environments for running SLO tests with chaos injection. Each scenario deploys a full YDB cluster, two workload applications (with and without retry), Prometheus, Grafana, and a chaos container.
+
+## Shared Infrastructure
+
+All scenarios use the same architecture:
+
+| Component | Count | Description |
+|---|---|---|
+| YDB static node | 1 | Storage node + discovery (`static-0`) |
+| YDB database nodes | 5 | Tenant nodes (`database-1` .. `database-5`) |
+| SLO app with retry | 1 | Port 8081, retry enabled |
+| SLO app without retry | 1 | Port 8082, retry disabled |
+| Prometheus | 1 | Scrapes metrics every 5s |
+| Grafana | 1 | Visualization on port 3000 |
+| Chaos container | 1 | Docker container with docker.sock access |
+
+All services run on a single Docker network `slo-network`. The YDB cluster uses erasure `none` (no storage-level replication), which amplifies the impact of failures.
+
+---
+
+## Scenario 1: `chaos/` — Baseline Chaos
+
+A mild scenario modeling typical operational failures: graceful shutdown, restart, and crash of a single node at a time.
+
+### Start
+
+```bash
+cd slo/playground/chaos
+docker compose up --build -d
+```
+
+### Chaos Phases (`chaos.sh`)
+
+The chaos script starts 60 seconds after launch (once YDB and apps are ready).
+
+| Phase | Iterations | Action | Pause | Generated Errors |
+|---|---|---|---|---|
+| Stop/Start | 5 | `docker stop` → `docker start` a random node | 60s | `UNAVAILABLE`, `TRANSPORT_UNAVAILABLE` |
+| Restart | 3 | `docker restart -t 0` a random node (instant) | 60s | `TRANSPORT_UNAVAILABLE` |
+| Final Kill | 1 | `docker kill -s SIGKILL` a random node | — | `UNAVAILABLE`, `BAD_SESSION` |
+
+**Total chaos duration:** ~8 minutes after the 60s delay.
+
+---
+
+## Scenario 2: `chaos-aggressive/` — Aggressive Chaos
+
+An intensive scenario with multi-node failures, pause/unpause, and rapid kill/start cycles. YDB nodes run with constrained resources (768 MB RAM, 1 CPU), amplifying the effect.
+
+### Start
+
+```bash
+cd slo/playground/chaos-aggressive
+docker compose up --build -d
+```
+
+### Chaos Phases (`chaos.sh`)
+
+| Phase | Iterations | Action | Pause |
+|---|---|---|---|
+| 1. Pause/Unpause | 4 | `docker pause` 20s → `docker unpause` one node | 15s |
+| 2. Multi-node Kill | 3 | `SIGKILL` **two** nodes simultaneously → `docker start` both | 25s |
+| 3. Instant Restart | 3 | `docker restart -t 0` one node | 20s |
+| 4. Dual Pause | 1 | `docker pause` **two** nodes for 30s → unpause | 15s |
+| 5. Rapid Kill/Start | 5 | `SIGKILL` → `docker start` with no gap | 8s |
+| 6. Final Triple Kill | 1 | `SIGKILL` **three** nodes simultaneously | — |
+
+**Total chaos duration:** ~7 minutes after the 60s delay.
+
+---
+
+## Configuration Files
+
+### `configs/ydb.yaml`
+
+YDB cluster configuration with erasure `none`, a single storage pool (SSD), and 5 database nodes connected to the tenant `/Root/testdb`.
+
+### `configs/prometheus/prometheus.yaml`
+
+Scrape configuration: both apps are scraped every 5 seconds at `:9464/metrics`.
+
+### `configs/grafana/provisioning/`
+
+- **datasource.yaml** — Prometheus datasource
+- **dashboard.yaml** — Auto-loads JSON dashboards from the directory
+- **slo.json** — Pre-built dashboard
+
+## Cleanup
+
+```bash
+docker compose down -v
+```
+
+Removes containers, networks, and volumes (Prometheus data, Grafana DB).
diff --git a/spring-ydb/spring-ydb-retry/slo/playground/chaos-aggressive/chaos.sh b/spring-ydb/spring-ydb-retry/slo/playground/chaos-aggressive/chaos.sh
new file mode 100755
index 00000000..20ba8376
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/slo/playground/chaos-aggressive/chaos.sh
@@ -0,0 +1,118 @@
+#!/bin/sh -e
+
+get_random_container() {
+ # Get a list of all containers starting with ydb-database-*
+ containers=$(docker ps --format '{{.Names}}' | grep '^ydb-database-')
+
+ # Convert the list to a newline-separated string
+ containers=$(echo "$containers" | tr ' ' '\n')
+
+ # Count the number of containers
+ containersCount=$(echo "$containers" | wc -l)
+
+ # Generate a random number between 0 and containersCount - 1
+ randomIndex=$(shuf -i 0-$(($containersCount - 1)) -n 1)
+
+ # Get the container name at the random index
+ nodeForChaos=$(echo "$containers" | sed -n "$(($randomIndex + 1))p")
+}
+
+get_two_random_containers() {
+ containers=$(docker ps --format '{{.Names}}' | grep '^ydb-database-')
+ containers=$(echo "$containers" | tr ' ' '\n')
+ containersCount=$(echo "$containers" | wc -l)
+ if [ "$containersCount" -lt 2 ]; then
+ get_random_container
+ nodeForChaos2=""
+ return
+ fi
+ randomIndex1=$(shuf -i 0-$(($containersCount - 1)) -n 1)
+ randomIndex2=$(shuf -i 0-$(($containersCount - 2)) -n 1)
+ if [ "$randomIndex2" -ge "$randomIndex1" ]; then
+ randomIndex2=$(($randomIndex2 + 1))
+ fi
+ nodeForChaos=$(echo "$containers" | sed -n "$(($randomIndex1 + 1))p")
+ nodeForChaos2=$(echo "$containers" | sed -n "$(($randomIndex2 + 1))p")
+}
+
+sleep 60
+
+echo "Start AGGRESSIVE CHAOS on YDB cluster!"
+
+# Phase 1: Pause/unpause
+echo "=== Phase 1: docker pause/unpause ==="
+for i in $(seq 1 4)
+do
+ get_random_container
+ echo "[$(date)]: PAUSE ${nodeForChaos} (iteration $i) — in-flight ops will hang"
+ docker pause ${nodeForChaos}
+ sleep 20
+ echo "[$(date)]: UNPAUSE ${nodeForChaos}"
+ docker unpause ${nodeForChaos}
+ sleep 15
+done
+
+# Phase 2: Multi-node simultaneous kill
+echo "=== Phase 2: multi-node kill ==="
+for i in $(seq 1 3)
+do
+ get_two_random_containers
+ echo "[$(date)]: KILL ${nodeForChaos} and ${nodeForChaos2} simultaneously (iteration $i)"
+ docker kill -s SIGKILL ${nodeForChaos} &
+ docker kill -s SIGKILL ${nodeForChaos2} &
+ wait
+ echo "[$(date)]: Starting both nodes back"
+ docker start ${nodeForChaos} &
+ docker start ${nodeForChaos2} &
+ wait
+ sleep 25
+done
+
+# Phase 3: Single-node instant restart
+echo "=== Phase 3: instant restart ==="
+for i in $(seq 1 3)
+do
+ get_random_container
+ echo "[$(date)]: INSTANT RESTART ${nodeForChaos} (iteration $i)"
+ docker restart ${nodeForChaos} -t 0
+ sleep 20
+done
+
+# Phase 4: Pause 2 nodes simultaneously
+echo "=== Phase 4: dual pause 30s ==="
+get_two_random_containers
+echo "[$(date)]: PAUSE ${nodeForChaos} and ${nodeForChaos2} for 30s"
+docker pause ${nodeForChaos} &
+docker pause ${nodeForChaos2} &
+wait
+sleep 30
+echo "[$(date)]: UNPAUSE both"
+docker unpause ${nodeForChaos} &
+docker unpause ${nodeForChaos2} &
+wait
+sleep 15
+
+# Phase 5: Rapid kill/start cycle (session pool thrashing)
+echo "=== Phase 5: rapid kill/start ==="
+for i in $(seq 1 5)
+do
+ get_random_container
+ echo "[$(date)]: RAPID kill+start ${nodeForChaos} (iteration $i)"
+ docker kill -s SIGKILL ${nodeForChaos}
+ docker start ${nodeForChaos}
+ sleep 8
+done
+
+# Phase 6: Final triple kill
+echo "=== Phase 6: FINAL triple SIGKILL ==="
+containers=$(docker ps --format '{{.Names}}' | grep '^ydb-database-' | shuf)
+c1=$(echo "$containers" | sed -n '1p')
+c2=$(echo "$containers" | sed -n '2p')
+c3=$(echo "$containers" | sed -n '3p')
+echo "[$(date)]: SIGKILL ${c1}, ${c2}, ${c3} simultaneously"
+docker kill -s SIGKILL ${c1} &
+docker kill -s SIGKILL ${c2} &
+docker kill -s SIGKILL ${c3} &
+wait
+
+echo "[$(date)]: Chaos complete."
diff --git a/spring-ydb/spring-ydb-retry/slo/playground/chaos-aggressive/compose.yaml b/spring-ydb/spring-ydb-retry/slo/playground/chaos-aggressive/compose.yaml
new file mode 100644
index 00000000..9501c198
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/slo/playground/chaos-aggressive/compose.yaml
@@ -0,0 +1,380 @@
+networks:
+ slo-network:
+ driver: bridge
+
+x-ydb-node: &ydb-node
+ image: cr.yandex/crptqonuodf51kdj7a7d/ydb:24.4.4.12
+ restart: always
+ platform: linux/amd64
+ privileged: true
+ networks:
+ - slo-network
+ volumes:
+ - ../configs/ydb.yaml:/opt/ydb/cfg/config.yaml
+ deploy:
+ resources:
+ limits:
+ cpus: '1.0'
+ memory: 768M
+ reservations:
+ cpus: '0.5'
+ memory: 512M
+
+name: ydb-slo
+
+services:
+ static-0:
+ <<: *ydb-node
+ container_name: ydb-static-0
+ hostname: static-0
+ command:
+ - /opt/ydb/bin/ydbd
+ - server
+ - --grpc-port
+ - "2135"
+ - --mon-port
+ - "8765"
+ - --ic-port
+ - "19001"
+ - --yaml-config
+ - /opt/ydb/cfg/config.yaml
+ - --node
+ - static
+ - --label
+ - deployment=docker
+ ports:
+ - "2135:2135"
+ - "8765:8765"
+ healthcheck:
+ test: bash -c "exec 6<> /dev/tcp/localhost/2135"
+ interval: 10s
+ timeout: 1s
+ retries: 3
+ start_period: 30s
+
+ static-init:
+ <<: *ydb-node
+ restart: on-failure
+ container_name: ydb-static-init
+ command:
+ - /opt/ydb/bin/ydbd
+ - -s
+ - grpc://static-0:2135
+ - admin
+ - blobstorage
+ - config
+ - init
+ - --yaml-file
+ - /opt/ydb/cfg/config.yaml
+ depends_on:
+ static-0:
+ condition: service_healthy
+
+ tenant-init:
+ <<: *ydb-node
+ restart: on-failure
+ container_name: ydb-tenant-init
+ command:
+ - /opt/ydb/bin/ydbd
+ - -s
+ - grpc://static-0:2135
+ - admin
+ - database
+ - /Root/testdb
+ - create
+ - ssd:1
+ depends_on:
+ static-init:
+ condition: service_completed_successfully
+
+ database-1:
+ <<: *ydb-node
+ container_name: ydb-database-1
+ hostname: database-1
+ command:
+ - /opt/ydb/bin/ydbd
+ - server
+ - --grpc-port
+ - "2136"
+ - --mon-port
+ - "8766"
+ - --ic-port
+ - "19002"
+ - --yaml-config
+ - /opt/ydb/cfg/config.yaml
+ - --tenant
+ - /Root/testdb
+ - --node-broker
+ - grpc://static-0:2135
+ - --label
+ - deployment=docker
+ ports:
+ - "2136:2136"
+ - "8766:8766"
+ healthcheck:
+ test: bash -c "exec 6<> /dev/tcp/localhost/2136"
+ interval: 10s
+ timeout: 1s
+ retries: 3
+ start_period: 30s
+ depends_on:
+ static-0:
+ condition: service_healthy
+ static-init:
+ condition: service_completed_successfully
+ tenant-init:
+ condition: service_completed_successfully
+
+ database-2:
+ <<: *ydb-node
+ container_name: ydb-database-2
+ hostname: database-2
+ command:
+ - /opt/ydb/bin/ydbd
+ - server
+ - --grpc-port
+ - "2137"
+ - --mon-port
+ - "8767"
+ - --ic-port
+ - "19003"
+ - --yaml-config
+ - /opt/ydb/cfg/config.yaml
+ - --tenant
+ - /Root/testdb
+ - --node-broker
+ - grpc://static-0:2135
+ - --label
+ - deployment=docker
+ ports:
+ - "2137:2137"
+ - "8767:8767"
+ healthcheck:
+ test: bash -c "exec 6<> /dev/tcp/localhost/2137"
+ interval: 10s
+ timeout: 1s
+ retries: 3
+ start_period: 30s
+ depends_on:
+ static-0:
+ condition: service_healthy
+ static-init:
+ condition: service_completed_successfully
+ tenant-init:
+ condition: service_completed_successfully
+
+ database-3:
+ <<: *ydb-node
+ container_name: ydb-database-3
+ hostname: database-3
+ command:
+ - /opt/ydb/bin/ydbd
+ - server
+ - --grpc-port
+ - "2138"
+ - --mon-port
+ - "8768"
+ - --ic-port
+ - "19004"
+ - --yaml-config
+ - /opt/ydb/cfg/config.yaml
+ - --tenant
+ - /Root/testdb
+ - --node-broker
+ - grpc://static-0:2135
+ - --label
+ - deployment=docker
+ ports:
+ - "2138:2138"
+ - "8768:8768"
+ healthcheck:
+ test: bash -c "exec 6<> /dev/tcp/localhost/2138"
+ interval: 10s
+ timeout: 1s
+ retries: 3
+ start_period: 30s
+ depends_on:
+ static-0:
+ condition: service_healthy
+ static-init:
+ condition: service_completed_successfully
+ tenant-init:
+ condition: service_completed_successfully
+
+ database-4:
+ <<: *ydb-node
+ container_name: ydb-database-4
+ hostname: database-4
+ command:
+ - /opt/ydb/bin/ydbd
+ - server
+ - --grpc-port
+ - "2139"
+ - --mon-port
+ - "8769"
+ - --ic-port
+ - "19005"
+ - --yaml-config
+ - /opt/ydb/cfg/config.yaml
+ - --tenant
+ - /Root/testdb
+ - --node-broker
+ - grpc://static-0:2135
+ - --label
+ - deployment=docker
+ ports:
+ - "2139:2139"
+ - "8769:8769"
+ healthcheck:
+ test: bash -c "exec 6<> /dev/tcp/localhost/2139"
+ interval: 10s
+ timeout: 1s
+ retries: 3
+ start_period: 30s
+ depends_on:
+ static-0:
+ condition: service_healthy
+ static-init:
+ condition: service_completed_successfully
+ tenant-init:
+ condition: service_completed_successfully
+
+ database-5:
+ <<: *ydb-node
+ container_name: ydb-database-5
+ hostname: database-5
+ command:
+ - /opt/ydb/bin/ydbd
+ - server
+ - --grpc-port
+ - "2140"
+ - --mon-port
+ - "8770"
+ - --ic-port
+ - "19006"
+ - --yaml-config
+ - /opt/ydb/cfg/config.yaml
+ - --tenant
+ - /Root/testdb
+ - --node-broker
+ - grpc://static-0:2135
+ - --label
+ - deployment=docker
+ ports:
+ - "2140:2140"
+ - "8770:8770"
+ healthcheck:
+ test: bash -c "exec 6<> /dev/tcp/localhost/2140"
+ interval: 10s
+ timeout: 1s
+ retries: 3
+ start_period: 30s
+ depends_on:
+ static-0:
+ condition: service_healthy
+ static-init:
+ condition: service_completed_successfully
+ tenant-init:
+ condition: service_completed_successfully
+
+ app-with-retry:
+ build:
+ context: ../../../..
+ dockerfile: spring-ydb-retry/slo/Dockerfile
+ container_name: ydb-app-with-retry
+ platform: linux/amd64
+ networks:
+ - slo-network
+ environment:
+ SERVER_PORT: "8081"
+ SPRING_DATASOURCE_URL: jdbc:ydb:grpc://static-0:2135/Root/testdb
+ SPRING_DATASOURCE_DRIVER_CLASS_NAME: tech.ydb.jdbc.YdbDriver
+ YDB_TRANSACTION_RETRY_ENABLED: "true"
+ YDB_TRANSACTION_RETRY_MAX_RETRIES: "10"
+ REF: with-retry
+ SLO_RUN_ID: ${SLO_RUN_ID:-}
+ SLO_RESULTS_DIR: /app/results
+ SLO_READ_RPS: "100"
+ SLO_WRITE_RPS: "100"
+ SLO_INITIAL_DATA: "1000"
+ SLO_TIME: "600"
+ volumes:
+ - ../results:/app/results
+ depends_on:
+ static-0:
+ condition: service_healthy
+
+ app-no-retry:
+ build:
+ context: ../../../..
+ dockerfile: spring-ydb-retry/slo/Dockerfile
+ container_name: ydb-app-no-retry
+ platform: linux/amd64
+ networks:
+ - slo-network
+ environment:
+ SERVER_PORT: "8082"
+ SPRING_DATASOURCE_URL: jdbc:ydb:grpc://static-0:2135/Root/testdb
+ SPRING_DATASOURCE_DRIVER_CLASS_NAME: tech.ydb.jdbc.YdbDriver
+ YDB_TRANSACTION_RETRY_ENABLED: "false"
+ REF: no-retry
+ SLO_RUN_ID: ${SLO_RUN_ID:-}
+ SLO_RESULTS_DIR: /app/results
+ SLO_READ_RPS: "100"
+ SLO_WRITE_RPS: "100"
+ SLO_INITIAL_DATA: "1000"
+ SLO_TIME: "600"
+ volumes:
+ - ../results:/app/results
+ depends_on:
+ static-0:
+ condition: service_healthy
+
+ chaos:
+ image: docker:latest
+ restart: on-failure
+ container_name: ydb-chaos
+ platform: linux/amd64
+ networks:
+ - slo-network
+ entrypoint: ["/bin/sh", "-c", "chmod +x /opt/ydb/chaos.sh && /opt/ydb/chaos.sh"]
+ volumes:
+ - ./chaos.sh:/opt/ydb/chaos.sh
+ - ../configs/ydb.yaml:/opt/ydb/cfg/config.yaml
+ - /var/run/docker.sock:/var/run/docker.sock
+ depends_on:
+ static-0:
+ condition: service_healthy
+
+ prometheus:
+ image: prom/prometheus:v3.3.1
+ container_name: prometheus
+ networks:
+ - slo-network
+ volumes:
+ - ../configs/prometheus:/etc/prometheus
+ - ../data/prometheus:/prometheus
+ command:
+ - '--config.file=/etc/prometheus/prometheus.yaml'
+ - '--storage.tsdb.path=/prometheus'
+ - '--storage.tsdb.retention.time=200h'
+ - '--web.enable-lifecycle'
+ - '--web.enable-otlp-receiver'
+ ports:
+ - "9090:9090"
+
+ grafana:
+ image: grafana/grafana:9.5.3
+ container_name: grafana
+ networks:
+ - slo-network
+ volumes:
+ - ../configs/grafana/provisioning:/etc/grafana/provisioning
+ - ../data/grafana:/var/lib/grafana
+ environment:
+ - GF_SECURITY_ADMIN_USER=admin
+ - GF_SECURITY_ADMIN_PASSWORD=admin
+ - GF_AUTH_ANONYMOUS_ENABLED=true
+ - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin
+ ports:
+ - "3000:3000"
diff --git a/spring-ydb/spring-ydb-retry/slo/playground/chaos/chaos.sh b/spring-ydb/spring-ydb-retry/slo/playground/chaos/chaos.sh
new file mode 100755
index 00000000..521bed8b
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/slo/playground/chaos/chaos.sh
@@ -0,0 +1,52 @@
+#!/bin/sh -e
+
+get_random_container() {
+ # Get a list of all containers starting with ydb-database-*
+ containers=$(docker ps --format '{{.Names}}' | grep '^ydb-database-')
+
+ # Convert the list to a newline-separated string
+ containers=$(echo "$containers" | tr ' ' '\n')
+
+ # Count the number of containers
+ containersCount=$(echo "$containers" | wc -l)
+
+ # Generate a random number between 0 and containersCount - 1
+ randomIndex=$(shuf -i 0-$(($containersCount - 1)) -n 1)
+
+ # Get the container name at the random index
+ nodeForChaos=$(echo "$containers" | sed -n "$(($randomIndex + 1))p")
+}
+
+
+sleep 60
+
+echo "Start CHAOS YDB cluster!"
+
+for i in $(seq 1 5)
+do
+ echo "[$(date)]: docker stop/start iteration $i"
+
+ get_random_container
+
+ sh -c "docker stop ${nodeForChaos} -t 10"
+ sh -c "docker start ${nodeForChaos}"
+
+ sleep 60
+done
+
+for i in $(seq 1 3)
+do
+ echo "[$(date)]: docker restart iteration $i"
+
+ get_random_container
+
+ sh -c "docker restart ${nodeForChaos} -t 0"
+
+ sleep 60
+done
+
+get_random_container
+
+echo "[$(date)]: docker kill -s SIGKILL ${nodeForChaos}"
+
+sh -c "docker kill -s SIGKILL ${nodeForChaos}"
diff --git a/spring-ydb/spring-ydb-retry/slo/playground/chaos/compose.yaml b/spring-ydb/spring-ydb-retry/slo/playground/chaos/compose.yaml
new file mode 100644
index 00000000..b876ef0b
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/slo/playground/chaos/compose.yaml
@@ -0,0 +1,372 @@
+networks:
+ slo-network:
+ driver: bridge
+
+x-ydb-node: &ydb-node
+ image: cr.yandex/crptqonuodf51kdj7a7d/ydb:24.4.4.12
+ restart: always
+ platform: linux/amd64
+ privileged: true
+ networks:
+ - slo-network
+ volumes:
+ - ../configs/ydb.yaml:/opt/ydb/cfg/config.yaml
+
+name: ydb-slo
+
+services:
+ static-0:
+ <<: *ydb-node
+ container_name: ydb-static-0
+ hostname: static-0
+ command:
+ - /opt/ydb/bin/ydbd
+ - server
+ - --grpc-port
+ - "2135"
+ - --mon-port
+ - "8765"
+ - --ic-port
+ - "19001"
+ - --yaml-config
+ - /opt/ydb/cfg/config.yaml
+ - --node
+ - static
+ - --label
+ - deployment=docker
+ ports:
+ - "2135:2135"
+ - "8765:8765"
+ healthcheck:
+ test: bash -c "exec 6<> /dev/tcp/localhost/2135"
+ interval: 10s
+ timeout: 1s
+ retries: 3
+ start_period: 30s
+
+ static-init:
+ <<: *ydb-node
+ restart: on-failure
+ container_name: ydb-static-init
+ command:
+ - /opt/ydb/bin/ydbd
+ - -s
+ - grpc://static-0:2135
+ - admin
+ - blobstorage
+ - config
+ - init
+ - --yaml-file
+ - /opt/ydb/cfg/config.yaml
+ depends_on:
+ static-0:
+ condition: service_healthy
+
+ tenant-init:
+ <<: *ydb-node
+ restart: on-failure
+ container_name: ydb-tenant-init
+ command:
+ - /opt/ydb/bin/ydbd
+ - -s
+ - grpc://static-0:2135
+ - admin
+ - database
+ - /Root/testdb
+ - create
+ - ssd:1
+ depends_on:
+ static-init:
+ condition: service_completed_successfully
+
+ database-1:
+ <<: *ydb-node
+ container_name: ydb-database-1
+ hostname: database-1
+ command:
+ - /opt/ydb/bin/ydbd
+ - server
+ - --grpc-port
+ - "2136"
+ - --mon-port
+ - "8766"
+ - --ic-port
+ - "19002"
+ - --yaml-config
+ - /opt/ydb/cfg/config.yaml
+ - --tenant
+ - /Root/testdb
+ - --node-broker
+ - grpc://static-0:2135
+ - --label
+ - deployment=docker
+ ports:
+ - "2136:2136"
+ - "8766:8766"
+ healthcheck:
+ test: bash -c "exec 6<> /dev/tcp/localhost/2136"
+ interval: 10s
+ timeout: 1s
+ retries: 3
+ start_period: 30s
+ depends_on:
+ static-0:
+ condition: service_healthy
+ static-init:
+ condition: service_completed_successfully
+ tenant-init:
+ condition: service_completed_successfully
+
+ database-2:
+ <<: *ydb-node
+ container_name: ydb-database-2
+ hostname: database-2
+ command:
+ - /opt/ydb/bin/ydbd
+ - server
+ - --grpc-port
+ - "2137"
+ - --mon-port
+ - "8767"
+ - --ic-port
+ - "19003"
+ - --yaml-config
+ - /opt/ydb/cfg/config.yaml
+ - --tenant
+ - /Root/testdb
+ - --node-broker
+ - grpc://static-0:2135
+ - --label
+ - deployment=docker
+ ports:
+ - "2137:2137"
+ - "8767:8767"
+ healthcheck:
+ test: bash -c "exec 6<> /dev/tcp/localhost/2137"
+ interval: 10s
+ timeout: 1s
+ retries: 3
+ start_period: 30s
+ depends_on:
+ static-0:
+ condition: service_healthy
+ static-init:
+ condition: service_completed_successfully
+ tenant-init:
+ condition: service_completed_successfully
+
+ database-3:
+ <<: *ydb-node
+ container_name: ydb-database-3
+ hostname: database-3
+ command:
+ - /opt/ydb/bin/ydbd
+ - server
+ - --grpc-port
+ - "2138"
+ - --mon-port
+ - "8768"
+ - --ic-port
+ - "19004"
+ - --yaml-config
+ - /opt/ydb/cfg/config.yaml
+ - --tenant
+ - /Root/testdb
+ - --node-broker
+ - grpc://static-0:2135
+ - --label
+ - deployment=docker
+ ports:
+ - "2138:2138"
+ - "8768:8768"
+ healthcheck:
+ test: bash -c "exec 6<> /dev/tcp/localhost/2138"
+ interval: 10s
+ timeout: 1s
+ retries: 3
+ start_period: 30s
+ depends_on:
+ static-0:
+ condition: service_healthy
+ static-init:
+ condition: service_completed_successfully
+ tenant-init:
+ condition: service_completed_successfully
+
+ database-4:
+ <<: *ydb-node
+ container_name: ydb-database-4
+ hostname: database-4
+ command:
+ - /opt/ydb/bin/ydbd
+ - server
+ - --grpc-port
+ - "2139"
+ - --mon-port
+ - "8769"
+ - --ic-port
+ - "19005"
+ - --yaml-config
+ - /opt/ydb/cfg/config.yaml
+ - --tenant
+ - /Root/testdb
+ - --node-broker
+ - grpc://static-0:2135
+ - --label
+ - deployment=docker
+ ports:
+ - "2139:2139"
+ - "8769:8769"
+ healthcheck:
+ test: bash -c "exec 6<> /dev/tcp/localhost/2139"
+ interval: 10s
+ timeout: 1s
+ retries: 3
+ start_period: 30s
+ depends_on:
+ static-0:
+ condition: service_healthy
+ static-init:
+ condition: service_completed_successfully
+ tenant-init:
+ condition: service_completed_successfully
+
+ database-5:
+ <<: *ydb-node
+ container_name: ydb-database-5
+ hostname: database-5
+ command:
+ - /opt/ydb/bin/ydbd
+ - server
+ - --grpc-port
+ - "2140"
+ - --mon-port
+ - "8770"
+ - --ic-port
+ - "19006"
+ - --yaml-config
+ - /opt/ydb/cfg/config.yaml
+ - --tenant
+ - /Root/testdb
+ - --node-broker
+ - grpc://static-0:2135
+ - --label
+ - deployment=docker
+ ports:
+ - "2140:2140"
+ - "8770:8770"
+ healthcheck:
+ test: bash -c "exec 6<> /dev/tcp/localhost/2140"
+ interval: 10s
+ timeout: 1s
+ retries: 3
+ start_period: 30s
+ depends_on:
+ static-0:
+ condition: service_healthy
+ static-init:
+ condition: service_completed_successfully
+ tenant-init:
+ condition: service_completed_successfully
+
+ app-with-retry:
+ build:
+ context: ../../../..
+ dockerfile: spring-ydb-retry/slo/Dockerfile
+ container_name: ydb-app-with-retry
+ platform: linux/amd64
+ networks:
+ - slo-network
+ environment:
+ SERVER_PORT: "8081"
+ SPRING_DATASOURCE_URL: jdbc:ydb:grpc://static-0:2135/Root/testdb
+ SPRING_DATASOURCE_DRIVER_CLASS_NAME: tech.ydb.jdbc.YdbDriver
+ YDB_TRANSACTION_RETRY_ENABLED: "true"
+ YDB_TRANSACTION_RETRY_MAX_RETRIES: "10"
+ REF: with-retry
+ SLO_RUN_ID: ${SLO_RUN_ID:-}
+ SLO_RESULTS_DIR: /app/results
+ SLO_READ_RPS: "100"
+ SLO_WRITE_RPS: "100"
+ SLO_INITIAL_DATA: "1000"
+ SLO_TIME: "600"
+ volumes:
+ - ../results:/app/results
+ depends_on:
+ static-0:
+ condition: service_healthy
+
+ app-no-retry:
+ build:
+ context: ../../../..
+ dockerfile: spring-ydb-retry/slo/Dockerfile
+ container_name: ydb-app-no-retry
+ platform: linux/amd64
+ networks:
+ - slo-network
+ environment:
+ SERVER_PORT: "8082"
+ SPRING_DATASOURCE_URL: jdbc:ydb:grpc://static-0:2135/Root/testdb
+ SPRING_DATASOURCE_DRIVER_CLASS_NAME: tech.ydb.jdbc.YdbDriver
+ YDB_TRANSACTION_RETRY_ENABLED: "false"
+ REF: no-retry
+ SLO_RUN_ID: ${SLO_RUN_ID:-}
+ SLO_RESULTS_DIR: /app/results
+ SLO_READ_RPS: "100"
+ SLO_WRITE_RPS: "100"
+ SLO_INITIAL_DATA: "1000"
+ SLO_TIME: "600"
+ volumes:
+ - ../results:/app/results
+ depends_on:
+ static-0:
+ condition: service_healthy
+
+ chaos:
+ image: docker:latest
+ restart: on-failure
+ container_name: ydb-chaos
+ platform: linux/amd64
+ networks:
+ - slo-network
+ entrypoint: ["/bin/sh", "-c", "chmod +x /opt/ydb/chaos.sh && /opt/ydb/chaos.sh"]
+ volumes:
+ - ./chaos.sh:/opt/ydb/chaos.sh
+ - ../configs/ydb.yaml:/opt/ydb/cfg/config.yaml
+ - /var/run/docker.sock:/var/run/docker.sock
+ depends_on:
+ static-0:
+ condition: service_healthy
+
+ prometheus:
+ image: prom/prometheus:v3.3.1
+ container_name: prometheus
+ networks:
+ - slo-network
+ volumes:
+ - ../configs/prometheus:/etc/prometheus
+ - ../data/prometheus:/prometheus
+ command:
+ - '--config.file=/etc/prometheus/prometheus.yaml'
+ - '--storage.tsdb.path=/prometheus'
+ - '--storage.tsdb.retention.time=200h'
+ - '--web.enable-lifecycle'
+ - '--web.enable-otlp-receiver'
+ ports:
+ - "9090:9090"
+
+ grafana:
+ image: grafana/grafana:9.5.3
+ container_name: grafana
+ networks:
+ - slo-network
+ volumes:
+ - ../configs/grafana/provisioning:/etc/grafana/provisioning
+ - ../data/grafana:/var/lib/grafana
+ environment:
+ - GF_SECURITY_ADMIN_USER=admin
+ - GF_SECURITY_ADMIN_PASSWORD=admin
+ - GF_AUTH_ANONYMOUS_ENABLED=true
+ - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin
+ ports:
+ - "3000:3000"
diff --git a/spring-ydb/spring-ydb-retry/slo/playground/configs/grafana/provisioning/dashboards/dashboard.yaml b/spring-ydb/spring-ydb-retry/slo/playground/configs/grafana/provisioning/dashboards/dashboard.yaml
new file mode 100644
index 00000000..c8442c2f
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/slo/playground/configs/grafana/provisioning/dashboards/dashboard.yaml
@@ -0,0 +1,9 @@
+apiVersion: 1
+providers:
+ - name: 'SLO'
+ folder: ''
+ type: file
+ disableDeletion: false
+ editable: true
+ options:
+ path: /etc/grafana/provisioning/dashboards
diff --git a/spring-ydb/spring-ydb-retry/slo/playground/configs/grafana/provisioning/dashboards/slo.json b/spring-ydb/spring-ydb-retry/slo/playground/configs/grafana/provisioning/dashboards/slo.json
new file mode 100644
index 00000000..6e608796
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/slo/playground/configs/grafana/provisioning/dashboards/slo.json
@@ -0,0 +1,638 @@
+{
+ "annotations": {
+ "list": []
+ },
+ "editable": true,
+ "fiscalYearStartMonth": 0,
+ "graphTooltip": 1,
+ "links": [],
+ "panels": [
+ {
+ "title": "Error Rate (%)",
+ "type": "stat",
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 0,
+ "y": 0
+ },
+ "id": 1,
+ "options": {
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ]
+ },
+ "orientation": "horizontal",
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "expr": "sum by (ref) (rate(slo_operations_total{status=\"failure\"}[1m])) / sum by (ref) (rate(slo_operations_total[1m])) * 100",
+ "legendFormat": "{{ref}}"
+ }
+ ],
+ "fieldConfig": {
+ "defaults": {
+ "unit": "percent",
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "yellow",
+ "value": 5
+ },
+ {
+ "color": "red",
+ "value": 20
+ }
+ ]
+ },
+ "mappings": []
+ },
+ "overrides": []
+ }
+ },
+ {
+ "title": "Total Errors (cumulative)",
+ "type": "stat",
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 6,
+ "y": 0
+ },
+ "id": 2,
+ "options": {
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ]
+ },
+ "orientation": "horizontal",
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "expr": "sum by (ref) (increase(slo_operations_total{status=\"failure\"}[$__range]))",
+ "legendFormat": "{{ref}}"
+ }
+ ],
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 100
+ }
+ ]
+ }
+ },
+ "overrides": []
+ }
+ },
+ {
+ "title": "Total Successes (cumulative)",
+ "type": "stat",
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 12,
+ "y": 0
+ },
+ "id": 3,
+ "options": {
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ]
+ },
+ "orientation": "horizontal",
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "expr": "sum by (ref) (increase(slo_operations_total{status=\"success\"}[$__range]))",
+ "legendFormat": "{{ref}}"
+ }
+ ],
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "green",
+ "value": 1000
+ }
+ ]
+ }
+ },
+ "overrides": []
+ }
+ },
+ {
+ "title": "Error Reduction from Retry",
+ "type": "stat",
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 18,
+ "y": 0
+ },
+ "id": 4,
+ "options": {
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ]
+ },
+ "orientation": "horizontal",
+ "textMode": "auto",
+ "wideLayout": true
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "expr": "(sum(increase(slo_operations_total{ref=\"no-retry\",status=\"failure\"}[$__range])) - sum(increase(slo_operations_total{ref=\"with-retry\",status=\"failure\"}[$__range]))) / sum(increase(slo_operations_total{ref=\"no-retry\",status=\"failure\"}[$__range])) * 100",
+ "legendFormat": "error reduction %"
+ }
+ ],
+ "fieldConfig": {
+ "defaults": {
+ "unit": "percent",
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "red",
+ "value": null
+ },
+ {
+ "color": "green",
+ "value": 50
+ }
+ ]
+ }
+ },
+ "overrides": []
+ }
+ },
+ {
+ "title": "Failed Operations / sec",
+ "type": "timeseries",
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 6
+ },
+ "id": 5,
+ "options": {
+ "legend": {
+ "displayMode": "table",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "desc"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "expr": "sum by (ref) (rate(slo_operations_total{status=\"failure\"}[1m]))",
+ "legendFormat": "{{ref}}"
+ }
+ ],
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "fillOpacity": 30,
+ "lineWidth": 2
+ }
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byRegexp",
+ "options": ".*no-retry.*"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "red",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byRegexp",
+ "options": ".*with-retry.*"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "green",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ }
+ },
+ {
+ "title": "Operations / sec (by type and status)",
+ "type": "timeseries",
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 6
+ },
+ "id": 7,
+ "options": {
+ "legend": {
+ "displayMode": "table",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "desc"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "expr": "sum by (ref, operation_type, status) (rate(slo_operations_total[1m]))",
+ "legendFormat": "{{ref}} {{operation_type}} {{status}}"
+ }
+ ],
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "fillOpacity": 20,
+ "lineWidth": 1
+ }
+ },
+ "overrides": []
+ }
+ },
+ {
+ "title": "Operation Latency p99 (seconds)",
+ "type": "timeseries",
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 14
+ },
+ "id": 8,
+ "options": {
+ "legend": {
+ "displayMode": "table",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "desc"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "expr": "histogram_quantile(0.99, sum by (ref, le) (rate(slo_operation_duration_seconds_bucket[1m])))",
+ "legendFormat": "{{ref}} p99"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "expr": "histogram_quantile(0.95, sum by (ref, le) (rate(slo_operation_duration_seconds_bucket[1m])))",
+ "legendFormat": "{{ref}} p95"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "expr": "histogram_quantile(0.50, sum by (ref, le) (rate(slo_operation_duration_seconds_bucket[1m])))",
+ "legendFormat": "{{ref}} p50"
+ }
+ ],
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "fillOpacity": 10,
+ "lineWidth": 2
+ },
+ "unit": "s"
+ },
+ "overrides": []
+ }
+ },
+ {
+ "title": "Failed Ops / sec by Error Type (no-retry)",
+ "type": "timeseries",
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 22
+ },
+ "id": 10,
+ "options": {
+ "legend": {
+ "displayMode": "table",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "desc"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "expr": "sum by (error_type) (rate(slo_operations_total{ref=\"no-retry\",status=\"failure\",error_type!=\"none\"}[1m]))",
+ "legendFormat": "no-retry {{error_type}}"
+ }
+ ],
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "fillOpacity": 30,
+ "lineWidth": 2
+ }
+ },
+ "overrides": []
+ }
+ },
+ {
+ "title": "Failed Ops / sec by Error Type (with-retry)",
+ "type": "timeseries",
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 22
+ },
+ "id": 11,
+ "options": {
+ "legend": {
+ "displayMode": "table",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "desc"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "expr": "sum by (error_type) (rate(slo_operations_total{ref=\"with-retry\",status=\"failure\",error_type!=\"none\"}[1m]))",
+ "legendFormat": "with-retry {{error_type}}"
+ }
+ ],
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "fillOpacity": 30,
+ "lineWidth": 2
+ }
+ },
+ "overrides": []
+ }
+ },
+ {
+ "title": "Error Type Distribution (no-retry)",
+ "type": "piechart",
+ "gridPos": {
+ "h": 8,
+ "w": 6,
+ "x": 0,
+ "y": 30
+ },
+ "id": 12,
+ "options": {
+ "legend": {
+ "displayMode": "table",
+ "placement": "right"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "desc"
+ },
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ]
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "expr": "sum by (error_type) (increase(slo_operations_total{ref=\"no-retry\",status=\"failure\",error_type!=\"none\"}[$__range]))",
+ "legendFormat": "{{error_type}}"
+ }
+ ],
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ }
+ },
+ "overrides": []
+ }
+ },
+ {
+ "title": "Error Type Distribution (with-retry)",
+ "type": "piechart",
+ "gridPos": {
+ "h": 8,
+ "w": 6,
+ "x": 6,
+ "y": 30
+ },
+ "id": 13,
+ "options": {
+ "legend": {
+ "displayMode": "table",
+ "placement": "right"
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "desc"
+ },
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ]
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "expr": "sum by (error_type) (increase(slo_operations_total{ref=\"with-retry\",status=\"failure\",error_type!=\"none\"}[$__range]))",
+ "legendFormat": "{{error_type}}"
+ }
+ ],
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ }
+ },
+ "overrides": []
+ }
+ },
+ {
+ "title": "Error Rate by Type (%) — Comparison",
+ "type": "table",
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 30
+ },
+ "id": 14,
+ "options": {
+ "showHeader": true,
+ "footer": {
+ "show": true,
+ "reducer": ["sum"]
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "prometheus"
+ },
+ "expr": "sum by (ref, error_type) (increase(slo_operations_total{status=\"failure\",error_type!=\"none\"}[$__range]))",
+ "format": "table",
+ "instant": true
+ }
+ ],
+ "transformations": [
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {
+ "Time": true
+ },
+ "renameByName": {
+ "Value": "Errors",
+ "error_type": "Error Type",
+ "ref": "Version"
+ }
+ }
+ }
+ ],
+ "fieldConfig": {
+ "defaults": {},
+ "overrides": []
+ }
+ }
+ ],
+ "refresh": "5s",
+ "schemaVersion": 39,
+ "tags": [
+ "slo",
+ "ydb",
+ "retry"
+ ],
+ "templating": {
+ "list": []
+ },
+ "time": {
+ "from": "now-30m",
+ "to": "now"
+ },
+ "timepicker": {},
+ "timezone": "",
+ "title": "YDB Spring Retry SLO - Retry vs No-Retry Comparison",
+ "uid": "ydb-slo-retry",
+ "version": 2
+}
diff --git a/spring-ydb/spring-ydb-retry/slo/playground/configs/grafana/provisioning/datasources/datasource.yaml b/spring-ydb/spring-ydb-retry/slo/playground/configs/grafana/provisioning/datasources/datasource.yaml
new file mode 100644
index 00000000..415d5684
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/slo/playground/configs/grafana/provisioning/datasources/datasource.yaml
@@ -0,0 +1,8 @@
+apiVersion: 1
+datasources:
+ - name: Prometheus
+ type: prometheus
+ access: proxy
+ url: http://prometheus:9090
+ isDefault: true
+ uid: prometheus
diff --git a/spring-ydb/spring-ydb-retry/slo/playground/configs/prometheus/prometheus.yaml b/spring-ydb/spring-ydb-retry/slo/playground/configs/prometheus/prometheus.yaml
new file mode 100644
index 00000000..fb4e212f
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/slo/playground/configs/prometheus/prometheus.yaml
@@ -0,0 +1,16 @@
+global:
+ scrape_interval: 1s
+ evaluation_interval: 1s
+
+scrape_configs:
+ - job_name: 'app-with-retry'
+ static_configs:
+ - targets: ['app-with-retry:9464']
+ metrics_path: '/metrics'
+ scrape_interval: 5s
+
+ - job_name: 'app-no-retry'
+ static_configs:
+ - targets: ['app-no-retry:9464']
+ metrics_path: '/metrics'
+ scrape_interval: 5s
diff --git a/spring-ydb/spring-ydb-retry/slo/playground/configs/ydb.yaml b/spring-ydb/spring-ydb-retry/slo/playground/configs/ydb.yaml
new file mode 100644
index 00000000..ff3f4f3b
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/slo/playground/configs/ydb.yaml
@@ -0,0 +1,63 @@
+pqconfig:
+ require_credentials_in_new_protocol: false
+
+actor_system_config:
+ cpu_count: 1
+ node_type: STORAGE
+ use_auto_config: true
+blob_storage_config:
+ service_set:
+ groups:
+ - erasure_species: none
+ rings:
+ - fail_domains:
+ - vdisk_locations:
+ - node_id: 1
+ path: SectorMap:1:64
+ pdisk_category: SSD
+channel_profile_config:
+ profile:
+ - channel:
+ - erasure_species: none
+ pdisk_category: 0
+ storage_pool_kind: ssd
+ - erasure_species: none
+ pdisk_category: 0
+ storage_pool_kind: ssd
+ - erasure_species: none
+ pdisk_category: 0
+ storage_pool_kind: ssd
+ profile_id: 0
+domains_config:
+ domain:
+ - name: Root
+ storage_pool_types:
+ - kind: ssd
+ pool_config:
+ box_id: 1
+ erasure_species: none
+ kind: ssd
+ pdisk_filter:
+ - property:
+ - type: SSD
+ vdisk_kind: Default
+ state_storage:
+ - ring:
+ node: [ 1 ]
+ nto_select: 1
+ ssid: 1
+host_configs:
+ - drive:
+ - path: SectorMap:1:64
+ type: SSD
+ host_config_id: 1
+hosts:
+ - host: static-0
+ host_config_id: 1
+ node_id: 1
+ port: 19001
+ walle_location:
+ body: 1
+ data_center: az-1
+ rack: "0"
+static_erasure: none
diff --git a/spring-ydb/spring-ydb-retry/slo/pom.xml b/spring-ydb/spring-ydb-retry/slo/pom.xml
new file mode 100644
index 00000000..eed68110
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/slo/pom.xml
@@ -0,0 +1,92 @@
+
+
+
+ 4.0.0
+
+
+ org.springframework.boot
+ spring-boot-starter-parent
+ 3.4.0
+
+
+
+ tech.ydb
+ ydb-slo-workload
+ 1.0.0-SNAPSHOT
+ jar
+
+ YDB SLO Workload
+
+
+ 21
+ UTF-8
+ 1.43.0
+
+
+
+
+
+ io.opentelemetry
+ opentelemetry-bom
+ ${opentelemetry.version}
+ pom
+ import
+
+
+ io.opentelemetry
+ opentelemetry-bom-alpha
+ ${opentelemetry.version}-alpha
+ pom
+ import
+
+
+
+
+
+
+ org.springframework.boot
+ spring-boot-starter-jdbc
+
+
+ tech.ydb.jdbc
+ ydb-jdbc-driver
+ 2.3.22
+
+
+ tech.ydb
+ spring-ydb-retry
+ 1.0.0-SNAPSHOT
+
+
+ io.opentelemetry
+ opentelemetry-api
+
+
+ io.opentelemetry
+ opentelemetry-sdk
+
+
+ io.opentelemetry
+ opentelemetry-exporter-otlp
+
+
+ io.opentelemetry
+ opentelemetry-exporter-prometheus
+
+
+
+
+
+
+ org.springframework.boot
+ spring-boot-maven-plugin
+
+ tech.ydb.slo.SloApplication
+ exec
+
+
+
+
+
diff --git a/spring-ydb/spring-ydb-retry/slo/src/README.md b/spring-ydb/spring-ydb-retry/slo/src/README.md
new file mode 100644
index 00000000..7f7bca6b
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/slo/src/README.md
@@ -0,0 +1,141 @@
+# SLO Workload Application
+
+A Spring Boot load-generation tool that drives read/write traffic against a YDB table and exposes
+metrics to Prometheus via OpenTelemetry. Used to measure how the **spring-ydb-retry** library
+reduces visible application errors under cluster-level fault injection.
+
+## Quick Start
+
+### 1. Build
+
+```bash
+cd slo
+mvn package -DskipTests
+```
+
+### 2. Run
+
+Two instances are typically launched side-by-side — one with retry enabled and one without:
+
+```bash
+# With retry
+java -jar target/ydb-slo-workload-1.0.0-SNAPSHOT-exec.jar \
+ --server.port=8081 \
+ --spring.datasource.url=jdbc:ydb:grpc://localhost:2136/Root/testdb \
+ --ydb.transaction.retry.enabled=true \
+ --ydb.transaction.retry.max-retries=10 \
+ --slo.ref=with-retry
+
+# Without retry
+java -jar target/ydb-slo-workload-1.0.0-SNAPSHOT-exec.jar \
+ --server.port=8082 \
+ --spring.datasource.url=jdbc:ydb:grpc://localhost:2136/Root/testdb \
+ --ydb.transaction.retry.enabled=false \
+ --slo.ref=no-retry
+```
+
+In the Docker Compose playground, both instances are launched automatically — see
+[`playground/README.md`](../playground/README.md).
+
+## Lifecycle
+
+On startup the application (`SloRunner`, a `CommandLineRunner`) performs three steps:
+
+1. **Create table** — creates `slo_test_table` (up to 10 attempts with retry between each)
+2. **Seed data** — populates `initialDataCount` rows via single-row UPSERTs
+3. **Run workload** — starts concurrent read and write jobs for `runTimeSeconds`
+
+After the workload finishes, the app stays alive to serve Prometheus metrics on port 9464.
+
+## Table Schema
+
+```sql
+CREATE TABLE slo_test_table (
+ guid Text,
+ id Int32,
+ payload_str Text,
+ payload_double Double,
+ payload_timestamp Timestamp,
+ PRIMARY KEY (guid, id)
+);
+```
+
+## What the Workload Does
+
+- **Read job** —reads rows by random IDs (keys generated by writeJob)
+- **Write job** — generates and upserts new rows
+
+## Configuration
+
+All parameters are set via environment variables (or Spring Boot command-line arguments).
+
+### Application
+
+| Variable | Default | Description |
+|-------------------------------------|----------------------------------------------|--------------------------------------------------------|
+| `SERVER_PORT` | `8080` | HTTP port (Actuator endpoints) |
+| `SPRING_DATASOURCE_URL` | `jdbc:ydb:grpc://localhost:2136/Root/testdb` | YDB JDBC URL |
+| `YDB_TRANSACTION_RETRY_ENABLED` | `true` | Enable/disable retry |
+| `YDB_TRANSACTION_RETRY_MAX_RETRIES` | `10` | Max retry attempts |
+| `SLO_RUN_ID` | auto | Shared run identifier used for the result folder name |
+| `SLO_RESULTS_DIR` | `results` | Root directory where per-run result folders are stored |
+
+### Workload
+
+| Variable | Default | Description |
+|--------------------|-----------|--------------------------------------------------------|
+| `SLO_READ_RPS` | `100` | Target read requests per second |
+| `SLO_WRITE_RPS` | `100` | Target write requests per second |
+| `SLO_INITIAL_DATA` | `1000` | Number of rows to pre-populate |
+| `SLO_TIME` | `600` | Total run duration (seconds) |
+| `REF` | `unknown` | Instance label for metrics (`with-retry` / `no-retry`) |
+
+## Saved Results
+
+```text
+/
+ /
+ retry
+ no-retry
+```
+
+The `retry` file is written by the `with-retry` instance, and `no-retry` is written by the `no-retry` instance.
+
+## Collected Metrics (exposed via OpenTelemetry on :9464)
+
+| Metric | Type | Labels | Description |
+|----------------------------------|-----------|-----------------------------------------|-----------------------------|
+| `slo_operations_total` | Counter | ref, operation_type, status, error_type | Total number of operations |
+| `slo_operation_duration_seconds` | Histogram | ref, operation_type, status, error_type | Operation latency (seconds) |
+
+### Labels
+
+| Label | Values | Description |
+|------------------|--------------------------------------------------------------------------------|-----------------------------------------|
+| `ref` | `with-retry`, `no-retry` | Instance identifier |
+| `operation_type` | `read`, `write` | Operation type |
+| `status` | `success`, `failure` | Operation result |
+| `error_type` | `none`, `UNAVAILABLE`, `TRANSPORT_UNAVAILABLE`, `OVERLOADED`, `BAD_SESSION`, … | YDB status code or exception class name |
+
+### Error Classification
+
+`extractErrorType` walks the exception cause chain looking for `YdbStatusable`. If found, returns
+`Status.Code.name()` (e.g. `UNAVAILABLE`, `TRANSPORT_UNAVAILABLE`). Otherwise returns the
+exception class name (e.g. `SqlTransientException`).
+
+## Classes
+
+| Class | Description |
+|------------------|---------------------------------------------------------------------------------------------|
+| `SloApplication` | `@SpringBootApplication` entry point with `@EnableConfigurationProperties(SloConfig.class)` |
+| `SloConfig` | `@ConfigurationProperties(prefix = "slo")` — binds workload parameters |
+| `SloService` | `@YdbTransactional` service: `upsert()`, `upsert2()`, `select()`, `selectMaxId()` |
+| `SloRunner` | `CommandLineRunner` — table creation, data seeding, load generation, metrics |
+| `OtelConfig` | OpenTelemetry SDK bean — `PrometheusHttpServer` on port 9464 |
+
+## Grafana Dashboard
+
+Import the pre-built SLO dashboard from
+[
+`playground/configs/grafana/provisioning/dashboards/slo.json`](../playground/configs/grafana/provisioning/dashboards/slo.json)
+into your Grafana instance to visualize the collected metrics.
diff --git a/spring-ydb/spring-ydb-retry/slo/src/main/java/tech/ydb/slo/OtelConfig.java b/spring-ydb/spring-ydb-retry/slo/src/main/java/tech/ydb/slo/OtelConfig.java
new file mode 100644
index 00000000..43f591c1
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/slo/src/main/java/tech/ydb/slo/OtelConfig.java
@@ -0,0 +1,24 @@
+package tech.ydb.slo;
+
+import io.opentelemetry.exporter.prometheus.PrometheusHttpServer;
+import io.opentelemetry.sdk.OpenTelemetrySdk;
+import io.opentelemetry.sdk.metrics.SdkMeterProvider;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+
+@Configuration
+public class OtelConfig {
+
+ private static final int PROMETHEUS_PORT = 9464;
+
+ @Bean(destroyMethod = "close")
+ public OpenTelemetrySdk openTelemetry() {
+ PrometheusHttpServer prometheusHttpServer =
+ PrometheusHttpServer.builder().setPort(PROMETHEUS_PORT).build();
+
+ SdkMeterProvider meterProvider =
+ SdkMeterProvider.builder().registerMetricReader(prometheusHttpServer).build();
+
+ return OpenTelemetrySdk.builder().setMeterProvider(meterProvider).build();
+ }
+}
diff --git a/spring-ydb/spring-ydb-retry/slo/src/main/java/tech/ydb/slo/SloApplication.java b/spring-ydb/spring-ydb-retry/slo/src/main/java/tech/ydb/slo/SloApplication.java
new file mode 100644
index 00000000..182b1edf
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/slo/src/main/java/tech/ydb/slo/SloApplication.java
@@ -0,0 +1,13 @@
+package tech.ydb.slo;
+
+import org.springframework.boot.SpringApplication;
+import org.springframework.boot.autoconfigure.SpringBootApplication;
+import org.springframework.boot.context.properties.EnableConfigurationProperties;
+
+@SpringBootApplication
+@EnableConfigurationProperties(SloConfig.class)
+public class SloApplication {
+ public static void main(String[] args) {
+ SpringApplication.run(SloApplication.class, args);
+ }
+}
diff --git a/spring-ydb/spring-ydb-retry/slo/src/main/java/tech/ydb/slo/SloConfig.java b/spring-ydb/spring-ydb-retry/slo/src/main/java/tech/ydb/slo/SloConfig.java
new file mode 100644
index 00000000..9d87eea5
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/slo/src/main/java/tech/ydb/slo/SloConfig.java
@@ -0,0 +1,71 @@
+package tech.ydb.slo;
+
+import org.springframework.boot.context.properties.ConfigurationProperties;
+
+@ConfigurationProperties(prefix = "slo")
+public class SloConfig {
+
+ private int readRps = 100;
+ private int writeRps = 100;
+ private int initialDataCount = 1000;
+ private int runTimeSeconds = 600;
+ private String ref = "unknown";
+ private String runId = "";
+ private String resultsDir = "results";
+
+ public int getReadRps() {
+ return readRps;
+ }
+
+ public void setReadRps(int readRps) {
+ this.readRps = readRps;
+ }
+
+ public int getWriteRps() {
+ return writeRps;
+ }
+
+ public void setWriteRps(int writeRps) {
+ this.writeRps = writeRps;
+ }
+
+ public int getInitialDataCount() {
+ return initialDataCount;
+ }
+
+ public void setInitialDataCount(int initialDataCount) {
+ this.initialDataCount = initialDataCount;
+ }
+
+ public int getRunTimeSeconds() {
+ return runTimeSeconds;
+ }
+
+ public void setRunTimeSeconds(int runTimeSeconds) {
+ this.runTimeSeconds = runTimeSeconds;
+ }
+
+ public String getRef() {
+ return ref;
+ }
+
+ public void setRef(String ref) {
+ this.ref = ref;
+ }
+
+ public String getRunId() {
+ return runId;
+ }
+
+ public void setRunId(String runId) {
+ this.runId = runId;
+ }
+
+ public String getResultsDir() {
+ return resultsDir;
+ }
+
+ public void setResultsDir(String resultsDir) {
+ this.resultsDir = resultsDir;
+ }
+}
diff --git a/spring-ydb/spring-ydb-retry/slo/src/main/java/tech/ydb/slo/SloResultWriter.java b/spring-ydb/spring-ydb-retry/slo/src/main/java/tech/ydb/slo/SloResultWriter.java
new file mode 100644
index 00000000..58027767
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/slo/src/main/java/tech/ydb/slo/SloResultWriter.java
@@ -0,0 +1,221 @@
+package tech.ydb.slo;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.channels.FileLock;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+import java.time.Duration;
+import java.time.Instant;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
+import java.util.Map;
+import java.util.UUID;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.stereotype.Component;
+import tech.ydb.retry.YdbRetryProperties;
+
+@Component
+public class SloResultWriter {
+
+ private static final Logger log = LoggerFactory.getLogger(SloResultWriter.class);
+ private static final String CURRENT_RUN_ID_FILE = ".current-run-id";
+ private static final String RUN_ID_PREFIX = "run-";
+ private static final String RUN_ID_TIMESTAMP_PATTERN = "yyyyMMdd-HHmmss";
+ private static final int RUN_ID_RANDOM_SUFFIX_LENGTH = 8;
+ private static final String WITH_RETRY_REF = "with-retry";
+ private static final String NO_RETRY_REF = "no-retry";
+ private static final String RETRY_RESULT_FILE_NAME = "retry";
+ private static final String NO_RETRY_RESULT_FILE_NAME = "no-retry";
+ private static final String FILE_NAME_SANITIZE_REGEX = "[^a-zA-Z0-9._-]";
+ private static final String FILE_NAME_SANITIZE_REPLACEMENT = "_";
+
+ public String resolveRunId(SloConfig config, Instant startedAt) {
+ if (config.getRunId() != null && !config.getRunId().isBlank()) {
+ return config.getRunId();
+ }
+
+ Path resultsRoot = resultsRoot(config);
+ try {
+ Files.createDirectories(resultsRoot);
+ Path currentRunIdFile = resultsRoot.resolve(CURRENT_RUN_ID_FILE);
+ try (FileChannel channel =
+ FileChannel.open(
+ currentRunIdFile,
+ StandardOpenOption.CREATE,
+ StandardOpenOption.READ,
+ StandardOpenOption.WRITE);
+ FileLock ignored = channel.lock()) {
+ String existingRunId = readCurrentRunId(channel);
+ if (!existingRunId.isBlank() && isReusableRun(resultsRoot.resolve(existingRunId))) {
+ return existingRunId;
+ }
+
+ String generatedRunId = generateRunId(startedAt);
+ writeCurrentRunId(channel, generatedRunId);
+ return generatedRunId;
+ }
+ } catch (IOException exception) {
+ throw new RuntimeException("Failed to resolve shared SLO runId", exception);
+ }
+ }
+
+ public void writeSummary(
+ SloConfig config, YdbRetryProperties retryProperties, RunSummary summary) {
+ Path runDirectory = resultsRoot(config).resolve(summary.runId());
+ Path resultFile = runDirectory.resolve(resultFileName(config.getRef()));
+
+ try {
+ Files.createDirectories(runDirectory);
+ Files.writeString(
+ resultFile,
+ buildRunSummaryText(config, retryProperties, summary),
+ StandardOpenOption.CREATE,
+ StandardOpenOption.TRUNCATE_EXISTING,
+ StandardOpenOption.WRITE);
+ log.info(
+ "SLO run result file written: runId={}, ref={}, path={}",
+ summary.runId(),
+ config.getRef(),
+ resultFile.toAbsolutePath());
+ } catch (IOException exception) {
+ throw new RuntimeException("Failed to write SLO run summary file", exception);
+ }
+ }
+
+ public Path resultsRoot(SloConfig config) {
+ return Path.of(config.getResultsDir());
+ }
+
+ private String generateRunId(Instant startedAt) {
+ String timestamp =
+ DateTimeFormatter.ofPattern(RUN_ID_TIMESTAMP_PATTERN)
+ .withZone(ZoneOffset.UTC)
+ .format(startedAt);
+ return RUN_ID_PREFIX
+ + timestamp
+ + "-"
+ + UUID.randomUUID().toString().substring(0, RUN_ID_RANDOM_SUFFIX_LENGTH);
+ }
+
+ private String buildRunSummaryText(
+ SloConfig config, YdbRetryProperties retryProperties, RunSummary summary) {
+ StringBuilder builder = new StringBuilder();
+ builder.append("runId: ").append(summary.runId()).append('\n');
+ builder.append("ref: ").append(config.getRef()).append('\n');
+ builder.append("startedAt: ").append(summary.startedAt()).append('\n');
+ builder.append("finishedAt: ").append(summary.finishedAt()).append('\n');
+ builder.append("durationMs: ")
+ .append(Duration.between(summary.startedAt(), summary.finishedAt()).toMillis())
+ .append('\n');
+ builder.append("resultsDir: ").append(resultsRoot(config).toAbsolutePath()).append('\n');
+ builder.append('\n');
+ builder.append("readRps: ").append(config.getReadRps()).append('\n');
+ builder.append("writeRps: ").append(config.getWriteRps()).append('\n');
+ builder.append("initialDataCount: ").append(config.getInitialDataCount()).append('\n');
+ builder.append("runTimeSeconds: ").append(config.getRunTimeSeconds()).append('\n');
+ builder.append('\n');
+ builder.append("retryEnabled: ").append(retryProperties.isEnabled()).append('\n');
+ builder.append("retryMaxRetries: ").append(retryProperties.getMaxRetries()).append('\n');
+ builder.append("retrySlowBackoffBaseMs: ")
+ .append(retryProperties.getSlowBackoffBaseMs())
+ .append('\n');
+ builder.append("retryFastBackoffBaseMs: ")
+ .append(retryProperties.getFastBackoffBaseMs())
+ .append('\n');
+ builder.append("retrySlowCapBackoffMs: ")
+ .append(retryProperties.getSlowCapBackoffMs())
+ .append('\n');
+ builder.append("retryFastCapBackoffMs: ")
+ .append(retryProperties.getFastCapBackoffMs())
+ .append('\n');
+ builder.append('\n');
+ builder.append("totalOperations: ").append(summary.totalOperations()).append('\n');
+ builder.append("totalSuccess: ").append(summary.totalSuccess()).append('\n');
+ builder.append("totalFailure: ").append(summary.totalFailure()).append('\n');
+ builder.append("failureRatePercent: ").append(summary.failureRatePercent()).append('\n');
+ builder.append("readSuccess: ").append(summary.readSuccess()).append('\n');
+ builder.append("readFailure: ").append(summary.readFailure()).append('\n');
+ builder.append("writeSuccess: ").append(summary.writeSuccess()).append('\n');
+ builder.append("writeFailure: ").append(summary.writeFailure()).append('\n');
+ builder.append('\n');
+ builder.append("overallP50Ms: ").append(summary.overallP50()).append('\n');
+ builder.append("overallP95Ms: ").append(summary.overallP95()).append('\n');
+ builder.append("overallP99Ms: ").append(summary.overallP99()).append('\n');
+ builder.append("readP50Ms: ").append(summary.readP50()).append('\n');
+ builder.append("readP95Ms: ").append(summary.readP95()).append('\n');
+ builder.append("readP99Ms: ").append(summary.readP99()).append('\n');
+ builder.append("writeP50Ms: ").append(summary.writeP50()).append('\n');
+ builder.append("writeP95Ms: ").append(summary.writeP95()).append('\n');
+ builder.append("writeP99Ms: ").append(summary.writeP99()).append('\n');
+ builder.append('\n');
+ builder.append("errorTypes:").append('\n');
+ if (summary.errorCounts().isEmpty()) {
+ builder.append(" none").append('\n');
+ } else {
+ summary.errorCounts()
+ .forEach(
+ (errorType, count) ->
+ builder.append(" ").append(errorType).append(": ").append(count).append('\n'));
+ }
+ return builder.toString();
+ }
+
+ private static String resultFileName(String ref) {
+ if (WITH_RETRY_REF.equals(ref)) {
+ return RETRY_RESULT_FILE_NAME;
+ }
+ if (NO_RETRY_REF.equals(ref)) {
+ return NO_RETRY_RESULT_FILE_NAME;
+ }
+ return ref.replaceAll(FILE_NAME_SANITIZE_REGEX, FILE_NAME_SANITIZE_REPLACEMENT);
+ }
+
+ private static boolean isReusableRun(Path runDirectory) {
+ return !Files.exists(runDirectory.resolve(RETRY_RESULT_FILE_NAME))
+ && !Files.exists(runDirectory.resolve(NO_RETRY_RESULT_FILE_NAME));
+ }
+
+ private static String readCurrentRunId(FileChannel channel) throws IOException {
+ channel.position(0);
+ ByteBuffer buffer = ByteBuffer.allocate((int) channel.size());
+ channel.read(buffer);
+ buffer.flip();
+ return StandardCharsets.UTF_8.decode(buffer).toString().trim();
+ }
+
+ private static void writeCurrentRunId(FileChannel channel, String runId) throws IOException {
+ channel.truncate(0);
+ channel.position(0);
+ channel.write(StandardCharsets.UTF_8.encode(runId));
+ channel.force(true);
+ }
+
+ public record RunSummary(
+ String runId,
+ Instant startedAt,
+ Instant finishedAt,
+ long totalOperations,
+ long totalSuccess,
+ long totalFailure,
+ String failureRatePercent,
+ long readSuccess,
+ long readFailure,
+ long writeSuccess,
+ long writeFailure,
+ String overallP50,
+ String overallP95,
+ String overallP99,
+ String readP50,
+ String readP95,
+ String readP99,
+ String writeP50,
+ String writeP95,
+ String writeP99,
+ Map errorCounts) {
+ }
+}
diff --git a/spring-ydb/spring-ydb-retry/slo/src/main/java/tech/ydb/slo/SloRunner.java b/spring-ydb/spring-ydb-retry/slo/src/main/java/tech/ydb/slo/SloRunner.java
new file mode 100644
index 00000000..741b7a6b
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/slo/src/main/java/tech/ydb/slo/SloRunner.java
@@ -0,0 +1,374 @@
+package tech.ydb.slo;
+
+import io.opentelemetry.api.OpenTelemetry;
+import io.opentelemetry.api.common.AttributeKey;
+import io.opentelemetry.api.common.Attributes;
+import io.opentelemetry.api.metrics.DoubleHistogram;
+import io.opentelemetry.api.metrics.LongCounter;
+import io.opentelemetry.api.metrics.Meter;
+import java.security.MessageDigest;
+import java.time.Instant;
+import java.time.LocalDateTime;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.ScheduledFuture;
+import java.util.concurrent.ThreadLocalRandom;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.boot.CommandLineRunner;
+import org.springframework.jdbc.core.JdbcTemplate;
+import org.springframework.stereotype.Component;
+import tech.ydb.core.Status;
+import tech.ydb.jdbc.exception.YdbStatusable;
+import tech.ydb.retry.YdbRetryProperties;
+
+@Component
+public class SloRunner implements CommandLineRunner {
+
+ private static final Logger log = LoggerFactory.getLogger(SloRunner.class);
+ private static final String OPERATIONS_METRIC_NAME = "slo.operations";
+ private static final String DURATION_METRIC_NAME = "slo.operation.duration.seconds";
+ private static final String DURATION_METRIC_UNIT = "s";
+ private static final List DURATION_BUCKETS =
+ List.of(0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0);
+
+ private static final String TABLE_NAME = "slo_test_table";
+ private static final String READ_OPERATION = "read";
+ private static final String WRITE_OPERATION = "write";
+ private static final String SUCCESS_STATUS = "success";
+ private static final String FAILURE_STATUS = "failure";
+ private static final String NO_ERROR_TYPE = "none";
+
+ private final JdbcTemplate jdbcTemplate;
+ private final SloService sloService;
+ private final SloConfig config;
+ private final YdbRetryProperties retryProperties;
+ private final SloResultWriter resultWriter;
+ private final LongCounter operationsCounter;
+ private final DoubleHistogram durationHistogram;
+ private final SloStats sloStats = new SloStats();
+
+ private final AtomicInteger nextId = new AtomicInteger(0);
+ private final List readableIds = Collections.synchronizedList(new ArrayList<>());
+
+ private static final AttributeKey REF_KEY = AttributeKey.stringKey("ref");
+ private static final AttributeKey OP_TYPE_KEY = AttributeKey.stringKey("operation_type");
+ private static final AttributeKey STATUS_KEY = AttributeKey.stringKey("status");
+ private static final AttributeKey ERROR_TYPE_KEY = AttributeKey.stringKey("error_type");
+
+ public SloRunner(
+ JdbcTemplate jdbcTemplate,
+ SloService sloService,
+ SloConfig config,
+ YdbRetryProperties retryProperties,
+ SloResultWriter resultWriter,
+ OpenTelemetry openTelemetry) {
+ this.jdbcTemplate = jdbcTemplate;
+ this.sloService = sloService;
+ this.config = config;
+ this.retryProperties = retryProperties;
+ this.resultWriter = resultWriter;
+
+ Meter meter = openTelemetry.getMeter("slo");
+ this.operationsCounter =
+ meter.counterBuilder(OPERATIONS_METRIC_NAME)
+ .setDescription("Total number of SLO operations")
+ .build();
+ this.durationHistogram =
+ meter.histogramBuilder(DURATION_METRIC_NAME)
+ .setDescription("SLO operation latency")
+ .setUnit(DURATION_METRIC_UNIT)
+ .setExplicitBucketBoundariesAdvice(DURATION_BUCKETS)
+ .build();
+ }
+
+ @Override
+ public void run(String... args) {
+ Instant startedAt = Instant.now();
+ String runId = resultWriter.resolveRunId(config, startedAt);
+ createTable();
+ seedData();
+ runWorkload(runId);
+ Instant finishedAt = Instant.now();
+ writeRunSummaryFile(runId, startedAt, finishedAt);
+ waitForPrometheusScrapes(runId);
+ log.info("SLO workload completed and final metrics were exposed for scraping: runId={}", runId);
+ }
+
+ private void createTable() {
+ for (int attempt = 0; attempt < 10; attempt++) {
+ try {
+ jdbcTemplate.execute(
+ "CREATE TABLE "
+ + TABLE_NAME
+ + " ("
+ + "guid Text, "
+ + "id Int32, "
+ + "payload_str Text, "
+ + "payload_double Double, "
+ + "payload_timestamp Timestamp, "
+ + "PRIMARY KEY (guid, id)"
+ + ")");
+ log.info("Created table {}", TABLE_NAME);
+ return;
+ } catch (Exception e) {
+ String msg = e.getMessage();
+ if (msg != null
+ && (msg.contains("already exists")
+ || msg.contains("ALREADY_EXISTS")
+ || msg.contains("path exist"))) {
+ log.info("Table slo_test_table already exists");
+ return;
+ }
+ log.warn("Failed to create table (attempt {}/{}): {}", attempt + 1, 10, msg);
+ if (attempt == 9) {
+ log.warn("Max attempts reached, proceeding anyway");
+ return;
+ }
+ try {
+ Thread.sleep((attempt + 1) * 1000L);
+ } catch (InterruptedException ie) {
+ Thread.currentThread().interrupt();
+ throw new RuntimeException(ie);
+ }
+ }
+ }
+ }
+
+ private void seedData() {
+ log.info("Seeding {} initial rows...", config.getInitialDataCount());
+ int success = 0;
+ for (int i = 1; i <= config.getInitialDataCount(); i++) {
+ try {
+ String guid = guidFromInt(i);
+ String payload = randomString();
+ sloService.upsert(guid, i, payload, Math.random(), LocalDateTime.now());
+ registerReadableId(i);
+ success++;
+ } catch (Exception e) {
+ log.warn("Failed to seed row {}: {}", i, e.getMessage());
+ }
+ }
+ nextId.set(config.getInitialDataCount());
+ log.info("Seeded {}/{} rows", success, config.getInitialDataCount());
+ }
+
+ private void runWorkload(String runId) {
+ String ref = config.getRef();
+ log.info(
+ "Starting workload: runId={}, ref={}, readRps={}, writeRps={}, time={}s",
+ runId,
+ ref,
+ config.getReadRps(),
+ config.getWriteRps(),
+ config.getRunTimeSeconds());
+
+ ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(2);
+ ExecutorService workers = Executors.newFixedThreadPool(20);
+
+ int intervalMs = 100;
+ int readsPerInterval = Math.max(1, config.getReadRps() / 10);
+ int writesPerInterval = Math.max(1, config.getWriteRps() / 10);
+
+ ScheduledFuture> readFuture =
+ scheduler.scheduleAtFixedRate(
+ () -> {
+ for (int i = 0; i < readsPerInterval; i++) {
+ workers.submit(() -> doRead(ref));
+ }
+ },
+ 0,
+ intervalMs,
+ TimeUnit.MILLISECONDS);
+
+ ScheduledFuture> writeFuture =
+ scheduler.scheduleAtFixedRate(
+ () -> {
+ for (int i = 0; i < writesPerInterval; i++) {
+ workers.submit(() -> doWrite(ref));
+ }
+ },
+ 0,
+ intervalMs,
+ TimeUnit.MILLISECONDS);
+
+ try {
+ Thread.sleep(TimeUnit.SECONDS.toMillis(config.getRunTimeSeconds()));
+ } catch (InterruptedException interruptedException) {
+ Thread.currentThread().interrupt();
+ throw new RuntimeException("SLO workload interrupted", interruptedException);
+ }
+
+ readFuture.cancel(false);
+ writeFuture.cancel(false);
+ scheduler.shutdown();
+ workers.shutdown();
+ awaitTermination("scheduler", scheduler, 30L, TimeUnit.SECONDS);
+ awaitTermination("workers", workers, 30L, TimeUnit.SECONDS);
+ log.info("Workload finished: runId={}, ref={}", runId, ref);
+ }
+
+ private void doWrite(String ref) {
+ int id = nextId.incrementAndGet();
+ String guid = guidFromInt(id);
+ String payload = randomString();
+ double payloadDouble = Math.random();
+ LocalDateTime ts = LocalDateTime.now();
+
+ long start = System.nanoTime();
+ try {
+ sloService.upsert2(guid, id, payload, payloadDouble, ts);
+ registerReadableId(id);
+ long durationNanos = System.nanoTime() - start;
+ sloStats.recordSuccess(WRITE_OPERATION, durationNanos);
+ recordLatency(ref, WRITE_OPERATION, SUCCESS_STATUS, NO_ERROR_TYPE, durationNanos);
+ incrementCounter(ref, WRITE_OPERATION, SUCCESS_STATUS, NO_ERROR_TYPE);
+ } catch (Exception e) {
+ String errorType = extractErrorType(e);
+ long durationNanos = System.nanoTime() - start;
+ sloStats.recordFailure(WRITE_OPERATION, errorType, durationNanos);
+ recordLatency(ref, WRITE_OPERATION, FAILURE_STATUS, errorType, durationNanos);
+ incrementCounter(ref, WRITE_OPERATION, FAILURE_STATUS, errorType);
+ log.debug("Write failed: [{}] {}", errorType, e.getMessage());
+ }
+ }
+
+ private void doRead(String ref) {
+ Integer id = pickReadableId();
+ if (id == null) {
+ return;
+ }
+ String guid = guidFromInt(id);
+
+ long start = System.nanoTime();
+ try {
+ sloService.select(guid, id);
+ long durationNanos = System.nanoTime() - start;
+ sloStats.recordSuccess(READ_OPERATION, durationNanos);
+ recordLatency(ref, READ_OPERATION, SUCCESS_STATUS, NO_ERROR_TYPE, durationNanos);
+ incrementCounter(ref, READ_OPERATION, SUCCESS_STATUS, NO_ERROR_TYPE);
+ } catch (Exception e) {
+ String errorType = extractErrorType(e);
+ long durationNanos = System.nanoTime() - start;
+ sloStats.recordFailure(READ_OPERATION, errorType, durationNanos);
+ recordLatency(ref, READ_OPERATION, FAILURE_STATUS, errorType, durationNanos);
+ incrementCounter(ref, READ_OPERATION, FAILURE_STATUS, errorType);
+ log.debug("Read failed: [{}] {}", errorType, e.getMessage());
+ }
+ }
+
+ private void registerReadableId(int id) {
+ readableIds.add(id);
+ }
+
+ private Integer pickReadableId() {
+ synchronized (readableIds) {
+ if (readableIds.isEmpty()) {
+ return null;
+ }
+ return readableIds.get(ThreadLocalRandom.current().nextInt(readableIds.size()));
+ }
+ }
+
+ private void incrementCounter(String ref, String operationType, String status, String errorType) {
+ Attributes attrs =
+ Attributes.builder()
+ .put(REF_KEY, ref)
+ .put(OP_TYPE_KEY, operationType)
+ .put(STATUS_KEY, status)
+ .put(ERROR_TYPE_KEY, errorType)
+ .build();
+ operationsCounter.add(1, attrs);
+ }
+
+ private void recordLatency(
+ String ref, String operationType, String status, String errorType, long durationNanos) {
+ Attributes attrs =
+ Attributes.builder()
+ .put(REF_KEY, ref)
+ .put(OP_TYPE_KEY, operationType)
+ .put(STATUS_KEY, status)
+ .put(ERROR_TYPE_KEY, errorType)
+ .build();
+ durationHistogram.record(durationNanos / 1_000_000_000.0, attrs);
+ }
+
+ static String extractErrorType(Throwable throwable) {
+ Throwable current = throwable;
+ while (current != null) {
+ if (current instanceof YdbStatusable statusable) {
+ Status status = statusable.getStatus();
+ if (status != null && status.getCode() != null) {
+ return status.getCode().name();
+ }
+ }
+ current = current.getCause();
+ }
+ return throwable.getClass().getSimpleName();
+ }
+
+ static String guidFromInt(int value) {
+ try {
+ byte[] intBytes = new byte[4];
+ intBytes[0] = (byte) (value >> 24);
+ intBytes[1] = (byte) (value >> 16);
+ intBytes[2] = (byte) (value >> 8);
+ intBytes[3] = (byte) value;
+ byte[] hash = MessageDigest.getInstance("SHA-1").digest(intBytes);
+ StringBuilder sb = new StringBuilder(36);
+ for (int i = 0; i < 16; i++) {
+ sb.append(String.format("%02x", hash[i]));
+ if (i == 3 || i == 5 || i == 7 || i == 9) {
+ sb.append('-');
+ }
+ }
+ return sb.toString();
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ static String randomString() {
+ ThreadLocalRandom rng = ThreadLocalRandom.current();
+ int len = 20 + rng.nextInt(21);
+ StringBuilder sb = new StringBuilder(len);
+ for (int i = 0; i < len; i++) {
+ sb.append((char) (32 + rng.nextInt(95)));
+ }
+ return sb.toString();
+ }
+
+ private void writeRunSummaryFile(String runId, Instant startedAt, Instant finishedAt) {
+ resultWriter.writeSummary(
+ config, retryProperties, sloStats.calculate(runId, startedAt, finishedAt, sloStats));
+ }
+
+ private void waitForPrometheusScrapes(String runId) {
+ log.info("Waiting {}s before shutdown to allow final Prometheus scrapes: runId={}", 10, runId);
+ try {
+ Thread.sleep(TimeUnit.SECONDS.toMillis(10));
+ } catch (InterruptedException interruptedException) {
+ Thread.currentThread().interrupt();
+ throw new RuntimeException(
+ "Interrupted while waiting for final Prometheus scrapes", interruptedException);
+ }
+ }
+
+ private static void awaitTermination(
+ String name, ExecutorService executorService, long timeout, TimeUnit unit) {
+ try {
+ if (!executorService.awaitTermination(timeout, unit)) {
+ throw new IllegalStateException(name + " did not terminate in time");
+ }
+ } catch (InterruptedException interruptedException) {
+ Thread.currentThread().interrupt();
+ throw new RuntimeException(name + " termination interrupted", interruptedException);
+ }
+ }
+}
diff --git a/spring-ydb/spring-ydb-retry/slo/src/main/java/tech/ydb/slo/SloService.java b/spring-ydb/spring-ydb-retry/slo/src/main/java/tech/ydb/slo/SloService.java
new file mode 100644
index 00000000..7d925001
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/slo/src/main/java/tech/ydb/slo/SloService.java
@@ -0,0 +1,85 @@
+package tech.ydb.slo;
+
+import java.sql.Timestamp;
+import java.time.LocalDateTime;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.jdbc.core.JdbcTemplate;
+import org.springframework.stereotype.Service;
+import tech.ydb.retry.YdbTransactional;
+
+@Service
+public class SloService {
+
+ private static final Logger log = LoggerFactory.getLogger(SloService.class);
+ private static final String TABLE_NAME = "slo_test_table";
+ private static final String SELECT_MAX_ID_SQL = "SELECT MAX(id) FROM " + TABLE_NAME;
+ private static final int SECOND_UPSERT_ID_OFFSET = 1;
+
+ private final JdbcTemplate jdbcTemplate;
+
+ public SloService(JdbcTemplate jdbcTemplate) {
+ this.jdbcTemplate = jdbcTemplate;
+ }
+
+ @YdbTransactional(idempotent = true)
+ public void upsert(
+ String guid,
+ int id,
+ String payloadStr,
+ double payloadDouble,
+ LocalDateTime payloadTimestamp) {
+ jdbcTemplate.update(
+ "UPSERT INTO "
+ + TABLE_NAME
+ + " (guid, id, payload_str, payload_double, payload_timestamp) VALUES (?, ?, ?, ?, ?)",
+ guid,
+ id,
+ payloadStr,
+ payloadDouble,
+ Timestamp.valueOf(payloadTimestamp));
+ }
+
+ @YdbTransactional(idempotent = true)
+ public void upsert2(
+ String guid,
+ int id,
+ String payloadStr,
+ double payloadDouble,
+ LocalDateTime payloadTimestamp) {
+ jdbcTemplate.update(
+ "UPSERT INTO "
+ + TABLE_NAME
+ + " (guid, id, payload_str, payload_double, payload_timestamp) VALUES (?, ?, ?, ?, ?)",
+ guid,
+ id,
+ payloadStr,
+ payloadDouble,
+ Timestamp.valueOf(payloadTimestamp));
+
+ jdbcTemplate.update(
+ "UPSERT INTO "
+ + TABLE_NAME
+ + " (guid, id, payload_str, payload_double, payload_timestamp) VALUES (?, ?, ?, ?, ?)",
+ guid,
+ id + SECOND_UPSERT_ID_OFFSET,
+ payloadStr,
+ payloadDouble,
+ Timestamp.valueOf(payloadTimestamp));
+ }
+
+ @YdbTransactional(idempotent = true, readOnly = true)
+ public String select(String guid, int id) {
+ return jdbcTemplate.queryForObject(
+ "SELECT payload_str FROM " + TABLE_NAME + " WHERE guid = ? AND id = ?",
+ String.class,
+ guid,
+ id);
+ }
+
+ @YdbTransactional(idempotent = true, readOnly = true)
+ public int selectMaxId() {
+ Integer result = jdbcTemplate.queryForObject(SELECT_MAX_ID_SQL, Integer.class);
+ return result != null ? result : 0;
+ }
+}
diff --git a/spring-ydb/spring-ydb-retry/slo/src/main/java/tech/ydb/slo/SloStats.java b/spring-ydb/spring-ydb-retry/slo/src/main/java/tech/ydb/slo/SloStats.java
new file mode 100644
index 00000000..ceac8a97
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/slo/src/main/java/tech/ydb/slo/SloStats.java
@@ -0,0 +1,169 @@
+package tech.ydb.slo;
+
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.atomic.LongAdder;
+
+public class SloStats {
+
+ private static final String READ_OPERATION = "read";
+ private static final String EMPTY_PERCENTILE = "n/a";
+ private static final String FAILURE_RATE_FORMAT = "%.4f";
+ private static final String LATENCY_FORMAT = "%.3f";
+ private static final double PERCENTILE_50 = 0.50;
+ private static final double PERCENTILE_95 = 0.95;
+ private static final double PERCENTILE_99 = 0.99;
+ private static final double NANOS_IN_MILLISECOND = 1_000_000.0;
+ private static final double PERCENT_FACTOR = 100.0;
+
+ private final AtomicLong readSuccess = new AtomicLong();
+ private final AtomicLong readFailure = new AtomicLong();
+ private final AtomicLong writeSuccess = new AtomicLong();
+ private final AtomicLong writeFailure = new AtomicLong();
+ private final List overallLatenciesNanos = Collections.synchronizedList(new ArrayList<>());
+ private final List readLatenciesNanos = Collections.synchronizedList(new ArrayList<>());
+ private final List writeLatenciesNanos = Collections.synchronizedList(new ArrayList<>());
+ private final ConcurrentHashMap errorCounts = new ConcurrentHashMap<>();
+
+ public void recordSuccess(String operationType, long durationNanos) {
+ recordLatency(operationType, durationNanos);
+ if (READ_OPERATION.equals(operationType)) {
+ readSuccess.incrementAndGet();
+ return;
+ }
+ writeSuccess.incrementAndGet();
+ }
+
+ public void recordFailure(String operationType, String errorType, long durationNanos) {
+ recordLatency(operationType, durationNanos);
+ errorCounts.computeIfAbsent(errorType, ignored -> new LongAdder()).increment();
+ if (READ_OPERATION.equals(operationType)) {
+ readFailure.incrementAndGet();
+ return;
+ }
+ writeFailure.incrementAndGet();
+ }
+
+ public long getReadSuccess() {
+ return readSuccess.get();
+ }
+
+ public long getReadFailure() {
+ return readFailure.get();
+ }
+
+ public long getWriteSuccess() {
+ return writeSuccess.get();
+ }
+
+ public long getWriteFailure() {
+ return writeFailure.get();
+ }
+
+ public List overallLatenciesSnapshot() {
+ return snapshotLatencies(overallLatenciesNanos);
+ }
+
+ public List readLatenciesSnapshot() {
+ return snapshotLatencies(readLatenciesNanos);
+ }
+
+ public List writeLatenciesSnapshot() {
+ return snapshotLatencies(writeLatenciesNanos);
+ }
+
+ public Map errorCountsSnapshot() {
+ return errorCounts.entrySet().stream()
+ .sorted(
+ Map.Entry.comparingByValue(Comparator.comparingLong(LongAdder::sum))
+ .reversed())
+ .collect(
+ LinkedHashMap::new,
+ (map, entry) -> map.put(entry.getKey(), entry.getValue().sum()),
+ LinkedHashMap::putAll);
+ }
+
+ private void recordLatency(String operationType, long durationNanos) {
+ overallLatenciesNanos.add(durationNanos);
+ if (READ_OPERATION.equals(operationType)) {
+ readLatenciesNanos.add(durationNanos);
+ return;
+ }
+ writeLatenciesNanos.add(durationNanos);
+ }
+
+ private static List snapshotLatencies(List latenciesNanos) {
+ synchronized (latenciesNanos) {
+ return new ArrayList<>(latenciesNanos);
+ }
+ }
+
+ public SloResultWriter.RunSummary calculate(
+ String runId, Instant startedAt, Instant finishedAt, SloStats runStats) {
+ long readSuccess = runStats.getReadSuccess();
+ long readFailure = runStats.getReadFailure();
+ long writeSuccess = runStats.getWriteSuccess();
+ long writeFailure = runStats.getWriteFailure();
+ long totalSuccess = readSuccess + writeSuccess;
+ long totalFailure = readFailure + writeFailure;
+ long totalOperations = totalSuccess + totalFailure;
+ double failureRatePercent =
+ totalOperations == 0 ? 0.0 : (double) totalFailure * PERCENT_FACTOR / totalOperations;
+ List overallLatencies = sortedLatenciesSnapshot(runStats.overallLatenciesNanos);
+ List readLatencies = sortedLatenciesSnapshot(runStats.readLatenciesNanos);
+ List writeLatencies = sortedLatenciesSnapshot(runStats.writeLatenciesNanos);
+
+ return new SloResultWriter.RunSummary(
+ runId,
+ startedAt,
+ finishedAt,
+ totalOperations,
+ totalSuccess,
+ totalFailure,
+ String.format(Locale.ROOT, FAILURE_RATE_FORMAT, failureRatePercent),
+ readSuccess,
+ readFailure,
+ writeSuccess,
+ writeFailure,
+ formatPercentileMillis(overallLatencies, PERCENTILE_50),
+ formatPercentileMillis(overallLatencies, PERCENTILE_95),
+ formatPercentileMillis(overallLatencies, PERCENTILE_99),
+ formatPercentileMillis(readLatencies, PERCENTILE_50),
+ formatPercentileMillis(readLatencies, PERCENTILE_95),
+ formatPercentileMillis(readLatencies, PERCENTILE_99),
+ formatPercentileMillis(writeLatencies, PERCENTILE_50),
+ formatPercentileMillis(writeLatencies, PERCENTILE_95),
+ formatPercentileMillis(writeLatencies, PERCENTILE_99),
+ runStats.errorCountsSnapshot());
+ }
+
+ private static List sortedLatenciesSnapshot(List latenciesNanos) {
+ List snapshot = snapshotLatencies(latenciesNanos);
+ snapshot.sort(Long::compareTo);
+ return snapshot;
+ }
+
+ private static String formatPercentileMillis(List latenciesNanos, double percentile) {
+ if (latenciesNanos.isEmpty()) {
+ return EMPTY_PERCENTILE;
+ }
+ double millis = percentileValue(latenciesNanos, percentile) / NANOS_IN_MILLISECOND;
+ return String.format(Locale.ROOT, LATENCY_FORMAT, millis);
+ }
+
+ private static long percentileValue(List sortedLatenciesNanos, double percentile) {
+ int index =
+ Math.min(
+ sortedLatenciesNanos.size() - 1,
+ (int) Math.ceil(percentile * sortedLatenciesNanos.size()) - 1);
+ return sortedLatenciesNanos.get(index);
+ }
+}
diff --git a/spring-ydb/spring-ydb-retry/slo/src/main/resources/application.properties b/spring-ydb/spring-ydb-retry/slo/src/main/resources/application.properties
new file mode 100644
index 00000000..34fdbddb
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/slo/src/main/resources/application.properties
@@ -0,0 +1,23 @@
+server.port=${SERVER_PORT:8080}
+
+spring.datasource.url=${SPRING_DATASOURCE_URL:jdbc:ydb:grpc://localhost:2136/Root/testdb}
+spring.datasource.driver-class-name=tech.ydb.jdbc.YdbDriver
+spring.autoconfigure.exclude=org.springframework.boot.autoconfigure.orm.jpa.HibernateJpaAutoConfiguration
+
+ydb.transaction.retry.enabled=${YDB_TRANSACTION_RETRY_ENABLED:true}
+ydb.transaction.retry.max-retries=${YDB_TRANSACTION_RETRY_MAX_RETRIES:10}
+
+slo.read-rps=${SLO_READ_RPS:100}
+slo.write-rps=${SLO_WRITE_RPS:100}
+slo.initial-data-count=${SLO_INITIAL_DATA:1000}
+slo.run-time-seconds=${SLO_TIME:600}
+slo.ref=${REF:unknown}
+slo.run-id=${SLO_RUN_ID:}
+slo.results-dir=${SLO_RESULTS_DIR:results}
+
+management.endpoints.web.exposure.include=prometheus,health,info
+management.metrics.export.prometheus.enabled=true
+management.health.db.enabled=false
+
+logging.level.tech.ydb=INFO
+logging.level.tech.ydb.slo=INFO
diff --git a/spring-ydb/spring-ydb-retry/src/main/java/tech/ydb/retry/BackoffSleeper.java b/spring-ydb/spring-ydb-retry/src/main/java/tech/ydb/retry/BackoffSleeper.java
new file mode 100644
index 00000000..2650548b
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/src/main/java/tech/ydb/retry/BackoffSleeper.java
@@ -0,0 +1,6 @@
+package tech.ydb.retry;
+
+@FunctionalInterface
+public interface BackoffSleeper {
+ void sleep(long delayMs) throws InterruptedException;
+}
diff --git a/spring-ydb/spring-ydb-retry/src/main/java/tech/ydb/retry/YdbDelayCalculator.java b/spring-ydb/spring-ydb-retry/src/main/java/tech/ydb/retry/YdbDelayCalculator.java
new file mode 100644
index 00000000..5bc7179c
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/src/main/java/tech/ydb/retry/YdbDelayCalculator.java
@@ -0,0 +1,52 @@
+package tech.ydb.retry;
+
+import org.springframework.lang.Nullable;
+import tech.ydb.core.StatusCode;
+
+public class YdbDelayCalculator {
+ public static long calculateDelay(
+ @Nullable StatusCode statusCode, YdbRetryPolicyConfig retryConfig, int attempt) {
+ if (statusCode == null) {
+ return 0;
+ }
+
+ return switch (statusCode) {
+ case BAD_SESSION, SESSION_BUSY -> 0;
+ case UNDETERMINED, ABORTED, CLIENT_CANCELLED, CLIENT_INTERNAL_ERROR -> delayWithFullJitter(
+ retryConfig.getFastBackoffBaseMs(),
+ retryConfig.getFastCapBackoffMs(),
+ retryConfig.getFastPow(),
+ attempt,
+ retryConfig);
+ case UNAVAILABLE, TRANSPORT_UNAVAILABLE -> delayWithEqualJitter(
+ retryConfig.getFastBackoffBaseMs(),
+ retryConfig.getFastCapBackoffMs(),
+ retryConfig.getFastPow(),
+ attempt,
+ retryConfig);
+ case OVERLOADED, CLIENT_RESOURCE_EXHAUSTED -> delayWithEqualJitter(
+ retryConfig.getSlowBackoffBaseMs(),
+ retryConfig.getSlowCapBackoffMs(),
+ retryConfig.getSlowPow(),
+ attempt,
+ retryConfig);
+ default -> 0;
+ };
+ }
+
+ static long calculateBackoff(int baseMs, int capMs, int pow, int attempt) {
+ return Math.min((long) baseMs * (1L << Math.min(pow, attempt)), capMs);
+ }
+
+ private static long delayWithFullJitter(
+ int baseMs, int capMs, int pow, int attempt, YdbRetryPolicyConfig retryConfig) {
+ return retryConfig.getJitter(calculateBackoff(baseMs, capMs, pow, attempt));
+ }
+
+ private static long delayWithEqualJitter(
+ int baseMs, int capMs, int pow, int attempt, YdbRetryPolicyConfig retryConfig) {
+ long calculatedBackoff = calculateBackoff(baseMs, capMs, pow, attempt);
+ long temp = calculatedBackoff / 2;
+ return temp + calculatedBackoff % 2 + retryConfig.getJitter(temp);
+ }
+}
diff --git a/spring-ydb/spring-ydb-retry/src/main/java/tech/ydb/retry/YdbRetryPolicy.java b/spring-ydb/spring-ydb-retry/src/main/java/tech/ydb/retry/YdbRetryPolicy.java
new file mode 100644
index 00000000..4ff793c4
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/src/main/java/tech/ydb/retry/YdbRetryPolicy.java
@@ -0,0 +1,10 @@
+package tech.ydb.retry;
+
+import org.springframework.lang.Nullable;
+import tech.ydb.core.StatusCode;
+
+public final class YdbRetryPolicy {
+ public static boolean shouldRetry(@Nullable StatusCode statusCode, boolean isIdempotent) {
+ return statusCode != null && statusCode.isRetryable(isIdempotent);
+ }
+}
diff --git a/spring-ydb/spring-ydb-retry/src/main/java/tech/ydb/retry/YdbRetryPolicyConfig.java b/spring-ydb/spring-ydb-retry/src/main/java/tech/ydb/retry/YdbRetryPolicyConfig.java
new file mode 100644
index 00000000..2b9c8742
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/src/main/java/tech/ydb/retry/YdbRetryPolicyConfig.java
@@ -0,0 +1,143 @@
+package tech.ydb.retry;
+
+import java.util.concurrent.ThreadLocalRandom;
+import org.springframework.lang.Nullable;
+
+public final class YdbRetryPolicyConfig {
+ public static final boolean DEFAULT_ENABLED = true;
+ public static final int DEFAULT_MAX_RETRIES = 10;
+ public static final int DEFAULT_SLOW_BACKOFF_BASE_MS = 50;
+ public static final int DEFAULT_FAST_BACKOFF_BASE_MS = 5;
+ public static final int DEFAULT_SLOW_CAP_BACKOFF_MS = 5_000;
+ public static final int DEFAULT_FAST_CAP_BACKOFF_MS = 500;
+
+ private final boolean enabled;
+ private final int maxRetries;
+ private final int slowBackoffBaseMs;
+ private final int fastBackoffBaseMs;
+ private final int slowCapBackoffMs;
+ private final int fastCapBackoffMs;
+ private final int slowPow;
+ private final int fastPow;
+
+ public YdbRetryPolicyConfig() {
+ this(
+ DEFAULT_ENABLED,
+ DEFAULT_MAX_RETRIES,
+ DEFAULT_SLOW_BACKOFF_BASE_MS,
+ DEFAULT_FAST_BACKOFF_BASE_MS,
+ DEFAULT_SLOW_CAP_BACKOFF_MS,
+ DEFAULT_FAST_CAP_BACKOFF_MS);
+ }
+
+ public YdbRetryPolicyConfig(
+ boolean enabled,
+ int maxRetries,
+ int slowBackoffBaseMs,
+ int fastBackoffBaseMs,
+ int slowCapBackoffMs,
+ int fastCapBackoffMs) {
+ if (maxRetries < 1) {
+ throw new IllegalArgumentException("maxRetries must be >= 1");
+ }
+ if (slowBackoffBaseMs < 0
+ || fastBackoffBaseMs < 0
+ || slowCapBackoffMs < 0
+ || fastCapBackoffMs < 0) {
+ throw new IllegalArgumentException("backoff values must be >= 0");
+ }
+ this.enabled = enabled;
+ this.slowBackoffBaseMs = slowBackoffBaseMs;
+ this.fastBackoffBaseMs = fastBackoffBaseMs;
+ this.slowCapBackoffMs = slowCapBackoffMs;
+ this.fastCapBackoffMs = fastCapBackoffMs;
+ this.maxRetries = maxRetries;
+ this.slowPow = powerForCap(this.slowCapBackoffMs);
+ this.fastPow = powerForCap(this.fastCapBackoffMs);
+ }
+
+ public long getJitter(long bound) {
+ if (bound <= 0) {
+ return 0;
+ }
+ return ThreadLocalRandom.current().nextLong(bound + 1);
+ }
+
+ public boolean isEnabled() {
+ return enabled;
+ }
+
+ public int getMaxRetries() {
+ return maxRetries;
+ }
+
+ public int getSlowBackoffBaseMs() {
+ return slowBackoffBaseMs;
+ }
+
+ public int getFastBackoffBaseMs() {
+ return fastBackoffBaseMs;
+ }
+
+ public int getSlowCapBackoffMs() {
+ return slowCapBackoffMs;
+ }
+
+ public int getFastCapBackoffMs() {
+ return fastCapBackoffMs;
+ }
+
+ public int getSlowPow() {
+ return slowPow;
+ }
+
+ public int getFastPow() {
+ return fastPow;
+ }
+
+ public YdbRetryPolicyConfig merge(@Nullable YdbTransactional transactionPolicy) {
+ if (transactionPolicy == null) {
+ return this;
+ }
+ return new YdbRetryPolicyConfig(
+ enabled && transactionPolicy.enabled(),
+ mergeMaxRetries(transactionPolicy.maxRetries(), maxRetries),
+ mergeNonNegativeInt(
+ "slowBackoffBaseMs", transactionPolicy.slowBackoffBaseMs(), slowBackoffBaseMs),
+ mergeNonNegativeInt(
+ "fastBackoffBaseMs", transactionPolicy.fastBackoffBaseMs(), fastBackoffBaseMs),
+ mergeNonNegativeInt(
+ "slowCapBackoffMs", transactionPolicy.slowCapBackoffMs(), slowCapBackoffMs),
+ mergeNonNegativeInt(
+ "fastCapBackoffMs", transactionPolicy.fastCapBackoffMs(), fastCapBackoffMs));
+ }
+
+ private static int mergeMaxRetries(int candidate, int fallback) {
+ return switch (candidate) {
+ case -1 -> fallback;
+ case 0 -> throw new IllegalArgumentException(
+ "maxRetries must not be 0; use enabled = false to disable retry");
+ default -> {
+ if (candidate < -1) {
+ throw new IllegalArgumentException("maxRetries must be -1 or >= 1");
+ }
+ yield candidate;
+ }
+ };
+ }
+
+ private static int mergeNonNegativeInt(String name, int candidate, int fallback)
+ throws IllegalArgumentException {
+ if (candidate < -1) {
+ throw new IllegalArgumentException(String.format("%s is invalid", name));
+ }
+ return candidate == -1 ? fallback : candidate;
+ }
+
+ private static int powerForCap(int capMs) {
+ if (capMs <= 0) {
+ return 0;
+ }
+ return Integer.SIZE - Integer.numberOfLeadingZeros(capMs);
+ }
+}
diff --git a/spring-ydb/spring-ydb-retry/src/main/java/tech/ydb/retry/YdbRetryProperties.java b/spring-ydb/spring-ydb-retry/src/main/java/tech/ydb/retry/YdbRetryProperties.java
new file mode 100644
index 00000000..6e8033ce
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/src/main/java/tech/ydb/retry/YdbRetryProperties.java
@@ -0,0 +1,72 @@
+package tech.ydb.retry;
+
+import org.springframework.boot.context.properties.ConfigurationProperties;
+
+@ConfigurationProperties(prefix = "ydb.transaction.retry")
+public class YdbRetryProperties {
+
+ private boolean enabled = YdbRetryPolicyConfig.DEFAULT_ENABLED;
+ private int maxRetries = YdbRetryPolicyConfig.DEFAULT_MAX_RETRIES;
+ private int slowBackoffBaseMs = YdbRetryPolicyConfig.DEFAULT_SLOW_BACKOFF_BASE_MS;
+ private int fastBackoffBaseMs = YdbRetryPolicyConfig.DEFAULT_FAST_BACKOFF_BASE_MS;
+ private int slowCapBackoffMs = YdbRetryPolicyConfig.DEFAULT_SLOW_CAP_BACKOFF_MS;
+ private int fastCapBackoffMs = YdbRetryPolicyConfig.DEFAULT_FAST_CAP_BACKOFF_MS;
+
+ public boolean isEnabled() {
+ return enabled;
+ }
+
+ public void setEnabled(boolean enabled) {
+ this.enabled = enabled;
+ }
+
+ public int getMaxRetries() {
+ return maxRetries;
+ }
+
+ public void setMaxRetries(int maxRetries) {
+ this.maxRetries = maxRetries;
+ }
+
+ public int getSlowBackoffBaseMs() {
+ return slowBackoffBaseMs;
+ }
+
+ public void setSlowBackoffBaseMs(int slowBackoffBaseMs) {
+ this.slowBackoffBaseMs = slowBackoffBaseMs;
+ }
+
+ public int getFastBackoffBaseMs() {
+ return fastBackoffBaseMs;
+ }
+
+ public void setFastBackoffBaseMs(int fastBackoffBaseMs) {
+ this.fastBackoffBaseMs = fastBackoffBaseMs;
+ }
+
+ public int getSlowCapBackoffMs() {
+ return slowCapBackoffMs;
+ }
+
+ public void setSlowCapBackoffMs(int slowCapBackoffMs) {
+ this.slowCapBackoffMs = slowCapBackoffMs;
+ }
+
+ public int getFastCapBackoffMs() {
+ return fastCapBackoffMs;
+ }
+
+ public void setFastCapBackoffMs(int fastCapBackoffMs) {
+ this.fastCapBackoffMs = fastCapBackoffMs;
+ }
+
+ public YdbRetryPolicyConfig toConfig() {
+ return new YdbRetryPolicyConfig(
+ enabled,
+ maxRetries,
+ slowBackoffBaseMs,
+ fastBackoffBaseMs,
+ slowCapBackoffMs,
+ fastCapBackoffMs);
+ }
+}
diff --git a/spring-ydb/spring-ydb-retry/src/main/java/tech/ydb/retry/YdbTransactionAutoConfiguration.java b/spring-ydb/spring-ydb-retry/src/main/java/tech/ydb/retry/YdbTransactionAutoConfiguration.java
new file mode 100644
index 00000000..f226f6a9
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/src/main/java/tech/ydb/retry/YdbTransactionAutoConfiguration.java
@@ -0,0 +1,25 @@
+package tech.ydb.retry;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.boot.autoconfigure.AutoConfiguration;
+import org.springframework.boot.autoconfigure.condition.ConditionalOnClass;
+import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
+import org.springframework.boot.context.properties.EnableConfigurationProperties;
+import org.springframework.context.annotation.Bean;
+import org.springframework.transaction.interceptor.TransactionInterceptor;
+
+@AutoConfiguration
+@ConditionalOnClass(TransactionInterceptor.class)
+@EnableConfigurationProperties(YdbRetryProperties.class)
+public class YdbTransactionAutoConfiguration {
+
+ private static final Logger log = LoggerFactory.getLogger(YdbTransactionAutoConfiguration.class);
+
+ @Bean
+ @ConditionalOnMissingBean
+ public static YdbTransactionInterceptorReplacer ydbBeanDefinitionRegistryPostProcessor() {
+ log.debug("creating YdbBeanDefinitionRegistryPostProcessor bean");
+ return new YdbTransactionInterceptorReplacer();
+ }
+}
diff --git a/spring-ydb/spring-ydb-retry/src/main/java/tech/ydb/retry/YdbTransactionInterceptor.java b/spring-ydb/spring-ydb-retry/src/main/java/tech/ydb/retry/YdbTransactionInterceptor.java
new file mode 100644
index 00000000..480b432a
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/src/main/java/tech/ydb/retry/YdbTransactionInterceptor.java
@@ -0,0 +1,183 @@
+package tech.ydb.retry;
+
+import java.lang.reflect.AnnotatedElement;
+import java.lang.reflect.Method;
+import org.aopalliance.intercept.MethodInvocation;
+import org.springframework.aop.ProxyMethodInvocation;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.aop.support.AopUtils;
+import org.springframework.core.annotation.AnnotatedElementUtils;
+import org.springframework.lang.Nullable;
+import org.springframework.transaction.TransactionDefinition;
+import org.springframework.transaction.interceptor.TransactionAttribute;
+import org.springframework.transaction.interceptor.TransactionAttributeSource;
+import org.springframework.transaction.interceptor.TransactionInterceptor;
+import org.springframework.transaction.support.TransactionSynchronizationManager;
+import tech.ydb.core.StatusCode;
+import tech.ydb.jdbc.exception.YdbStatusable;
+
+public class YdbTransactionInterceptor extends TransactionInterceptor {
+
+ private static final Logger log = LoggerFactory.getLogger(YdbTransactionInterceptor.class);
+ private final YdbRetryPolicyConfig retryConfig;
+ private final BackoffSleeper backoffSleeper;
+
+ public YdbTransactionInterceptor() {
+ this(new YdbRetryPolicyConfig(), Thread::sleep);
+ }
+
+ YdbTransactionInterceptor(YdbRetryPolicyConfig retryConfig, BackoffSleeper backoffSleeper) {
+ this.retryConfig = retryConfig;
+ this.backoffSleeper = backoffSleeper;
+ }
+
+ @Override
+ @Nullable
+ public Object invoke(final MethodInvocation invocation) throws Throwable {
+ Class> targetClass = invocation.getThis() != null ? AopUtils.getTargetClass(invocation.getThis()) : null;
+
+ TransactionAttributeSource tas = getTransactionAttributeSource();
+ final TransactionAttribute txAttr = (tas != null ? tas.getTransactionAttribute(invocation.getMethod(), targetClass) : null);
+ if (txAttr == null) {
+ return this.invokeWithinTransaction(invocation.getMethod(), targetClass, createCallback(invocation));
+ }
+
+ if (isParticipatingInExistingTransaction(txAttr)) {
+ log.debug(
+ "YDB retry is disabled for method {} because it participates in an existing transaction",
+ invocation.getMethod().toGenericString());
+ return this.invokeWithinTransaction(invocation.getMethod(), targetClass, createCallback(invocation));
+ }
+
+ YdbTransactional ydbTransactional = resolveYdbTransactionAnnotation(invocation.getMethod(), targetClass);
+ YdbRetryPolicyConfig retryConfig = this.retryConfig.merge(ydbTransactional);
+ boolean isIdempotent = ydbTransactional != null && ydbTransactional.idempotent();
+
+ if (!retryConfig.isEnabled()) {
+ log.debug("YDB retry is disabled for method {}", invocation.getMethod().toGenericString());
+ return this.invokeWithinTransaction(invocation.getMethod(), targetClass, createCallback(invocation));
+ }
+
+ return invokeWithinTransactionWithRetryContext(invocation, targetClass, retryConfig, isIdempotent);
+ }
+
+ @Nullable
+ private Object invokeWithinTransactionWithRetryContext(
+ final MethodInvocation invocation,
+ @Nullable Class> targetClass,
+ YdbRetryPolicyConfig retryConfig,
+ boolean isIdempotent)
+ throws Throwable {
+ for (int attempt = 0; ; attempt++) {
+ try {
+ MethodInvocation cloneInvocation = cloneInvocation(invocation);
+ return this.invokeWithinTransaction(
+ invocation.getMethod(), targetClass, createCallback(cloneInvocation));
+ } catch (Throwable ex) {
+ if (ex instanceof Error) {
+ throw ex;
+ }
+ StatusCode statusCode = extractStatusCode(ex);
+ if (!YdbRetryPolicy.shouldRetry(statusCode, isIdempotent)) {
+ throw ex;
+ }
+ if (attempt >= retryConfig.getMaxRetries()) {
+ throw ex;
+ }
+ long delay = YdbDelayCalculator.calculateDelay(statusCode, retryConfig, attempt);
+ sleep(delay, ex);
+ }
+ }
+ }
+
+ private void sleep(long delay, Throwable originalException) throws Throwable {
+ try {
+ backoffSleeper.sleep(delay);
+ } catch (InterruptedException interruptedException) {
+ Thread.currentThread().interrupt();
+ interruptedException.addSuppressed(originalException);
+ throw interruptedException;
+ }
+ }
+
+ private boolean isParticipatingInExistingTransaction(TransactionAttribute txAttr) {
+ if (!TransactionSynchronizationManager.isActualTransactionActive()) {
+ return false;
+ }
+ int propagationBehavior = txAttr.getPropagationBehavior();
+
+ return propagationBehavior != TransactionDefinition.PROPAGATION_REQUIRES_NEW
+ && propagationBehavior != TransactionDefinition.PROPAGATION_NOT_SUPPORTED
+ && propagationBehavior != TransactionDefinition.PROPAGATION_NEVER;
+ }
+
+ @Nullable
+ private YdbTransactional resolveYdbTransactionAnnotation(
+ Method method, @Nullable Class> targetClass) {
+ Method specificMethod =
+ targetClass != null ? AopUtils.getMostSpecificMethod(method, targetClass) : method;
+
+ YdbTransactional annotation = findYdbTransactional(specificMethod);
+ if (annotation != null) {
+ return annotation;
+ }
+
+ annotation = findYdbTransactional(targetClass);
+ if (annotation != null) {
+ return annotation;
+ }
+
+ if (!specificMethod.equals(method)) {
+ annotation = findYdbTransactional(method);
+ if (annotation != null) {
+ return annotation;
+ }
+ }
+
+ return findYdbTransactional(method.getDeclaringClass());
+ }
+
+ @Nullable
+ private YdbTransactional findYdbTransactional(@Nullable AnnotatedElement element) {
+ return element != null
+ ? AnnotatedElementUtils.findMergedAnnotation(element, YdbTransactional.class)
+ : null;
+ }
+
+ @Nullable
+ private StatusCode extractStatusCode(Throwable throwable) {
+ Throwable current = throwable;
+ while (current != null) {
+ if (current instanceof YdbStatusable statusable && statusable.getStatus() != null) {
+ return statusable.getStatus().getCode();
+ }
+ current = current.getCause();
+ }
+ return null;
+ }
+
+ private InvocationCallback createCallback(MethodInvocation invocation) {
+ return new InvocationCallback() {
+ @Nullable
+ public Object proceedWithInvocation() throws Throwable {
+ return invocation.proceed();
+ }
+
+ public Object getTarget() {
+ return invocation.getThis();
+ }
+
+ public Object[] getArguments() {
+ return invocation.getArguments();
+ }
+ };
+ }
+
+ private MethodInvocation cloneInvocation(MethodInvocation invocation) {
+ if (invocation instanceof ProxyMethodInvocation proxyMethodInvocation) {
+ return proxyMethodInvocation.invocableClone();
+ }
+ return invocation;
+ }
+}
diff --git a/spring-ydb/spring-ydb-retry/src/main/java/tech/ydb/retry/YdbTransactionInterceptorFactory.java b/spring-ydb/spring-ydb-retry/src/main/java/tech/ydb/retry/YdbTransactionInterceptorFactory.java
new file mode 100644
index 00000000..7f175300
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/src/main/java/tech/ydb/retry/YdbTransactionInterceptorFactory.java
@@ -0,0 +1,85 @@
+package tech.ydb.retry;
+
+import org.springframework.beans.BeansException;
+import org.springframework.beans.factory.BeanFactory;
+import org.springframework.beans.factory.BeanFactoryAware;
+import org.springframework.beans.factory.FactoryBean;
+import org.springframework.lang.Nullable;
+import org.springframework.transaction.TransactionManager;
+import org.springframework.transaction.annotation.TransactionManagementConfigurer;
+import org.springframework.transaction.interceptor.TransactionAttributeSource;
+
+public class YdbTransactionInterceptorFactory
+ implements FactoryBean, BeanFactoryAware {
+
+ private YdbRetryProperties retryProperties;
+ private TransactionAttributeSource transactionAttributeSource;
+
+ @Nullable
+ private BeanFactory beanFactory;
+
+ public void setRetryProperties(YdbRetryProperties retryProperties) {
+ this.retryProperties = retryProperties;
+ }
+
+ public void setTransactionAttributeSource(TransactionAttributeSource transactionAttributeSource) {
+ this.transactionAttributeSource = transactionAttributeSource;
+ }
+
+ @Override
+ public void setBeanFactory(BeanFactory beanFactory) throws BeansException {
+ this.beanFactory = beanFactory;
+ }
+
+ @Override
+ public YdbTransactionInterceptor getObject() {
+ requireRetryProperties();
+ requireTransactionAttributeSource();
+
+ YdbTransactionInterceptor interceptor = new YdbTransactionInterceptor(retryProperties.toConfig(), Thread::sleep);
+ interceptor.setTransactionAttributeSource(transactionAttributeSource);
+ if (beanFactory != null) {
+ interceptor.setBeanFactory(beanFactory);
+ }
+
+ TransactionManager defaultTransactionManager = resolveTransactionManager();
+ if (defaultTransactionManager != null) {
+ interceptor.setTransactionManager(defaultTransactionManager);
+ }
+
+ return interceptor;
+ }
+
+ private void requireRetryProperties() {
+ if (retryProperties == null) {
+ throw new IllegalStateException(
+ "retryProperties must be set before creating YdbTransactionInterceptor");
+ }
+ }
+
+ private void requireTransactionAttributeSource() {
+ if (transactionAttributeSource == null) {
+ throw new IllegalStateException(
+ "transactionAttributeSource must be set before creating YdbTransactionInterceptor");
+ }
+ }
+
+ @Nullable
+ private TransactionManager resolveTransactionManager() {
+ if (beanFactory == null) {
+ return null;
+ }
+
+ TransactionManagementConfigurer configurer = beanFactory.getBeanProvider(TransactionManagementConfigurer.class).getIfAvailable();
+ if (configurer == null) {
+ return null;
+ }
+
+ return configurer.annotationDrivenTransactionManager();
+ }
+
+ @Override
+ public Class> getObjectType() {
+ return YdbTransactionInterceptor.class;
+ }
+}
diff --git a/spring-ydb/spring-ydb-retry/src/main/java/tech/ydb/retry/YdbTransactionInterceptorReplacer.java b/spring-ydb/spring-ydb-retry/src/main/java/tech/ydb/retry/YdbTransactionInterceptorReplacer.java
new file mode 100644
index 00000000..c88ff316
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/src/main/java/tech/ydb/retry/YdbTransactionInterceptorReplacer.java
@@ -0,0 +1,89 @@
+package tech.ydb.retry;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.beans.BeansException;
+import org.springframework.beans.factory.config.BeanDefinition;
+import org.springframework.beans.factory.support.AbstractBeanDefinition;
+import org.springframework.beans.factory.support.BeanDefinitionBuilder;
+import org.springframework.beans.factory.support.BeanDefinitionRegistry;
+import org.springframework.beans.factory.support.BeanDefinitionRegistryPostProcessor;
+import org.springframework.core.Ordered;
+
+public class YdbTransactionInterceptorReplacer
+ implements BeanDefinitionRegistryPostProcessor, Ordered {
+
+ private static final Logger log = LoggerFactory.getLogger(YdbTransactionInterceptorReplacer.class);
+
+ private static final String TRANSACTION_INTERCEPTOR_BEAN_NAME = "transactionInterceptor";
+
+ @Override
+ public void postProcessBeanDefinitionRegistry(BeanDefinitionRegistry registry)
+ throws BeansException {
+ if (!registry.containsBeanDefinition(TRANSACTION_INTERCEPTOR_BEAN_NAME)) {
+ log.debug("BeanDefinition '{}' not found", TRANSACTION_INTERCEPTOR_BEAN_NAME);
+ return;
+ }
+
+ BeanDefinition existingBd = registry.getBeanDefinition(TRANSACTION_INTERCEPTOR_BEAN_NAME);
+
+ if (YdbTransactionInterceptorFactory.class.getName().equals(existingBd.getBeanClassName())) {
+ log.debug(
+ "BeanDefinition '{}' is already YdbTransactionInterceptorFactory",
+ TRANSACTION_INTERCEPTOR_BEAN_NAME);
+ return;
+ }
+
+ AbstractBeanDefinition newBd = buildYdbInterceptorBeanDefinition(existingBd);
+
+ registry.removeBeanDefinition(TRANSACTION_INTERCEPTOR_BEAN_NAME);
+ registry.registerBeanDefinition(TRANSACTION_INTERCEPTOR_BEAN_NAME, newBd);
+
+ log.debug(
+ "registered YdbTransactionInterceptorFactory as bean '{}'",
+ TRANSACTION_INTERCEPTOR_BEAN_NAME);
+ }
+
+ private AbstractBeanDefinition buildYdbInterceptorBeanDefinition(BeanDefinition existingBd) {
+ AbstractBeanDefinition newBd =
+ BeanDefinitionBuilder.genericBeanDefinition(YdbTransactionInterceptorFactory.class)
+ .setAutowireMode(AbstractBeanDefinition.AUTOWIRE_BY_TYPE)
+ .getBeanDefinition();
+
+ copyBeanDefinitionMetadata(existingBd, newBd);
+ return newBd;
+ }
+
+ private void copyBeanDefinitionMetadata(BeanDefinition source, AbstractBeanDefinition target) {
+ target.setParentName(source.getParentName());
+ target.setRole(source.getRole());
+ target.setScope(source.getScope());
+ target.setLazyInit(source.isLazyInit());
+ target.setPrimary(source.isPrimary());
+ target.setFallback(source.isFallback());
+ target.setDependsOn(source.getDependsOn());
+ target.setDescription(source.getDescription());
+ target.setSource(source.getSource());
+
+ if (source instanceof AbstractBeanDefinition abstractSource) {
+ target.setAutowireCandidate(abstractSource.isAutowireCandidate());
+ target.setDefaultCandidate(abstractSource.isDefaultCandidate());
+ target.setSynthetic(abstractSource.isSynthetic());
+ target.setResource(abstractSource.getResource());
+ target.setResourceDescription(abstractSource.getResourceDescription());
+ if (abstractSource.getOriginatingBeanDefinition() != null) {
+ target.setOriginatingBeanDefinition(abstractSource.getOriginatingBeanDefinition());
+ }
+ target.copyQualifiersFrom(abstractSource);
+
+ for (String attributeName : abstractSource.attributeNames()) {
+ target.setAttribute(attributeName, abstractSource.getAttribute(attributeName));
+ }
+ }
+ }
+
+ @Override
+ public int getOrder() {
+ return LOWEST_PRECEDENCE;
+ }
+}
diff --git a/spring-ydb/spring-ydb-retry/src/main/java/tech/ydb/retry/YdbTransactional.java b/spring-ydb/spring-ydb-retry/src/main/java/tech/ydb/retry/YdbTransactional.java
new file mode 100644
index 00000000..009e6efb
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/src/main/java/tech/ydb/retry/YdbTransactional.java
@@ -0,0 +1,71 @@
+package tech.ydb.retry;
+
+import java.lang.annotation.Documented;
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Inherited;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+import org.springframework.core.annotation.AliasFor;
+import org.springframework.transaction.TransactionDefinition;
+import org.springframework.transaction.annotation.Isolation;
+import org.springframework.transaction.annotation.Propagation;
+import org.springframework.transaction.annotation.Transactional;
+
+@Target({ElementType.TYPE, ElementType.METHOD})
+@Retention(RetentionPolicy.RUNTIME)
+@Documented
+@Inherited
+@Transactional
+public @interface YdbTransactional {
+
+ @AliasFor(annotation = Transactional.class, attribute = "value")
+ String value() default "";
+
+ @AliasFor(annotation = Transactional.class, attribute = "transactionManager")
+ String transactionManager() default "";
+
+ @AliasFor(annotation = Transactional.class, attribute = "label")
+ String[] label() default {};
+
+ @AliasFor(annotation = Transactional.class, attribute = "propagation")
+ Propagation propagation() default Propagation.REQUIRED;
+
+ @AliasFor(annotation = Transactional.class, attribute = "isolation")
+ Isolation isolation() default Isolation.DEFAULT;
+
+ @AliasFor(annotation = Transactional.class, attribute = "timeout")
+ int timeout() default TransactionDefinition.TIMEOUT_DEFAULT;
+
+ @AliasFor(annotation = Transactional.class, attribute = "timeoutString")
+ String timeoutString() default "";
+
+ @AliasFor(annotation = Transactional.class, attribute = "readOnly")
+ boolean readOnly() default false;
+
+ @AliasFor(annotation = Transactional.class, attribute = "rollbackFor")
+ Class extends Throwable>[] rollbackFor() default {};
+
+ @AliasFor(annotation = Transactional.class, attribute = "rollbackForClassName")
+ String[] rollbackForClassName() default {};
+
+ @AliasFor(annotation = Transactional.class, attribute = "noRollbackFor")
+ Class extends Throwable>[] noRollbackFor() default {};
+
+ @AliasFor(annotation = Transactional.class, attribute = "noRollbackForClassName")
+ String[] noRollbackForClassName() default {};
+
+ boolean enabled() default true;
+
+ int maxRetries() default -1;
+
+ int slowBackoffBaseMs() default -1;
+
+ int fastBackoffBaseMs() default -1;
+
+ int slowCapBackoffMs() default -1;
+
+ int fastCapBackoffMs() default -1;
+
+ boolean idempotent() default false;
+}
diff --git a/spring-ydb/spring-ydb-retry/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports b/spring-ydb/spring-ydb-retry/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports
new file mode 100644
index 00000000..e20c6892
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports
@@ -0,0 +1 @@
+tech.ydb.retry.YdbTransactionAutoConfiguration
diff --git a/spring-ydb/spring-ydb-retry/src/test/java/tech/ydb/retry/InterceptorTestSupport.java b/spring-ydb/spring-ydb-retry/src/test/java/tech/ydb/retry/InterceptorTestSupport.java
new file mode 100644
index 00000000..5f7347c7
--- /dev/null
+++ b/spring-ydb/spring-ydb-retry/src/test/java/tech/ydb/retry/InterceptorTestSupport.java
@@ -0,0 +1,220 @@
+package tech.ydb.retry;
+
+import java.lang.reflect.Method;
+import java.util.ArrayDeque;
+import java.util.Deque;
+import java.util.concurrent.atomic.AtomicInteger;
+import org.aopalliance.intercept.MethodInvocation;
+import org.junit.jupiter.api.AfterEach;
+import org.mockito.Mockito;
+import org.springframework.transaction.annotation.AnnotationTransactionAttributeSource;
+import org.springframework.transaction.annotation.Propagation;
+import org.springframework.transaction.annotation.Transactional;
+import org.springframework.transaction.support.TransactionSynchronizationManager;
+import tech.ydb.core.Status;
+import tech.ydb.core.StatusCode;
+import tech.ydb.jdbc.exception.YdbStatusable;
+
+abstract class InterceptorTestSupport {
+
+ @AfterEach
+ void cleanupTransactionContext() {
+ TransactionSynchronizationManager.clear();
+ }
+
+ static TestableInterceptor interceptorWithConfig(
+ boolean enabled, int maxRetries, int slowBase, int fastBase, int slowCap, int fastCap) {
+ return interceptorWithSleeper(
+ enabled, maxRetries, slowBase, fastBase, slowCap, fastCap, delay -> {
+ });
+ }
+
+ static TestableInterceptor interceptorWithSleeper(
+ boolean enabled,
+ int maxRetries,
+ int slowBase,
+ int fastBase,
+ int slowCap,
+ int fastCap,
+ BackoffSleeper sleeper) {
+ TestableInterceptor interceptor =
+ new TestableInterceptor(
+ new YdbRetryPolicyConfig(enabled, maxRetries, slowBase, fastBase, slowCap, fastCap),
+ sleeper);
+ interceptor.setTransactionAttributeSource(new AnnotationTransactionAttributeSource());
+ return interceptor;
+ }
+
+ static MethodInvocation invocationFor(String methodName) {
+ Method method = methodOf(methodName);
+ Object target = targetFor(methodName);
+ return invocationFor(method, target);
+ }
+
+ static MethodInvocation invocationFor(Method method, Object target) {
+ MethodInvocation invocation = Mockito.mock(MethodInvocation.class);
+ Mockito.when(invocation.getMethod()).thenReturn(method);
+ Mockito.when(invocation.getThis()).thenReturn(target);
+ Mockito.when(invocation.getArguments()).thenReturn(new Object[0]);
+ return invocation;
+ }
+
+ private static Object targetFor(String methodName) {
+ if (methodName.startsWith("ydb") || methodName.startsWith("default")) {
+ return new YdbTransactionalTestService();
+ }
+ return new TransactionalTestService();
+ }
+
+ static Method methodOf(String methodName) {
+ try {
+ if (methodName.startsWith("ydb") || methodName.startsWith("default")) {
+ return YdbTransactionalTestService.class.getMethod(methodName);
+ }
+ return TransactionalTestService.class.getMethod(methodName);
+ } catch (NoSuchMethodException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ static final class TestableInterceptor extends YdbTransactionInterceptor {
+ private final Deque