Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
- Add constructor/builder parameters to supply the initial Lua script as a string or as a file path, and optional additional script paths copied into the container data directory (`Tarantool2Container`, `CartridgeClusterContainer`, `VshardClusterContainer`); simplify bundled `server.lua` accordingly.
- Upgrade TQE to v3.5.0.
- Extract `ObjectMapper` to a static field in the test `tdg.Utils` helper to avoid recreating it on every `sendUsers`/`getUsers` call.
- Wait for vshard storages to complete the handshake (`vshard.router.info()` reports every replica as `available` and no unreachable buckets) before declaring a `VshardClusterContainer` ready, preventing intermittent `VHANDSHAKE_NOT_COMPLETE` CRUD failures right after bootstrap.

### Documentation

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ public void configure() {
this.container.waitUntilVshardIsBootstrapped(
VshardClusterContainer.TIMEOUT_VSHARD_BOOTSTRAP_IN_SECONDS);
this.container.waitUntilCrudIsUp(VshardClusterContainer.TIMEOUT_CRUD_HEALTH_IN_SECONDS);
this.container.waitUntilVshardStoragesAreReady(
VshardClusterContainer.TIMEOUT_VSHARD_STORAGES_READY_IN_SECONDS);
this.configured.set(true);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,16 @@ public class VshardClusterContainer extends GenericContainer<VshardClusterContai
protected static final String VSHARD_BOOTSTRAP_COMMAND =
"return require('vshard').router.bootstrap({if_not_bootstrapped = true})";
protected static final String ROUTER_HEALTH_COMMAND = "return box.info.status";
protected static final String VSHARD_STORAGES_READY_COMMAND =
"local info = require('vshard').router.info();"
+ " for _, rs in pairs(info.replicasets or {}) do"
+ " if rs.master == nil or rs.master == box.NULL then return false end;"
+ " for _, r in pairs(rs.replicas or {}) do"
+ " if r.status == nil or r.status ~= 'available' then return false end;"
+ " end;"
+ " end;"
+ " if info.bucket and (info.bucket.unreachable or 0) > 0 then return false end;"
+ " return true";
private static final ObjectMapper YAML_MAPPER = new ObjectMapper(new YAMLFactory());

public static final String ENV_TARANTOOL_VERSION = "TARANTOOL_VERSION";
Expand All @@ -82,6 +92,7 @@ public class VshardClusterContainer extends GenericContainer<VshardClusterContai
protected static final int TIMEOUT_CRUD_HEALTH_IN_SECONDS = 60;
protected static final int TIMEOUT_ROUTER_HEALTH_IN_SECONDS = 90;
protected static final int TIMEOUT_VSHARD_BOOTSTRAP_IN_SECONDS = 90;
protected static final int TIMEOUT_VSHARD_STORAGES_READY_IN_SECONDS = 120;
protected static final int TIMEOUT_CONTAINER_START_IN_SECONDS = 600;

protected final String TARANTOOL_RUN_DIR;
Expand Down Expand Up @@ -557,6 +568,23 @@ protected void waitUntilVshardIsBootstrapped(int secondsToWait) {
}
}

/**
* Waits until every vshard replica is in the {@code available} state (i.e. the vshard handshake
* with each storage has completed) and there are no unreachable buckets. {@link
* #vshardIsBootstrapped()} only verifies that {@code vshard.router.bootstrap} returned cleanly,
* which does not preclude individual storages from still being in {@code VHANDSHAKE_NOT_COMPLETE}
* (vshard code 40) during the initial rebalance. Issuing a CRUD request against such a cluster
* fails immediately, so the readiness check must additionally inspect the per-replica status
* surfaced by {@code vshard.router.info()}.
*/
protected void waitUntilVshardStoragesAreReady(int secondsToWait) {
if (!waitUntilTrue(secondsToWait, this::vshardStoragesAreReady)) {
throw new RuntimeException(
"Timeout exceeded while waiting for vshard storages to complete handshake."
+ " See the specific error in logs.");
}
}

protected boolean waitUntilTrue(int secondsToWait, Supplier<Boolean> waitFunc) {
int secondsPassed = 0;
boolean result = waitFunc.get();
Expand Down Expand Up @@ -629,6 +657,31 @@ protected boolean vshardIsBootstrapped() {
}
}

/**
* Returns {@code true} when {@code vshard.router.info()} reports every replica as {@code
* available} and {@code info.bucket.unreachable == 0}, i.e. the vshard handshake has completed
* for all storages. See {@link #waitUntilVshardStoragesAreReady(int)} for rationale.
*/
protected boolean vshardStoragesAreReady() {
try {
List<?> result =
TarantoolContainerClientHelper.executeCommandDecoded(
this, VSHARD_STORAGES_READY_COMMAND, null);
if (result.isEmpty()) {
logger().warn("Vshard storages readiness probe returned an empty response");
return false;
}
boolean ready = Boolean.TRUE.equals(result.get(0));
if (!ready) {
logger().warn("Vshard storages are not handshaked yet");
}
return ready;
} catch (Exception e) {
logger().warn("Vshard storages readiness probe failed: {}", e.getMessage());
return false;
}
}

protected String getFileName(String filePath) {
if (filePath == null || filePath.isBlank()) {
throw new IllegalArgumentException("File path must not be null or empty");
Expand Down
Loading