From 71d774c3007e25aaf27dd1c26bb4f05d7f80ead9 Mon Sep 17 00:00:00 2001
From: yyoyoian-pixel <279225925+yyoyoian-pixel@users.noreply.github.com>
Date: Sun, 3 May 2026 12:52:43 +0200
Subject: [PATCH] =?UTF-8?q?tune:=20lower=20coalesce/settle=20step=20from?=
 =?UTF-8?q?=2040=20=E2=86=92=2010=20ms,=20raise=20tunnel-node=20settle=20m?=
 =?UTF-8?q?ax=20to=201=20s?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The batch coalesce step controls how long the client (and the
tunnel-node's straggler settle) waits between checking for more ops
to pack into the same batch.  At 40 ms the wait was conservative —
good for packing uploads but needlessly slow on the download path
where the tunnel-node round-trip, not coalescing, is the bottleneck.

Lowering the step to 10 ms means we fire batches almost immediately
when there's nothing else queued, cutting ~30 ms of dead air on
every download-dominated round-trip.  When both sides DO have data
in flight (uploads, bursty page loads), the adaptive reset still
works: each arriving op resets the 10 ms step timer, so a rapid
burst naturally coalesces up to the 1 s hard cap without wasting
quota on many small batches.

In short: don't wait when there's nothing to wait for; batch
aggressively when there is.

Client side:
  - DEFAULT_COALESCE_STEP_MS  40 → 10 ms
  - DEFAULT_COALESCE_MAX_MS   unchanged at 1000 ms

Tunnel-node side:
  - STRAGGLER_SETTLE_STEP     40 → 10 ms  (matches client step)
  - STRAGGLER_SETTLE_MAX     500 → 1000 ms (more room to pack
    straggler responses when upstream targets reply at different
    speeds — saves Apps Script quota on the return leg)

Users who prefer the old behaviour can set "coalesce_step_ms": 40
in config.json.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../main/java/com/therealaleph/mhrv/ConfigStore.kt  |  6 +++---
 src/config.rs                                       |  2 +-
 src/proxy_server.rs                                 |  2 +-
 src/tunnel_client.rs                                | 13 ++++++++++++-
 tunnel-node/src/main.rs                             |  4 ++--
 5 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/android/app/src/main/java/com/therealaleph/mhrv/ConfigStore.kt b/android/app/src/main/java/com/therealaleph/mhrv/ConfigStore.kt
index 8f7e6713..9cb535b4 100644
--- a/android/app/src/main/java/com/therealaleph/mhrv/ConfigStore.kt
+++ b/android/app/src/main/java/com/therealaleph/mhrv/ConfigStore.kt
@@ -96,7 +96,7 @@ data class MhrvConfig(
     val verifySsl: Boolean = true,
     val logLevel: String = "info",
     val parallelRelay: Int = 1,
-    val coalesceStepMs: Int = 40,
+    val coalesceStepMs: Int = 10,
     val coalesceMaxMs: Int = 1000,
     val upstreamSocks5: String = "",
 
@@ -210,7 +210,7 @@ data class MhrvConfig(
             put("verify_ssl", verifySsl)
             put("log_level", logLevel)
             put("parallel_relay", parallelRelay)
-            if (coalesceStepMs != 40) put("coalesce_step_ms", coalesceStepMs)
+            if (coalesceStepMs != 10) put("coalesce_step_ms", coalesceStepMs)
             if (coalesceMaxMs != 1000) put("coalesce_max_ms", coalesceMaxMs)
             if (upstreamSocks5.isNotBlank()) {
                 put("upstream_socks5", upstreamSocks5.trim())
@@ -422,7 +422,7 @@ object ConfigStore {
             verifySsl = obj.optBoolean("verify_ssl", true),
             logLevel = obj.optString("log_level", "info"),
             parallelRelay = obj.optInt("parallel_relay", 1),
-            coalesceStepMs = obj.optInt("coalesce_step_ms", 40),
+            coalesceStepMs = obj.optInt("coalesce_step_ms", 10),
             coalesceMaxMs = obj.optInt("coalesce_max_ms", 1000),
             upstreamSocks5 = obj.optString("upstream_socks5", ""),
             passthroughHosts = obj.optJSONArray("passthrough_hosts")?.let { arr ->
diff --git a/src/config.rs b/src/config.rs
index 628eb7ca..d0281a6c 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -104,7 +104,7 @@ pub struct Config {
     pub parallel_relay: u8,
     /// Adaptive batch coalesce: after each op arrives, wait this many ms
     /// for more ops before firing the batch. Resets on every arrival.
-    /// 0 = use compiled default (40ms).
+    /// 0 = use compiled default (10ms).
     #[serde(default)]
     pub coalesce_step_ms: u16,
     /// Hard cap on total coalesce wait (ms). 0 = use compiled default (1000ms).
diff --git a/src/proxy_server.rs b/src/proxy_server.rs
index 06ed7feb..81f9071a 100644
--- a/src/proxy_server.rs
+++ b/src/proxy_server.rs
@@ -518,7 +518,7 @@ impl ProxyServer {
             mitm,
             rewrite_ctx,
             tunnel_mux: None, // initialized in run() inside the tokio runtime
-            coalesce_step_ms: if config.coalesce_step_ms > 0 { config.coalesce_step_ms as u64 } else { 40 },
+            coalesce_step_ms: if config.coalesce_step_ms > 0 { config.coalesce_step_ms as u64 } else { 10 },
             coalesce_max_ms: if config.coalesce_max_ms > 0 { config.coalesce_max_ms as u64 } else { 1000 },
         })
     }
diff --git a/src/tunnel_client.rs b/src/tunnel_client.rs
index c3444a44..57c27366 100644
--- a/src/tunnel_client.rs
+++ b/src/tunnel_client.rs
@@ -59,7 +59,18 @@ const CLIENT_FIRST_DATA_WAIT: Duration = Duration::from_millis(50);
 /// Adaptive coalesce defaults: after each new op arrives, wait another
 /// step for more ops. Resets on every arrival, up to max from the first
 /// op. Overridable via config `coalesce_step_ms` / `coalesce_max_ms`.
-const DEFAULT_COALESCE_STEP_MS: u64 = 40;
+///
+/// 10 ms is enough to catch ops that arrive in the same event-loop tick
+/// (e.g. a browser opening 6 parallel connections) without adding
+/// perceptible latency to downloads where the tunnel-node reply — not
+/// coalescing — is the real bottleneck.  When both sides *do* have data
+/// in flight (uploads, bursty page loads), the adaptive reset still
+/// packs batches efficiently: each arriving op resets the step timer, so
+/// a rapid burst naturally coalesces up to `DEFAULT_COALESCE_MAX_MS`
+/// without an explicit upload/download distinction.  The net effect is
+/// "don't wait when there's nothing to wait for; batch aggressively when
+/// there is."
+const DEFAULT_COALESCE_STEP_MS: u64 = 10;
 const DEFAULT_COALESCE_MAX_MS: u64 = 1000;
 
 /// Structured error code the tunnel-node returns when it doesn't know the
diff --git a/tunnel-node/src/main.rs b/tunnel-node/src/main.rs
index 69e361cb..c63c534a 100644
--- a/tunnel-node/src/main.rs
+++ b/tunnel-node/src/main.rs
@@ -47,8 +47,8 @@ const ACTIVE_DRAIN_DEADLINE: Duration = Duration::from_millis(350);
 /// still arriving. Stops when no new data arrived in the last STEP (the
 /// burst is over) or MAX is reached. Packing more session responses into
 /// one batch saves quota on high-latency relays (~1.5s Apps Script overhead).
-const STRAGGLER_SETTLE_STEP: Duration = Duration::from_millis(40);
-const STRAGGLER_SETTLE_MAX: Duration = Duration::from_millis(500);
+const STRAGGLER_SETTLE_STEP: Duration = Duration::from_millis(10);
+const STRAGGLER_SETTLE_MAX: Duration = Duration::from_millis(1000);
 
 /// Drain-phase deadline when the batch is a pure poll (no writes, no new
 /// connections — clients just asking "any push data?"). Holding the