diff --git a/fixtures/issue-283/init.lua b/fixtures/issue-283/init.lua new file mode 100644 index 00000000..a8998d46 --- /dev/null +++ b/fixtures/issue-283/init.lua @@ -0,0 +1,99 @@ +-- Fixture for issue #283: +-- "find_available_port probe-then-rebind races; create_server has no retry -> +-- EADDRINUSE with parallel Neovim instances (regression in #282)" +-- https://github.com/coder/claudecode.nvim/issues/283 +-- +-- This fixture starts the REAL claudecode WebSocket server on launch and prints +-- a big banner showing whether THIS instance got a listening port or failed. +-- +-- Reproduction (from repo root), in TWO terminals: +-- source fixtures/nvim-aliases.sh +-- vv issue-283 # terminal 1 -> "LISTENING on port 48811" +-- vv issue-283 # terminal 2 -> "FAILED ... Failed to listen on port 48811: EADDRINUSE" +-- +-- Because #282 dropped the per-process RNG seeding, every fresh Neovim picks the +-- SAME port (48811 with the default 10000-65535 range), so the second instance +-- always collides. The probe in find_available_port cannot notice the first +-- instance's listener (libuv defers EADDRINUSE to listen()), and create_server +-- does not retry, so the integration never starts in instance 2. +-- +-- :ReproStatus re-print this instance's server status +-- :ReproStop stop this instance's server (frees the port / lockfile) + +local config_dir = vim.fn.stdpath("config") +local repo_root = vim.fn.fnamemodify(config_dir, ":h:h") +vim.opt.rtp:prepend(repo_root) + +vim.g.mapleader = " " +vim.g.maplocalleader = "\\" +vim.o.showtabline = 0 +vim.o.laststatus = 2 + +local ok, claudecode = pcall(require, "claudecode") +assert(ok, "Failed to load claudecode.nvim from repo root: " .. tostring(claudecode)) + +-- auto_start = false so we can call start() ourselves and capture its result. +claudecode.setup({ + auto_start = false, + log_level = "info", + terminal = { + provider = "native", + auto_close = false, + }, +}) + +local started_ok, started_info = claudecode.start(false) + +local function status_lines() + local running = claudecode.state and claudecode.state.server ~= nil + local port = claudecode.state and claudecode.state.port or nil + local lines = { + "claudecode.nvim -- issue #283 reproduction fixture", + "", + "Run `vv issue-283` in a SECOND terminal while this one is open.", + "", + } + if started_ok and running then + lines[#lines + 1] = "THIS INSTANCE: ✅ server LISTENING on port " .. tostring(port) + lines[#lines + 1] = "" + lines[#lines + 1] = "Now open a second instance: it should FAIL on the same port" + lines[#lines + 1] = "with EADDRINUSE, because every fresh Neovim deterministically" + lines[#lines + 1] = "selects this same port (lost RNG seeding in #282)." + else + lines[#lines + 1] = "THIS INSTANCE: ❌ server FAILED to start" + lines[#lines + 1] = "" + lines[#lines + 1] = " " .. tostring(started_info) + lines[#lines + 1] = "" + lines[#lines + 1] = "This is #283: another Neovim already holds this port, the probe" + lines[#lines + 1] = "could not detect it, and create_server did not retry." + end + lines[#lines + 1] = "" + lines[#lines + 1] = ":ReproStatus re-print status :ReproStop stop this server" + return lines, (started_ok and running) +end + +local function show_banner() + local lines, good = status_lines() + vim.bo.modifiable = true + vim.api.nvim_buf_set_lines(0, 0, -1, false, lines) + vim.bo.modifiable = false + vim.bo.modified = false + -- Keep the echo SHORT (port only) so it stays below the hit-enter threshold; + -- the full error text lives in the banner buffer above. + local msg = good and ("issue283: LISTENING on port " .. tostring(claudecode.state.port)) + or "issue283: FAILED -- port in use (EADDRINUSE); see buffer above" + vim.api.nvim_echo({ { msg, good and "MoreMsg" or "ErrorMsg" } }, false, {}) +end + +vim.api.nvim_create_user_command("ReproStatus", show_banner, { desc = "Re-print #283 server status" }) +vim.api.nvim_create_user_command("ReproStop", function() + claudecode.stop() + vim.api.nvim_echo({ { "issue283: server stopped", "MoreMsg" } }, false, {}) +end, { desc = "Stop this instance's server (#283)" }) + +-- Populate the buffer synchronously at load time so it is already non-empty when +-- startup finishes -- this suppresses Neovim's intro screen without depending on +-- a deferred redraw (a hit-enter prompt from the plugin's own error log can +-- otherwise block a scheduled callback). The plugin's native error message still +-- appears in the message area, exactly as a real user sees it. +show_banner() diff --git a/lua/claudecode/server/tcp.lua b/lua/claudecode/server/tcp.lua index 695d02ef..a33de204 100644 --- a/lua/claudecode/server/tcp.lua +++ b/lua/claudecode/server/tcp.lua @@ -13,7 +13,58 @@ local M = {} ---@field on_disconnect function Callback for client disconnections ---@field on_error fun(err_msg: string) Callback for errors ----Find an available port by attempting to bind +-- Seed Lua's PRNG exactly once per process. #282 removed the implicit seeding +-- that used to happen via utils.shuffle_array (math.randomseed(os.time())), which +-- left LuaJIT's fixed default seed in place -- so every fresh Neovim picked the +-- *same* starting port and parallel instances always collided (#283). Mixing in a +-- sub-second source avoids two instances launched in the same second seeding +-- identically. hrtime is guarded because some test stubs omit it. +local rng_seeded = false +local function ensure_rng_seeded() + if rng_seeded then + return + end + local jitter + local ok, hr = pcall(function() + return vim.loop and vim.loop.hrtime and vim.loop.hrtime() + end) + if ok and type(hr) == "number" then + jitter = hr % 1000000 + else + jitter = math.floor((os.clock() % 1) * 1000000) + end + math.randomseed((os.time() * 1000000) + jitter) + rng_seeded = true +end + +-- Iterate the port range exactly once, starting from a random offset and wrapping +-- around. Returns a closure rather than materializing the range: the default +-- 10000-65535 range is ~55k entries, and building/shuffling it on every startup +-- was the cost #282 set out to remove. +local function port_iterator(min_port, max_port) + local port_count = max_port - min_port + 1 + if port_count <= 0 then + return function() + return nil + end + end + ensure_rng_seeded() + local start_offset = math.random(port_count) - 1 + local checked = -1 + return function() + checked = checked + 1 + if checked >= port_count then + return nil + end + return min_port + ((start_offset + checked) % port_count) + end +end + +---Find an available port using a best-effort bind probe. +---NOTE: this is only a pre-filter. A successful throwaway bind does NOT guarantee +---the port is free: libuv's bind() defers EADDRINUSE to listen()/connect(), so a +---port another process is actively listening on still passes this probe. The +---authoritative check is create_server's bind+listen with retry. ---@param min_port number Minimum port to try ---@param max_port number Maximum port to try ---@return number|nil port Available port number, or nil if none found @@ -25,14 +76,7 @@ function M.find_available_port(min_port, max_port) return nil end - local port_count = max_port - min_port + 1 - local start_offset = math.random(port_count) - 1 - - -- Pick a random starting point, then scan the range once. This keeps the - -- selection spread across the configured range without building and shuffling - -- a 55k-entry table for the default 10000-65535 range on every startup. - for checked = 0, port_count - 1 do - local port = min_port + ((start_offset + checked) % port_count) + for port in port_iterator(min_port, max_port) do local test_server = vim.loop.new_tcp() if test_server then local success = test_server:bind("127.0.0.1", port) @@ -47,6 +91,44 @@ function M.find_available_port(min_port, max_port) return nil end +---Bind AND listen on a single fresh TCP handle, returning that same handle. +---Binding then listening on one socket (instead of probing a throwaway socket and +---re-binding) is what makes a busy port detectable: libuv's bind() defers +---EADDRINUSE to listen(), so the listen() call is the real test, and keeping the +---handle we listened on removes the probe/rebind TOCTOU window. +---@param server TCPServer The server object whose connection handler to wire up +---@param port number Port to bind and listen on +---@return table|nil handle The bound+listening TCP handle, or nil on failure +---@return string|nil error Error message if failed +function M._bind_and_listen(server, port) + local handle = vim.loop.new_tcp() + if not handle then + return nil, "Failed to create TCP server" + end + + local bind_success, bind_err = handle:bind("127.0.0.1", port) + if not bind_success then + handle:close() + return nil, "Failed to bind to port " .. port .. ": " .. (bind_err or "unknown error") + end + + local listen_success, listen_err = handle:listen(128, function(err) + if err then + server.on_error("Listen error: " .. err) + return + end + + M._handle_new_connection(server) + end) + + if not listen_success then + handle:close() + return nil, "Failed to listen on port " .. port .. ": " .. (listen_err or "unknown error") + end + + return handle, nil +end + ---Create and start a TCP server ---@param config ClaudeCodeConfig Server configuration ---@param callbacks table Callback functions @@ -54,20 +136,13 @@ end ---@return TCPServer|nil server The server object, or nil on error ---@return string|nil error Error message if failed function M.create_server(config, callbacks, auth_token) - local port = M.find_available_port(config.port_range.min, config.port_range.max) - if not port then - return nil, "No available ports in range " .. config.port_range.min .. "-" .. config.port_range.max - end - - local tcp_server = vim.loop.new_tcp() - if not tcp_server then - return nil, "Failed to create TCP server" - end + local min_port = config.port_range.min + local max_port = config.port_range.max - -- Create server object + -- Build the server object up front so the listen callback can close over it. local server = { - server = tcp_server, - port = port, + server = nil, + port = nil, auth_token = auth_token, clients = {}, on_message = callbacks.on_message or function() end, @@ -76,28 +151,28 @@ function M.create_server(config, callbacks, auth_token) on_error = callbacks.on_error or function() end, } - local bind_success, bind_err = tcp_server:bind("127.0.0.1", port) - if not bind_success then - tcp_server:close() - return nil, "Failed to bind to port " .. port .. ": " .. (bind_err or "unknown error") - end - - -- Start listening - local listen_success, listen_err = tcp_server:listen(128, function(err) - if err then - callbacks.on_error("Listen error: " .. err) - return + -- Walk candidate ports and bind+listen on each until one succeeds. Retrying + -- here (rather than committing to a single pre-probed port) is what fixes #283: + -- when several Neovim instances race for the same port, the losers just advance + -- to the next candidate instead of giving up with EADDRINUSE. + local last_err + local tried_any = false + for port in port_iterator(min_port, max_port) do + tried_any = true + local handle, err = M._bind_and_listen(server, port) + if handle then + server.server = handle + server.port = port + return server, nil end - - M._handle_new_connection(server) - end) - - if not listen_success then - tcp_server:close() - return nil, "Failed to listen on port " .. port .. ": " .. (listen_err or "unknown error") + last_err = err end - return server, nil + if not tried_any then + return nil, "No available ports in range " .. min_port .. "-" .. max_port + end + return nil, + "Failed to bind to any port in range " .. min_port .. "-" .. max_port .. ": " .. (last_err or "unknown error") end ---Handle a new client connection diff --git a/lua/claudecode/server/utils.lua b/lua/claudecode/server/utils.lua index 6ae94302..5959b5cc 100644 --- a/lua/claudecode/server/utils.lua +++ b/lua/claudecode/server/utils.lua @@ -386,25 +386,6 @@ function M.apply_mask(data, mask) return table.concat(result) end -local rng_seeded = false - ----Shuffle an array in place using Fisher-Yates algorithm ----@param tbl table The array to shuffle -function M.shuffle_array(tbl) - -- Seed the PRNG once per process so port selection order varies across editor - -- starts. Seeding lazily on first use (rather than on every call, as a prior - -- version did with os.time()) avoids identical orderings within the same - -- second while still giving each process a distinct sequence. - if not rng_seeded then - math.randomseed(os.time()) - rng_seeded = true - end - for i = #tbl, 2, -1 do - local j = math.random(i) - tbl[i], tbl[j] = tbl[j], tbl[i] - end -end - ---Compare two strings in constant time relative to their length. ---Returns false immediately on a length mismatch; otherwise every byte is ---examined so total work does not depend on the matching-prefix length. diff --git a/scripts/repro_issue_283.lua b/scripts/repro_issue_283.lua new file mode 100644 index 00000000..06f5c1be --- /dev/null +++ b/scripts/repro_issue_283.lua @@ -0,0 +1,183 @@ +-- Reproduction / verification for issue #283: +-- "find_available_port probe-then-rebind races; create_server has no retry -> +-- EADDRINUSE with parallel Neovim instances (regression in #282)" +-- https://github.com/coder/claudecode.nvim/issues/283 +-- +-- WHAT ACTUALLY BROKE (more than the issue's own analysis): +-- +-- 1. LOST RNG SEEDING (the regression trigger, NEW in #282). +-- Before #282, find_available_port shuffled the port list via +-- utils.shuffle_array, which calls `math.randomseed(os.time())` the first +-- time it runs. So every Neovim process seeded its PRNG and picked a +-- different starting port (as long as two instances did not start in the +-- same wall-clock second). +-- #282 replaced the shuffle with a direct, UNSEEDED `math.random(port_count)` +-- and dropped the `require("claudecode.server.utils")` line, so nothing +-- seeds the PRNG anymore. LuaJIT's math.random has a FIXED default seed, so +-- EVERY fresh Neovim process now computes the IDENTICAL start_offset -> the +-- IDENTICAL port (48811 with the default 10000-65535 range). Two instances +-- therefore ALWAYS collide, deterministically, regardless of timing. +-- +-- 2. BROKEN PROBE (pre-existing, but now always hit). find_available_port +-- probes a candidate by binding a THROWAWAY socket, closing it, and +-- returning the port. libuv's uv_tcp_bind SWALLOWS EADDRINUSE: instead of +-- failing, it stores the error as a `delayed_error` and returns success, +-- deferring the failure to listen()/connect(). The probe never listens, so +-- its bind "succeeds" even when another process is actively LISTENING on the +-- port -> the probe reports a taken port as available. +-- +-- 3. NO RETRY (pre-existing, but now always hit). create_server selects the +-- port once; the deferred EADDRINUSE then surfaces at listen() (hence the +-- user's error text "Failed to listen on port ...", NOT "Failed to bind"), +-- and create_server gives up instead of advancing to the next port. +-- +-- This script proves mechanism (2)+(3) deterministically in a single process, +-- and exposes the deterministic port (1) for the cross-process check driven by +-- scripts/repro_issue_283.sh. +-- +-- Run from the repo root: +-- nvim --headless -u NONE -l scripts/repro_issue_283.lua # mechanism proof +-- REPRO283_MODE=port nvim --headless -u NONE -l scripts/repro_issue_283.lua +-- REPRO283_MODE=serve REPRO283_LABEL=A nvim --headless -u NONE -l scripts/repro_issue_283.lua +-- +-- Exit code (mechanism mode): 1 if the broken probe + listen-time EADDRINUSE +-- reproduce (#283 confirmed), 0 if the probe correctly rejects a busy port. + +local script_path = debug.getinfo(1, "S").source:sub(2) +local repo_root = vim.fn.fnamemodify(script_path, ":h:h") +vim.opt.rtp:prepend(repo_root) + +local function out(msg) + io.stdout:write(msg .. "\n") + io.stdout:flush() +end + +-- luv returns 0 (or a handle) on success and nil+err on failure. 0 is TRUTHY in +-- Lua, which is exactly why find_available_port's `if success then` accepts it. +local function ok_truthy(v) + return v and true or false +end + +local uv = vim.loop +local mode = vim.env.REPRO283_MODE or "mechanism" + +-- Match a real startup's PRNG draw order: requiring server/init.lua performs the +-- module-load draw `module_instance_id = math.random(10000, 99999)` BEFORE +-- find_available_port's own draw, so the port we compute equals what a real +-- :ClaudeCodeStart selects (48811 with the default range). +require("claudecode.server.init") +local tcp = require("claudecode.server.tcp") +local config = { port_range = { min = 10000, max = 65535 } } + +------------------------------------------------------------------------------- +-- MODE: port -- print the deterministically-selected port, then exit. +-- The harness runs this in several fresh processes and asserts they all match. +------------------------------------------------------------------------------- +if mode == "port" then + local port = tcp.find_available_port(config.port_range.min, config.port_range.max) + out("SELECTED_PORT=" .. tostring(port)) + vim.cmd("qa!") + return +end + +------------------------------------------------------------------------------- +-- MODE: serve -- start the REAL server (create_server) and stay alive, exactly +-- as lua/claudecode/server/init.lua does. Used for the two-instance end-to-end +-- reproduction. +------------------------------------------------------------------------------- +if mode == "serve" then + local label = vim.env.REPRO283_LABEL or "?" + local wait_ms = tonumber(vim.env.REPRO283_WAIT_MS or "") or 6000 + local server, err = tcp.create_server(config, {}, nil) + if server then + out(("INSTANCE_%s: LISTENING port=%d"):format(label, server.port)) + else + -- Mirror the exact init.lua user-facing wording. + out( + ("INSTANCE_%s: [ClaudeCode] [init] [ERROR] Failed to start Claude Code server: %s"):format(label, tostring(err)) + ) + end + vim.wait(wait_ms, function() + return false + end) + if server then + tcp.stop_server(server) + end + vim.cmd("qa!") + return +end + +------------------------------------------------------------------------------- +-- MODE: mechanism (default) -- in-process, deterministic proof that the probe +-- cannot detect an active listener and that EADDRINUSE surfaces at listen(). +------------------------------------------------------------------------------- +out("== issue #283 reproduction (broken probe + listen-time EADDRINUSE) ==") +out(("Neovim: %s"):format(tostring(vim.version()))) + +-- Stand up a real, actively-LISTENING socket on an OS-assigned free port. +local listener = uv.new_tcp() +listener:bind("127.0.0.1", 0) +local listen_ok = listener:listen(128, function() end) +assert(ok_truthy(listen_ok), "harness: could not start listener") +local P = listener:getsockname().port +out(("\nA real server is now LISTENING on 127.0.0.1:%d"):format(P)) + +-- STEP 1: run find_available_port's exact probe against the busy port. +local probe = uv.new_tcp() +local probe_bind = probe:bind("127.0.0.1", P) +probe:close() +local probe_says_available = ok_truthy(probe_bind) +out( + ("\n[probe] throwaway bind to busy port %d -> %s => find_available_port would treat it as %s"):format( + P, + tostring(probe_bind), + probe_says_available and "AVAILABLE (FALSE POSITIVE)" or "taken (correct)" + ) +) + +-- STEP 2: reproduce create_server's bind-then-listen on that same busy port. +local real = uv.new_tcp() +local bind_ok, bind_err = real:bind("127.0.0.1", P) +out(("[create_server] bind to busy port %d -> ok=%s err=%s"):format(P, tostring(bind_ok), tostring(bind_err))) +local lst_ok, lst_err = real:listen(128, function() end) +out(("[create_server] listen on busy port %d -> ok=%s err=%s"):format(P, tostring(lst_ok), tostring(lst_err))) + +local listen_failed_eaddrinuse = (not ok_truthy(lst_ok)) and (tostring(lst_err):match("EADDRINUSE") ~= nil) + +-- Cleanup +if not real:is_closing() then + real:close() +end +if not listener:is_closing() then + listener:close() +end + +out("\n== verdict ==") +out( + (" probe false-positive : %s"):format( + probe_says_available and "YES -- probe says a LISTENING port is available" or "no" + ) +) +out( + (" bind swallowed error : %s"):format(ok_truthy(bind_ok) and "YES -- bind() returned success on a busy port" or "no") +) +out( + (" listen() EADDRINUSE : %s"):format( + listen_failed_eaddrinuse and "YES -- error surfaces at listen(), matching the user's report" or "no" + ) +) + +local reproduced = probe_says_available and listen_failed_eaddrinuse +if reproduced then + out( + "\n=> #283 confirmed: the probe cannot detect an active listener (libuv defers EADDRINUSE\n" + .. " to listen()), so find_available_port returns a busy port and create_server fails\n" + .. " at listen() with no retry. Combined with the lost RNG seeding (see repro .sh),\n" + .. " every parallel Neovim instance deterministically collides on the same port." + ) +else + out("\n=> NOT reproduced: the probe correctly rejected the busy port on this platform/libuv build.") +end + +io.stdout:flush() +vim.cmd("cquit " .. (reproduced and 1 or 0)) diff --git a/scripts/repro_issue_283.sh b/scripts/repro_issue_283.sh new file mode 100755 index 00000000..79186139 --- /dev/null +++ b/scripts/repro_issue_283.sh @@ -0,0 +1,140 @@ +#!/usr/bin/env bash +# Reproduction / verification for issue #283: +# "find_available_port probe-then-rebind races; create_server has no retry -> +# EADDRINUSE with parallel Neovim instances (regression in #282)" +# https://github.com/coder/claudecode.nvim/issues/283 +# +# Drives three checks against the REAL plugin code: +# +# Part 1 - REGRESSION TRIGGER (lost RNG seeding): runs the port selector in +# several fresh Neovim processes and asserts they ALL pick the same +# port. Pre-#282 this varied per process (shuffle_array seeded the +# PRNG via os.time()); post-#282 it is deterministic, so every +# instance collides. +# +# Part 2 - MECHANISM (broken probe + listen-time EADDRINUSE): the in-process +# proof from repro_issue_283.lua (mechanism mode). +# +# Part 3 - END-TO-END: two real plugin servers (create_server). Instance A +# listens; Instance B fails with the user's exact error, +# "Failed to listen on port

: EADDRINUSE", on the SAME port. +# +# Usage (from repo root): +# scripts/repro_issue_283.sh +# NVIM=/path/to/nvim scripts/repro_issue_283.sh # to pin a Neovim binary +# +# Exit code: 1 if #283 reproduces (deterministic collision + B fails), else 0. + +set -uo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +LUA="$SCRIPT_DIR/repro_issue_283.lua" +NVIM="${NVIM:-nvim}" + +if ! command -v "$NVIM" >/dev/null 2>&1; then + echo "ERROR: nvim not found (set NVIM=/path/to/nvim)" >&2 + exit 2 +fi + +WORK="$(mktemp -d)" +A_OUT="$WORK/a.out" +A_PID="" +# shellcheck disable=SC2329 # invoked indirectly via `trap cleanup EXIT` below +cleanup() { + [ -n "$A_PID" ] && kill "$A_PID" 2>/dev/null + rm -rf "$WORK" +} +trap cleanup EXIT + +run_port() { + REPRO283_MODE=port "$NVIM" --headless -u NONE -l "$LUA" 2>/dev/null | + tr -d '\r' | sed -n 's/^SELECTED_PORT=//p' +} + +echo "############################################################" +echo "# issue #283 reproduction" +echo "# nvim: $("$NVIM" --version | head -1)" +echo "############################################################" + +############################################# +# Part 1: cross-process determinism +############################################# +echo +echo "=== Part 1: is port selection deterministic across fresh processes? ===" +P1="$(run_port)" +P2="$(run_port)" +P3="$(run_port)" +echo " fresh process #1 -> $P1" +echo " fresh process #2 -> $P2" +echo " fresh process #3 -> $P3" +DETERMINISTIC=0 +if [ -n "$P1" ] && [ "$P1" = "$P2" ] && [ "$P2" = "$P3" ]; then + DETERMINISTIC=1 + echo " RESULT: DETERMINISTIC -- every fresh Neovim picks port $P1 (regression: lost RNG seeding)" +else + echo " RESULT: varies across processes (RNG appears seeded)" +fi + +############################################# +# Part 2: in-process mechanism proof +############################################# +echo +echo "=== Part 2: broken probe + listen-time EADDRINUSE (in-process) ===" +"$NVIM" --headless -u NONE -l "$LUA" +MECH_RC=$? +echo " (mechanism script exit code: $MECH_RC; 1 = reproduced)" + +############################################# +# Part 3: two real instances, end-to-end +############################################# +echo +echo "=== Part 3: two real plugin servers in parallel ===" +REPRO283_MODE=serve REPRO283_LABEL=A REPRO283_WAIT_MS=8000 \ + "$NVIM" --headless -u NONE -l "$LUA" >"$A_OUT" 2>&1 & +A_PID=$! + +# Wait until A reports its listening port. +A_LINE="" +for _ in $(seq 1 80); do + A_LINE="$(tr -d '\r' <"$A_OUT" | sed -n 's/.*\(INSTANCE_A: .*\)/\1/p')" + [ -n "$A_LINE" ] && break + sleep 0.1 +done +echo " A: ${A_LINE:-}" + +B_OUT="$(REPRO283_MODE=serve REPRO283_LABEL=B REPRO283_WAIT_MS=200 \ + "$NVIM" --headless -u NONE -l "$LUA" 2>&1 | tr -d '\r' | sed -n 's/.*\(INSTANCE_B: .*\)/\1/p')" +echo " B: ${B_OUT:-}" + +A_PORT="$(printf '%s' "$A_LINE" | sed -n 's/.*LISTENING port=\([0-9]*\).*/\1/p')" +E2E=0 +if printf '%s' "$B_OUT" | grep -q "Failed to listen on port .*EADDRINUSE"; then + B_PORT="$(printf '%s' "$B_OUT" | sed -n 's/.*Failed to listen on port \([0-9]*\).*/\1/p')" + if [ -n "$A_PORT" ] && [ "$A_PORT" = "$B_PORT" ]; then + E2E=1 + echo " RESULT: B failed with EADDRINUSE on the SAME port A holds ($A_PORT)" + else + echo " RESULT: B failed with EADDRINUSE but on port $B_PORT (A holds $A_PORT)" + E2E=1 + fi +else + echo " RESULT: B did NOT collide (started on a different port)" +fi + +############################################# +# Verdict +############################################# +echo +echo "=== VERDICT ===" +if [ "$DETERMINISTIC" -eq 1 ] && [ "$MECH_RC" -eq 1 ] && [ "$E2E" -eq 1 ]; then + echo "#283 REPRODUCED: parallel Neovim instances deterministically pick the same" + echo "port ($P1); the probe cannot detect the active listener; create_server fails" + echo "at listen() with EADDRINUSE and no retry." + exit 1 +else + echo "#283 NOT fully reproduced on this environment:" + echo " deterministic port = $DETERMINISTIC, mechanism = $((MECH_RC == 1 ? 1 : 0)), end-to-end = $E2E" + echo "(After a fix that seeds the RNG + retries on EADDRINUSE, Part 1 should vary" + echo " and Part 3's instance B should start on a different port.)" + exit 0 +fi diff --git a/tests/unit/server/tcp_spec.lua b/tests/unit/server/tcp_spec.lua index 9adcce03..a6b42563 100644 --- a/tests/unit/server/tcp_spec.lua +++ b/tests/unit/server/tcp_spec.lua @@ -72,6 +72,127 @@ describe("TCP server disconnect handling", function() end) end) + -- Regression tests for #283: create_server must retry across candidate ports + -- when a port is taken, because the bind-only probe cannot detect an active + -- listener (libuv defers EADDRINUSE to listen()). + describe("create_server port-collision retry (#283)", function() + local original_new_tcp + local original_random + + before_each(function() + original_new_tcp = vim.loop.new_tcp + original_random = math.random + -- Deterministic start_offset 0 => candidates scanned in ascending order. + rawset(math, "random", function() + return 1 + end) + end) + + after_each(function() + vim.loop.new_tcp = original_new_tcp + rawset(math, "random", original_random) + end) + + -- bind_result/listen_result: true => succeed (return 0, as luv does); + -- a string => fail with that message (return nil, msg). + local function make_handle(records, bind_result, listen_result) + local handle = { closed = false } + handle.bind = function(_, _host, port) + handle.bound_port = port + if bind_result == true then + return 0 + end + return nil, bind_result + end + handle.listen = function(_, _backlog, cb) + handle.listen_cb = cb + if listen_result == true then + return 0 + end + return nil, listen_result + end + handle.close = function() + handle.closed = true + end + handle.is_closing = function() + return handle.closed + end + table.insert(records, handle) + return handle + end + + local function new_tcp_from_specs(records, specs) + local i = 0 + return function() + i = i + 1 + local spec = specs[i] or { bind = true, listen = true } + return make_handle(records, spec.bind, spec.listen) + end + end + + it("advances to the next port when listen() reports EADDRINUSE", function() + local handles = {} + vim.loop.new_tcp = new_tcp_from_specs(handles, { + { bind = true, listen = "EADDRINUSE: address already in use" }, -- 10000 busy + { bind = true, listen = true }, -- 10001 free + }) + + local server, err = tcp.create_server({ port_range = { min = 10000, max = 10002 } }, {}, nil) + + assert.is_nil(err) + assert.is_table(server) + assert.are.equal(10001, server.port) + assert.is_true(handles[1].closed) -- busy handle discarded + assert.are.equal(handles[2], server.server) -- the listening handle is kept + assert.is_false(handles[2].closed) + end) + + it("returns an error after exhausting the range, closing every handle", function() + local handles = {} + vim.loop.new_tcp = function() + return make_handle(handles, true, "EADDRINUSE: address already in use") + end + + local server, err = tcp.create_server({ port_range = { min = 10000, max = 10002 } }, {}, nil) + + assert.is_nil(server) + assert.is_string(err) + assert.is_truthy(err:find("Failed to bind to any port in range 10000%-10002")) + assert.are.equal(3, #handles) -- every candidate tried exactly once + for _, h in ipairs(handles) do + assert.is_true(h.closed) + end + end) + + it("treats bind-success-but-listen-EADDRINUSE as unavailable", function() + local handles = {} + vim.loop.new_tcp = function() + return make_handle(handles, true, "EADDRINUSE: address already in use") + end + + local server, err = tcp.create_server({ port_range = { min = 10000, max = 10000 } }, {}, nil) + + assert.is_nil(server) + assert.is_string(err) + assert.is_truthy(err:find("Failed to listen on port 10000")) + assert.is_true(handles[1].closed) + end) + + it("keeps the exact handle whose listen() succeeded", function() + local handles = {} + vim.loop.new_tcp = function() + return make_handle(handles, true, true) + end + + local server, err = tcp.create_server({ port_range = { min = 10000, max = 10000 } }, {}, nil) + + assert.is_nil(err) + assert.are.equal(handles[1], server.server) + assert.are.equal(10000, server.port) + assert.is_function(handles[1].listen_cb) + end) + end) + it("should call on_disconnect and remove client on EOF", function() local callbacks = { on_message = spy.new(function() end),