From 89ef26527fe0d4181c61ccf991bd7b65825f72d7 Mon Sep 17 00:00:00 2001 From: Luca Toniolo <10792599+grandixximo@users.noreply.github.com> Date: Sun, 24 May 2026 16:56:53 +0800 Subject: [PATCH 1/7] test: extend ui-smoke with g-code execution and endpoint check Each per-GUI test now also drives estop reset, machine on, home all, mode auto, program_open + auto(RUN) on a tiny shared smoke.ngc, waits for sustained INTERP_IDLE, and asserts stat.position delta against --expect-delta-mm 1,1,0 converted via stat.linear_units so the same arg works on inch (axis, touchy) and mm (gmoccapy, qtdragon) sims. State/mode commands use ensure_state/ensure_mode helpers with a retry-and-stability pattern: gmoccapy and qtdragon re-issue their own mode commands during startup and can revert task_mode AUTO -> MANUAL right after we set it. The helpers wait for the desired state, then re-check after STATE_STABILITY_S; on revert they retry up to STATE_RETRY_BUDGET times. Intermediate timeouts use a quiet variant so spurious UI_SMOKE_FAIL lines do not pollute the log during retries (checkresult.sh greps for ^UI_SMOKE_FAIL on any line). smoke.ngc is G21 G91 G0 X1 Y1 G90 M2 - relative move in mm, sim- agnostic. The driver snapshots stat.position[:3] after homing and checks (final - start) against the converted delta, sidestepping each sim's HOME offset. Adds python3-zmq and python3-opencv to debian/control.top.in under !nocheck: qtdragon's hal_bridge and the camview widget segfault on startup without them, which is invisible to the connect-only Phase 1 smoke but breaks the run-program path before the program can start. 5 consecutive local runs all green at 2m43s wall each. --- debian/control.top.in | 2 + tests/ui-smoke/_lib/drive.py | 293 +++++++++++++++++++++++++++++++- tests/ui-smoke/_lib/launch.sh | 15 +- tests/ui-smoke/_lib/run-gui.sh | 8 +- tests/ui-smoke/_lib/smoke.ngc | 13 ++ tests/ui-smoke/axis/test.sh | 4 +- tests/ui-smoke/gmoccapy/test.sh | 4 +- tests/ui-smoke/qtdragon/test.sh | 4 +- tests/ui-smoke/touchy/test.sh | 4 +- 9 files changed, 330 insertions(+), 17 deletions(-) create mode 100644 tests/ui-smoke/_lib/smoke.ngc diff --git a/debian/control.top.in b/debian/control.top.in index 1246fcdabfa..8b7a66ebed0 100644 --- a/debian/control.top.in +++ b/debian/control.top.in @@ -59,6 +59,8 @@ Build-Depends: python3-dbus , python3-dbus.mainloop.pyqt5 , python3-qtpy , + python3-zmq , + python3-opencv , python3-cairo , python3-gi , python3-gi-cairo , diff --git a/tests/ui-smoke/_lib/drive.py b/tests/ui-smoke/_lib/drive.py index 1ee90a42234..06becabff45 100755 --- a/tests/ui-smoke/_lib/drive.py +++ b/tests/ui-smoke/_lib/drive.py @@ -1,15 +1,36 @@ #!/usr/bin/env python3 -# Minimal UI smoke driver: confirm linuxcnc task came up and the GUI -# did not crash. The smoke layer answers Bertho's "does it start" -# question only; functional behaviour (home, run a file, verify -# position) belongs in tests/ui-functional/ (Phase 2). +# UI smoke driver. +# +# Default mode (Phase 1): confirm linuxcnc task came up and the GUI did +# not crash. The driver only proves the GUI started and NML is reachable. +# +# --run-program mode (Phase 2): also estop-reset, machine-on, home, +# program_open + auto(RUN), wait for sustained INTERP_IDLE, and assert +# (stat.position_after - stat.position_after_home) equals --expect-delta-mm +# converted to machine units via stat.linear_units. Snapshot-and-delta +# sidesteps per-sim HOME offsets; mm-input + linear_units conversion +# sidesteps per-sim LINEAR_UNITS (axis and touchy sims are inch). +import argparse import linuxcnc import sys import time CONNECT_TIMEOUT_S = 60.0 SETTLE_S = 3.0 +SETTLE_POLLS = 5 +POLL_INTERVAL_S = 0.01 +# Per-attempt wait timeout for ensure_state / ensure_mode. The state +# normally lands well under 1s; profiling showed nothing benefits from +# more than 3s here, and shorter timeouts trim wall time when a retry +# is needed (notably gmoccapy reverting task_mode AUTO -> MANUAL). +ENSURE_ATTEMPT_TIMEOUT_S = 3.0 +# After the desired task_state / task_mode is reached, re-check after +# this long. Some GUIs (notably gmoccapy and qtdragon) run their own +# startup commands that can revert a state we just set; the post-reach +# stability check catches that. +STATE_STABILITY_S = 0.5 +STATE_RETRY_BUDGET = 6 def connect_and_wait_ready(timeout): @@ -17,7 +38,12 @@ def connect_and_wait_ready(timeout): reports a non-negative echo_serial_number. The NML status buffer can be 'invalid err=3' for the first ~30s while linuxcncsvr is still initialising; recreate the stat object on every iteration so - a stale invalid buffer does not stick after linuxcncsvr is ready.""" + a stale invalid buffer does not stick after linuxcncsvr is ready. + + Catch the full Exception hierarchy: in early startup stat.poll() + can raise SystemError ('error return without exception set') when + the underlying C function reports failure without setting a Python + exception. Treat that the same as linuxcnc.error and retry.""" deadline = time.monotonic() + timeout last_err = None while time.monotonic() < deadline: @@ -26,7 +52,7 @@ def connect_and_wait_ready(timeout): stat.poll() if stat.echo_serial_number >= 0: return linuxcnc.command(), stat - except linuxcnc.error as e: + except Exception as e: last_err = e time.sleep(0.5) sys.stderr.write( @@ -35,7 +61,256 @@ def connect_and_wait_ready(timeout): return None, None +def wait_until_quiet(stat, predicate, timeout): + """Poll stat until predicate(stat) is true. Returns True on success, + False on timeout. Never writes UI_SMOKE_FAIL: caller decides whether + a timeout here is fatal (and writes its own UI_SMOKE_FAIL line) or + is part of a retry that may still succeed. checkresult.sh greps for + any '^UI_SMOKE_FAIL' line, so spurious emissions during retries + must not happen.""" + deadline = time.monotonic() + timeout + while time.monotonic() < deadline: + stat.poll() + if predicate(stat): + return True + time.sleep(POLL_INTERVAL_S) + return False + + +def wait_until(stat, predicate, timeout, label): + """Like wait_until_quiet but emits UI_SMOKE_FAIL on timeout. Use + only when timeout is fatal at the call site (no retry above).""" + if wait_until_quiet(stat, predicate, timeout): + return True + sys.stderr.write(f"UI_SMOKE_FAIL: timeout waiting for {label} after {timeout}s\n") + return False + + +def home_all(cmd, stat, timeout): + """Home every joint. Uses c.home(-1) which respects HOME_SEQUENCE + if configured. Caller must have already ensured task_state is ON + via ensure_state; otherwise the home command is rejected with + 'cannot be executed until the machine is out of E-stop and turned + on'. Mode change uses ensure_mode so a GUI that reverts mode mid- + sequence (gmoccapy) is detected and retried.""" + if not ensure_mode(cmd, stat, linuxcnc.MODE_MANUAL, "MODE_MANUAL"): + return False + cmd.teleop_enable(0) + cmd.wait_complete() + stat.poll() + njoints = stat.joints + cmd.home(-1) + if not wait_until( + stat, + lambda s: all(s.homed[i] for i in range(njoints)), + timeout, "all joints homed"): + return False + cmd.teleop_enable(1) + cmd.wait_complete() + return True + + +def wait_state(stat, target_state, timeout, label): + """Poll until stat.task_state == target_state. wait_complete on a + state-change command only proves task ack'd the NML message, not + that the underlying state machine has transitioned. Polling + task_state is the only deterministic signal.""" + return wait_until( + stat, + lambda s: s.task_state == target_state, + timeout, label) + + +def ensure_state(cmd, stat, target_state, label): + """Issue c.state(target_state), wait for stat.task_state to reach + target_state, then verify it stays there across STATE_STABILITY_S. + If the GUI reverts (e.g. gmoccapy re-issues its own ESTOP on + startup), retry up to STATE_RETRY_BUDGET times. Returns True on + stable success, False on exhausted budget.""" + for attempt in range(1, STATE_RETRY_BUDGET + 1): + cmd.state(target_state) + cmd.wait_complete() + if not wait_until_quiet( + stat, lambda s: s.task_state == target_state, + ENSURE_ATTEMPT_TIMEOUT_S): + sys.stderr.write( + f"WARN: {label} not reached on attempt {attempt}, retrying\n") + continue + time.sleep(STATE_STABILITY_S) + stat.poll() + if stat.task_state == target_state: + return True + sys.stderr.write( + f"WARN: {label} reverted to task_state={stat.task_state} " + f"after attempt {attempt}, retrying\n") + sys.stderr.write( + f"UI_SMOKE_FAIL: {label} did not hold stable across " + f"{STATE_RETRY_BUDGET} attempts\n") + return False + + +def ensure_mode(cmd, stat, target_mode, label): + """Same retry+stability pattern as ensure_state, for task_mode.""" + for attempt in range(1, STATE_RETRY_BUDGET + 1): + cmd.mode(target_mode) + cmd.wait_complete() + if not wait_until_quiet( + stat, lambda s: s.task_mode == target_mode, + ENSURE_ATTEMPT_TIMEOUT_S): + sys.stderr.write( + f"WARN: {label} not reached on attempt {attempt}, retrying\n") + continue + time.sleep(STATE_STABILITY_S) + stat.poll() + if stat.task_mode == target_mode: + return True + sys.stderr.write( + f"WARN: {label} reverted to task_mode={stat.task_mode} " + f"after attempt {attempt}, retrying\n") + sys.stderr.write( + f"UI_SMOKE_FAIL: {label} did not hold stable across " + f"{STATE_RETRY_BUDGET} attempts\n") + return False + + +PROGRAM_START_TIMEOUT_S = 5.0 + + +def snapshot(stat): + """Best-effort one-line summary of state fields relevant to Phase 2 + debugging. Caller is expected to have just polled.""" + return ( + f"task_state={stat.task_state} task_mode={stat.task_mode} " + f"interp_state={stat.interp_state} exec_state={stat.exec_state} " + f"motion_type={stat.motion_type} queue={stat.queue} " + f"queued_mdi_commands={stat.queued_mdi_commands} " + f"file={stat.file!r}") + + +def wait_program_started(stat, timeout): + """Wait until interp_state leaves INTERP_IDLE, i.e. the program + has actually begun executing. Without this guard, a short program + can finish before wait_program_idle gets its first poll, and the + settle-window then mistakes the pre-start IDLE for the post-end + IDLE; we then read stat.position at (0,0,0).""" + deadline = time.monotonic() + timeout + while time.monotonic() < deadline: + stat.poll() + if stat.interp_state != linuxcnc.INTERP_IDLE: + return True + time.sleep(POLL_INTERVAL_S) + stat.poll() + sys.stderr.write( + f"UI_SMOKE_FAIL: program did not start within {timeout}s " + f"(interp_state stayed INTERP_IDLE) state: {snapshot(stat)}\n") + return False + + +def wait_program_idle(stat, timeout): + """Wait until interp_state returns to INTERP_IDLE and the motion + queue is drained for SETTLE_POLLS consecutive polls. Caller must + have already proven the program started via wait_program_started; + otherwise this returns immediately on the pre-start IDLE.""" + deadline = time.monotonic() + timeout + consecutive = 0 + while time.monotonic() < deadline: + stat.poll() + idle = ( + stat.interp_state == linuxcnc.INTERP_IDLE + and stat.queue == 0 + ) + if idle: + consecutive += 1 + if consecutive >= SETTLE_POLLS: + return True + else: + consecutive = 0 + time.sleep(POLL_INTERVAL_S) + sys.stderr.write(f"UI_SMOKE_FAIL: program did not reach idle within {timeout}s\n") + return False + + +def run_program(cmd, stat, ngc_path, expect_delta_mm, tol, run_timeout): + """Estop reset, machine on, home, snapshot position, load + run ngc, + verify (final - start) delta matches expect_delta_mm converted to + machine units.""" + if not ensure_state(cmd, stat, linuxcnc.STATE_ESTOP_RESET, + "STATE_ESTOP_RESET"): + return False + if not ensure_state(cmd, stat, linuxcnc.STATE_ON, "STATE_ON"): + return False + + if not home_all(cmd, stat, timeout=60.0): + return False + + if not ensure_mode(cmd, stat, linuxcnc.MODE_AUTO, "MODE_AUTO"): + return False + + # Snapshot start position AFTER homing + AFTER mode transition. The + # GUI might re-issue mode commands during its own startup; doing the + # snapshot last means we record the position right before AUTO_RUN. + stat.poll() + start_pos = stat.position[:3] + + cmd.program_open(ngc_path) + cmd.wait_complete() + # No wait_complete after auto(AUTO_RUN, 0): wait_complete blocks + # until the operation finishes, which for AUTO_RUN means the whole + # program completes. That would race wait_program_started; by the + # time we polled, interp would already be back at INTERP_IDLE. + cmd.auto(linuxcnc.AUTO_RUN, 0) + + if not wait_program_started(stat, PROGRAM_START_TIMEOUT_S): + return False + if not wait_program_idle(stat, run_timeout): + return False + + # stat.linear_units: machine units per mm. mm machine -> 1.0; + # inch machine -> 1/25.4 = 0.03937. Multiplying the expected mm + # delta by linear_units gives the expected delta in machine units, + # which is what stat.position reports. + units_per_mm = stat.linear_units + expect_machine = [d * units_per_mm for d in expect_delta_mm] + final_pos = stat.position[:3] + actual_delta = [final_pos[i] - start_pos[i] for i in range(3)] + err = [abs(actual_delta[i] - expect_machine[i]) for i in range(3)] + if any(e > tol for e in err): + sys.stderr.write( + f"UI_SMOKE_FAIL: delta mismatch " + f"expect_mm={expect_delta_mm} units_per_mm={units_per_mm} " + f"expect_machine={expect_machine} " + f"start={start_pos} final={final_pos} " + f"actual_delta={actual_delta} err={err} tol={tol}\n") + return False + return True + + +def parse_xyz(s): + parts = [float(p) for p in s.split(",")] + if len(parts) != 3: + raise argparse.ArgumentTypeError("expected x,y,z (three comma-separated floats)") + return parts + + def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--run-program", metavar="NGC", + help="g-code file to load and run (enables Phase 2 mode)") + ap.add_argument("--expect-delta-mm", type=parse_xyz, metavar="DX,DY,DZ", + help="expected XYZ delta in mm from post-home position " + "(required with --run-program). Driver converts to " + "machine units via stat.linear_units so the same " + "value works on inch and mm sims.") + ap.add_argument("--tol", type=float, default=1e-4, + help="position tolerance per axis in machine units " + "(default: 1e-4)") + ap.add_argument("--run-timeout", type=float, default=60.0, + help="program-completion timeout in seconds (default: 60)") + args = ap.parse_args() + + if args.run_program and args.expect_delta_mm is None: + ap.error("--run-program requires --expect-delta-mm DX,DY,DZ") + cmd, stat = connect_and_wait_ready(CONNECT_TIMEOUT_S) if cmd is None: return 1 @@ -54,6 +329,12 @@ def main(): sys.stderr.write(f"UI_SMOKE_FAIL: task disappeared after GUI startup: {e}\n") return 1 + if args.run_program: + if not run_program(cmd, stat, + args.run_program, args.expect_delta_mm, + args.tol, args.run_timeout): + return 1 + print("UI_SMOKE_OK") return 0 diff --git a/tests/ui-smoke/_lib/launch.sh b/tests/ui-smoke/_lib/launch.sh index 5f3672e786c..1db06423dd1 100755 --- a/tests/ui-smoke/_lib/launch.sh +++ b/tests/ui-smoke/_lib/launch.sh @@ -16,6 +16,8 @@ set -u CONFIG_INI="$1" +shift +DRIVER_ARGS=("$@") TEST_DIR="${TEST_DIR:-$(cd "$(dirname "$0")" && pwd)}" LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" @@ -28,9 +30,10 @@ rm -f ui-smoke.out ui-smoke.err linuxcnc.pid bash "$LIB_DIR/cleanup-runtime.sh" # Launch linuxcnc inside xvfb-run. The outer timeout is a safety net -# so a wedged GUI cannot hang CI. -LINUXCNC_TIMEOUT=240 -DRIVER_TIMEOUT=90 +# so a wedged GUI cannot hang CI. Driver timeout covers connect (60s) +# + GUI settle (3s) + optional Phase 2 run (estop/home/program ~90s). +LINUXCNC_TIMEOUT=300 +DRIVER_TIMEOUT=180 # Force software OpenGL (Mesa llvmpipe). CI runners have no GPU and # Qt/GL widgets segfault under hardware GL with no display. The Qt- @@ -71,7 +74,9 @@ xvfb-run -a --server-args="-screen 0 1024x768x24" \ # The driver polls NML readiness itself (BsAtHome review: # avoid real-clock waits where status polling will do). - timeout "$DRIVER_TIMEOUT" python3 "$LIB_DIR/drive.py" >ui-smoke.out 2>ui-smoke.err + # Driver args (Phase 2: --run-program/--expect-pos) come through + # as positional $@ from the inner bash -c. + timeout "$DRIVER_TIMEOUT" python3 "$LIB_DIR/drive.py" "$@" >ui-smoke.out 2>ui-smoke.err DRIVE_RC=$? # Clean shutdown: GUI-specific quit first (lets linuxcnc end @@ -97,7 +102,7 @@ xvfb-run -a --server-args="-screen 0 1024x768x24" \ fi exit "$DRIVE_RC" - ' + ' _launch "${DRIVER_ARGS[@]}" RC=$? # Surface logs so checkresult and CI artifact upload can see them. diff --git a/tests/ui-smoke/_lib/run-gui.sh b/tests/ui-smoke/_lib/run-gui.sh index 01840944bc0..9f4f596afe0 100755 --- a/tests/ui-smoke/_lib/run-gui.sh +++ b/tests/ui-smoke/_lib/run-gui.sh @@ -1,9 +1,10 @@ #!/bin/bash # Dispatcher invoked from each per-GUI test.sh. Resolves an INI path # under configs/sim/ and execs launch.sh in the caller's test dir. -# Usage: run-gui.sh +# Usage: run-gui.sh [driver-args...] # e.g. run-gui.sh axis/axis.ini # run-gui.sh qtdragon/qtdragon_xyz/qtdragon_metric.ini +# run-gui.sh axis/axis.ini --run-program /abs/smoke.ngc --expect-pos 10,10,5 set -u @@ -11,5 +12,8 @@ LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" TEST_DIR="${TEST_DIR:-$(cd "$(dirname "$0")" && pwd)}" CONFIGS_DIR="$(cd "$LIB_DIR/../../../configs/sim" && pwd)" +INI_REL="$1" +shift + export TEST_DIR -exec "$LIB_DIR/launch.sh" "$CONFIGS_DIR/$1" +exec "$LIB_DIR/launch.sh" "$CONFIGS_DIR/$INI_REL" "$@" diff --git a/tests/ui-smoke/_lib/smoke.ngc b/tests/ui-smoke/_lib/smoke.ngc new file mode 100644 index 00000000000..036bff7bd09 --- /dev/null +++ b/tests/ui-smoke/_lib/smoke.ngc @@ -0,0 +1,13 @@ +(Phase 2 UI smoke program. Force mm input units with G21 so the move) +(commanded here is the same physical distance regardless of the sim) +(config's LINEAR_UNITS. Use G91 relative so the move is independent of) +(each sim's HOME position, then return to G90 absolute and end with M2.) +(stat.position is still reported in the machine's LINEAR_UNITS; the) +(driver converts the expected mm delta to machine units via) +(stat.linear_units before comparing. Note: axis and touchy sims are) +(inch machines, so a 1mm move shows as ~0.03937 in stat.position.) +G21 +G91 +G0 X1 Y1 +G90 +M2 diff --git a/tests/ui-smoke/axis/test.sh b/tests/ui-smoke/axis/test.sh index efa45dd9590..ba4fddfc6d0 100755 --- a/tests/ui-smoke/axis/test.sh +++ b/tests/ui-smoke/axis/test.sh @@ -1,2 +1,4 @@ #!/bin/bash -exec "$(dirname "$0")/../_lib/run-gui.sh" axis/axis.ini +LIB_DIR="$(cd "$(dirname "$0")/../_lib" && pwd)" +exec "$LIB_DIR/run-gui.sh" axis/axis.ini \ + --run-program "$LIB_DIR/smoke.ngc" --expect-delta-mm 1,1,0 diff --git a/tests/ui-smoke/gmoccapy/test.sh b/tests/ui-smoke/gmoccapy/test.sh index 29adc2b9397..de93beaed99 100755 --- a/tests/ui-smoke/gmoccapy/test.sh +++ b/tests/ui-smoke/gmoccapy/test.sh @@ -1,2 +1,4 @@ #!/bin/bash -exec "$(dirname "$0")/../_lib/run-gui.sh" gmoccapy/gmoccapy.ini +LIB_DIR="$(cd "$(dirname "$0")/../_lib" && pwd)" +exec "$LIB_DIR/run-gui.sh" gmoccapy/gmoccapy.ini \ + --run-program "$LIB_DIR/smoke.ngc" --expect-delta-mm 1,1,0 diff --git a/tests/ui-smoke/qtdragon/test.sh b/tests/ui-smoke/qtdragon/test.sh index 7df11989920..a6a72e01c3b 100755 --- a/tests/ui-smoke/qtdragon/test.sh +++ b/tests/ui-smoke/qtdragon/test.sh @@ -1,2 +1,4 @@ #!/bin/bash -exec "$(dirname "$0")/../_lib/run-gui.sh" qtdragon/qtdragon_xyz/qtdragon_metric.ini +LIB_DIR="$(cd "$(dirname "$0")/../_lib" && pwd)" +exec "$LIB_DIR/run-gui.sh" qtdragon/qtdragon_xyz/qtdragon_metric.ini \ + --run-program "$LIB_DIR/smoke.ngc" --expect-delta-mm 1,1,0 diff --git a/tests/ui-smoke/touchy/test.sh b/tests/ui-smoke/touchy/test.sh index 4b9c904d700..831fe81b346 100755 --- a/tests/ui-smoke/touchy/test.sh +++ b/tests/ui-smoke/touchy/test.sh @@ -1,2 +1,4 @@ #!/bin/bash -exec "$(dirname "$0")/../_lib/run-gui.sh" touchy/touchy.ini +LIB_DIR="$(cd "$(dirname "$0")/../_lib" && pwd)" +exec "$LIB_DIR/run-gui.sh" touchy/touchy.ini \ + --run-program "$LIB_DIR/smoke.ngc" --expect-delta-mm 1,1,0 From bc0d86db5aa733720437fb35945beb2c2843c073 Mon Sep 17 00:00:00 2001 From: Luca Toniolo <10792599+grandixximo@users.noreply.github.com> Date: Sun, 24 May 2026 17:20:28 +0800 Subject: [PATCH 2/7] test: home_all retries home(-1) and dumps stat on timeout CI run hit 'timeout waiting for all joints homed after 60.0s' on qtdragon only; locally homing completes in <4s on all four sims. Likely cause: same task_mode revert race as ensure_mode catches for MODE_AUTO, except home() lives outside that helper, so a mid-sequence mode flip back to a non-MANUAL mode silently drops the home command. Wrap the post-c.home(-1) wait in a poll loop that re-asserts MANUAL and re-issues home(-1) every HOME_REISSUE_S (10s). Final timeout now also dumps homed[], task_state, task_mode and exec_state so the next CI failure has actionable diagnostics. --- tests/ui-smoke/_lib/drive.py | 41 ++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/tests/ui-smoke/_lib/drive.py b/tests/ui-smoke/_lib/drive.py index 06becabff45..24a5893bc82 100755 --- a/tests/ui-smoke/_lib/drive.py +++ b/tests/ui-smoke/_lib/drive.py @@ -86,13 +86,18 @@ def wait_until(stat, predicate, timeout, label): return False +HOME_REISSUE_S = 10.0 + + def home_all(cmd, stat, timeout): """Home every joint. Uses c.home(-1) which respects HOME_SEQUENCE if configured. Caller must have already ensured task_state is ON via ensure_state; otherwise the home command is rejected with 'cannot be executed until the machine is out of E-stop and turned on'. Mode change uses ensure_mode so a GUI that reverts mode mid- - sequence (gmoccapy) is detected and retried.""" + sequence (gmoccapy) is detected and retried. The outer poll loop + re-issues c.home(-1) every HOME_REISSUE_S in case a GUI swallowed + the first one by switching mode after we set it (qtdragon CI).""" if not ensure_mode(cmd, stat, linuxcnc.MODE_MANUAL, "MODE_MANUAL"): return False cmd.teleop_enable(0) @@ -100,14 +105,32 @@ def home_all(cmd, stat, timeout): stat.poll() njoints = stat.joints cmd.home(-1) - if not wait_until( - stat, - lambda s: all(s.homed[i] for i in range(njoints)), - timeout, "all joints homed"): - return False - cmd.teleop_enable(1) - cmd.wait_complete() - return True + + deadline = time.monotonic() + timeout + next_reissue = time.monotonic() + HOME_REISSUE_S + while time.monotonic() < deadline: + stat.poll() + if all(stat.homed[i] for i in range(njoints)): + cmd.teleop_enable(1) + cmd.wait_complete() + return True + if time.monotonic() >= next_reissue: + # Re-assert MANUAL in case it got reverted, then re-home. + cmd.mode(linuxcnc.MODE_MANUAL) + cmd.wait_complete() + cmd.home(-1) + sys.stderr.write( + f"WARN: re-issued home(-1); homed={list(stat.homed[:njoints])} " + f"task_state={stat.task_state} task_mode={stat.task_mode}\n") + next_reissue = time.monotonic() + HOME_REISSUE_S + time.sleep(POLL_INTERVAL_S) + stat.poll() + sys.stderr.write( + f"UI_SMOKE_FAIL: timeout waiting for all joints homed after " + f"{timeout}s; homed={list(stat.homed[:njoints])} " + f"task_state={stat.task_state} task_mode={stat.task_mode} " + f"exec_state={stat.exec_state} njoints={njoints}\n") + return False def wait_state(stat, target_state, timeout, label): From b51642962e1a6595248af39eb9d8bb7ee1bc8eb9 Mon Sep 17 00:00:00 2001 From: Luca Toniolo <10792599+grandixximo@users.noreply.github.com> Date: Sun, 24 May 2026 17:44:06 +0800 Subject: [PATCH 3/7] test: qtdragon ui-smoke runs against writable config mirror CI run hit a PermissionError in qtvcp's logger when it tried to open configs/sim/qtdragon/qtdragon_xyz/qtdragon.log for write: the GitHub Actions workspace is mounted read-only for the docker build user, and qtvcp resolves LOG_FILE = qtdragon.log into the config dir. hal_bridge then exits, linuxcnc tears down, and the driver retries ESTOP_RESET until the budget is exhausted. qtdragon test.sh now mirrors the qtdragon_xyz config dir to a mktemp directory, seds LOG_FILE to ~/qtdragon.log, and passes the absolute INI path to run-gui.sh. run-gui.sh treats any path starting with / as absolute; everything else still resolves under configs/sim. Trap cleans the tmp dir on exit so the working tree stays clean. Does not touch the shipped qtdragon config to avoid changing default behaviour for real users. The same fix would work for any other config that turns out to write into its own dir on CI. --- tests/ui-smoke/_lib/run-gui.sh | 13 +++++++++++-- tests/ui-smoke/qtdragon/test.sh | 19 ++++++++++++++++++- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/tests/ui-smoke/_lib/run-gui.sh b/tests/ui-smoke/_lib/run-gui.sh index 9f4f596afe0..980dff7f51b 100755 --- a/tests/ui-smoke/_lib/run-gui.sh +++ b/tests/ui-smoke/_lib/run-gui.sh @@ -12,8 +12,17 @@ LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" TEST_DIR="${TEST_DIR:-$(cd "$(dirname "$0")" && pwd)}" CONFIGS_DIR="$(cd "$LIB_DIR/../../../configs/sim" && pwd)" -INI_REL="$1" +INI_ARG="$1" shift +# Accept either a relative path under configs/sim/ or an absolute path. +# Absolute paths are used by tests that need to point at a writable +# mirror of a shipped config (qtdragon writes a log file inside the +# config dir, which is read-only on CI). +case "$INI_ARG" in + /*) INI_PATH="$INI_ARG" ;; + *) INI_PATH="$CONFIGS_DIR/$INI_ARG" ;; +esac + export TEST_DIR -exec "$LIB_DIR/launch.sh" "$CONFIGS_DIR/$INI_REL" "$@" +exec "$LIB_DIR/launch.sh" "$INI_PATH" "$@" diff --git a/tests/ui-smoke/qtdragon/test.sh b/tests/ui-smoke/qtdragon/test.sh index a6a72e01c3b..d2cedd8d5c0 100755 --- a/tests/ui-smoke/qtdragon/test.sh +++ b/tests/ui-smoke/qtdragon/test.sh @@ -1,4 +1,21 @@ #!/bin/bash +# qtdragon's qtvcp logger writes its log file (path from INI [DISPLAY] +# LOG_FILE) into the config directory. CI mounts the workspace read- +# only for the runtime user, so a relative LOG_FILE like 'qtdragon.log' +# resolves to a path qtvcp cannot create, hal_bridge then exits, and +# linuxcnc tears down before our driver can do anything. Mirror the +# config dir to a writable tmp location and patch LOG_FILE to be +# rooted at $HOME so the log lands in a directory we can write to. +set -u + LIB_DIR="$(cd "$(dirname "$0")/../_lib" && pwd)" -exec "$LIB_DIR/run-gui.sh" qtdragon/qtdragon_xyz/qtdragon_metric.ini \ +SRC_DIR="$(cd "$LIB_DIR/../../../configs/sim/qtdragon/qtdragon_xyz" && pwd)" + +WORK_DIR="$(mktemp -d -t ui-smoke-qtdragon.XXXXXX)" +trap 'rm -rf "$WORK_DIR"' EXIT +cp -r "$SRC_DIR/." "$WORK_DIR/" +sed -i 's|^LOG_FILE = qtdragon\.log$|LOG_FILE = ~/qtdragon.log|' \ + "$WORK_DIR/qtdragon_metric.ini" + +exec "$LIB_DIR/run-gui.sh" "$WORK_DIR/qtdragon_metric.ini" \ --run-program "$LIB_DIR/smoke.ngc" --expect-delta-mm 1,1,0 From 7206b82e045f836c0cded1e360a7a6a96f623aff Mon Sep 17 00:00:00 2001 From: Luca Toniolo <10792599+grandixximo@users.noreply.github.com> Date: Sun, 24 May 2026 18:18:57 +0800 Subject: [PATCH 4/7] test: drop python3-opencv dep and set QT_XCB_GL_INTEGRATION=xcb_egl Ubuntu 24.04 rip-and-test runs hit a qtvcp segfault after the log- permission fix let qtvcp get further than Phase 1 had. Debian package-arch passes the same code. Two known asymmetries match: - python3-opencv on Ubuntu pulls Qt5 GUI bits whose cv2/qt/plugins directory overrides the system PyQt5 platform plugin path under xvfb (opencv-python issue #572, Qt Forum 119109). qtvcp's camview_widget tolerates ImportError on cv2 and just logs a warning, so dropping the dep restores the harmless fallback path Phase 1 was already exercising. - xcb_glx is the historical fragile integration under xvfb (Launchpad #1761708, QTBUG-67537); xcb_egl is what software-GL stacks expect anyway. Set as defense in depth. Local 4/4 still green with both changes. --- debian/control.top.in | 1 - tests/ui-smoke/_lib/launch.sh | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/debian/control.top.in b/debian/control.top.in index 8b7a66ebed0..c4526315981 100644 --- a/debian/control.top.in +++ b/debian/control.top.in @@ -60,7 +60,6 @@ Build-Depends: python3-dbus.mainloop.pyqt5 , python3-qtpy , python3-zmq , - python3-opencv , python3-cairo , python3-gi , python3-gi-cairo , diff --git a/tests/ui-smoke/_lib/launch.sh b/tests/ui-smoke/_lib/launch.sh index 1db06423dd1..2686c13fa37 100755 --- a/tests/ui-smoke/_lib/launch.sh +++ b/tests/ui-smoke/_lib/launch.sh @@ -43,6 +43,10 @@ export GALLIUM_DRIVER=llvmpipe export QT_QUICK_BACKEND=software export QSG_RHI_BACKEND=software export QT_OPENGL=software +# Dodge a long-known xcb_glx integration crash that hits QtWebEngine +# and related Qt5 widgets under xvfb (Launchpad #1761708, QTBUG-67537). +# Forces the egl path which is what software-GL stacks expect anyway. +export QT_XCB_GL_INTEGRATION=xcb_egl # Silence audio: xvfb covers X but not sound. Demote every Gst # Audio/Sink and disable canberra/SDL/pulse/ALSA-default paths. From 98284f7db8b7afb0a585c53db0ddb17cdbb6b029 Mon Sep 17 00:00:00 2001 From: Luca Toniolo <10792599+grandixximo@users.noreply.github.com> Date: Sun, 24 May 2026 18:45:02 +0800 Subject: [PATCH 5/7] test: force QT_QPA_PLATFORM=offscreen for qtdragon xvfb + xcb + xcb_egl was not enough for Ubuntu 24.04 rip-and-test: qtvcp still segfaults during widget construction even with opencv and qtwebengine paths quiet, and the same code passes on Debian package-arch. Offscreen renders entirely in memory and exercises a different Qt plugin entirely, dodging the xcb-stack instability. scripts/linuxcnc itself forces QT_QPA_PLATFORM=xcb unless LINUXCNC_OPENGL_PLATFORM is set to a non-glx value, so pin both. Only qtdragon needs this; axis (Tk), touchy and gmoccapy (GTK) are unaffected. Trade-off: no Phase 3 screenshot from qtdragon under this config; Phase 3 would need an opt-out for offscreen tests. --- tests/ui-smoke/qtdragon/test.sh | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/ui-smoke/qtdragon/test.sh b/tests/ui-smoke/qtdragon/test.sh index d2cedd8d5c0..d2b9224f59c 100755 --- a/tests/ui-smoke/qtdragon/test.sh +++ b/tests/ui-smoke/qtdragon/test.sh @@ -6,6 +6,15 @@ # linuxcnc tears down before our driver can do anything. Mirror the # config dir to a writable tmp location and patch LOG_FILE to be # rooted at $HOME so the log lands in a directory we can write to. +# +# Force the Qt offscreen platform plugin. qtvcp under xvfb + xcb on +# Ubuntu 24.04 segfaults during widget construction (no backtrace); +# Debian containers in the same CI matrix do not. Offscreen renders +# entirely in memory, no X server needed (xvfb-run still wraps the +# call so the rest of scripts/linuxcnc's X-display assumptions hold). +# scripts/linuxcnc itself forces QT_QPA_PLATFORM=xcb unless +# LINUXCNC_OPENGL_PLATFORM is set to something other than glx, so we +# pin both env vars. set -u LIB_DIR="$(cd "$(dirname "$0")/../_lib" && pwd)" @@ -17,5 +26,8 @@ cp -r "$SRC_DIR/." "$WORK_DIR/" sed -i 's|^LOG_FILE = qtdragon\.log$|LOG_FILE = ~/qtdragon.log|' \ "$WORK_DIR/qtdragon_metric.ini" +export LINUXCNC_OPENGL_PLATFORM=offscreen +export QT_QPA_PLATFORM=offscreen + exec "$LIB_DIR/run-gui.sh" "$WORK_DIR/qtdragon_metric.ini" \ --run-program "$LIB_DIR/smoke.ngc" --expect-delta-mm 1,1,0 From 2132da422494cfef3b19b98362c6ed7f10332e75 Mon Sep 17 00:00:00 2001 From: Luca Toniolo <10792599+grandixximo@users.noreply.github.com> Date: Sun, 24 May 2026 20:04:14 +0800 Subject: [PATCH 6/7] test: disable QtWebEngine sandbox+GPU for qtdragon ui-smoke qtdragon embeds QWebEngineView. On rip-and-test (gcc) CI it racy-crashed during Chromium browser-process spawn under offscreen + xvfb, no GPU, no user namespaces. rip-and-test-clang got past it by luck. Force --no-sandbox --single-process --no-zygote --disable-gpu so the renderer runs in-process with software rendering. --- tests/ui-smoke/qtdragon/test.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/ui-smoke/qtdragon/test.sh b/tests/ui-smoke/qtdragon/test.sh index d2b9224f59c..faad7f33ab5 100755 --- a/tests/ui-smoke/qtdragon/test.sh +++ b/tests/ui-smoke/qtdragon/test.sh @@ -15,6 +15,12 @@ # scripts/linuxcnc itself forces QT_QPA_PLATFORM=xcb unless # LINUXCNC_OPENGL_PLATFORM is set to something other than glx, so we # pin both env vars. +# +# qtdragon embeds a QWebEngineView (Chromium). Under offscreen + xvfb +# with no GPU and no user namespaces in the CI runner sandbox, +# QtWebEngine racy-crashes during browser-process spawn. Disable the +# Chromium sandbox and force single-process + software rendering so +# the renderer runs in the same process as Qt with no GPU thread. set -u LIB_DIR="$(cd "$(dirname "$0")/../_lib" && pwd)" @@ -28,6 +34,8 @@ sed -i 's|^LOG_FILE = qtdragon\.log$|LOG_FILE = ~/qtdragon.log|' \ export LINUXCNC_OPENGL_PLATFORM=offscreen export QT_QPA_PLATFORM=offscreen +export QTWEBENGINE_DISABLE_SANDBOX=1 +export QTWEBENGINE_CHROMIUM_FLAGS="--no-sandbox --disable-gpu --disable-software-rasterizer --single-process --no-zygote" exec "$LIB_DIR/run-gui.sh" "$WORK_DIR/qtdragon_metric.ini" \ --run-program "$LIB_DIR/smoke.ngc" --expect-delta-mm 1,1,0 From a26c5fb2c62604a6cf9c45928b3097ec025c7403 Mon Sep 17 00:00:00 2001 From: Luca Toniolo <10792599+grandixximo@users.noreply.github.com> Date: Sun, 24 May 2026 20:55:35 +0800 Subject: [PATCH 7/7] test: block QtWebEngine import in qtdragon ui-smoke via meta_path shim QtWebEngine browser-process init segfaults inside the qtvcp process on Ubuntu 24.04 CI even with --no-sandbox --single-process --disable-gpu. The smoke test never touches the WebWidget, so block the qtpy.QtWebEngineWidgets import via a sitecustomize meta_path finder; WebWidget already has a fallback that swaps in a plain QWidget when that import fails. No Chromium spawn, no segfault. The previous chromium-flags attempt was retracted: 'Sandboxing disabled by user.' confirmed Chromium got the flags but still crashed during init, so we are not going to win that race. --- tests/ui-smoke/qtdragon/test.sh | 41 +++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/tests/ui-smoke/qtdragon/test.sh b/tests/ui-smoke/qtdragon/test.sh index faad7f33ab5..f08c5a1491a 100755 --- a/tests/ui-smoke/qtdragon/test.sh +++ b/tests/ui-smoke/qtdragon/test.sh @@ -18,9 +18,14 @@ # # qtdragon embeds a QWebEngineView (Chromium). Under offscreen + xvfb # with no GPU and no user namespaces in the CI runner sandbox, -# QtWebEngine racy-crashes during browser-process spawn. Disable the -# Chromium sandbox and force single-process + software rendering so -# the renderer runs in the same process as Qt with no GPU thread. +# QtWebEngine browser-process init segfaults even with --no-sandbox +# --single-process --disable-gpu (Chromium logs "Sandboxing disabled +# by user." then crashes inside the same qtvcp PID). Rather than keep +# tuning Chromium flags for a widget the smoke test never touches, +# we shim qtpy.QtWebEngineWidgets to raise ImportError; web_widget.py +# already has a fallback path that swaps the QWebEngineView for a +# plain QWidget when the import fails (its "fail safe - mostly for +# designer" branch). No Chromium spawn = no crash. set -u LIB_DIR="$(cd "$(dirname "$0")/../_lib" && pwd)" @@ -34,8 +39,34 @@ sed -i 's|^LOG_FILE = qtdragon\.log$|LOG_FILE = ~/qtdragon.log|' \ export LINUXCNC_OPENGL_PLATFORM=offscreen export QT_QPA_PLATFORM=offscreen -export QTWEBENGINE_DISABLE_SANDBOX=1 -export QTWEBENGINE_CHROMIUM_FLAGS="--no-sandbox --disable-gpu --disable-software-rasterizer --single-process --no-zygote" + +# sitecustomize.py is auto-imported by Python from any sys.path entry +# at interpreter startup. Drop a meta_path finder that blocks the +# qtpy.QtWebEngineWidgets import so WebWidget falls back to QWidget. +SHIM_DIR="$WORK_DIR/_pyshim" +mkdir -p "$SHIM_DIR" +cat >"$SHIM_DIR/sitecustomize.py" <<'PY' +import sys +from importlib.abc import MetaPathFinder, Loader +from importlib.util import spec_from_loader + +_BLOCK = {'qtpy.QtWebEngineWidgets', 'PyQt5.QtWebEngineWidgets'} + +class _BlockLoader(Loader): + def create_module(self, spec): + raise ImportError('QtWebEngineWidgets blocked for ui-smoke CI') + def exec_module(self, module): + pass + +class _BlockFinder(MetaPathFinder): + def find_spec(self, name, path, target=None): + if name in _BLOCK: + return spec_from_loader(name, _BlockLoader()) + return None + +sys.meta_path.insert(0, _BlockFinder()) +PY +export PYTHONPATH="$SHIM_DIR${PYTHONPATH:+:$PYTHONPATH}" exec "$LIB_DIR/run-gui.sh" "$WORK_DIR/qtdragon_metric.ini" \ --run-program "$LIB_DIR/smoke.ngc" --expect-delta-mm 1,1,0