From 25e442395fa317cd7b3392084ba1b78d5a8a07f7 Mon Sep 17 00:00:00 2001 From: Piotr Mlocek Date: Wed, 15 Apr 2026 20:28:43 -0700 Subject: [PATCH 01/20] feat(server,sandbox): move SSH connect and exec onto supervisor session relay Introduce a persistent supervisor-to-gateway session (ConnectSupervisor bidirectional gRPC RPC) and migrate /connect/ssh and ExecSandbox onto relay channels coordinated through it. Architecture: - gRPC control plane: carries session lifecycle (hello, heartbeat) and relay lifecycle (RelayOpen, RelayOpenResult, RelayClose) - HTTP data plane: for each relay, the supervisor opens a reverse HTTP CONNECT to /relay/{channel_id} on the gateway; the gateway bridges the client stream with the supervisor stream - The supervisor is a dumb byte bridge with no SSH/NSSH1 awareness; the gateway sends the NSSH1 preface through the relay Key changes: - Add ConnectSupervisor RPC and session/relay proto messages - Add gateway session registry (SupervisorSessionRegistry) with pending-relay map for channel correlation - Add /relay/{channel_id} HTTP CONNECT endpoint - Rewire /connect/ssh: session lookup + RelayOpen instead of direct TCP dial to sandbox:2222 - Rewire ExecSandbox: relay-based proxy instead of direct sandbox dial - Add supervisor session client with reconnect and relay bridge - Remove ResolveSandboxEndpoint from proto, gateway, and K8s driver Closes OS-86 --- Cargo.lock | 4 + .../tests/ensure_providers_integration.rs | 24 +- .../openshell-cli/tests/mtls_integration.rs | 10 + .../tests/provider_commands_integration.rs | 24 +- .../sandbox_create_lifecycle_integration.rs | 25 +- .../sandbox_name_fallback_integration.rs | 22 +- .../openshell-driver-kubernetes/src/driver.rs | 69 +-- .../openshell-driver-kubernetes/src/grpc.rs | 18 +- crates/openshell-sandbox/Cargo.toml | 7 + crates/openshell-sandbox/src/grpc_client.rs | 5 + crates/openshell-sandbox/src/lib.rs | 16 + .../src/supervisor_session.rs | 351 ++++++++++++++ crates/openshell-server/src/compute/mod.rs | 98 +--- crates/openshell-server/src/grpc/mod.rs | 25 +- crates/openshell-server/src/grpc/sandbox.rs | 441 ++++++------------ crates/openshell-server/src/http.rs | 1 + crates/openshell-server/src/lib.rs | 6 + crates/openshell-server/src/relay.rs | 67 +++ crates/openshell-server/src/ssh_tunnel.rs | 233 ++++----- .../src/supervisor_session.rs | 440 +++++++++++++++++ .../tests/auth_endpoint_integration.rs | 10 + .../tests/edge_tunnel_auth.rs | 23 +- .../tests/multiplex_integration.rs | 23 +- .../tests/multiplex_tls_integration.rs | 23 +- .../tests/ws_tunnel_integration.rs | 23 +- proto/compute_driver.proto | 25 - proto/openshell.proto | 89 ++++ 27 files changed, 1408 insertions(+), 694 deletions(-) create mode 100644 crates/openshell-sandbox/src/supervisor_session.rs create mode 100644 crates/openshell-server/src/relay.rs create mode 100644 crates/openshell-server/src/supervisor_session.rs diff --git a/Cargo.lock b/Cargo.lock index e4057f75c..31144fc41 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3162,6 +3162,10 @@ dependencies = [ "futures", "hex", "hmac", + "http", + "http-body-util", + "hyper", + "hyper-util", "ipnet", "landlock", "libc", diff --git a/crates/openshell-cli/tests/ensure_providers_integration.rs b/crates/openshell-cli/tests/ensure_providers_integration.rs index 2cd362023..d5e813931 100644 --- a/crates/openshell-cli/tests/ensure_providers_integration.rs +++ b/crates/openshell-cli/tests/ensure_providers_integration.rs @@ -11,13 +11,14 @@ use openshell_core::proto::open_shell_server::{OpenShell, OpenShellServer}; use openshell_core::proto::{ CreateProviderRequest, CreateSandboxRequest, CreateSshSessionRequest, CreateSshSessionResponse, DeleteProviderRequest, DeleteProviderResponse, DeleteSandboxRequest, DeleteSandboxResponse, - ExecSandboxEvent, ExecSandboxRequest, GetGatewayConfigRequest, GetGatewayConfigResponse, - GetProviderRequest, GetSandboxConfigRequest, GetSandboxConfigResponse, - GetSandboxProviderEnvironmentRequest, GetSandboxProviderEnvironmentResponse, GetSandboxRequest, - HealthRequest, HealthResponse, ListProvidersRequest, ListProvidersResponse, - ListSandboxesRequest, ListSandboxesResponse, Provider, ProviderResponse, - RevokeSshSessionRequest, RevokeSshSessionResponse, SandboxResponse, SandboxStreamEvent, - ServiceStatus, UpdateProviderRequest, WatchSandboxRequest, + ExecSandboxEvent, ExecSandboxRequest, GatewayMessage, GetGatewayConfigRequest, + GetGatewayConfigResponse, GetProviderRequest, GetSandboxConfigRequest, + GetSandboxConfigResponse, GetSandboxProviderEnvironmentRequest, + GetSandboxProviderEnvironmentResponse, GetSandboxRequest, HealthRequest, HealthResponse, + ListProvidersRequest, ListProvidersResponse, ListSandboxesRequest, ListSandboxesResponse, + Provider, ProviderResponse, RevokeSshSessionRequest, RevokeSshSessionResponse, SandboxResponse, + SandboxStreamEvent, ServiceStatus, SupervisorMessage, UpdateProviderRequest, + WatchSandboxRequest, }; use rcgen::{ BasicConstraints, Certificate, CertificateParams, ExtendedKeyUsagePurpose, IsCa, KeyPair, @@ -298,6 +299,8 @@ impl OpenShell for TestOpenShell { tokio_stream::wrappers::ReceiverStream>; type ExecSandboxStream = tokio_stream::wrappers::ReceiverStream>; + type ConnectSupervisorStream = + tokio_stream::wrappers::ReceiverStream>; async fn watch_sandbox( &self, @@ -423,6 +426,13 @@ impl OpenShell for TestOpenShell { ) -> Result, Status> { Err(Status::unimplemented("not implemented in test")) } + + async fn connect_supervisor( + &self, + _request: tonic::Request>, + ) -> Result, Status> { + Err(Status::unimplemented("not implemented in test")) + } } // ── TLS helpers ────────────────────────────────────────────────────── diff --git a/crates/openshell-cli/tests/mtls_integration.rs b/crates/openshell-cli/tests/mtls_integration.rs index 5d04239bf..c98b7eae4 100644 --- a/crates/openshell-cli/tests/mtls_integration.rs +++ b/crates/openshell-cli/tests/mtls_integration.rs @@ -200,6 +200,9 @@ impl OpenShell for TestOpenShell { >; type ExecSandboxStream = tokio_stream::wrappers::ReceiverStream>; + type ConnectSupervisorStream = tokio_stream::wrappers::ReceiverStream< + Result, + >; async fn watch_sandbox( &self, @@ -325,6 +328,13 @@ impl OpenShell for TestOpenShell { ) -> Result, Status> { Err(Status::unimplemented("not implemented in test")) } + + async fn connect_supervisor( + &self, + _request: tonic::Request>, + ) -> Result, Status> { + Err(Status::unimplemented("not implemented in test")) + } } fn build_ca() -> (Certificate, KeyPair) { diff --git a/crates/openshell-cli/tests/provider_commands_integration.rs b/crates/openshell-cli/tests/provider_commands_integration.rs index c5476afee..1d1323371 100644 --- a/crates/openshell-cli/tests/provider_commands_integration.rs +++ b/crates/openshell-cli/tests/provider_commands_integration.rs @@ -7,13 +7,14 @@ use openshell_core::proto::open_shell_server::{OpenShell, OpenShellServer}; use openshell_core::proto::{ CreateProviderRequest, CreateSandboxRequest, CreateSshSessionRequest, CreateSshSessionResponse, DeleteProviderRequest, DeleteProviderResponse, DeleteSandboxRequest, DeleteSandboxResponse, - ExecSandboxEvent, ExecSandboxRequest, GetGatewayConfigRequest, GetGatewayConfigResponse, - GetProviderRequest, GetSandboxConfigRequest, GetSandboxConfigResponse, - GetSandboxProviderEnvironmentRequest, GetSandboxProviderEnvironmentResponse, GetSandboxRequest, - HealthRequest, HealthResponse, ListProvidersRequest, ListProvidersResponse, - ListSandboxesRequest, ListSandboxesResponse, Provider, ProviderResponse, - RevokeSshSessionRequest, RevokeSshSessionResponse, SandboxResponse, SandboxStreamEvent, - ServiceStatus, UpdateProviderRequest, WatchSandboxRequest, + ExecSandboxEvent, ExecSandboxRequest, GatewayMessage, GetGatewayConfigRequest, + GetGatewayConfigResponse, GetProviderRequest, GetSandboxConfigRequest, + GetSandboxConfigResponse, GetSandboxProviderEnvironmentRequest, + GetSandboxProviderEnvironmentResponse, GetSandboxRequest, HealthRequest, HealthResponse, + ListProvidersRequest, ListProvidersResponse, ListSandboxesRequest, ListSandboxesResponse, + Provider, ProviderResponse, RevokeSshSessionRequest, RevokeSshSessionResponse, SandboxResponse, + SandboxStreamEvent, ServiceStatus, SupervisorMessage, UpdateProviderRequest, + WatchSandboxRequest, }; use rcgen::{ BasicConstraints, Certificate, CertificateParams, ExtendedKeyUsagePurpose, IsCa, KeyPair, @@ -252,6 +253,8 @@ impl OpenShell for TestOpenShell { tokio_stream::wrappers::ReceiverStream>; type ExecSandboxStream = tokio_stream::wrappers::ReceiverStream>; + type ConnectSupervisorStream = + tokio_stream::wrappers::ReceiverStream>; async fn watch_sandbox( &self, @@ -377,6 +380,13 @@ impl OpenShell for TestOpenShell { ) -> Result, Status> { Err(Status::unimplemented("not implemented in test")) } + + async fn connect_supervisor( + &self, + _request: tonic::Request>, + ) -> Result, Status> { + Err(Status::unimplemented("not implemented in test")) + } } fn install_rustls_provider() { diff --git a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs index d5d39f082..e4c658b7b 100644 --- a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs +++ b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs @@ -8,14 +8,14 @@ use openshell_core::proto::open_shell_server::{OpenShell, OpenShellServer}; use openshell_core::proto::{ CreateProviderRequest, CreateSandboxRequest, CreateSshSessionRequest, CreateSshSessionResponse, DeleteProviderRequest, DeleteProviderResponse, DeleteSandboxRequest, DeleteSandboxResponse, - ExecSandboxEvent, ExecSandboxRequest, GetGatewayConfigRequest, GetGatewayConfigResponse, - GetProviderRequest, GetSandboxConfigRequest, GetSandboxConfigResponse, - GetSandboxProviderEnvironmentRequest, GetSandboxProviderEnvironmentResponse, GetSandboxRequest, - HealthRequest, HealthResponse, ListProvidersRequest, ListProvidersResponse, - ListSandboxesRequest, ListSandboxesResponse, PlatformEvent, ProviderResponse, - RevokeSshSessionRequest, RevokeSshSessionResponse, Sandbox, SandboxPhase, SandboxResponse, - SandboxStreamEvent, ServiceStatus, UpdateProviderRequest, WatchSandboxRequest, - sandbox_stream_event, + ExecSandboxEvent, ExecSandboxRequest, GatewayMessage, GetGatewayConfigRequest, + GetGatewayConfigResponse, GetProviderRequest, GetSandboxConfigRequest, + GetSandboxConfigResponse, GetSandboxProviderEnvironmentRequest, + GetSandboxProviderEnvironmentResponse, GetSandboxRequest, HealthRequest, HealthResponse, + ListProvidersRequest, ListProvidersResponse, ListSandboxesRequest, ListSandboxesResponse, + PlatformEvent, ProviderResponse, RevokeSshSessionRequest, RevokeSshSessionResponse, Sandbox, + SandboxPhase, SandboxResponse, SandboxStreamEvent, ServiceStatus, SupervisorMessage, + UpdateProviderRequest, WatchSandboxRequest, sandbox_stream_event, }; use rcgen::{ BasicConstraints, Certificate, CertificateParams, ExtendedKeyUsagePurpose, IsCa, KeyPair, @@ -242,6 +242,8 @@ impl OpenShell for TestOpenShell { tokio_stream::wrappers::ReceiverStream>; type ExecSandboxStream = tokio_stream::wrappers::ReceiverStream>; + type ConnectSupervisorStream = + tokio_stream::wrappers::ReceiverStream>; async fn watch_sandbox( &self, @@ -403,6 +405,13 @@ impl OpenShell for TestOpenShell { ) -> Result, Status> { Err(Status::unimplemented("not implemented in test")) } + + async fn connect_supervisor( + &self, + _request: tonic::Request>, + ) -> Result, Status> { + Err(Status::unimplemented("not implemented in test")) + } } fn install_rustls_provider() { diff --git a/crates/openshell-cli/tests/sandbox_name_fallback_integration.rs b/crates/openshell-cli/tests/sandbox_name_fallback_integration.rs index fbadec4c3..7824d141a 100644 --- a/crates/openshell-cli/tests/sandbox_name_fallback_integration.rs +++ b/crates/openshell-cli/tests/sandbox_name_fallback_integration.rs @@ -8,12 +8,13 @@ use openshell_core::proto::open_shell_server::{OpenShell, OpenShellServer}; use openshell_core::proto::{ CreateProviderRequest, CreateSandboxRequest, CreateSshSessionRequest, CreateSshSessionResponse, DeleteProviderRequest, DeleteProviderResponse, DeleteSandboxRequest, DeleteSandboxResponse, - ExecSandboxEvent, ExecSandboxRequest, GetGatewayConfigRequest, GetGatewayConfigResponse, - GetProviderRequest, GetSandboxConfigRequest, GetSandboxConfigResponse, - GetSandboxProviderEnvironmentRequest, GetSandboxProviderEnvironmentResponse, GetSandboxRequest, - HealthRequest, HealthResponse, ListProvidersRequest, ListProvidersResponse, - ListSandboxesRequest, ListSandboxesResponse, ProviderResponse, Sandbox, SandboxResponse, - SandboxStreamEvent, ServiceStatus, UpdateProviderRequest, WatchSandboxRequest, + ExecSandboxEvent, ExecSandboxRequest, GatewayMessage, GetGatewayConfigRequest, + GetGatewayConfigResponse, GetProviderRequest, GetSandboxConfigRequest, + GetSandboxConfigResponse, GetSandboxProviderEnvironmentRequest, + GetSandboxProviderEnvironmentResponse, GetSandboxRequest, HealthRequest, HealthResponse, + ListProvidersRequest, ListProvidersResponse, ListSandboxesRequest, ListSandboxesResponse, + ProviderResponse, Sandbox, SandboxResponse, SandboxStreamEvent, ServiceStatus, + SupervisorMessage, UpdateProviderRequest, WatchSandboxRequest, }; use rcgen::{ BasicConstraints, Certificate, CertificateParams, ExtendedKeyUsagePurpose, IsCa, KeyPair, @@ -210,6 +211,8 @@ impl OpenShell for TestOpenShell { tokio_stream::wrappers::ReceiverStream>; type ExecSandboxStream = tokio_stream::wrappers::ReceiverStream>; + type ConnectSupervisorStream = + tokio_stream::wrappers::ReceiverStream>; async fn watch_sandbox( &self, @@ -335,6 +338,13 @@ impl OpenShell for TestOpenShell { ) -> Result, Status> { Err(Status::unimplemented("not implemented in test")) } + + async fn connect_supervisor( + &self, + _request: tonic::Request>, + ) -> Result, Status> { + Err(Status::unimplemented("not implemented in test")) + } } // ── helpers ─────────────────────────────────────────────────────────── diff --git a/crates/openshell-driver-kubernetes/src/driver.rs b/crates/openshell-driver-kubernetes/src/driver.rs index 440703af5..3e0240d0f 100644 --- a/crates/openshell-driver-kubernetes/src/driver.rs +++ b/crates/openshell-driver-kubernetes/src/driver.rs @@ -5,7 +5,7 @@ use crate::config::KubernetesComputeConfig; use futures::{Stream, StreamExt, TryStreamExt}; -use k8s_openapi::api::core::v1::{Event as KubeEventObj, Node, Pod}; +use k8s_openapi::api::core::v1::{Event as KubeEventObj, Node}; use kube::api::{Api, ApiResource, DeleteParams, ListParams, PostParams}; use kube::core::gvk::GroupVersionKind; use kube::core::{DynamicObject, ObjectMeta}; @@ -15,12 +15,10 @@ use openshell_core::proto::compute::v1::{ DriverCondition as SandboxCondition, DriverPlatformEvent as PlatformEvent, DriverSandbox as Sandbox, DriverSandboxSpec as SandboxSpec, DriverSandboxStatus as SandboxStatus, DriverSandboxTemplate as SandboxTemplate, - GetCapabilitiesResponse, ResolveSandboxEndpointResponse, SandboxEndpoint, - WatchSandboxesDeletedEvent, WatchSandboxesEvent, WatchSandboxesPlatformEvent, - WatchSandboxesSandboxEvent, sandbox_endpoint, watch_sandboxes_event, + GetCapabilitiesResponse, WatchSandboxesDeletedEvent, WatchSandboxesEvent, + WatchSandboxesPlatformEvent, WatchSandboxesSandboxEvent, watch_sandboxes_event, }; use std::collections::BTreeMap; -use std::net::IpAddr; use std::pin::Pin; use std::time::Duration; use tokio::sync::mpsc; @@ -271,21 +269,6 @@ impl KubernetesComputeDriver { &self.config.ssh_handshake_secret } - async fn agent_pod_ip(&self, pod_name: &str) -> Result, KubeError> { - let api: Api = Api::namespaced(self.client.clone(), &self.config.namespace); - match api.get(pod_name).await { - Ok(pod) => { - let ip = pod - .status - .and_then(|status| status.pod_ip) - .and_then(|ip| ip.parse().ok()); - Ok(ip) - } - Err(KubeError::Api(err)) if err.code == 404 => Ok(None), - Err(err) => Err(err), - } - } - pub async fn create_sandbox(&self, sandbox: &Sandbox) -> Result<(), KubernetesDriverError> { let name = sandbox.name.as_str(); info!( @@ -407,52 +390,6 @@ impl KubernetesComputeDriver { } } - pub async fn resolve_sandbox_endpoint( - &self, - sandbox: &Sandbox, - ) -> Result { - if let Some(status) = sandbox.status.as_ref() - && !status.instance_id.is_empty() - { - match self.agent_pod_ip(&status.instance_id).await { - Ok(Some(ip)) => { - return Ok(ResolveSandboxEndpointResponse { - endpoint: Some(SandboxEndpoint { - target: Some(sandbox_endpoint::Target::Ip(ip.to_string())), - port: u32::from(self.config.ssh_port), - }), - }); - } - Ok(None) => { - return Err(KubernetesDriverError::Precondition( - "sandbox agent pod IP is not available".to_string(), - )); - } - Err(err) => { - return Err(KubernetesDriverError::Message(format!( - "failed to resolve agent pod IP: {err}" - ))); - } - } - } - - if sandbox.name.is_empty() { - return Err(KubernetesDriverError::Precondition( - "sandbox has no name".to_string(), - )); - } - - Ok(ResolveSandboxEndpointResponse { - endpoint: Some(SandboxEndpoint { - target: Some(sandbox_endpoint::Target::Host(format!( - "{}.{}.svc.cluster.local", - sandbox.name, self.config.namespace - ))), - port: u32::from(self.config.ssh_port), - }), - }) - } - pub async fn watch_sandboxes(&self) -> Result { let namespace = self.config.namespace.clone(); let sandbox_api = self.api(); diff --git a/crates/openshell-driver-kubernetes/src/grpc.rs b/crates/openshell-driver-kubernetes/src/grpc.rs index 2c5a94467..75e131d41 100644 --- a/crates/openshell-driver-kubernetes/src/grpc.rs +++ b/crates/openshell-driver-kubernetes/src/grpc.rs @@ -5,8 +5,7 @@ use futures::{Stream, StreamExt}; use openshell_core::proto::compute::v1::{ CreateSandboxRequest, CreateSandboxResponse, DeleteSandboxRequest, DeleteSandboxResponse, GetCapabilitiesRequest, GetCapabilitiesResponse, GetSandboxRequest, GetSandboxResponse, - ListSandboxesRequest, ListSandboxesResponse, ResolveSandboxEndpointRequest, - ResolveSandboxEndpointResponse, StopSandboxRequest, StopSandboxResponse, + ListSandboxesRequest, ListSandboxesResponse, StopSandboxRequest, StopSandboxResponse, ValidateSandboxCreateRequest, ValidateSandboxCreateResponse, WatchSandboxesEvent, WatchSandboxesRequest, compute_driver_server::ComputeDriver, }; @@ -128,21 +127,6 @@ impl ComputeDriver for ComputeDriverService { Ok(Response::new(DeleteSandboxResponse { deleted })) } - async fn resolve_sandbox_endpoint( - &self, - request: Request, - ) -> Result, Status> { - let sandbox = request - .into_inner() - .sandbox - .ok_or_else(|| Status::invalid_argument("sandbox is required"))?; - self.driver - .resolve_sandbox_endpoint(&sandbox) - .await - .map(Response::new) - .map_err(status_from_driver_error) - } - type WatchSandboxesStream = Pin> + Send + 'static>>; diff --git a/crates/openshell-sandbox/Cargo.toml b/crates/openshell-sandbox/Cargo.toml index 541784ee6..b21b1948f 100644 --- a/crates/openshell-sandbox/Cargo.toml +++ b/crates/openshell-sandbox/Cargo.toml @@ -51,8 +51,15 @@ rcgen = { workspace = true } webpki-roots = { workspace = true } # HTTP +hyper = { workspace = true } +hyper-util = { workspace = true } +http = "1" +http-body-util = "0.1" bytes = { workspace = true } +# UUID +uuid = { workspace = true } + # Encoding base64 = { workspace = true } diff --git a/crates/openshell-sandbox/src/grpc_client.rs b/crates/openshell-sandbox/src/grpc_client.rs index 5503637ee..09e7b607d 100644 --- a/crates/openshell-sandbox/src/grpc_client.rs +++ b/crates/openshell-sandbox/src/grpc_client.rs @@ -74,6 +74,11 @@ async fn connect_channel(endpoint: &str) -> Result { .wrap_err("failed to connect to OpenShell server") } +/// Create a channel to the OpenShell server (public for use by supervisor_session). +pub async fn connect_channel_pub(endpoint: &str) -> Result { + connect_channel(endpoint).await +} + /// Connect to the OpenShell server (mTLS or plaintext based on endpoint scheme). async fn connect(endpoint: &str) -> Result> { let channel = connect_channel(endpoint).await?; diff --git a/crates/openshell-sandbox/src/lib.rs b/crates/openshell-sandbox/src/lib.rs index b81dd4a6c..76da6bb3f 100644 --- a/crates/openshell-sandbox/src/lib.rs +++ b/crates/openshell-sandbox/src/lib.rs @@ -21,6 +21,7 @@ pub mod proxy; mod sandbox; mod secrets; mod ssh; +mod supervisor_session; use miette::{IntoDiagnostic, Result}; #[cfg(target_os = "linux")] @@ -676,6 +677,21 @@ pub async fn run_sandbox( } } + // Spawn the persistent supervisor session if we have a gateway endpoint + // and sandbox identity. The session provides relay channels for SSH + // connect and ExecSandbox through the gateway. + if let (Some(endpoint), Some(id)) = (openshell_endpoint.as_ref(), sandbox_id.as_ref()) { + // The SSH listen address was consumed above, so we use the configured + // SSH port (default 2222) for loopback connections from the relay. + let ssh_port = std::env::var("OPENSHELL_SSH_PORT") + .ok() + .and_then(|p| p.parse::().ok()) + .unwrap_or(2222); + + supervisor_session::spawn(endpoint.clone(), id.clone(), ssh_port); + info!("supervisor session task spawned"); + } + #[cfg(target_os = "linux")] let mut handle = ProcessHandle::spawn( program, diff --git a/crates/openshell-sandbox/src/supervisor_session.rs b/crates/openshell-sandbox/src/supervisor_session.rs new file mode 100644 index 000000000..2b571df08 --- /dev/null +++ b/crates/openshell-sandbox/src/supervisor_session.rs @@ -0,0 +1,351 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Persistent supervisor-to-gateway session. +//! +//! Maintains a long-lived `ConnectSupervisor` bidirectional gRPC stream to the +//! gateway. When the gateway sends `RelayOpen`, the supervisor opens a reverse +//! HTTP CONNECT tunnel back to the gateway and bridges it to the local SSH +//! daemon. The supervisor is a dumb byte bridge — it has no protocol awareness +//! of the SSH or NSSH1 bytes flowing through the tunnel. + +use std::time::Duration; + +use openshell_core::proto::open_shell_client::OpenShellClient; +use openshell_core::proto::{ + GatewayMessage, SupervisorHeartbeat, SupervisorHello, SupervisorMessage, gateway_message, + supervisor_message, +}; +use tokio::sync::mpsc; +use tonic::transport::Channel; +use tracing::{info, warn}; + +use crate::grpc_client; + +const INITIAL_BACKOFF: Duration = Duration::from_secs(1); +const MAX_BACKOFF: Duration = Duration::from_secs(30); + +/// Spawn the supervisor session task. +/// +/// The task runs for the lifetime of the sandbox process, reconnecting with +/// exponential backoff on failures. +pub fn spawn( + endpoint: String, + sandbox_id: String, + ssh_listen_port: u16, +) -> tokio::task::JoinHandle<()> { + tokio::spawn(run_session_loop(endpoint, sandbox_id, ssh_listen_port)) +} + +async fn run_session_loop(endpoint: String, sandbox_id: String, ssh_listen_port: u16) { + let mut backoff = INITIAL_BACKOFF; + let mut attempt: u64 = 0; + + loop { + attempt += 1; + + match run_single_session(&endpoint, &sandbox_id, ssh_listen_port).await { + Ok(()) => { + info!(sandbox_id = %sandbox_id, "supervisor session ended cleanly"); + break; + } + Err(e) => { + warn!( + sandbox_id = %sandbox_id, + attempt = attempt, + backoff_ms = backoff.as_millis() as u64, + error = %e, + "supervisor session failed, reconnecting" + ); + tokio::time::sleep(backoff).await; + backoff = (backoff * 2).min(MAX_BACKOFF); + } + } + } +} + +async fn run_single_session( + endpoint: &str, + sandbox_id: &str, + ssh_listen_port: u16, +) -> Result<(), Box> { + // Connect to the gateway. + let channel = grpc_client::connect_channel_pub(endpoint) + .await + .map_err(|e| format!("connect failed: {e}"))?; + let mut client = OpenShellClient::new(channel.clone()); + + // Create the outbound message stream. + let (tx, rx) = mpsc::channel::(64); + let outbound = tokio_stream::wrappers::ReceiverStream::new(rx); + + // Send hello as the first message. + let instance_id = uuid::Uuid::new_v4().to_string(); + tx.send(SupervisorMessage { + payload: Some(supervisor_message::Payload::Hello(SupervisorHello { + sandbox_id: sandbox_id.to_string(), + instance_id: instance_id.clone(), + })), + }) + .await + .map_err(|_| "failed to queue hello")?; + + // Open the bidirectional stream. + let response = client + .connect_supervisor(outbound) + .await + .map_err(|e| format!("connect_supervisor RPC failed: {e}"))?; + let mut inbound = response.into_inner(); + + // Wait for SessionAccepted. + let accepted = match inbound.message().await? { + Some(msg) => match msg.payload { + Some(gateway_message::Payload::SessionAccepted(a)) => a, + Some(gateway_message::Payload::SessionRejected(r)) => { + return Err(format!("session rejected: {}", r.reason).into()); + } + _ => return Err("expected SessionAccepted or SessionRejected".into()), + }, + None => return Err("stream closed before session accepted".into()), + }; + + let heartbeat_secs = accepted.heartbeat_interval_secs.max(5); + info!( + sandbox_id = %sandbox_id, + session_id = %accepted.session_id, + instance_id = %instance_id, + heartbeat_secs = heartbeat_secs, + "supervisor session established" + ); + + // Main loop: receive gateway messages + send heartbeats. + let mut heartbeat_interval = + tokio::time::interval(Duration::from_secs(u64::from(heartbeat_secs))); + heartbeat_interval.tick().await; // skip immediate tick + + loop { + tokio::select! { + msg = inbound.message() => { + match msg { + Ok(Some(msg)) => { + handle_gateway_message( + &msg, + sandbox_id, + &endpoint, + ssh_listen_port, + &channel, + ).await; + } + Ok(None) => { + info!(sandbox_id = %sandbox_id, "supervisor session: gateway closed stream"); + return Ok(()); + } + Err(e) => { + return Err(format!("stream error: {e}").into()); + } + } + } + _ = heartbeat_interval.tick() => { + let hb = SupervisorMessage { + payload: Some(supervisor_message::Payload::Heartbeat( + SupervisorHeartbeat {}, + )), + }; + if tx.send(hb).await.is_err() { + return Err("outbound channel closed".into()); + } + } + } + } +} + +async fn handle_gateway_message( + msg: &GatewayMessage, + sandbox_id: &str, + endpoint: &str, + ssh_listen_port: u16, + _channel: &Channel, +) { + match &msg.payload { + Some(gateway_message::Payload::Heartbeat(_)) => { + // Gateway heartbeat — nothing to do. + } + Some(gateway_message::Payload::RelayOpen(open)) => { + let channel_id = open.channel_id.clone(); + let endpoint = endpoint.to_string(); + let sandbox_id = sandbox_id.to_string(); + + info!( + sandbox_id = %sandbox_id, + channel_id = %channel_id, + "supervisor session: relay open request, spawning bridge" + ); + + tokio::spawn(async move { + if let Err(e) = handle_relay_open(&channel_id, &endpoint, ssh_listen_port).await { + warn!( + sandbox_id = %sandbox_id, + channel_id = %channel_id, + error = %e, + "supervisor session: relay bridge failed" + ); + } + }); + } + Some(gateway_message::Payload::RelayClose(close)) => { + info!( + sandbox_id = %sandbox_id, + channel_id = %close.channel_id, + reason = %close.reason, + "supervisor session: relay close from gateway" + ); + } + _ => { + warn!(sandbox_id = %sandbox_id, "supervisor session: unexpected gateway message"); + } + } +} + +/// Handle a RelayOpen by opening a reverse HTTP CONNECT to the gateway and +/// bridging it to the local SSH daemon. +async fn handle_relay_open( + channel_id: &str, + endpoint: &str, + ssh_listen_port: u16, +) -> Result<(), Box> { + // Build the relay URL from the gateway endpoint. + // The endpoint is like "https://gateway:8080" or "http://gateway:8080". + let relay_url = format!("{endpoint}/relay/{channel_id}"); + + // Open a reverse HTTP CONNECT to the gateway's relay endpoint. + let mut relay_stream = open_reverse_connect(&relay_url).await?; + + // Connect to the local SSH daemon on loopback. + let mut ssh_conn = tokio::net::TcpStream::connect(("127.0.0.1", ssh_listen_port)).await?; + + info!(channel_id = %channel_id, "relay bridge: connected to local SSH daemon, bridging"); + + // Bridge the relay stream to the local SSH connection. + // The gateway sends NSSH1 preface + SSH bytes through the relay. + // The SSH daemon receives them as if the gateway connected directly. + let _ = tokio::io::copy_bidirectional(&mut relay_stream, &mut ssh_conn).await; + + Ok(()) +} + +/// Open an HTTP CONNECT tunnel to the given URL and return the upgraded stream. +/// +/// This uses a raw hyper HTTP/1.1 client to send a CONNECT request and upgrade +/// the connection to a raw byte stream. +async fn open_reverse_connect( + url: &str, +) -> Result< + hyper_util::rt::TokioIo, + Box, +> { + let uri: http::Uri = url.parse()?; + let host = uri.host().ok_or("missing host")?; + let port = uri + .port_u16() + .unwrap_or(if uri.scheme_str() == Some("https") { + 443 + } else { + 80 + }); + let authority = format!("{host}:{port}"); + let path = uri.path().to_string(); + let use_tls = uri.scheme_str() == Some("https"); + + // Connect TCP. + let tcp = tokio::net::TcpStream::connect(&authority).await?; + tcp.set_nodelay(true)?; + + if use_tls { + // Build TLS connector using the same env-var certs as the gRPC client. + let tls_stream = connect_tls(tcp, host).await?; + send_connect_request(tls_stream, &authority, &path).await + } else { + send_connect_request(tcp, &authority, &path).await + } +} + +async fn send_connect_request( + io: IO, + authority: &str, + path: &str, +) -> Result< + hyper_util::rt::TokioIo, + Box, +> +where + IO: tokio::io::AsyncRead + tokio::io::AsyncWrite + Unpin + Send + 'static, +{ + use http::Method; + + let (mut sender, conn) = + hyper::client::conn::http1::handshake(hyper_util::rt::TokioIo::new(io)).await?; + + // Spawn the connection driver. + tokio::spawn(async move { + if let Err(e) = conn.with_upgrades().await { + warn!(error = %e, "relay CONNECT connection driver error"); + } + }); + + let req = http::Request::builder() + .method(Method::CONNECT) + .uri(path) + .header(http::header::HOST, authority) + .body(http_body_util::Empty::::new())?; + + let resp = sender.send_request(req).await?; + + if resp.status() != http::StatusCode::OK + && resp.status() != http::StatusCode::SWITCHING_PROTOCOLS + { + return Err(format!("relay CONNECT failed: {}", resp.status()).into()); + } + + let upgraded = hyper::upgrade::on(resp).await?; + Ok(hyper_util::rt::TokioIo::new(upgraded)) +} + +/// Connect TLS using the same cert env vars as the gRPC client. +async fn connect_tls( + tcp: tokio::net::TcpStream, + host: &str, +) -> Result< + tokio_rustls::client::TlsStream, + Box, +> { + use rustls::pki_types::ServerName; + use std::sync::Arc; + + let ca_path = std::env::var("OPENSHELL_TLS_CA")?; + let cert_path = std::env::var("OPENSHELL_TLS_CERT")?; + let key_path = std::env::var("OPENSHELL_TLS_KEY")?; + + let ca_pem = std::fs::read(&ca_path)?; + let cert_pem = std::fs::read(&cert_path)?; + let key_pem = std::fs::read(&key_path)?; + + let mut root_store = rustls::RootCertStore::empty(); + for cert in rustls_pemfile::certs(&mut ca_pem.as_slice()) { + root_store.add(cert?)?; + } + + let certs: Vec<_> = + rustls_pemfile::certs(&mut cert_pem.as_slice()).collect::>()?; + let key = + rustls_pemfile::private_key(&mut key_pem.as_slice())?.ok_or("no private key found")?; + + let config = rustls::ClientConfig::builder() + .with_root_certificates(root_store) + .with_client_auth_cert(certs, key)?; + + let connector = tokio_rustls::TlsConnector::from(Arc::new(config)); + let server_name = ServerName::try_from(host.to_string())?; + let tls_stream = connector.connect(server_name, tcp).await?; + + Ok(tls_stream) +} diff --git a/crates/openshell-server/src/compute/mod.rs b/crates/openshell-server/src/compute/mod.rs index 846782c65..181a5a819 100644 --- a/crates/openshell-server/src/compute/mod.rs +++ b/crates/openshell-server/src/compute/mod.rs @@ -13,9 +13,8 @@ use openshell_core::proto::compute::v1::{ CreateSandboxRequest, DeleteSandboxRequest, DriverCondition, DriverPlatformEvent, DriverResourceRequirements, DriverSandbox, DriverSandboxSpec, DriverSandboxStatus, DriverSandboxTemplate, GetCapabilitiesRequest, GetSandboxRequest, ListSandboxesRequest, - ResolveSandboxEndpointRequest, ResolveSandboxEndpointResponse, ValidateSandboxCreateRequest, - WatchSandboxesEvent, WatchSandboxesRequest, compute_driver_server::ComputeDriver, - sandbox_endpoint, watch_sandboxes_event, + ValidateSandboxCreateRequest, WatchSandboxesEvent, WatchSandboxesRequest, + compute_driver_server::ComputeDriver, watch_sandboxes_event, }; use openshell_core::proto::{ PlatformEvent, Sandbox, SandboxCondition, SandboxPhase, SandboxSpec, SandboxStatus, @@ -26,7 +25,6 @@ use openshell_driver_kubernetes::{ }; use prost::Message; use std::fmt; -use std::net::IpAddr; use std::pin::Pin; use std::sync::Arc; use std::time::Duration; @@ -55,12 +53,6 @@ pub enum ComputeError { Message(String), } -#[derive(Debug)] -pub enum ResolvedEndpoint { - Ip(IpAddr, u16), - Host(String, u16), -} - #[derive(Clone)] pub struct ComputeRuntime { driver: SharedComputeDriver, @@ -243,29 +235,6 @@ impl ComputeRuntime { Ok(deleted) } - pub async fn resolve_sandbox_endpoint( - &self, - sandbox: &Sandbox, - ) -> Result { - let driver_sandbox = driver_sandbox_from_public(sandbox); - self.driver - .resolve_sandbox_endpoint(Request::new(ResolveSandboxEndpointRequest { - sandbox: Some(driver_sandbox), - })) - .await - .map(|response| response.into_inner()) - .map_err(|status| match status.code() { - Code::FailedPrecondition => { - Status::failed_precondition(status.message().to_string()) - } - _ => Status::internal(status.message().to_string()), - }) - .and_then(|response| { - resolved_endpoint_from_response(&response) - .map_err(|err| Status::internal(err.to_string())) - }) - } - pub fn spawn_watchers(&self) { let runtime = Arc::new(self.clone()); let watch_runtime = runtime.clone(); @@ -813,30 +782,6 @@ fn decode_sandbox_record(record: &ObjectRecord) -> Result { Sandbox::decode(record.payload.as_slice()).map_err(|e| e.to_string()) } -fn resolved_endpoint_from_response( - response: &ResolveSandboxEndpointResponse, -) -> Result { - let endpoint = response - .endpoint - .as_ref() - .ok_or_else(|| ComputeError::Message("compute driver returned no endpoint".to_string()))?; - let port = u16::try_from(endpoint.port) - .map_err(|_| ComputeError::Message("compute driver returned invalid port".to_string()))?; - - match endpoint.target.as_ref() { - Some(sandbox_endpoint::Target::Ip(ip)) => ip - .parse() - .map(|ip| ResolvedEndpoint::Ip(ip, port)) - .map_err(|e| ComputeError::Message(format!("invalid endpoint IP: {e}"))), - Some(sandbox_endpoint::Target::Host(host)) => { - Ok(ResolvedEndpoint::Host(host.clone(), port)) - } - None => Err(ComputeError::Message( - "compute driver returned endpoint without target".to_string(), - )), - } -} - fn public_status_from_driver(status: &DriverSandboxStatus) -> SandboxStatus { SandboxStatus { sandbox_name: status.sandbox_name.clone(), @@ -929,8 +874,7 @@ mod tests { use futures::stream; use openshell_core::proto::compute::v1::{ CreateSandboxResponse, DeleteSandboxResponse, GetCapabilitiesResponse, GetSandboxRequest, - GetSandboxResponse, ResolveSandboxEndpointResponse, SandboxEndpoint, StopSandboxRequest, - StopSandboxResponse, ValidateSandboxCreateResponse, sandbox_endpoint, + GetSandboxResponse, StopSandboxRequest, StopSandboxResponse, ValidateSandboxCreateResponse, }; use std::sync::Arc; @@ -938,7 +882,6 @@ mod tests { struct TestDriver { listed_sandboxes: Vec, current_sandboxes: Vec, - resolve_precondition: Option, } #[tonic::async_trait] @@ -1031,24 +974,6 @@ mod tests { })) } - async fn resolve_sandbox_endpoint( - &self, - _request: Request, - ) -> Result, Status> { - if let Some(message) = &self.resolve_precondition { - return Err(Status::failed_precondition(message.clone())); - } - - Ok(tonic::Response::new(ResolveSandboxEndpointResponse { - endpoint: Some(SandboxEndpoint { - target: Some(sandbox_endpoint::Target::Host( - "sandbox.default.svc.cluster.local".to_string(), - )), - port: 2222, - }), - })) - } - async fn watch_sandboxes( &self, _request: Request, @@ -1322,23 +1247,6 @@ mod tests { ); } - #[tokio::test] - async fn resolve_sandbox_endpoint_preserves_precondition_errors() { - let runtime = test_runtime(Arc::new(TestDriver { - resolve_precondition: Some("sandbox agent pod IP is not available".to_string()), - ..Default::default() - })) - .await; - - let err = runtime - .resolve_sandbox_endpoint(&sandbox_record("sb-1", "sandbox-a", SandboxPhase::Ready)) - .await - .expect_err("endpoint resolution should preserve failed-precondition errors"); - - assert_eq!(err.code(), Code::FailedPrecondition); - assert_eq!(err.message(), "sandbox agent pod IP is not available"); - } - #[tokio::test] async fn reconcile_store_with_backend_applies_driver_snapshot() { let runtime = test_runtime(Arc::new(TestDriver { diff --git a/crates/openshell-server/src/grpc/mod.rs b/crates/openshell-server/src/grpc/mod.rs index af60897d1..8a5516c6b 100644 --- a/crates/openshell-server/src/grpc/mod.rs +++ b/crates/openshell-server/src/grpc/mod.rs @@ -14,10 +14,10 @@ use openshell_core::proto::{ CreateProviderRequest, CreateSandboxRequest, CreateSshSessionRequest, CreateSshSessionResponse, DeleteProviderRequest, DeleteProviderResponse, DeleteSandboxRequest, DeleteSandboxResponse, EditDraftChunkRequest, EditDraftChunkResponse, ExecSandboxEvent, ExecSandboxRequest, - GetDraftHistoryRequest, GetDraftHistoryResponse, GetDraftPolicyRequest, GetDraftPolicyResponse, - GetGatewayConfigRequest, GetGatewayConfigResponse, GetProviderRequest, GetSandboxConfigRequest, - GetSandboxConfigResponse, GetSandboxLogsRequest, GetSandboxLogsResponse, - GetSandboxPolicyStatusRequest, GetSandboxPolicyStatusResponse, + GatewayMessage, GetDraftHistoryRequest, GetDraftHistoryResponse, GetDraftPolicyRequest, + GetDraftPolicyResponse, GetGatewayConfigRequest, GetGatewayConfigResponse, GetProviderRequest, + GetSandboxConfigRequest, GetSandboxConfigResponse, GetSandboxLogsRequest, + GetSandboxLogsResponse, GetSandboxPolicyStatusRequest, GetSandboxPolicyStatusResponse, GetSandboxProviderEnvironmentRequest, GetSandboxProviderEnvironmentResponse, GetSandboxRequest, HealthRequest, HealthResponse, ListProvidersRequest, ListProvidersResponse, ListSandboxPoliciesRequest, ListSandboxPoliciesResponse, ListSandboxesRequest, @@ -25,11 +25,12 @@ use openshell_core::proto::{ RejectDraftChunkRequest, RejectDraftChunkResponse, ReportPolicyStatusRequest, ReportPolicyStatusResponse, RevokeSshSessionRequest, RevokeSshSessionResponse, SandboxResponse, SandboxStreamEvent, ServiceStatus, SubmitPolicyAnalysisRequest, SubmitPolicyAnalysisResponse, - UndoDraftChunkRequest, UndoDraftChunkResponse, UpdateConfigRequest, UpdateConfigResponse, - UpdateProviderRequest, WatchSandboxRequest, open_shell_server::OpenShell, + SupervisorMessage, UndoDraftChunkRequest, UndoDraftChunkResponse, UpdateConfigRequest, + UpdateConfigResponse, UpdateProviderRequest, WatchSandboxRequest, open_shell_server::OpenShell, }; use serde::{Deserialize, Serialize}; use std::collections::BTreeMap; +use std::pin::Pin; use std::sync::Arc; use tokio_stream::wrappers::ReceiverStream; use tonic::{Request, Response, Status}; @@ -383,6 +384,18 @@ impl OpenShell for OpenShellService { ) -> Result, Status> { policy::handle_get_draft_history(&self.state, request).await } + + // --- Supervisor session --- + + type ConnectSupervisorStream = + Pin> + Send + 'static>>; + + async fn connect_supervisor( + &self, + request: Request>, + ) -> Result, Status> { + crate::supervisor_session::handle_connect_supervisor(&self.state, request).await + } } // --------------------------------------------------------------------------- diff --git a/crates/openshell-server/src/grpc/sandbox.rs b/crates/openshell-server/src/grpc/sandbox.rs index 8e5930826..cdc7b51dd 100644 --- a/crates/openshell-server/src/grpc/sandbox.rs +++ b/crates/openshell-server/src/grpc/sandbox.rs @@ -22,13 +22,11 @@ use openshell_core::proto::{ use openshell_core::proto::{Sandbox, SandboxPhase, SandboxTemplate, SshSession}; use prost::Message; use std::sync::Arc; -use tokio::io::AsyncReadExt; -use tokio::io::AsyncWriteExt; use tokio::net::{TcpListener, TcpStream}; use tokio::sync::mpsc; use tokio_stream::wrappers::ReceiverStream; use tonic::{Request, Response, Status}; -use tracing::{debug, info, warn}; +use tracing::{info, warn}; use russh::ChannelMsg; use russh::client::AuthResult; @@ -438,7 +436,13 @@ pub(super) async fn handle_exec_sandbox( return Err(Status::failed_precondition("sandbox is not ready")); } - let (target_host, target_port) = resolve_sandbox_exec_target(state, &sandbox).await?; + // Open a relay channel through the supervisor session. + let (channel_id, relay_rx) = state + .supervisor_sessions + .open_relay(&sandbox.id) + .await + .map_err(|e| Status::unavailable(format!("supervisor relay failed: {e}")))?; + let command_str = build_remote_exec_command(&req) .map_err(|e| Status::invalid_argument(format!("command construction failed: {e}")))?; let stdin_payload = req.stdin; @@ -449,11 +453,32 @@ pub(super) async fn handle_exec_sandbox( let (tx, rx) = mpsc::channel::>(256); tokio::spawn(async move { - if let Err(err) = stream_exec_over_ssh( + // Wait for the supervisor's reverse CONNECT to deliver the relay stream. + let relay_stream = match tokio::time::timeout(std::time::Duration::from_secs(10), relay_rx) + .await + { + Ok(Ok(stream)) => stream, + Ok(Err(_)) => { + warn!(sandbox_id = %sandbox_id, channel_id = %channel_id, "ExecSandbox: relay channel dropped"); + let _ = tx + .send(Err(Status::unavailable("relay channel dropped"))) + .await; + return; + } + Err(_) => { + warn!(sandbox_id = %sandbox_id, channel_id = %channel_id, "ExecSandbox: relay open timed out"); + let _ = tx + .send(Err(Status::deadline_exceeded("relay open timed out"))) + .await; + return; + } + }; + + if let Err(err) = stream_exec_over_relay( tx.clone(), &sandbox_id, - &target_host, - target_port, + &channel_id, + relay_stream, &command_str, stdin_payload, timeout_seconds, @@ -584,16 +609,6 @@ fn resolve_gateway(config: &openshell_core::Config) -> (String, u16) { (host, port) } -async fn resolve_sandbox_exec_target( - state: &ServerState, - sandbox: &Sandbox, -) -> Result<(String, u16), Status> { - match state.compute.resolve_sandbox_endpoint(sandbox).await? { - crate::compute::ResolvedEndpoint::Ip(ip, port) => Ok((ip.to_string(), port)), - crate::compute::ResolvedEndpoint::Host(host, port) => Ok((host, port)), - } -} - /// Shell-escape a value for embedding in a POSIX shell command. /// /// Wraps unsafe values in single quotes with the standard `'\''` idiom for @@ -646,34 +661,18 @@ fn build_remote_exec_command(req: &ExecSandboxRequest) -> Result Ok(result) } -/// Maximum number of attempts when establishing the SSH transport to a sandbox. -const SSH_CONNECT_MAX_ATTEMPTS: u32 = 6; - -/// Initial backoff duration between SSH connection retries. -const SSH_CONNECT_INITIAL_BACKOFF: std::time::Duration = std::time::Duration::from_millis(250); - -/// Maximum backoff duration between SSH connection retries. -const SSH_CONNECT_MAX_BACKOFF: std::time::Duration = std::time::Duration::from_secs(2); - -/// Returns `true` if the gRPC status represents a transient SSH connection error. -fn is_retryable_ssh_error(status: &Status) -> bool { - if status.code() != tonic::Code::Internal { - return false; - } - let msg = status.message(); - msg.contains("Connection reset by peer") - || msg.contains("Connection refused") - || msg.contains("failed to establish ssh transport") - || msg.contains("failed to connect to ssh proxy") - || msg.contains("failed to start ssh proxy") -} - +/// Execute a command over an SSH transport relayed through a supervisor session. +/// +/// This is the relay equivalent of `stream_exec_over_ssh`. Instead of dialing a +/// sandbox endpoint directly, the SSH transport runs over a `DuplexStream` that +/// is bridged to the supervisor's local SSH daemon via a reverse HTTP CONNECT +/// tunnel. #[allow(clippy::too_many_arguments)] -async fn stream_exec_over_ssh( +async fn stream_exec_over_relay( tx: mpsc::Sender>, sandbox_id: &str, - target_host: &str, - target_port: u16, + channel_id: &str, + relay_stream: tokio::io::DuplexStream, command: &str, stdin_payload: Vec, timeout_seconds: u32, @@ -683,96 +682,53 @@ async fn stream_exec_over_ssh( let command_preview: String = command.chars().take(120).collect(); info!( sandbox_id = %sandbox_id, - target_host = %target_host, - target_port, + channel_id = %channel_id, command_len = command.len(), stdin_len = stdin_payload.len(), command_preview = %command_preview, - "ExecSandbox command started" + "ExecSandbox (relay): command started" ); - let (exit_code, proxy_task) = { - let mut last_err: Option = None; - - let mut result = None; - for attempt in 0..SSH_CONNECT_MAX_ATTEMPTS { - if attempt > 0 { - let backoff = (SSH_CONNECT_INITIAL_BACKOFF * 2u32.pow(attempt - 1)) - .min(SSH_CONNECT_MAX_BACKOFF); - warn!( - sandbox_id = %sandbox_id, - attempt = attempt + 1, - backoff_ms = %backoff.as_millis(), - error = %last_err.as_ref().unwrap(), - "Retrying SSH transport establishment" - ); - tokio::time::sleep(backoff).await; - } - - let (local_proxy_port, proxy_task) = match start_single_use_ssh_proxy( - target_host, - target_port, - handshake_secret, - ) + let (local_proxy_port, proxy_task) = + start_single_use_ssh_proxy_over_relay(relay_stream, handshake_secret) .await - { - Ok(v) => v, - Err(e) => { - last_err = Some(Status::internal(format!("failed to start ssh proxy: {e}"))); - continue; - } - }; - - let exec = run_exec_with_russh( - local_proxy_port, - command, - stdin_payload.clone(), - request_tty, - tx.clone(), - ); + .map_err(|e| Status::internal(format!("failed to start relay proxy: {e}")))?; + + let exec = run_exec_with_russh( + local_proxy_port, + command, + stdin_payload, + request_tty, + tx.clone(), + ); - let exec_result = if timeout_seconds == 0 { - exec.await - } else if let Ok(r) = tokio::time::timeout( - std::time::Duration::from_secs(u64::from(timeout_seconds)), - exec, - ) - .await - { - r - } else { - let _ = tx - .send(Ok(ExecSandboxEvent { - payload: Some(openshell_core::proto::exec_sandbox_event::Payload::Exit( - ExecSandboxExit { exit_code: 124 }, - )), - })) - .await; - let _ = proxy_task.await; - return Ok(()); - }; - - match exec_result { - Ok(exit_code) => { - result = Some((exit_code, proxy_task)); - break; - } - Err(status) => { - let _ = proxy_task.await; - if is_retryable_ssh_error(&status) && attempt + 1 < SSH_CONNECT_MAX_ATTEMPTS { - last_err = Some(status); - continue; - } - return Err(status); - } - } - } + let exec_result = if timeout_seconds == 0 { + exec.await + } else if let Ok(r) = tokio::time::timeout( + std::time::Duration::from_secs(u64::from(timeout_seconds)), + exec, + ) + .await + { + r + } else { + let _ = tx + .send(Ok(ExecSandboxEvent { + payload: Some(openshell_core::proto::exec_sandbox_event::Payload::Exit( + ExecSandboxExit { exit_code: 124 }, + )), + })) + .await; + let _ = proxy_task.await; + return Ok(()); + }; - result.ok_or_else(|| { - last_err.unwrap_or_else(|| { - Status::internal("ssh connection failed after exhausting retries") - }) - })? + let exit_code = match exec_result { + Ok(code) => code, + Err(status) => { + let _ = proxy_task.await; + return Err(status); + } }; let _ = proxy_task.await; @@ -788,6 +744,75 @@ async fn stream_exec_over_ssh( Ok(()) } +/// Create a localhost SSH proxy that bridges to a relay DuplexStream. +/// +/// The proxy sends the NSSH1 handshake preface through the relay (which flows +/// to the supervisor and on to the embedded SSH daemon), waits for "OK", then +/// bridges the russh client connection with the relay stream. +async fn start_single_use_ssh_proxy_over_relay( + relay_stream: tokio::io::DuplexStream, + handshake_secret: &str, +) -> Result<(u16, tokio::task::JoinHandle<()>), Box> { + let listener = TcpListener::bind(("127.0.0.1", 0)).await?; + let port = listener.local_addr()?.port(); + let handshake_secret = handshake_secret.to_string(); + + let task = tokio::spawn(async move { + let Ok((mut client_conn, _)) = listener.accept().await else { + warn!("SSH relay proxy: failed to accept local connection"); + return; + }; + + let (mut relay_read, mut relay_write) = tokio::io::split(relay_stream); + + // Send NSSH1 handshake through the relay to the SSH daemon. + let Ok(preface) = build_preface(&uuid::Uuid::new_v4().to_string(), &handshake_secret) + else { + warn!("SSH relay proxy: failed to build handshake preface"); + return; + }; + if let Err(e) = + tokio::io::AsyncWriteExt::write_all(&mut relay_write, preface.as_bytes()).await + { + warn!(error = %e, "SSH relay proxy: failed to send handshake preface"); + return; + } + + // Read handshake response from the relay. + let mut response_buf = Vec::new(); + loop { + let mut byte = [0u8; 1]; + match tokio::io::AsyncReadExt::read(&mut relay_read, &mut byte).await { + Ok(0) => break, + Ok(_) => { + if byte[0] == b'\n' { + break; + } + response_buf.push(byte[0]); + if response_buf.len() > 1024 { + break; + } + } + Err(e) => { + warn!(error = %e, "SSH relay proxy: failed to read handshake response"); + return; + } + } + } + let response = String::from_utf8_lossy(&response_buf); + if response.trim() != "OK" { + warn!(response = %response.trim(), "SSH relay proxy: handshake rejected"); + return; + } + + // Reunite the split halves for copy_bidirectional. + let mut relay = relay_read.unsplit(relay_write); + let _ = tokio::io::copy_bidirectional(&mut client_conn, &mut relay).await; + }); + + Ok((port, task)) +} + #[derive(Debug, Clone, Copy)] struct SandboxSshClientHandler; @@ -914,98 +939,6 @@ async fn run_exec_with_russh( Ok(exit_code.unwrap_or(1)) } -/// Check whether an IP address is safe to use as an SSH proxy target. -fn is_safe_ssh_proxy_target(ip: std::net::IpAddr) -> bool { - match ip { - std::net::IpAddr::V4(v4) => !v4.is_loopback() && !v4.is_link_local(), - std::net::IpAddr::V6(v6) => { - if v6.is_loopback() { - return false; - } - if let Some(v4) = v6.to_ipv4_mapped() { - return !v4.is_loopback() && !v4.is_link_local(); - } - true - } - } -} - -async fn start_single_use_ssh_proxy( - target_host: &str, - target_port: u16, - handshake_secret: &str, -) -> Result<(u16, tokio::task::JoinHandle<()>), Box> { - let listener = TcpListener::bind(("127.0.0.1", 0)).await?; - let port = listener.local_addr()?.port(); - let target_host = target_host.to_string(); - let handshake_secret = handshake_secret.to_string(); - - let task = tokio::spawn(async move { - let Ok((mut client_conn, _)) = listener.accept().await else { - warn!("SSH proxy: failed to accept local connection"); - return; - }; - - let addr_str = format!("{target_host}:{target_port}"); - let resolved = match tokio::net::lookup_host(&addr_str).await { - Ok(mut addrs) => { - if let Some(addr) = addrs.next() { - addr - } else { - warn!(target_host = %target_host, "SSH proxy: DNS resolution returned no addresses"); - return; - } - } - Err(e) => { - warn!(target_host = %target_host, error = %e, "SSH proxy: DNS resolution failed"); - return; - } - }; - - if !is_safe_ssh_proxy_target(resolved.ip()) { - warn!( - target_host = %target_host, - resolved_ip = %resolved.ip(), - "SSH proxy: target resolved to blocked IP range (loopback or link-local)" - ); - return; - } - - debug!( - target_host = %target_host, - resolved_ip = %resolved.ip(), - target_port, - "SSH proxy: connecting to validated target" - ); - - let Ok(mut sandbox_conn) = TcpStream::connect(resolved).await else { - warn!(target_host = %target_host, resolved_ip = %resolved.ip(), target_port, "SSH proxy: failed to connect to sandbox"); - return; - }; - let Ok(preface) = build_preface(&uuid::Uuid::new_v4().to_string(), &handshake_secret) - else { - warn!("SSH proxy: failed to build handshake preface"); - return; - }; - if let Err(e) = sandbox_conn.write_all(preface.as_bytes()).await { - warn!(error = %e, "SSH proxy: failed to send handshake preface"); - return; - } - let mut response = String::new(); - if let Err(e) = read_line(&mut sandbox_conn, &mut response).await { - warn!(error = %e, "SSH proxy: failed to read handshake response"); - return; - } - if response.trim() != "OK" { - warn!(response = %response.trim(), "SSH proxy: handshake rejected by sandbox"); - return; - } - let _ = tokio::io::copy_bidirectional(&mut client_conn, &mut sandbox_conn).await; - }); - - Ok((port, task)) -} - fn build_preface( token: &str, secret: &str, @@ -1023,29 +956,6 @@ fn build_preface( Ok(format!("NSSH1 {token} {timestamp} {nonce} {signature}\n")) } -async fn read_line( - stream: &mut TcpStream, - buf: &mut String, -) -> Result<(), Box> { - let mut bytes = Vec::new(); - loop { - let mut byte = [0_u8; 1]; - let n = stream.read(&mut byte).await?; - if n == 0 { - break; - } - if byte[0] == b'\n' { - break; - } - bytes.push(byte[0]); - if bytes.len() > 1024 { - break; - } - } - *buf = String::from_utf8_lossy(&bytes).to_string(); - Ok(()) -} - fn hmac_sha256(key: &[u8], data: &[u8]) -> String { use hmac::{Hmac, Mac}; use sha2::Sha256; @@ -1161,59 +1071,6 @@ mod tests { assert!(build_remote_exec_command(&req).is_err()); } - // ---- is_safe_ssh_proxy_target ---- - - #[test] - fn ssh_proxy_target_allows_pod_network_ips() { - use std::net::{IpAddr, Ipv4Addr}; - assert!(is_safe_ssh_proxy_target(IpAddr::V4(Ipv4Addr::new( - 10, 0, 0, 5 - )))); - assert!(is_safe_ssh_proxy_target(IpAddr::V4(Ipv4Addr::new( - 172, 16, 0, 1 - )))); - assert!(is_safe_ssh_proxy_target(IpAddr::V4(Ipv4Addr::new( - 192, 168, 1, 100 - )))); - } - - #[test] - fn ssh_proxy_target_blocks_loopback() { - use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; - assert!(!is_safe_ssh_proxy_target(IpAddr::V4(Ipv4Addr::new( - 127, 0, 0, 1 - )))); - assert!(!is_safe_ssh_proxy_target(IpAddr::V4(Ipv4Addr::new( - 127, 0, 0, 2 - )))); - assert!(!is_safe_ssh_proxy_target(IpAddr::V6(Ipv6Addr::LOCALHOST))); - } - - #[test] - fn ssh_proxy_target_blocks_link_local() { - use std::net::{IpAddr, Ipv4Addr}; - assert!(!is_safe_ssh_proxy_target(IpAddr::V4(Ipv4Addr::new( - 169, 254, 169, 254 - )))); - assert!(!is_safe_ssh_proxy_target(IpAddr::V4(Ipv4Addr::new( - 169, 254, 0, 1 - )))); - } - - #[test] - fn ssh_proxy_target_blocks_ipv4_mapped_ipv6_loopback() { - use std::net::IpAddr; - let ip: IpAddr = "::ffff:127.0.0.1".parse().unwrap(); - assert!(!is_safe_ssh_proxy_target(ip)); - } - - #[test] - fn ssh_proxy_target_blocks_ipv4_mapped_ipv6_link_local() { - use std::net::IpAddr; - let ip: IpAddr = "::ffff:169.254.169.254".parse().unwrap(); - assert!(!is_safe_ssh_proxy_target(ip)); - } - // ---- petname / generate_name ---- #[test] diff --git a/crates/openshell-server/src/http.rs b/crates/openshell-server/src/http.rs index afe7edc1b..aefe4181b 100644 --- a/crates/openshell-server/src/http.rs +++ b/crates/openshell-server/src/http.rs @@ -49,6 +49,7 @@ pub fn health_router() -> Router { pub fn http_router(state: Arc) -> Router { health_router() .merge(crate::ssh_tunnel::router(state.clone())) + .merge(crate::relay::router(state.clone())) .merge(crate::ws_tunnel::router(state.clone())) .merge(crate::auth::router(state)) } diff --git a/crates/openshell-server/src/lib.rs b/crates/openshell-server/src/lib.rs index a8d820b4d..346aaa172 100644 --- a/crates/openshell-server/src/lib.rs +++ b/crates/openshell-server/src/lib.rs @@ -16,9 +16,11 @@ mod http; mod inference; mod multiplex; mod persistence; +mod relay; mod sandbox_index; mod sandbox_watch; mod ssh_tunnel; +pub(crate) mod supervisor_session; mod tls; pub mod tracing_bus; mod ws_tunnel; @@ -73,6 +75,9 @@ pub struct ServerState { /// set/delete operation, including the precedence check on sandbox /// mutations that reads global state. pub settings_mutex: tokio::sync::Mutex<()>, + + /// Registry of active supervisor sessions and pending relay channels. + pub supervisor_sessions: supervisor_session::SupervisorSessionRegistry, } fn is_benign_tls_handshake_failure(error: &std::io::Error) -> bool { @@ -103,6 +108,7 @@ impl ServerState { ssh_connections_by_token: Mutex::new(HashMap::new()), ssh_connections_by_sandbox: Mutex::new(HashMap::new()), settings_mutex: tokio::sync::Mutex::new(()), + supervisor_sessions: supervisor_session::SupervisorSessionRegistry::new(), } } } diff --git a/crates/openshell-server/src/relay.rs b/crates/openshell-server/src/relay.rs new file mode 100644 index 000000000..662fe4d99 --- /dev/null +++ b/crates/openshell-server/src/relay.rs @@ -0,0 +1,67 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! HTTP CONNECT relay endpoint for supervisor-initiated reverse tunnels. +//! +//! When the gateway sends a `RelayOpen` message over the supervisor's gRPC +//! session, the supervisor opens `CONNECT /relay/{channel_id}` back to this +//! endpoint. The gateway then bridges the supervisor's upgraded stream with +//! the client's SSH tunnel or exec proxy. + +use axum::{ + Router, extract::Path, extract::State, http::Method, response::IntoResponse, routing::any, +}; +use http::StatusCode; +use hyper::upgrade::OnUpgrade; +use hyper_util::rt::TokioIo; +use std::sync::Arc; +use tokio::io::AsyncWriteExt; +use tracing::{info, warn}; + +use crate::ServerState; + +pub fn router(state: Arc) -> Router { + Router::new() + .route("/relay/{channel_id}", any(relay_connect)) + .with_state(state) +} + +async fn relay_connect( + State(state): State>, + Path(channel_id): Path, + req: hyper::Request, +) -> impl IntoResponse { + if req.method() != Method::CONNECT { + return StatusCode::METHOD_NOT_ALLOWED.into_response(); + } + + // Claim the pending relay. This consumes the entry — it cannot be reused. + let supervisor_stream = match state.supervisor_sessions.claim_relay(&channel_id) { + Ok(stream) => stream, + Err(_) => { + warn!(channel_id = %channel_id, "relay: unknown or expired channel"); + return StatusCode::NOT_FOUND.into_response(); + } + }; + + info!(channel_id = %channel_id, "relay: supervisor connected, upgrading"); + + // Upgrade the HTTP connection to a raw byte stream and bridge it to + // the DuplexStream that connects to the gateway-side waiter. + let on_upgrade: OnUpgrade = hyper::upgrade::on(req); + tokio::spawn(async move { + match on_upgrade.await { + Ok(upgraded) => { + let mut upgraded = TokioIo::new(upgraded); + let mut supervisor = supervisor_stream; + let _ = tokio::io::copy_bidirectional(&mut upgraded, &mut supervisor).await; + let _ = AsyncWriteExt::shutdown(&mut upgraded).await; + } + Err(e) => { + warn!(channel_id = %channel_id, error = %e, "relay: upgrade failed"); + } + } + }); + + StatusCode::SWITCHING_PROTOCOLS.into_response() +} diff --git a/crates/openshell-server/src/ssh_tunnel.rs b/crates/openshell-server/src/ssh_tunnel.rs index 536513ccd..de14976ac 100644 --- a/crates/openshell-server/src/ssh_tunnel.rs +++ b/crates/openshell-server/src/ssh_tunnel.rs @@ -6,15 +6,12 @@ use axum::{Router, extract::State, http::Method, response::IntoResponse, routing::any}; use http::StatusCode; use hyper::Request; -use hyper::upgrade::Upgraded; use hyper_util::rt::TokioIo; use openshell_core::proto::{Sandbox, SandboxPhase, SshSession}; use prost::Message; -use std::net::SocketAddr; use std::sync::Arc; use std::time::Duration; use tokio::io::{AsyncReadExt, AsyncWriteExt}; -use tokio::net::TcpStream; use tracing::{info, warn}; use uuid::Uuid; @@ -23,7 +20,6 @@ use crate::persistence::{ObjectId, ObjectName, ObjectType, Store}; const HEADER_SANDBOX_ID: &str = "x-sandbox-id"; const HEADER_TOKEN: &str = "x-sandbox-token"; -const PREFACE_MAGIC: &str = "NSSH1"; /// Maximum concurrent SSH tunnel connections per session token. const MAX_CONNECTIONS_PER_TOKEN: u32 = 3; @@ -100,19 +96,15 @@ async fn ssh_connect( return StatusCode::PRECONDITION_FAILED.into_response(); } - let connect_target = match state.compute.resolve_sandbox_endpoint(&sandbox).await { - Ok(crate::compute::ResolvedEndpoint::Ip(ip, port)) => { - ConnectTarget::Ip(SocketAddr::new(ip, port)) - } - Ok(crate::compute::ResolvedEndpoint::Host(host, port)) => ConnectTarget::Host(host, port), - Err(status) if status.code() == tonic::Code::FailedPrecondition => { - return StatusCode::PRECONDITION_FAILED.into_response(); - } - Err(err) => { - warn!(error = %err, "Failed to resolve sandbox endpoint"); + // Open a relay channel through the supervisor session. + let (channel_id, relay_rx) = match state.supervisor_sessions.open_relay(&sandbox_id).await { + Ok(pair) => pair, + Err(status) => { + warn!(sandbox_id = %sandbox_id, error = %status.message(), "SSH tunnel: supervisor session not available"); return StatusCode::BAD_GATEWAY.into_response(); } }; + // Enforce per-token concurrent connection limit. { let mut counts = state.ssh_connections_by_token.lock().unwrap(); @@ -150,20 +142,97 @@ async fn ssh_connect( let upgrade = hyper::upgrade::on(req); tokio::spawn(async move { - match upgrade.await { - Ok(mut upgraded) => { - if let Err(err) = handle_tunnel( - &mut upgraded, - connect_target, - &token_clone, - &handshake_secret, + // Wait for the supervisor's reverse CONNECT to arrive and claim the relay. + let relay_stream = match tokio::time::timeout(Duration::from_secs(10), relay_rx).await { + Ok(Ok(stream)) => stream, + Ok(Err(_)) => { + warn!(sandbox_id = %sandbox_id_clone, channel_id = %channel_id, "SSH tunnel: relay channel dropped"); + decrement_connection_count(&state_clone.ssh_connections_by_token, &token_clone); + decrement_connection_count( + &state_clone.ssh_connections_by_sandbox, &sandbox_id_clone, - ) - .await - { - warn!(error = %err, "SSH tunnel failure"); + ); + return; + } + Err(_) => { + warn!(sandbox_id = %sandbox_id_clone, channel_id = %channel_id, "SSH tunnel: relay open timed out"); + decrement_connection_count(&state_clone.ssh_connections_by_token, &token_clone); + decrement_connection_count( + &state_clone.ssh_connections_by_sandbox, + &sandbox_id_clone, + ); + return; + } + }; + + // Send NSSH1 handshake through the relay to the SSH daemon before + // bridging the client's SSH bytes. The relay carries bytes to the + // supervisor which bridges them to the local SSH daemon on loopback. + let (mut relay_read, mut relay_write) = tokio::io::split(relay_stream); + let preface = match build_preface(&token_clone, &handshake_secret) { + Ok(p) => p, + Err(e) => { + warn!(error = %e, "SSH tunnel: failed to build NSSH1 preface"); + decrement_connection_count(&state_clone.ssh_connections_by_token, &token_clone); + decrement_connection_count( + &state_clone.ssh_connections_by_sandbox, + &sandbox_id_clone, + ); + return; + } + }; + if let Err(e) = relay_write.write_all(preface.as_bytes()).await { + warn!(error = %e, "SSH tunnel: failed to send NSSH1 preface through relay"); + decrement_connection_count(&state_clone.ssh_connections_by_token, &token_clone); + decrement_connection_count(&state_clone.ssh_connections_by_sandbox, &sandbox_id_clone); + return; + } + + // Read handshake response from the SSH daemon through the relay. + let mut response_buf = Vec::new(); + loop { + let mut byte = [0u8; 1]; + match relay_read.read(&mut byte).await { + Ok(0) => break, + Ok(_) => { + if byte[0] == b'\n' { + break; + } + response_buf.push(byte[0]); + if response_buf.len() > 1024 { + break; + } + } + Err(e) => { + warn!(error = %e, "SSH tunnel: failed to read NSSH1 response from relay"); + decrement_connection_count(&state_clone.ssh_connections_by_token, &token_clone); + decrement_connection_count( + &state_clone.ssh_connections_by_sandbox, + &sandbox_id_clone, + ); + return; } } + } + let response = String::from_utf8_lossy(&response_buf); + if response.trim() != "OK" { + warn!(response = %response.trim(), "SSH tunnel: NSSH1 handshake rejected by sandbox"); + decrement_connection_count(&state_clone.ssh_connections_by_token, &token_clone); + decrement_connection_count(&state_clone.ssh_connections_by_sandbox, &sandbox_id_clone); + return; + } + + info!(sandbox_id = %sandbox_id_clone, channel_id = %channel_id, "SSH tunnel: NSSH1 handshake OK, bridging client"); + + // Reunite the split relay halves and bridge with the client's upgraded stream. + let mut relay = relay_read.unsplit(relay_write); + + match upgrade.await { + Ok(upgraded) => { + let mut upgraded = TokioIo::new(upgraded); + let _ = tokio::io::copy_bidirectional(&mut upgraded, &mut relay).await; + let _ = AsyncWriteExt::shutdown(&mut upgraded).await; + } Err(err) => { warn!(error = %err, "SSH upgrade failed"); } @@ -177,90 +246,6 @@ async fn ssh_connect( StatusCode::OK.into_response() } -async fn handle_tunnel( - upgraded: &mut Upgraded, - target: ConnectTarget, - token: &str, - secret: &str, - sandbox_id: &str, -) -> Result<(), Box> { - // The sandbox pod may not be network-reachable immediately after the CRD - // reports Ready (DNS propagation, pod IP assignment, SSH server startup). - // Retry the TCP connection with exponential backoff. - let mut upstream = None; - let mut last_err = None; - let delays = [ - Duration::from_millis(100), - Duration::from_millis(250), - Duration::from_millis(500), - Duration::from_secs(1), - Duration::from_secs(2), - Duration::from_secs(5), - Duration::from_secs(10), - Duration::from_secs(15), - ]; - let target_desc = match &target { - ConnectTarget::Ip(addr) => format!("{addr}"), - ConnectTarget::Host(host, port) => format!("{host}:{port}"), - }; - info!(sandbox_id = %sandbox_id, target = %target_desc, "SSH tunnel: connecting to sandbox"); - for (attempt, delay) in std::iter::once(&Duration::ZERO) - .chain(delays.iter()) - .enumerate() - { - if !delay.is_zero() { - info!(sandbox_id = %sandbox_id, attempt = attempt + 1, delay_ms = delay.as_millis() as u64, "SSH tunnel: retrying TCP connect"); - tokio::time::sleep(*delay).await; - } - let result = match &target { - ConnectTarget::Ip(addr) => TcpStream::connect(addr).await, - ConnectTarget::Host(host, port) => TcpStream::connect((host.as_str(), *port)).await, - }; - match result { - Ok(stream) => { - info!( - sandbox_id = %sandbox_id, - attempts = attempt + 1, - "SSH tunnel: TCP connected to sandbox" - ); - upstream = Some(stream); - break; - } - Err(err) => { - info!(sandbox_id = %sandbox_id, attempt = attempt + 1, error = %err, "SSH tunnel: TCP connect failed"); - last_err = Some(err); - } - } - } - let mut upstream = upstream.ok_or_else(|| { - let err = last_err.unwrap(); - format!("failed to connect to sandbox after retries: {err}") - })?; - upstream.set_nodelay(true)?; - info!(sandbox_id = %sandbox_id, "SSH tunnel: sending NSSH1 handshake preface"); - let preface = build_preface(token, secret)?; - upstream.write_all(preface.as_bytes()).await?; - - info!(sandbox_id = %sandbox_id, "SSH tunnel: waiting for handshake response"); - let mut response = String::new(); - read_line(&mut upstream, &mut response).await?; - info!(sandbox_id = %sandbox_id, response = %response.trim(), "SSH tunnel: handshake response received"); - if response.trim() != "OK" { - return Err("sandbox handshake rejected".into()); - } - - info!(sandbox_id = %sandbox_id, "SSH tunnel established"); - let mut upgraded = TokioIo::new(upgraded); - // Discard the result entirely – connection-close errors are expected when - // the SSH session ends and do not represent a failure worth propagating. - let _ = tokio::io::copy_bidirectional(&mut upgraded, &mut upstream).await; - // Gracefully shut down the write-half of the upgraded connection so the - // client receives a clean EOF instead of a TCP RST. This gives SSH time - // to read any remaining protocol data (e.g. exit-status) from its buffer. - let _ = AsyncWriteExt::shutdown(&mut upgraded).await; - Ok(()) -} - fn header_value(headers: &http::HeaderMap, name: &str) -> Result { let value = headers .get(name) @@ -275,6 +260,8 @@ fn header_value(headers: &http::HeaderMap, name: &str) -> Result Result<(), Box> { - let mut bytes = Vec::new(); - loop { - let mut byte = [0u8; 1]; - let n = stream.read(&mut byte).await?; - if n == 0 { - break; - } - if byte[0] == b'\n' { - break; - } - bytes.push(byte[0]); - if bytes.len() > 1024 { - break; - } - } - *buf = String::from_utf8_lossy(&bytes).to_string(); - Ok(()) -} - fn hmac_sha256(key: &[u8], data: &[u8]) -> String { use hmac::{Hmac, Mac}; use sha2::Sha256; @@ -345,11 +309,6 @@ impl ObjectName for SshSession { } } -enum ConnectTarget { - Ip(SocketAddr), - Host(String, u16), -} - /// Decrement a connection count entry, removing it if it reaches zero. fn decrement_connection_count( counts: &std::sync::Mutex>, diff --git a/crates/openshell-server/src/supervisor_session.rs b/crates/openshell-server/src/supervisor_session.rs new file mode 100644 index 000000000..ed33f8e15 --- /dev/null +++ b/crates/openshell-server/src/supervisor_session.rs @@ -0,0 +1,440 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::collections::HashMap; +use std::pin::Pin; +use std::sync::{Arc, Mutex}; +use std::time::{Duration, Instant}; + +use tokio::sync::{mpsc, oneshot}; +use tokio_stream::wrappers::ReceiverStream; +use tonic::{Request, Response, Status}; +use tracing::{info, warn}; +use uuid::Uuid; + +use openshell_core::proto::{ + GatewayMessage, RelayOpen, SessionAccepted, SupervisorMessage, gateway_message, + supervisor_message, +}; + +use crate::ServerState; + +const HEARTBEAT_INTERVAL_SECS: u32 = 15; +const RELAY_PENDING_TIMEOUT: Duration = Duration::from_secs(10); + +// --------------------------------------------------------------------------- +// Session registry +// --------------------------------------------------------------------------- + +/// A live supervisor session handle. +struct LiveSession { + #[allow(dead_code)] + sandbox_id: String, + tx: mpsc::Sender, + #[allow(dead_code)] + connected_at: Instant, +} + +/// Holds a oneshot sender that will deliver the upgraded relay stream. +type RelayStreamSender = oneshot::Sender; + +/// Registry of active supervisor sessions and pending relay channels. +#[derive(Default)] +pub struct SupervisorSessionRegistry { + /// sandbox_id -> live session handle. + sessions: Mutex>, + /// channel_id -> oneshot sender for the reverse CONNECT stream. + pending_relays: Mutex>, +} + +struct PendingRelay { + sender: RelayStreamSender, + created_at: Instant, +} + +impl std::fmt::Debug for SupervisorSessionRegistry { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let session_count = self.sessions.lock().unwrap().len(); + let pending_count = self.pending_relays.lock().unwrap().len(); + f.debug_struct("SupervisorSessionRegistry") + .field("sessions", &session_count) + .field("pending_relays", &pending_count) + .finish() + } +} + +impl SupervisorSessionRegistry { + pub fn new() -> Self { + Self::default() + } + + /// Register a live supervisor session for the given sandbox. + /// + /// Returns the previous session's sender (if any) so the caller can close it. + fn register( + &self, + sandbox_id: String, + tx: mpsc::Sender, + ) -> Option> { + let mut sessions = self.sessions.lock().unwrap(); + let previous = sessions.remove(&sandbox_id).map(|s| s.tx); + sessions.insert( + sandbox_id.clone(), + LiveSession { + sandbox_id, + tx, + connected_at: Instant::now(), + }, + ); + previous + } + + /// Remove the session for a sandbox. + fn remove(&self, sandbox_id: &str) { + self.sessions.lock().unwrap().remove(sandbox_id); + } + + /// Open a relay channel: sends RelayOpen to the supervisor and returns a + /// stream that will be connected once the supervisor's reverse HTTP CONNECT + /// arrives. + /// + /// Returns `(channel_id, receiver_for_relay_stream)`. + pub async fn open_relay( + &self, + sandbox_id: &str, + ) -> Result<(String, oneshot::Receiver), Status> { + let channel_id = Uuid::new_v4().to_string(); + + // Look up the session and send RelayOpen. + let tx = { + let sessions = self.sessions.lock().unwrap(); + let session = sessions + .get(sandbox_id) + .ok_or_else(|| Status::unavailable("supervisor session not connected"))?; + session.tx.clone() + }; + + // Register the pending relay before sending RelayOpen to avoid a race. + let (relay_tx, relay_rx) = oneshot::channel(); + { + let mut pending = self.pending_relays.lock().unwrap(); + pending.insert( + channel_id.clone(), + PendingRelay { + sender: relay_tx, + created_at: Instant::now(), + }, + ); + } + + let msg = GatewayMessage { + payload: Some(gateway_message::Payload::RelayOpen(RelayOpen { + channel_id: channel_id.clone(), + })), + }; + + if tx.send(msg).await.is_err() { + // Session dropped between our lookup and send. + self.pending_relays.lock().unwrap().remove(&channel_id); + return Err(Status::unavailable("supervisor session disconnected")); + } + + Ok((channel_id, relay_rx)) + } + + /// Claim a pending relay channel. Called by the /relay/{channel_id} HTTP handler + /// when the supervisor's reverse CONNECT arrives. + /// + /// Returns the DuplexStream half that the supervisor side should read/write. + pub fn claim_relay(&self, channel_id: &str) -> Result { + let pending = { + let mut map = self.pending_relays.lock().unwrap(); + map.remove(channel_id) + .ok_or_else(|| Status::not_found("unknown or expired relay channel"))? + }; + + if pending.created_at.elapsed() > RELAY_PENDING_TIMEOUT { + return Err(Status::deadline_exceeded("relay channel timed out")); + } + + // Create a duplex stream pair: one end for the gateway bridge, one for + // the supervisor HTTP CONNECT handler. + let (gateway_stream, supervisor_stream) = tokio::io::duplex(64 * 1024); + + // Send the gateway-side stream to the waiter (ssh_tunnel or exec handler). + if pending.sender.send(gateway_stream).is_err() { + return Err(Status::internal("relay requester dropped")); + } + + Ok(supervisor_stream) + } + + /// Remove all pending relays that have exceeded the timeout. + pub fn reap_expired_relays(&self) { + let mut map = self.pending_relays.lock().unwrap(); + map.retain(|_, pending| pending.created_at.elapsed() <= RELAY_PENDING_TIMEOUT); + } + + /// Clean up all state for a sandbox (session + pending relays). + pub fn cleanup_sandbox(&self, sandbox_id: &str) { + self.remove(sandbox_id); + } +} + +// --------------------------------------------------------------------------- +// ConnectSupervisor gRPC handler +// --------------------------------------------------------------------------- + +pub async fn handle_connect_supervisor( + state: &Arc, + request: Request>, +) -> Result< + Response< + Pin> + Send + 'static>>, + >, + Status, +> { + let mut inbound = request.into_inner(); + + // Step 1: Wait for SupervisorHello. + let hello = match inbound.message().await? { + Some(msg) => match msg.payload { + Some(supervisor_message::Payload::Hello(hello)) => hello, + _ => return Err(Status::invalid_argument("expected SupervisorHello")), + }, + None => return Err(Status::invalid_argument("stream closed before hello")), + }; + + let sandbox_id = hello.sandbox_id.clone(); + if sandbox_id.is_empty() { + return Err(Status::invalid_argument("sandbox_id is required")); + } + + let session_id = Uuid::new_v4().to_string(); + info!( + sandbox_id = %sandbox_id, + session_id = %session_id, + instance_id = %hello.instance_id, + "supervisor session: accepted" + ); + + // Step 2: Create the outbound channel and register the session. + let (tx, rx) = mpsc::channel::(64); + if let Some(_previous_tx) = state + .supervisor_sessions + .register(sandbox_id.clone(), tx.clone()) + { + info!(sandbox_id = %sandbox_id, "supervisor session: superseded previous session"); + } + + // Step 3: Send SessionAccepted. + let accepted = GatewayMessage { + payload: Some(gateway_message::Payload::SessionAccepted(SessionAccepted { + session_id: session_id.clone(), + heartbeat_interval_secs: HEARTBEAT_INTERVAL_SECS, + })), + }; + if tx.send(accepted).await.is_err() { + state.supervisor_sessions.remove(&sandbox_id); + return Err(Status::internal("failed to send session accepted")); + } + + // Step 4: Spawn the session loop that reads inbound messages. + let state_clone = Arc::clone(state); + let sandbox_id_clone = sandbox_id.clone(); + tokio::spawn(async move { + run_session_loop( + &state_clone, + &sandbox_id_clone, + &session_id, + &tx, + &mut inbound, + ) + .await; + state_clone.supervisor_sessions.remove(&sandbox_id_clone); + info!(sandbox_id = %sandbox_id_clone, session_id = %session_id, "supervisor session: ended"); + }); + + // Return the outbound stream. + let stream = ReceiverStream::new(rx); + let stream: Pin< + Box> + Send + 'static>, + > = Box::pin(tokio_stream::StreamExt::map(stream, Ok)); + + Ok(Response::new(stream)) +} + +async fn run_session_loop( + _state: &Arc, + sandbox_id: &str, + session_id: &str, + tx: &mpsc::Sender, + inbound: &mut tonic::Streaming, +) { + let heartbeat_interval = Duration::from_secs(u64::from(HEARTBEAT_INTERVAL_SECS)); + let mut heartbeat_timer = tokio::time::interval(heartbeat_interval); + // Skip the first immediate tick. + heartbeat_timer.tick().await; + + loop { + tokio::select! { + msg = inbound.message() => { + match msg { + Ok(Some(msg)) => { + handle_supervisor_message(sandbox_id, session_id, msg); + } + Ok(None) => { + info!(sandbox_id = %sandbox_id, session_id = %session_id, "supervisor session: stream closed by supervisor"); + break; + } + Err(e) => { + warn!(sandbox_id = %sandbox_id, session_id = %session_id, error = %e, "supervisor session: stream error"); + break; + } + } + } + _ = heartbeat_timer.tick() => { + let hb = GatewayMessage { + payload: Some(gateway_message::Payload::Heartbeat( + openshell_core::proto::GatewayHeartbeat {}, + )), + }; + if tx.send(hb).await.is_err() { + info!(sandbox_id = %sandbox_id, session_id = %session_id, "supervisor session: outbound channel closed"); + break; + } + } + } + } +} + +fn handle_supervisor_message(sandbox_id: &str, session_id: &str, msg: SupervisorMessage) { + match msg.payload { + Some(supervisor_message::Payload::Heartbeat(_)) => { + // Heartbeat received — nothing to do for now. + } + Some(supervisor_message::Payload::RelayOpenResult(result)) => { + if result.success { + info!( + sandbox_id = %sandbox_id, + session_id = %session_id, + channel_id = %result.channel_id, + "supervisor session: relay opened successfully" + ); + } else { + warn!( + sandbox_id = %sandbox_id, + session_id = %session_id, + channel_id = %result.channel_id, + error = %result.error, + "supervisor session: relay open failed" + ); + } + } + Some(supervisor_message::Payload::RelayClose(close)) => { + info!( + sandbox_id = %sandbox_id, + session_id = %session_id, + channel_id = %close.channel_id, + reason = %close.reason, + "supervisor session: relay closed by supervisor" + ); + } + _ => { + warn!( + sandbox_id = %sandbox_id, + session_id = %session_id, + "supervisor session: unexpected message type" + ); + } + } +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn registry_register_and_lookup() { + let registry = SupervisorSessionRegistry::new(); + let (tx, _rx) = mpsc::channel(1); + + assert!(registry.register("sandbox-1".to_string(), tx).is_none()); + + // Should find the session. + let sessions = registry.sessions.lock().unwrap(); + assert!(sessions.contains_key("sandbox-1")); + } + + #[test] + fn registry_supersedes_previous_session() { + let registry = SupervisorSessionRegistry::new(); + let (tx1, _rx1) = mpsc::channel(1); + let (tx2, _rx2) = mpsc::channel(1); + + assert!(registry.register("sandbox-1".to_string(), tx1).is_none()); + assert!(registry.register("sandbox-1".to_string(), tx2).is_some()); + } + + #[test] + fn registry_remove() { + let registry = SupervisorSessionRegistry::new(); + let (tx, _rx) = mpsc::channel(1); + registry.register("sandbox-1".to_string(), tx); + + registry.remove("sandbox-1"); + let sessions = registry.sessions.lock().unwrap(); + assert!(!sessions.contains_key("sandbox-1")); + } + + #[test] + fn claim_relay_unknown_channel() { + let registry = SupervisorSessionRegistry::new(); + let result = registry.claim_relay("nonexistent"); + assert!(result.is_err()); + } + + #[test] + fn claim_relay_success() { + let registry = SupervisorSessionRegistry::new(); + let (relay_tx, _relay_rx) = oneshot::channel(); + registry.pending_relays.lock().unwrap().insert( + "ch-1".to_string(), + PendingRelay { + sender: relay_tx, + created_at: Instant::now(), + }, + ); + + let result = registry.claim_relay("ch-1"); + assert!(result.is_ok()); + // Should be consumed. + assert!(!registry.pending_relays.lock().unwrap().contains_key("ch-1")); + } + + #[test] + fn reap_expired_relays() { + let registry = SupervisorSessionRegistry::new(); + let (relay_tx, _relay_rx) = oneshot::channel(); + registry.pending_relays.lock().unwrap().insert( + "ch-old".to_string(), + PendingRelay { + sender: relay_tx, + created_at: Instant::now() - Duration::from_secs(60), + }, + ); + + registry.reap_expired_relays(); + assert!( + !registry + .pending_relays + .lock() + .unwrap() + .contains_key("ch-old") + ); + } +} diff --git a/crates/openshell-server/tests/auth_endpoint_integration.rs b/crates/openshell-server/tests/auth_endpoint_integration.rs index 7c6545873..cd2abe157 100644 --- a/crates/openshell-server/tests/auth_endpoint_integration.rs +++ b/crates/openshell-server/tests/auth_endpoint_integration.rs @@ -528,6 +528,9 @@ impl openshell_core::proto::open_shell_server::OpenShell for TestOpenShell { type ExecSandboxStream = tokio_stream::wrappers::ReceiverStream< Result, >; + type ConnectSupervisorStream = tokio_stream::wrappers::ReceiverStream< + Result, + >; async fn watch_sandbox( &self, @@ -663,6 +666,13 @@ impl openshell_core::proto::open_shell_server::OpenShell for TestOpenShell { { Err(tonic::Status::unimplemented("not implemented in test")) } + + async fn connect_supervisor( + &self, + _request: tonic::Request>, + ) -> Result, tonic::Status> { + Err(tonic::Status::unimplemented("not implemented in test")) + } } /// Test 7: Plaintext server (no TLS) accepts both gRPC and HTTP. diff --git a/crates/openshell-server/tests/edge_tunnel_auth.rs b/crates/openshell-server/tests/edge_tunnel_auth.rs index 22f08434d..a5d6a88e9 100644 --- a/crates/openshell-server/tests/edge_tunnel_auth.rs +++ b/crates/openshell-server/tests/edge_tunnel_auth.rs @@ -37,13 +37,14 @@ use hyper_util::{ use openshell_core::proto::{ CreateProviderRequest, CreateSandboxRequest, CreateSshSessionRequest, CreateSshSessionResponse, DeleteProviderRequest, DeleteProviderResponse, DeleteSandboxRequest, DeleteSandboxResponse, - ExecSandboxEvent, ExecSandboxRequest, GetGatewayConfigRequest, GetGatewayConfigResponse, - GetProviderRequest, GetSandboxConfigRequest, GetSandboxConfigResponse, - GetSandboxProviderEnvironmentRequest, GetSandboxProviderEnvironmentResponse, GetSandboxRequest, - HealthRequest, HealthResponse, ListProvidersRequest, ListProvidersResponse, - ListSandboxesRequest, ListSandboxesResponse, ProviderResponse, RevokeSshSessionRequest, - RevokeSshSessionResponse, SandboxResponse, SandboxStreamEvent, ServiceStatus, - UpdateProviderRequest, WatchSandboxRequest, + ExecSandboxEvent, ExecSandboxRequest, GatewayMessage, GetGatewayConfigRequest, + GetGatewayConfigResponse, GetProviderRequest, GetSandboxConfigRequest, + GetSandboxConfigResponse, GetSandboxProviderEnvironmentRequest, + GetSandboxProviderEnvironmentResponse, GetSandboxRequest, HealthRequest, HealthResponse, + ListProvidersRequest, ListProvidersResponse, ListSandboxesRequest, ListSandboxesResponse, + ProviderResponse, RevokeSshSessionRequest, RevokeSshSessionResponse, SandboxResponse, + SandboxStreamEvent, ServiceStatus, SupervisorMessage, UpdateProviderRequest, + WatchSandboxRequest, open_shell_client::OpenShellClient, open_shell_server::{OpenShell, OpenShellServer}, }; @@ -186,6 +187,7 @@ impl OpenShell for TestOpenShell { type WatchSandboxStream = ReceiverStream>; type ExecSandboxStream = ReceiverStream>; + type ConnectSupervisorStream = ReceiverStream>; async fn watch_sandbox( &self, @@ -307,6 +309,13 @@ impl OpenShell for TestOpenShell { ) -> Result, Status> { Err(Status::unimplemented("not implemented in test")) } + + async fn connect_supervisor( + &self, + _request: tonic::Request>, + ) -> Result, Status> { + Err(Status::unimplemented("not implemented in test")) + } } // --------------------------------------------------------------------------- diff --git a/crates/openshell-server/tests/multiplex_integration.rs b/crates/openshell-server/tests/multiplex_integration.rs index 1957c5b87..8b93b0989 100644 --- a/crates/openshell-server/tests/multiplex_integration.rs +++ b/crates/openshell-server/tests/multiplex_integration.rs @@ -11,13 +11,14 @@ use hyper_util::{ use openshell_core::proto::{ CreateProviderRequest, CreateSandboxRequest, CreateSshSessionRequest, CreateSshSessionResponse, DeleteProviderRequest, DeleteProviderResponse, DeleteSandboxRequest, DeleteSandboxResponse, - ExecSandboxEvent, ExecSandboxRequest, GetGatewayConfigRequest, GetGatewayConfigResponse, - GetProviderRequest, GetSandboxConfigRequest, GetSandboxConfigResponse, - GetSandboxProviderEnvironmentRequest, GetSandboxProviderEnvironmentResponse, GetSandboxRequest, - HealthRequest, HealthResponse, ListProvidersRequest, ListProvidersResponse, - ListSandboxesRequest, ListSandboxesResponse, ProviderResponse, RevokeSshSessionRequest, - RevokeSshSessionResponse, SandboxResponse, SandboxStreamEvent, ServiceStatus, - UpdateProviderRequest, WatchSandboxRequest, + ExecSandboxEvent, ExecSandboxRequest, GatewayMessage, GetGatewayConfigRequest, + GetGatewayConfigResponse, GetProviderRequest, GetSandboxConfigRequest, + GetSandboxConfigResponse, GetSandboxProviderEnvironmentRequest, + GetSandboxProviderEnvironmentResponse, GetSandboxRequest, HealthRequest, HealthResponse, + ListProvidersRequest, ListProvidersResponse, ListSandboxesRequest, ListSandboxesResponse, + ProviderResponse, RevokeSshSessionRequest, RevokeSshSessionResponse, SandboxResponse, + SandboxStreamEvent, ServiceStatus, SupervisorMessage, UpdateProviderRequest, + WatchSandboxRequest, open_shell_client::OpenShellClient, open_shell_server::{OpenShell, OpenShellServer}, }; @@ -154,6 +155,7 @@ impl OpenShell for TestOpenShell { type WatchSandboxStream = ReceiverStream>; type ExecSandboxStream = ReceiverStream>; + type ConnectSupervisorStream = ReceiverStream>; async fn watch_sandbox( &self, @@ -275,6 +277,13 @@ impl OpenShell for TestOpenShell { ) -> Result, Status> { Err(Status::unimplemented("not implemented in test")) } + + async fn connect_supervisor( + &self, + _request: tonic::Request>, + ) -> Result, Status> { + Err(Status::unimplemented("not implemented in test")) + } } #[tokio::test] diff --git a/crates/openshell-server/tests/multiplex_tls_integration.rs b/crates/openshell-server/tests/multiplex_tls_integration.rs index 98d5d6256..4d77e8cae 100644 --- a/crates/openshell-server/tests/multiplex_tls_integration.rs +++ b/crates/openshell-server/tests/multiplex_tls_integration.rs @@ -13,13 +13,14 @@ use hyper_util::{ use openshell_core::proto::{ CreateProviderRequest, CreateSandboxRequest, CreateSshSessionRequest, CreateSshSessionResponse, DeleteProviderRequest, DeleteProviderResponse, DeleteSandboxRequest, DeleteSandboxResponse, - ExecSandboxEvent, ExecSandboxRequest, GetGatewayConfigRequest, GetGatewayConfigResponse, - GetProviderRequest, GetSandboxConfigRequest, GetSandboxConfigResponse, - GetSandboxProviderEnvironmentRequest, GetSandboxProviderEnvironmentResponse, GetSandboxRequest, - HealthRequest, HealthResponse, ListProvidersRequest, ListProvidersResponse, - ListSandboxesRequest, ListSandboxesResponse, ProviderResponse, RevokeSshSessionRequest, - RevokeSshSessionResponse, SandboxResponse, SandboxStreamEvent, ServiceStatus, - UpdateProviderRequest, WatchSandboxRequest, + ExecSandboxEvent, ExecSandboxRequest, GatewayMessage, GetGatewayConfigRequest, + GetGatewayConfigResponse, GetProviderRequest, GetSandboxConfigRequest, + GetSandboxConfigResponse, GetSandboxProviderEnvironmentRequest, + GetSandboxProviderEnvironmentResponse, GetSandboxRequest, HealthRequest, HealthResponse, + ListProvidersRequest, ListProvidersResponse, ListSandboxesRequest, ListSandboxesResponse, + ProviderResponse, RevokeSshSessionRequest, RevokeSshSessionResponse, SandboxResponse, + SandboxStreamEvent, ServiceStatus, SupervisorMessage, UpdateProviderRequest, + WatchSandboxRequest, open_shell_client::OpenShellClient, open_shell_server::{OpenShell, OpenShellServer}, }; @@ -167,6 +168,7 @@ impl OpenShell for TestOpenShell { type WatchSandboxStream = ReceiverStream>; type ExecSandboxStream = ReceiverStream>; + type ConnectSupervisorStream = ReceiverStream>; async fn watch_sandbox( &self, @@ -288,6 +290,13 @@ impl OpenShell for TestOpenShell { ) -> Result, Status> { Err(Status::unimplemented("not implemented in test")) } + + async fn connect_supervisor( + &self, + _request: tonic::Request>, + ) -> Result, Status> { + Err(Status::unimplemented("not implemented in test")) + } } /// PKI bundle: CA cert, server cert+key, client cert+key. diff --git a/crates/openshell-server/tests/ws_tunnel_integration.rs b/crates/openshell-server/tests/ws_tunnel_integration.rs index 54a7354c8..705e9de49 100644 --- a/crates/openshell-server/tests/ws_tunnel_integration.rs +++ b/crates/openshell-server/tests/ws_tunnel_integration.rs @@ -40,13 +40,14 @@ use hyper_util::{ use openshell_core::proto::{ CreateProviderRequest, CreateSandboxRequest, CreateSshSessionRequest, CreateSshSessionResponse, DeleteProviderRequest, DeleteProviderResponse, DeleteSandboxRequest, DeleteSandboxResponse, - ExecSandboxEvent, ExecSandboxRequest, GetGatewayConfigRequest, GetGatewayConfigResponse, - GetProviderRequest, GetSandboxConfigRequest, GetSandboxConfigResponse, - GetSandboxProviderEnvironmentRequest, GetSandboxProviderEnvironmentResponse, GetSandboxRequest, - HealthRequest, HealthResponse, ListProvidersRequest, ListProvidersResponse, - ListSandboxesRequest, ListSandboxesResponse, ProviderResponse, RevokeSshSessionRequest, - RevokeSshSessionResponse, SandboxResponse, SandboxStreamEvent, ServiceStatus, - UpdateProviderRequest, WatchSandboxRequest, + ExecSandboxEvent, ExecSandboxRequest, GatewayMessage, GetGatewayConfigRequest, + GetGatewayConfigResponse, GetProviderRequest, GetSandboxConfigRequest, + GetSandboxConfigResponse, GetSandboxProviderEnvironmentRequest, + GetSandboxProviderEnvironmentResponse, GetSandboxRequest, HealthRequest, HealthResponse, + ListProvidersRequest, ListProvidersResponse, ListSandboxesRequest, ListSandboxesResponse, + ProviderResponse, RevokeSshSessionRequest, RevokeSshSessionResponse, SandboxResponse, + SandboxStreamEvent, ServiceStatus, SupervisorMessage, UpdateProviderRequest, + WatchSandboxRequest, open_shell_client::OpenShellClient, open_shell_server::{OpenShell, OpenShellServer}, }; @@ -180,6 +181,7 @@ impl OpenShell for TestOpenShell { type WatchSandboxStream = ReceiverStream>; type ExecSandboxStream = ReceiverStream>; + type ConnectSupervisorStream = ReceiverStream>; async fn watch_sandbox( &self, @@ -301,6 +303,13 @@ impl OpenShell for TestOpenShell { ) -> Result, Status> { Err(Status::unimplemented("not implemented in test")) } + + async fn connect_supervisor( + &self, + _request: tonic::Request>, + ) -> Result, Status> { + Err(Status::unimplemented("not implemented in test")) + } } // --------------------------------------------------------------------------- diff --git a/proto/compute_driver.proto b/proto/compute_driver.proto index 53b0ac27d..68af695e5 100644 --- a/proto/compute_driver.proto +++ b/proto/compute_driver.proto @@ -38,10 +38,6 @@ service ComputeDriver { // Tear down platform resources for a sandbox. rpc DeleteSandbox(DeleteSandboxRequest) returns (DeleteSandboxResponse); - // Resolve the current endpoint for sandbox exec/SSH transport. - rpc ResolveSandboxEndpoint(ResolveSandboxEndpointRequest) - returns (ResolveSandboxEndpointResponse); - // Stream sandbox observations from the platform. rpc WatchSandboxes(WatchSandboxesRequest) returns (stream WatchSandboxesEvent); } @@ -238,27 +234,6 @@ message DeleteSandboxResponse { bool deleted = 1; } -message ResolveSandboxEndpointRequest { - // Sandbox to resolve for exec or SSH connectivity. - DriverSandbox sandbox = 1; -} - -message SandboxEndpoint { - oneof target { - // Direct IP address for the sandbox endpoint. - string ip = 1; - // DNS host name for the sandbox endpoint. - string host = 2; - } - // TCP port for the sandbox endpoint. - uint32 port = 3; -} - -message ResolveSandboxEndpointResponse { - // Current endpoint the gateway should use to reach the sandbox. - SandboxEndpoint endpoint = 1; -} - message WatchSandboxesRequest {} message WatchSandboxesSandboxEvent { diff --git a/proto/openshell.proto b/proto/openshell.proto index 0ee1e8904..53812c977 100644 --- a/proto/openshell.proto +++ b/proto/openshell.proto @@ -91,6 +91,14 @@ service OpenShell { // Push sandbox supervisor logs to the server (client-streaming). rpc PushSandboxLogs(stream PushSandboxLogsRequest) returns (PushSandboxLogsResponse); + // Persistent supervisor-to-gateway session (bidirectional streaming). + // + // The supervisor opens this stream at startup and keeps it alive for the + // sandbox lifetime. The gateway uses it to coordinate relay channels for + // SSH connect and ExecSandbox. SSH bytes flow over separate reverse HTTP + // CONNECT tunnels, not over this stream. + rpc ConnectSupervisor(stream SupervisorMessage) returns (stream GatewayMessage); + // Watch a sandbox and stream updates. // // This stream can include: @@ -704,6 +712,87 @@ message GetSandboxLogsResponse { uint32 buffer_total = 2; } +// --------------------------------------------------------------------------- +// Supervisor session messages +// --------------------------------------------------------------------------- + +// Envelope for supervisor-to-gateway messages on the ConnectSupervisor stream. +message SupervisorMessage { + oneof payload { + SupervisorHello hello = 1; + SupervisorHeartbeat heartbeat = 2; + RelayOpenResult relay_open_result = 3; + RelayClose relay_close = 4; + } +} + +// Envelope for gateway-to-supervisor messages on the ConnectSupervisor stream. +message GatewayMessage { + oneof payload { + SessionAccepted session_accepted = 1; + SessionRejected session_rejected = 2; + GatewayHeartbeat heartbeat = 3; + RelayOpen relay_open = 4; + RelayClose relay_close = 5; + } +} + +// Supervisor identifies itself and the sandbox it manages. +message SupervisorHello { + // Sandbox ID this supervisor manages. + string sandbox_id = 1; + // Supervisor instance ID (e.g. boot id or process epoch). + string instance_id = 2; +} + +// Gateway accepts the supervisor session. +message SessionAccepted { + // Gateway-assigned session ID for this connection. + string session_id = 1; + // Recommended heartbeat interval in seconds. + uint32 heartbeat_interval_secs = 2; +} + +// Gateway rejects the supervisor session. +message SessionRejected { + // Human-readable rejection reason. + string reason = 1; +} + +// Supervisor heartbeat. +message SupervisorHeartbeat {} + +// Gateway heartbeat. +message GatewayHeartbeat {} + +// Gateway requests the supervisor to open a relay channel. +// +// On receiving this, the supervisor should open a reverse HTTP CONNECT +// to the gateway's /relay/{channel_id} endpoint and bridge it to the +// local SSH daemon. +message RelayOpen { + // Gateway-allocated channel identifier (UUID). + string channel_id = 1; +} + +// Supervisor reports the result of a relay open request. +message RelayOpenResult { + // Channel identifier from the RelayOpen request. + string channel_id = 1; + // True if the relay was successfully established. + bool success = 2; + // Error message if success is false. + string error = 3; +} + +// Either side requests closure of a relay channel. +message RelayClose { + // Channel identifier to close. + string channel_id = 1; + // Optional reason for closure. + string reason = 2; +} + // --------------------------------------------------------------------------- // Service status // --------------------------------------------------------------------------- From 193bacc7ca5425a99873227c597ab7ab56407abe Mon Sep 17 00:00:00 2001 From: Piotr Mlocek Date: Wed, 15 Apr 2026 21:09:53 -0700 Subject: [PATCH 02/20] fix(server): wait for supervisor session before opening relay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a sandbox first reports Ready, the supervisor session may not have completed its gRPC handshake yet. Instead of failing immediately with 502 / "supervisor session not connected", the relay open now retries with exponential backoff (100ms → 2s) for up to 15 seconds. This fixes the race between K8s marking the pod Ready and the supervisor establishing its ConnectSupervisor session. --- crates/openshell-server/src/grpc/sandbox.rs | 6 ++-- crates/openshell-server/src/ssh_tunnel.rs | 10 +++++-- .../src/supervisor_session.rs | 28 +++++++++++++++++++ 3 files changed, 40 insertions(+), 4 deletions(-) diff --git a/crates/openshell-server/src/grpc/sandbox.rs b/crates/openshell-server/src/grpc/sandbox.rs index cdc7b51dd..58269b46a 100644 --- a/crates/openshell-server/src/grpc/sandbox.rs +++ b/crates/openshell-server/src/grpc/sandbox.rs @@ -436,10 +436,12 @@ pub(super) async fn handle_exec_sandbox( return Err(Status::failed_precondition("sandbox is not ready")); } - // Open a relay channel through the supervisor session. + // Open a relay channel through the supervisor session. The session may + // not be established yet right after the sandbox reports Ready, so wait + // briefly for it to appear. let (channel_id, relay_rx) = state .supervisor_sessions - .open_relay(&sandbox.id) + .open_relay_with_wait(&sandbox.id, std::time::Duration::from_secs(15)) .await .map_err(|e| Status::unavailable(format!("supervisor relay failed: {e}")))?; diff --git a/crates/openshell-server/src/ssh_tunnel.rs b/crates/openshell-server/src/ssh_tunnel.rs index de14976ac..8317aa7bb 100644 --- a/crates/openshell-server/src/ssh_tunnel.rs +++ b/crates/openshell-server/src/ssh_tunnel.rs @@ -96,8 +96,14 @@ async fn ssh_connect( return StatusCode::PRECONDITION_FAILED.into_response(); } - // Open a relay channel through the supervisor session. - let (channel_id, relay_rx) = match state.supervisor_sessions.open_relay(&sandbox_id).await { + // Open a relay channel through the supervisor session. The session may + // not be established yet right after the sandbox reports Ready, so wait + // briefly for it to appear. + let (channel_id, relay_rx) = match state + .supervisor_sessions + .open_relay_with_wait(&sandbox_id, Duration::from_secs(15)) + .await + { Ok(pair) => pair, Err(status) => { warn!(sandbox_id = %sandbox_id, error = %status.message(), "SSH tunnel: supervisor session not available"); diff --git a/crates/openshell-server/src/supervisor_session.rs b/crates/openshell-server/src/supervisor_session.rs index ed33f8e15..5e3ccf852 100644 --- a/crates/openshell-server/src/supervisor_session.rs +++ b/crates/openshell-server/src/supervisor_session.rs @@ -94,6 +94,34 @@ impl SupervisorSessionRegistry { self.sessions.lock().unwrap().remove(sandbox_id); } + /// Open a relay channel, waiting for the supervisor session to appear. + /// + /// The supervisor session may not be established yet when the sandbox first + /// reports Ready (race between K8s readiness and gRPC session handshake). + /// This method retries the session lookup with short backoff before failing. + pub async fn open_relay_with_wait( + &self, + sandbox_id: &str, + timeout: Duration, + ) -> Result<(String, oneshot::Receiver), Status> { + let deadline = Instant::now() + timeout; + let mut backoff = Duration::from_millis(100); + + loop { + match self.open_relay(sandbox_id).await { + Ok(result) => return Ok(result), + Err(status) if status.code() == tonic::Code::Unavailable => { + if Instant::now() + backoff > deadline { + return Err(status); + } + tokio::time::sleep(backoff).await; + backoff = (backoff * 2).min(Duration::from_secs(2)); + } + Err(status) => return Err(status), + } + } + } + /// Open a relay channel: sends RelayOpen to the supervisor and returns a /// stream that will be connected once the supervisor's reverse HTTP CONNECT /// arrives. From c698f5304530fc703d42095beb11156235c49974 Mon Sep 17 00:00:00 2001 From: Piotr Mlocek Date: Thu, 16 Apr 2026 11:51:01 -0700 Subject: [PATCH 03/20] refactor(server): harden supervisor session relay lifecycle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three related changes: 1. Fold the session-wait into `open_relay` itself via a new `wait_for_session` helper with exponential backoff (100ms → 2s). Callers pass an explicit `session_wait_timeout`: - SSH connect uses 30s — it typically runs right after `sandbox create`, so the timeout has to cover a cold supervisor's TLS + gRPC handshake. - ExecSandbox uses 15s — during normal operation it only needs to cover a transient supervisor reconnect window. This covers both the startup race (pod Ready before the supervisor's ConnectSupervisor stream is up) and mid-lifetime reconnects after a network blip or gateway/supervisor restart — both look identical to the caller. 2. Fix a supersede cleanup race. `LiveSession` now tracks a `session_id`, and `remove_if_current(sandbox_id, session_id)` only evicts when the registered entry still matches. Previously an old session's cleanup could run after a reconnect had already registered the new session, unconditionally removing the live registration. 3. Wire up `spawn_relay_reaper` alongside the existing SSH session reaper so expired pending relay entries (supervisor acknowledged RelayOpen but never opened the reverse CONNECT) are swept every 30s instead of leaking until someone tries to claim them. Adds 12 unit tests covering: open_relay happy path, timeout, mid-wait session appearance, closed-receiver failure, supersede routing; claim_relay unknown/expired/receiver-dropped/round-trip; and the remove_if_current cleanup-race regression. --- crates/openshell-server/src/grpc/sandbox.rs | 9 +- crates/openshell-server/src/lib.rs | 1 + crates/openshell-server/src/ssh_tunnel.rs | 10 +- .../src/supervisor_session.rs | 425 ++++++++++++++++-- 4 files changed, 391 insertions(+), 54 deletions(-) diff --git a/crates/openshell-server/src/grpc/sandbox.rs b/crates/openshell-server/src/grpc/sandbox.rs index 58269b46a..bdda63d6a 100644 --- a/crates/openshell-server/src/grpc/sandbox.rs +++ b/crates/openshell-server/src/grpc/sandbox.rs @@ -436,12 +436,13 @@ pub(super) async fn handle_exec_sandbox( return Err(Status::failed_precondition("sandbox is not ready")); } - // Open a relay channel through the supervisor session. The session may - // not be established yet right after the sandbox reports Ready, so wait - // briefly for it to appear. + // Open a relay channel through the supervisor session. Use a 15s + // session-wait timeout — enough to cover a transient supervisor + // reconnect, but shorter than `/connect/ssh` since `ExecSandbox` is + // typically called during normal operation (not right after create). let (channel_id, relay_rx) = state .supervisor_sessions - .open_relay_with_wait(&sandbox.id, std::time::Duration::from_secs(15)) + .open_relay(&sandbox.id, std::time::Duration::from_secs(15)) .await .map_err(|e| Status::unavailable(format!("supervisor relay failed: {e}")))?; diff --git a/crates/openshell-server/src/lib.rs b/crates/openshell-server/src/lib.rs index 346aaa172..cbef28b0e 100644 --- a/crates/openshell-server/src/lib.rs +++ b/crates/openshell-server/src/lib.rs @@ -154,6 +154,7 @@ pub async fn run_server(config: Config, tracing_log_bus: TracingLogBus) -> Resul state.compute.spawn_watchers(); ssh_tunnel::spawn_session_reaper(store.clone(), std::time::Duration::from_secs(3600)); + supervisor_session::spawn_relay_reaper(state.clone(), std::time::Duration::from_secs(30)); // Create the multiplexed service let service = MultiplexService::new(state.clone()); diff --git a/crates/openshell-server/src/ssh_tunnel.rs b/crates/openshell-server/src/ssh_tunnel.rs index 8317aa7bb..8b7d6b48d 100644 --- a/crates/openshell-server/src/ssh_tunnel.rs +++ b/crates/openshell-server/src/ssh_tunnel.rs @@ -96,12 +96,14 @@ async fn ssh_connect( return StatusCode::PRECONDITION_FAILED.into_response(); } - // Open a relay channel through the supervisor session. The session may - // not be established yet right after the sandbox reports Ready, so wait - // briefly for it to appear. + // Open a relay channel through the supervisor session. Use a generous + // 30s session-wait timeout because `/connect/ssh` is typically called + // immediately after `sandbox create`, so we need to cover the supervisor's + // initial TLS + gRPC handshake on a cold-started pod. The old + // direct-connect path tolerated ~34s here for similar reasons. let (channel_id, relay_rx) = match state .supervisor_sessions - .open_relay_with_wait(&sandbox_id, Duration::from_secs(15)) + .open_relay(&sandbox_id, Duration::from_secs(30)) .await { Ok(pair) => pair, diff --git a/crates/openshell-server/src/supervisor_session.rs b/crates/openshell-server/src/supervisor_session.rs index 5e3ccf852..d79540d42 100644 --- a/crates/openshell-server/src/supervisor_session.rs +++ b/crates/openshell-server/src/supervisor_session.rs @@ -21,6 +21,10 @@ use crate::ServerState; const HEARTBEAT_INTERVAL_SECS: u32 = 15; const RELAY_PENDING_TIMEOUT: Duration = Duration::from_secs(10); +/// Initial backoff between session-availability polls in `wait_for_session`. +const SESSION_WAIT_INITIAL_BACKOFF: Duration = Duration::from_millis(100); +/// Maximum backoff between session-availability polls in `wait_for_session`. +const SESSION_WAIT_MAX_BACKOFF: Duration = Duration::from_secs(2); // --------------------------------------------------------------------------- // Session registry @@ -30,6 +34,9 @@ const RELAY_PENDING_TIMEOUT: Duration = Duration::from_secs(10); struct LiveSession { #[allow(dead_code)] sandbox_id: String, + /// Uniquely identifies this session instance. Used by cleanup to avoid + /// removing a session that has since been superseded by a reconnect. + session_id: String, tx: mpsc::Sender, #[allow(dead_code)] connected_at: Instant, @@ -74,6 +81,7 @@ impl SupervisorSessionRegistry { fn register( &self, sandbox_id: String, + session_id: String, tx: mpsc::Sender, ) -> Option> { let mut sessions = self.sessions.lock().unwrap(); @@ -82,6 +90,7 @@ impl SupervisorSessionRegistry { sandbox_id.clone(), LiveSession { sandbox_id, + session_id, tx, connected_at: Instant::now(), }, @@ -94,53 +103,87 @@ impl SupervisorSessionRegistry { self.sessions.lock().unwrap().remove(sandbox_id); } - /// Open a relay channel, waiting for the supervisor session to appear. + /// Remove the session only if its `session_id` matches the one we are + /// cleaning up. Returns `true` if the entry was removed. /// - /// The supervisor session may not be established yet when the sandbox first - /// reports Ready (race between K8s readiness and gRPC session handshake). - /// This method retries the session lookup with short backoff before failing. - pub async fn open_relay_with_wait( + /// This guards against the supersede race: an old session's task may + /// finish long after a new session has taken its place. The old task's + /// cleanup must not evict the new registration. + fn remove_if_current(&self, sandbox_id: &str, session_id: &str) -> bool { + let mut sessions = self.sessions.lock().unwrap(); + let is_current = sessions + .get(sandbox_id) + .is_some_and(|s| s.session_id == session_id); + if is_current { + sessions.remove(sandbox_id); + } + is_current + } + + /// Look up the sender for a supervisor session, waiting up to `timeout` + /// for it to appear if absent. + /// + /// Uses exponential backoff (100ms → 2s) while polling the sessions map. + async fn wait_for_session( &self, sandbox_id: &str, timeout: Duration, - ) -> Result<(String, oneshot::Receiver), Status> { + ) -> Result, Status> { let deadline = Instant::now() + timeout; - let mut backoff = Duration::from_millis(100); + let mut backoff = SESSION_WAIT_INITIAL_BACKOFF; loop { - match self.open_relay(sandbox_id).await { - Ok(result) => return Ok(result), - Err(status) if status.code() == tonic::Code::Unavailable => { - if Instant::now() + backoff > deadline { - return Err(status); - } - tokio::time::sleep(backoff).await; - backoff = (backoff * 2).min(Duration::from_secs(2)); - } - Err(status) => return Err(status), + if let Some(tx) = self.lookup_session(sandbox_id) { + return Ok(tx); + } + if Instant::now() + backoff > deadline { + return Err(Status::unavailable("supervisor session not connected")); } + tokio::time::sleep(backoff).await; + backoff = (backoff * 2).min(SESSION_WAIT_MAX_BACKOFF); } } - /// Open a relay channel: sends RelayOpen to the supervisor and returns a - /// stream that will be connected once the supervisor's reverse HTTP CONNECT - /// arrives. + fn lookup_session(&self, sandbox_id: &str) -> Option> { + self.sessions + .lock() + .unwrap() + .get(sandbox_id) + .map(|s| s.tx.clone()) + } + + /// Open a relay channel and return a receiver for the supervisor-side + /// stream. + /// + /// Sends `RelayOpen` over the supervisor's gRPC session and returns a + /// oneshot receiver that resolves once the supervisor opens its reverse + /// HTTP CONNECT to `/relay/{channel_id}`. + /// + /// If the session is not currently registered, this method waits up to + /// `session_wait_timeout` for it to appear. A session may be temporarily + /// absent for several reasons — all of which look identical from here: /// - /// Returns `(channel_id, receiver_for_relay_stream)`. + /// - startup race: the sandbox just reported Ready but the supervisor's + /// `ConnectSupervisor` gRPC handshake hasn't completed yet + /// - transient disconnect: the session was up but got dropped (network + /// blip, gateway restart, supervisor restart) and the supervisor is + /// in its reconnect backoff loop + /// + /// Callers pick the timeout based on how much patience the caller needs. + /// A first `sandbox connect` right after `sandbox create` may need to + /// wait for the supervisor's initial TLS + gRPC handshake (tens of + /// seconds on a slow cluster), while mid-lifetime calls typically just + /// need to cover a short reconnect window. pub async fn open_relay( &self, sandbox_id: &str, + session_wait_timeout: Duration, ) -> Result<(String, oneshot::Receiver), Status> { - let channel_id = Uuid::new_v4().to_string(); + let tx = self + .wait_for_session(sandbox_id, session_wait_timeout) + .await?; - // Look up the session and send RelayOpen. - let tx = { - let sessions = self.sessions.lock().unwrap(); - let session = sessions - .get(sandbox_id) - .ok_or_else(|| Status::unavailable("supervisor session not connected"))?; - session.tx.clone() - }; + let channel_id = Uuid::new_v4().to_string(); // Register the pending relay before sending RelayOpen to avoid a race. let (relay_tx, relay_rx) = oneshot::channel(); @@ -209,6 +252,23 @@ impl SupervisorSessionRegistry { } } +/// Spawn a background task that periodically reaps expired pending relay +/// entries. +/// +/// Pending entries are normally consumed either when the supervisor opens its +/// reverse CONNECT (via `claim_relay`) or by the gateway-side waiter timing +/// out. If neither happens — e.g., the supervisor crashed after acknowledging +/// `RelayOpen` but before dialing back — the entry would otherwise sit in the +/// map indefinitely. This sweeper bounds that leak. +pub fn spawn_relay_reaper(state: Arc, interval: Duration) { + tokio::spawn(async move { + loop { + tokio::time::sleep(interval).await; + state.supervisor_sessions.reap_expired_relays(); + } + }); +} + // --------------------------------------------------------------------------- // ConnectSupervisor gRPC handler // --------------------------------------------------------------------------- @@ -248,11 +308,16 @@ pub async fn handle_connect_supervisor( // Step 2: Create the outbound channel and register the session. let (tx, rx) = mpsc::channel::(64); - if let Some(_previous_tx) = state - .supervisor_sessions - .register(sandbox_id.clone(), tx.clone()) + if let Some(_previous_tx) = + state + .supervisor_sessions + .register(sandbox_id.clone(), session_id.clone(), tx.clone()) { - info!(sandbox_id = %sandbox_id, "supervisor session: superseded previous session"); + info!( + sandbox_id = %sandbox_id, + session_id = %session_id, + "supervisor session: superseded previous session" + ); } // Step 3: Send SessionAccepted. @@ -263,7 +328,11 @@ pub async fn handle_connect_supervisor( })), }; if tx.send(accepted).await.is_err() { - state.supervisor_sessions.remove(&sandbox_id); + // Only evict ourselves — a faster reconnect may already have + // superseded this registration. + state + .supervisor_sessions + .remove_if_current(&sandbox_id, &session_id); return Err(Status::internal("failed to send session accepted")); } @@ -279,8 +348,14 @@ pub async fn handle_connect_supervisor( &mut inbound, ) .await; - state_clone.supervisor_sessions.remove(&sandbox_id_clone); - info!(sandbox_id = %sandbox_id_clone, session_id = %session_id, "supervisor session: ended"); + let still_ours = state_clone + .supervisor_sessions + .remove_if_current(&sandbox_id_clone, &session_id); + if still_ours { + info!(sandbox_id = %sandbox_id_clone, session_id = %session_id, "supervisor session: ended"); + } else { + info!(sandbox_id = %sandbox_id_clone, session_id = %session_id, "supervisor session: ended (already superseded)"); + } }); // Return the outbound stream. @@ -385,15 +460,21 @@ fn handle_supervisor_message(sandbox_id: &str, session_id: &str, msg: Supervisor #[cfg(test)] mod tests { use super::*; + use tokio::io::{AsyncReadExt, AsyncWriteExt}; + + // ---- registry: register / remove ---- #[test] fn registry_register_and_lookup() { let registry = SupervisorSessionRegistry::new(); let (tx, _rx) = mpsc::channel(1); - assert!(registry.register("sandbox-1".to_string(), tx).is_none()); + assert!( + registry + .register("sandbox-1".to_string(), "s1".to_string(), tx) + .is_none() + ); - // Should find the session. let sessions = registry.sessions.lock().unwrap(); assert!(sessions.contains_key("sandbox-1")); } @@ -404,26 +485,182 @@ mod tests { let (tx1, _rx1) = mpsc::channel(1); let (tx2, _rx2) = mpsc::channel(1); - assert!(registry.register("sandbox-1".to_string(), tx1).is_none()); - assert!(registry.register("sandbox-1".to_string(), tx2).is_some()); + assert!( + registry + .register("sandbox-1".to_string(), "s1".to_string(), tx1) + .is_none() + ); + assert!( + registry + .register("sandbox-1".to_string(), "s2".to_string(), tx2) + .is_some() + ); } #[test] fn registry_remove() { let registry = SupervisorSessionRegistry::new(); let (tx, _rx) = mpsc::channel(1); - registry.register("sandbox-1".to_string(), tx); + registry.register("sandbox-1".to_string(), "s1".to_string(), tx); registry.remove("sandbox-1"); let sessions = registry.sessions.lock().unwrap(); assert!(!sessions.contains_key("sandbox-1")); } + #[test] + fn remove_if_current_removes_matching_session() { + let registry = SupervisorSessionRegistry::new(); + let (tx, _rx) = mpsc::channel(1); + registry.register("sbx".to_string(), "s1".to_string(), tx); + + assert!(registry.remove_if_current("sbx", "s1")); + assert!(!registry.sessions.lock().unwrap().contains_key("sbx")); + } + + #[test] + fn remove_if_current_ignores_stale_session_id() { + let registry = SupervisorSessionRegistry::new(); + let (tx_old, _rx_old) = mpsc::channel(1); + let (tx_new, _rx_new) = mpsc::channel(1); + + // Old session registers, then is superseded by a new session. + registry.register("sbx".to_string(), "s-old".to_string(), tx_old); + registry.register("sbx".to_string(), "s-new".to_string(), tx_new); + + // Cleanup from the old session task runs late. It must NOT evict the + // newly registered session. + assert!(!registry.remove_if_current("sbx", "s-old")); + let sessions = registry.sessions.lock().unwrap(); + assert!( + sessions.contains_key("sbx"), + "new session must still be registered" + ); + assert_eq!(sessions.get("sbx").unwrap().session_id, "s-new"); + } + + #[test] + fn remove_if_current_unknown_sandbox_is_noop() { + let registry = SupervisorSessionRegistry::new(); + assert!(!registry.remove_if_current("sbx-does-not-exist", "s1")); + } + + // ---- open_relay: happy path and wait semantics ---- + + #[tokio::test] + async fn open_relay_sends_relay_open_to_registered_session() { + let registry = SupervisorSessionRegistry::new(); + let (tx, mut rx) = mpsc::channel(4); + registry.register("sbx".to_string(), "s1".to_string(), tx); + + let (channel_id, _relay_rx) = registry + .open_relay("sbx", Duration::from_secs(1)) + .await + .expect("open_relay should succeed when session is live"); + + let msg = rx.recv().await.expect("relay open should be delivered"); + match msg.payload { + Some(gateway_message::Payload::RelayOpen(open)) => { + assert_eq!(open.channel_id, channel_id); + } + other => panic!("expected RelayOpen, got {other:?}"), + } + } + + #[tokio::test] + async fn open_relay_times_out_without_session() { + let registry = SupervisorSessionRegistry::new(); + let err = registry + .open_relay("missing", Duration::from_millis(50)) + .await + .expect_err("open_relay should time out"); + assert_eq!(err.code(), tonic::Code::Unavailable); + } + + #[tokio::test] + async fn open_relay_waits_for_session_to_appear() { + let registry = Arc::new(SupervisorSessionRegistry::new()); + let registry_for_register = Arc::clone(®istry); + + // Register the session after a small delay, shorter than the wait. + tokio::spawn(async move { + tokio::time::sleep(Duration::from_millis(200)).await; + let (tx, mut rx) = mpsc::channel::(4); + // Keep the receiver alive so the send in open_relay succeeds. + tokio::spawn(async move { while rx.recv().await.is_some() {} }); + registry_for_register.register("sbx".to_string(), "s1".to_string(), tx); + }); + + let result = registry.open_relay("sbx", Duration::from_secs(2)).await; + assert!( + result.is_ok(), + "open_relay should succeed when session arrives mid-wait: {result:?}" + ); + } + + #[tokio::test] + async fn open_relay_fails_when_session_receiver_dropped() { + let registry = SupervisorSessionRegistry::new(); + let (tx, rx) = mpsc::channel::(4); + registry.register("sbx".to_string(), "s1".to_string(), tx); + + // Simulate the supervisor's stream going away between lookup and send: + // the receiver held by `ReceiverStream` is dropped. + drop(rx); + + let err = registry + .open_relay("sbx", Duration::from_secs(1)) + .await + .expect_err("open_relay should fail when mpsc is closed"); + assert_eq!(err.code(), tonic::Code::Unavailable); + // The pending-relay entry must have been cleaned up on failure. + assert!(registry.pending_relays.lock().unwrap().is_empty()); + } + + #[tokio::test] + async fn open_relay_uses_newest_session_after_supersede() { + let registry = SupervisorSessionRegistry::new(); + let (tx_old, mut rx_old) = mpsc::channel::(4); + let (tx_new, mut rx_new) = mpsc::channel(4); + + // Hold a clone of the old sender so supersede doesn't close the old + // channel — that way try_recv distinguishes "no message sent" from + // "channel closed". + let _tx_old_alive = tx_old.clone(); + + registry.register("sbx".to_string(), "s-old".to_string(), tx_old); + registry.register("sbx".to_string(), "s-new".to_string(), tx_new); + + let (_channel_id, _relay_rx) = registry + .open_relay("sbx", Duration::from_secs(1)) + .await + .expect("open_relay should succeed"); + + let msg = rx_new + .recv() + .await + .expect("new session should receive RelayOpen"); + assert!(matches!( + msg.payload, + Some(gateway_message::Payload::RelayOpen(_)) + )); + + // The old session must have received no messages — the channel is + // still open but empty. + use tokio::sync::mpsc::error::TryRecvError; + match rx_old.try_recv() { + Err(TryRecvError::Empty) => {} + other => panic!("expected Empty on superseded session, got {other:?}"), + } + } + + // ---- claim_relay: expiry, drop, wiring ---- + #[test] fn claim_relay_unknown_channel() { let registry = SupervisorSessionRegistry::new(); - let result = registry.claim_relay("nonexistent"); - assert!(result.is_err()); + let err = registry.claim_relay("nonexistent").expect_err("should err"); + assert_eq!(err.code(), tonic::Code::NotFound); } #[test] @@ -440,12 +677,86 @@ mod tests { let result = registry.claim_relay("ch-1"); assert!(result.is_ok()); - // Should be consumed. assert!(!registry.pending_relays.lock().unwrap().contains_key("ch-1")); } #[test] - fn reap_expired_relays() { + fn claim_relay_expired_returns_deadline_exceeded() { + let registry = SupervisorSessionRegistry::new(); + let (relay_tx, _relay_rx) = oneshot::channel(); + registry.pending_relays.lock().unwrap().insert( + "ch-old".to_string(), + PendingRelay { + sender: relay_tx, + created_at: Instant::now() - Duration::from_secs(60), + }, + ); + + let err = registry + .claim_relay("ch-old") + .expect_err("expired entry must fail"); + assert_eq!(err.code(), tonic::Code::DeadlineExceeded); + // Entry must have been consumed regardless. + assert!( + !registry + .pending_relays + .lock() + .unwrap() + .contains_key("ch-old") + ); + } + + #[test] + fn claim_relay_receiver_dropped_returns_internal() { + let registry = SupervisorSessionRegistry::new(); + let (relay_tx, relay_rx) = oneshot::channel::(); + drop(relay_rx); // Gateway-side waiter has given up already. + registry.pending_relays.lock().unwrap().insert( + "ch-1".to_string(), + PendingRelay { + sender: relay_tx, + created_at: Instant::now(), + }, + ); + + let err = registry + .claim_relay("ch-1") + .expect_err("should err when receiver is gone"); + assert_eq!(err.code(), tonic::Code::Internal); + } + + #[tokio::test] + async fn claim_relay_connects_both_ends() { + let registry = SupervisorSessionRegistry::new(); + let (relay_tx, relay_rx) = oneshot::channel::(); + registry.pending_relays.lock().unwrap().insert( + "ch-io".to_string(), + PendingRelay { + sender: relay_tx, + created_at: Instant::now(), + }, + ); + + let mut supervisor_side = registry.claim_relay("ch-io").expect("claim should succeed"); + let mut gateway_side = relay_rx.await.expect("gateway side should receive stream"); + + // Supervisor side writes → gateway side reads. + supervisor_side.write_all(b"hello").await.unwrap(); + let mut buf = [0u8; 5]; + gateway_side.read_exact(&mut buf).await.unwrap(); + assert_eq!(&buf, b"hello"); + + // Gateway side writes → supervisor side reads. + gateway_side.write_all(b"world").await.unwrap(); + let mut buf = [0u8; 5]; + supervisor_side.read_exact(&mut buf).await.unwrap(); + assert_eq!(&buf, b"world"); + } + + // ---- reap_expired_relays ---- + + #[test] + fn reap_expired_relays_removes_old_entries() { let registry = SupervisorSessionRegistry::new(); let (relay_tx, _relay_rx) = oneshot::channel(); registry.pending_relays.lock().unwrap().insert( @@ -465,4 +776,26 @@ mod tests { .contains_key("ch-old") ); } + + #[test] + fn reap_expired_relays_keeps_fresh_entries() { + let registry = SupervisorSessionRegistry::new(); + let (relay_tx, _relay_rx) = oneshot::channel(); + registry.pending_relays.lock().unwrap().insert( + "ch-fresh".to_string(), + PendingRelay { + sender: relay_tx, + created_at: Instant::now(), + }, + ); + + registry.reap_expired_relays(); + assert!( + registry + .pending_relays + .lock() + .unwrap() + .contains_key("ch-fresh") + ); + } } From ca53a5b035be4ae4708723763065cc4b1d9187b3 Mon Sep 17 00:00:00 2001 From: Piotr Mlocek Date: Thu, 16 Apr 2026 11:59:24 -0700 Subject: [PATCH 04/20] refactor(server,sandbox): move relay data plane onto HTTP/2 streams MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the supervisor's reverse HTTP CONNECT data plane with a new `RelayStream` gRPC RPC. Each relay now rides the supervisor's existing `ConnectSupervisor` TCP+TLS+HTTP/2 connection as a new HTTP/2 stream, multiplexed natively. Removes one TLS handshake per SSH/exec session. - proto: add `RelayStream(stream RelayChunk) returns (stream RelayChunk)`; the first chunk from the supervisor carries `channel_id` and no data, matching the existing RelayOpen channel_id. Subsequent chunks are bytes-only — leaving channel_id off data frames avoids a ~36 B per-frame tax that would hurt interactive SSH. - server: add `handle_relay_stream` alongside `handle_connect_supervisor`. It reads the first RelayChunk for channel_id, claims the pending relay (same `SupervisorSessionRegistry::claim_relay` path as before, returning a `DuplexStream` half), then bridges that half ↔ the gRPC stream via two tasks (16 KiB chunks). Delete `relay.rs` and its `/relay/{channel_id}` HTTP endpoint. - sandbox: on `RelayOpen`, open a `RelayStream` RPC on the existing `Channel`, send `RelayChunk { channel_id, data: [] }` as the first frame, then bridge the local SSH socket. Drop `open_reverse_connect`, `send_connect_request`, `connect_tls`, and the `hyper`, `hyper-util`, `http`, `http-body-util` deps that existed solely for the reverse CONNECT. - tests: add `RelayStreamStream` type alias and `relay_stream` stub to the seven `OpenShell` mock impls in server + CLI integration tests. The registry shape (pending_relays, claim_relay, RelayOpen control message, DuplexStream bridging) is unchanged, so the existing session-wait / supersede / reaper hardening on feat/supervisor-session-relay carries over intact. --- Cargo.lock | 4 - .../tests/ensure_providers_integration.rs | 12 + .../openshell-cli/tests/mtls_integration.rs | 12 + .../tests/provider_commands_integration.rs | 12 + .../sandbox_create_lifecycle_integration.rs | 12 + .../sandbox_name_fallback_integration.rs | 12 + crates/openshell-sandbox/Cargo.toml | 4 - .../src/supervisor_session.rs | 236 +++++++----------- crates/openshell-server/src/grpc/mod.rs | 12 +- crates/openshell-server/src/http.rs | 1 - crates/openshell-server/src/lib.rs | 1 - crates/openshell-server/src/relay.rs | 67 ----- .../src/supervisor_session.rs | 115 ++++++++- .../tests/auth_endpoint_integration.rs | 11 + .../tests/edge_tunnel_auth.rs | 11 + .../tests/multiplex_integration.rs | 11 + .../tests/multiplex_tls_integration.rs | 11 + .../tests/ws_tunnel_integration.rs | 11 + proto/openshell.proto | 42 +++- 19 files changed, 372 insertions(+), 225 deletions(-) delete mode 100644 crates/openshell-server/src/relay.rs diff --git a/Cargo.lock b/Cargo.lock index 31144fc41..e4057f75c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3162,10 +3162,6 @@ dependencies = [ "futures", "hex", "hmac", - "http", - "http-body-util", - "hyper", - "hyper-util", "ipnet", "landlock", "libc", diff --git a/crates/openshell-cli/tests/ensure_providers_integration.rs b/crates/openshell-cli/tests/ensure_providers_integration.rs index d5e813931..1c4943414 100644 --- a/crates/openshell-cli/tests/ensure_providers_integration.rs +++ b/crates/openshell-cli/tests/ensure_providers_integration.rs @@ -433,6 +433,18 @@ impl OpenShell for TestOpenShell { ) -> Result, Status> { Err(Status::unimplemented("not implemented in test")) } + + + type RelayStreamStream = tokio_stream::wrappers::ReceiverStream< + Result, + >; + + async fn relay_stream( + &self, + _request: tonic::Request>, + ) -> Result, tonic::Status> { + Err(tonic::Status::unimplemented("not implemented in test")) + } } // ── TLS helpers ────────────────────────────────────────────────────── diff --git a/crates/openshell-cli/tests/mtls_integration.rs b/crates/openshell-cli/tests/mtls_integration.rs index c98b7eae4..05787d3fe 100644 --- a/crates/openshell-cli/tests/mtls_integration.rs +++ b/crates/openshell-cli/tests/mtls_integration.rs @@ -335,6 +335,18 @@ impl OpenShell for TestOpenShell { ) -> Result, Status> { Err(Status::unimplemented("not implemented in test")) } + + + type RelayStreamStream = tokio_stream::wrappers::ReceiverStream< + Result, + >; + + async fn relay_stream( + &self, + _request: tonic::Request>, + ) -> Result, tonic::Status> { + Err(tonic::Status::unimplemented("not implemented in test")) + } } fn build_ca() -> (Certificate, KeyPair) { diff --git a/crates/openshell-cli/tests/provider_commands_integration.rs b/crates/openshell-cli/tests/provider_commands_integration.rs index 1d1323371..1fe013169 100644 --- a/crates/openshell-cli/tests/provider_commands_integration.rs +++ b/crates/openshell-cli/tests/provider_commands_integration.rs @@ -387,6 +387,18 @@ impl OpenShell for TestOpenShell { ) -> Result, Status> { Err(Status::unimplemented("not implemented in test")) } + + + type RelayStreamStream = tokio_stream::wrappers::ReceiverStream< + Result, + >; + + async fn relay_stream( + &self, + _request: tonic::Request>, + ) -> Result, tonic::Status> { + Err(tonic::Status::unimplemented("not implemented in test")) + } } fn install_rustls_provider() { diff --git a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs index e4c658b7b..6ffb5c501 100644 --- a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs +++ b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs @@ -412,6 +412,18 @@ impl OpenShell for TestOpenShell { ) -> Result, Status> { Err(Status::unimplemented("not implemented in test")) } + + + type RelayStreamStream = tokio_stream::wrappers::ReceiverStream< + Result, + >; + + async fn relay_stream( + &self, + _request: tonic::Request>, + ) -> Result, tonic::Status> { + Err(tonic::Status::unimplemented("not implemented in test")) + } } fn install_rustls_provider() { diff --git a/crates/openshell-cli/tests/sandbox_name_fallback_integration.rs b/crates/openshell-cli/tests/sandbox_name_fallback_integration.rs index 7824d141a..65dcf9f61 100644 --- a/crates/openshell-cli/tests/sandbox_name_fallback_integration.rs +++ b/crates/openshell-cli/tests/sandbox_name_fallback_integration.rs @@ -345,6 +345,18 @@ impl OpenShell for TestOpenShell { ) -> Result, Status> { Err(Status::unimplemented("not implemented in test")) } + + + type RelayStreamStream = tokio_stream::wrappers::ReceiverStream< + Result, + >; + + async fn relay_stream( + &self, + _request: tonic::Request>, + ) -> Result, tonic::Status> { + Err(tonic::Status::unimplemented("not implemented in test")) + } } // ── helpers ─────────────────────────────────────────────────────────── diff --git a/crates/openshell-sandbox/Cargo.toml b/crates/openshell-sandbox/Cargo.toml index b21b1948f..78d8ac741 100644 --- a/crates/openshell-sandbox/Cargo.toml +++ b/crates/openshell-sandbox/Cargo.toml @@ -51,10 +51,6 @@ rcgen = { workspace = true } webpki-roots = { workspace = true } # HTTP -hyper = { workspace = true } -hyper-util = { workspace = true } -http = "1" -http-body-util = "0.1" bytes = { workspace = true } # UUID diff --git a/crates/openshell-sandbox/src/supervisor_session.rs b/crates/openshell-sandbox/src/supervisor_session.rs index 2b571df08..2a45726e0 100644 --- a/crates/openshell-sandbox/src/supervisor_session.rs +++ b/crates/openshell-sandbox/src/supervisor_session.rs @@ -4,19 +4,22 @@ //! Persistent supervisor-to-gateway session. //! //! Maintains a long-lived `ConnectSupervisor` bidirectional gRPC stream to the -//! gateway. When the gateway sends `RelayOpen`, the supervisor opens a reverse -//! HTTP CONNECT tunnel back to the gateway and bridges it to the local SSH +//! gateway. When the gateway sends `RelayOpen`, the supervisor initiates a +//! `RelayStream` gRPC call (a new HTTP/2 stream multiplexed over the same +//! TCP+TLS connection as the control stream) and bridges it to the local SSH //! daemon. The supervisor is a dumb byte bridge — it has no protocol awareness -//! of the SSH or NSSH1 bytes flowing through the tunnel. +//! of the SSH or NSSH1 bytes flowing through. use std::time::Duration; use openshell_core::proto::open_shell_client::OpenShellClient; use openshell_core::proto::{ - GatewayMessage, SupervisorHeartbeat, SupervisorHello, SupervisorMessage, gateway_message, - supervisor_message, + GatewayMessage, RelayChunk, SupervisorHeartbeat, SupervisorHello, SupervisorMessage, + gateway_message, supervisor_message, }; +use tokio::io::{AsyncReadExt, AsyncWriteExt}; use tokio::sync::mpsc; +use tokio_stream::StreamExt; use tonic::transport::Channel; use tracing::{info, warn}; @@ -25,6 +28,11 @@ use crate::grpc_client; const INITIAL_BACKOFF: Duration = Duration::from_secs(1); const MAX_BACKOFF: Duration = Duration::from_secs(30); +/// Size of chunks read from the local SSH socket when forwarding bytes back +/// to the gateway over the gRPC response stream. 16 KiB matches the default +/// HTTP/2 frame size so each `RelayChunk` fits in one frame. +const RELAY_CHUNK_SIZE: usize = 16 * 1024; + /// Spawn the supervisor session task. /// /// The task runs for the lifetime of the sandbox process, reconnecting with @@ -69,7 +77,10 @@ async fn run_single_session( sandbox_id: &str, ssh_listen_port: u16, ) -> Result<(), Box> { - // Connect to the gateway. + // Connect to the gateway. The same `Channel` is used for both the + // long-lived control stream and all data-plane `RelayStream` calls, so + // every relay rides the same TCP+TLS+HTTP/2 connection — no new TLS + // handshake per relay. let channel = grpc_client::connect_channel_pub(endpoint) .await .map_err(|e| format!("connect failed: {e}"))?; @@ -131,7 +142,6 @@ async fn run_single_session( handle_gateway_message( &msg, sandbox_id, - &endpoint, ssh_listen_port, &channel, ).await; @@ -162,9 +172,8 @@ async fn run_single_session( async fn handle_gateway_message( msg: &GatewayMessage, sandbox_id: &str, - endpoint: &str, ssh_listen_port: u16, - _channel: &Channel, + channel: &Channel, ) { match &msg.payload { Some(gateway_message::Payload::Heartbeat(_)) => { @@ -172,8 +181,8 @@ async fn handle_gateway_message( } Some(gateway_message::Payload::RelayOpen(open)) => { let channel_id = open.channel_id.clone(); - let endpoint = endpoint.to_string(); let sandbox_id = sandbox_id.to_string(); + let channel = channel.clone(); info!( sandbox_id = %sandbox_id, @@ -182,7 +191,7 @@ async fn handle_gateway_message( ); tokio::spawn(async move { - if let Err(e) = handle_relay_open(&channel_id, &endpoint, ssh_listen_port).await { + if let Err(e) = handle_relay_open(&channel_id, ssh_listen_port, channel).await { warn!( sandbox_id = %sandbox_id, channel_id = %channel_id, @@ -206,146 +215,95 @@ async fn handle_gateway_message( } } -/// Handle a RelayOpen by opening a reverse HTTP CONNECT to the gateway and -/// bridging it to the local SSH daemon. +/// Handle a `RelayOpen` by initiating a `RelayStream` RPC on the gateway and +/// bridging that stream to the local SSH daemon. +/// +/// This opens a new HTTP/2 stream on the existing `Channel` — no new TCP or +/// TLS handshake. The first `RelayChunk` we send identifies the channel via +/// `channel_id`; subsequent chunks carry raw SSH bytes. async fn handle_relay_open( channel_id: &str, - endpoint: &str, ssh_listen_port: u16, + channel: Channel, ) -> Result<(), Box> { - // Build the relay URL from the gateway endpoint. - // The endpoint is like "https://gateway:8080" or "http://gateway:8080". - let relay_url = format!("{endpoint}/relay/{channel_id}"); + let mut client = OpenShellClient::new(channel); + + // Outbound chunks to the gateway. + let (out_tx, out_rx) = mpsc::channel::(16); + let outbound = tokio_stream::wrappers::ReceiverStream::new(out_rx); + + // First frame: identify the channel. No payload on this frame. + out_tx + .send(RelayChunk { + channel_id: channel_id.to_string(), + data: Vec::new(), + }) + .await + .map_err(|_| "outbound channel closed before init")?; - // Open a reverse HTTP CONNECT to the gateway's relay endpoint. - let mut relay_stream = open_reverse_connect(&relay_url).await?; + // Initiate the RPC. This rides the existing HTTP/2 connection. + let response = client + .relay_stream(outbound) + .await + .map_err(|e| format!("relay_stream RPC failed: {e}"))?; + let mut inbound = response.into_inner(); // Connect to the local SSH daemon on loopback. - let mut ssh_conn = tokio::net::TcpStream::connect(("127.0.0.1", ssh_listen_port)).await?; - - info!(channel_id = %channel_id, "relay bridge: connected to local SSH daemon, bridging"); - - // Bridge the relay stream to the local SSH connection. - // The gateway sends NSSH1 preface + SSH bytes through the relay. - // The SSH daemon receives them as if the gateway connected directly. - let _ = tokio::io::copy_bidirectional(&mut relay_stream, &mut ssh_conn).await; - - Ok(()) -} - -/// Open an HTTP CONNECT tunnel to the given URL and return the upgraded stream. -/// -/// This uses a raw hyper HTTP/1.1 client to send a CONNECT request and upgrade -/// the connection to a raw byte stream. -async fn open_reverse_connect( - url: &str, -) -> Result< - hyper_util::rt::TokioIo, - Box, -> { - let uri: http::Uri = url.parse()?; - let host = uri.host().ok_or("missing host")?; - let port = uri - .port_u16() - .unwrap_or(if uri.scheme_str() == Some("https") { - 443 - } else { - 80 - }); - let authority = format!("{host}:{port}"); - let path = uri.path().to_string(); - let use_tls = uri.scheme_str() == Some("https"); - - // Connect TCP. - let tcp = tokio::net::TcpStream::connect(&authority).await?; - tcp.set_nodelay(true)?; - - if use_tls { - // Build TLS connector using the same env-var certs as the gRPC client. - let tls_stream = connect_tls(tcp, host).await?; - send_connect_request(tls_stream, &authority, &path).await - } else { - send_connect_request(tcp, &authority, &path).await - } -} - -async fn send_connect_request( - io: IO, - authority: &str, - path: &str, -) -> Result< - hyper_util::rt::TokioIo, - Box, -> -where - IO: tokio::io::AsyncRead + tokio::io::AsyncWrite + Unpin + Send + 'static, -{ - use http::Method; - - let (mut sender, conn) = - hyper::client::conn::http1::handshake(hyper_util::rt::TokioIo::new(io)).await?; - - // Spawn the connection driver. - tokio::spawn(async move { - if let Err(e) = conn.with_upgrades().await { - warn!(error = %e, "relay CONNECT connection driver error"); + let ssh = tokio::net::TcpStream::connect(("127.0.0.1", ssh_listen_port)).await?; + let (mut ssh_r, mut ssh_w) = ssh.into_split(); + + info!(channel_id = %channel_id, "relay bridge: connected to local SSH daemon"); + + // SSH → gRPC (out_tx): read local SSH, forward as `RelayChunk`s. + let out_tx_writer = out_tx.clone(); + let ssh_to_grpc = tokio::spawn(async move { + let mut buf = vec![0u8; RELAY_CHUNK_SIZE]; + loop { + match ssh_r.read(&mut buf).await { + Ok(0) | Err(_) => break, + Ok(n) => { + let chunk = RelayChunk { + channel_id: String::new(), + data: buf[..n].to_vec(), + }; + if out_tx_writer.send(chunk).await.is_err() { + break; + } + } + } } }); - let req = http::Request::builder() - .method(Method::CONNECT) - .uri(path) - .header(http::header::HOST, authority) - .body(http_body_util::Empty::::new())?; - - let resp = sender.send_request(req).await?; - - if resp.status() != http::StatusCode::OK - && resp.status() != http::StatusCode::SWITCHING_PROTOCOLS - { - return Err(format!("relay CONNECT failed: {}", resp.status()).into()); - } - - let upgraded = hyper::upgrade::on(resp).await?; - Ok(hyper_util::rt::TokioIo::new(upgraded)) -} - -/// Connect TLS using the same cert env vars as the gRPC client. -async fn connect_tls( - tcp: tokio::net::TcpStream, - host: &str, -) -> Result< - tokio_rustls::client::TlsStream, - Box, -> { - use rustls::pki_types::ServerName; - use std::sync::Arc; - - let ca_path = std::env::var("OPENSHELL_TLS_CA")?; - let cert_path = std::env::var("OPENSHELL_TLS_CERT")?; - let key_path = std::env::var("OPENSHELL_TLS_KEY")?; - - let ca_pem = std::fs::read(&ca_path)?; - let cert_pem = std::fs::read(&cert_path)?; - let key_pem = std::fs::read(&key_path)?; - - let mut root_store = rustls::RootCertStore::empty(); - for cert in rustls_pemfile::certs(&mut ca_pem.as_slice()) { - root_store.add(cert?)?; + // gRPC (inbound) → SSH: drain inbound chunks into the local SSH socket. + let mut inbound_err: Option = None; + while let Some(next) = inbound.next().await { + match next { + Ok(chunk) => { + if chunk.data.is_empty() { + continue; + } + if let Err(e) = ssh_w.write_all(&chunk.data).await { + inbound_err = Some(format!("write to ssh failed: {e}")); + break; + } + } + Err(e) => { + inbound_err = Some(format!("relay inbound errored: {e}")); + break; + } + } } - let certs: Vec<_> = - rustls_pemfile::certs(&mut cert_pem.as_slice()).collect::>()?; - let key = - rustls_pemfile::private_key(&mut key_pem.as_slice())?.ok_or("no private key found")?; - - let config = rustls::ClientConfig::builder() - .with_root_certificates(root_store) - .with_client_auth_cert(certs, key)?; + // Half-close the SSH socket's write side so the daemon sees EOF. + let _ = ssh_w.shutdown().await; - let connector = tokio_rustls::TlsConnector::from(Arc::new(config)); - let server_name = ServerName::try_from(host.to_string())?; - let tls_stream = connector.connect(server_name, tcp).await?; + // Dropping out_tx closes the outbound gRPC stream, letting the gateway + // observe EOF on its side too. + drop(out_tx); + let _ = ssh_to_grpc.await; - Ok(tls_stream) + if let Some(e) = inbound_err { + return Err(e.into()); + } + Ok(()) } diff --git a/crates/openshell-server/src/grpc/mod.rs b/crates/openshell-server/src/grpc/mod.rs index 8a5516c6b..97751b72b 100644 --- a/crates/openshell-server/src/grpc/mod.rs +++ b/crates/openshell-server/src/grpc/mod.rs @@ -22,7 +22,7 @@ use openshell_core::proto::{ HealthRequest, HealthResponse, ListProvidersRequest, ListProvidersResponse, ListSandboxPoliciesRequest, ListSandboxPoliciesResponse, ListSandboxesRequest, ListSandboxesResponse, ProviderResponse, PushSandboxLogsRequest, PushSandboxLogsResponse, - RejectDraftChunkRequest, RejectDraftChunkResponse, ReportPolicyStatusRequest, + RejectDraftChunkRequest, RejectDraftChunkResponse, RelayChunk, ReportPolicyStatusRequest, ReportPolicyStatusResponse, RevokeSshSessionRequest, RevokeSshSessionResponse, SandboxResponse, SandboxStreamEvent, ServiceStatus, SubmitPolicyAnalysisRequest, SubmitPolicyAnalysisResponse, SupervisorMessage, UndoDraftChunkRequest, UndoDraftChunkResponse, UpdateConfigRequest, @@ -396,6 +396,16 @@ impl OpenShell for OpenShellService { ) -> Result, Status> { crate::supervisor_session::handle_connect_supervisor(&self.state, request).await } + + type RelayStreamStream = + Pin> + Send + 'static>>; + + async fn relay_stream( + &self, + request: Request>, + ) -> Result, Status> { + crate::supervisor_session::handle_relay_stream(&self.state, request).await + } } // --------------------------------------------------------------------------- diff --git a/crates/openshell-server/src/http.rs b/crates/openshell-server/src/http.rs index aefe4181b..afe7edc1b 100644 --- a/crates/openshell-server/src/http.rs +++ b/crates/openshell-server/src/http.rs @@ -49,7 +49,6 @@ pub fn health_router() -> Router { pub fn http_router(state: Arc) -> Router { health_router() .merge(crate::ssh_tunnel::router(state.clone())) - .merge(crate::relay::router(state.clone())) .merge(crate::ws_tunnel::router(state.clone())) .merge(crate::auth::router(state)) } diff --git a/crates/openshell-server/src/lib.rs b/crates/openshell-server/src/lib.rs index cbef28b0e..ffd030db1 100644 --- a/crates/openshell-server/src/lib.rs +++ b/crates/openshell-server/src/lib.rs @@ -16,7 +16,6 @@ mod http; mod inference; mod multiplex; mod persistence; -mod relay; mod sandbox_index; mod sandbox_watch; mod ssh_tunnel; diff --git a/crates/openshell-server/src/relay.rs b/crates/openshell-server/src/relay.rs deleted file mode 100644 index 662fe4d99..000000000 --- a/crates/openshell-server/src/relay.rs +++ /dev/null @@ -1,67 +0,0 @@ -// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -// SPDX-License-Identifier: Apache-2.0 - -//! HTTP CONNECT relay endpoint for supervisor-initiated reverse tunnels. -//! -//! When the gateway sends a `RelayOpen` message over the supervisor's gRPC -//! session, the supervisor opens `CONNECT /relay/{channel_id}` back to this -//! endpoint. The gateway then bridges the supervisor's upgraded stream with -//! the client's SSH tunnel or exec proxy. - -use axum::{ - Router, extract::Path, extract::State, http::Method, response::IntoResponse, routing::any, -}; -use http::StatusCode; -use hyper::upgrade::OnUpgrade; -use hyper_util::rt::TokioIo; -use std::sync::Arc; -use tokio::io::AsyncWriteExt; -use tracing::{info, warn}; - -use crate::ServerState; - -pub fn router(state: Arc) -> Router { - Router::new() - .route("/relay/{channel_id}", any(relay_connect)) - .with_state(state) -} - -async fn relay_connect( - State(state): State>, - Path(channel_id): Path, - req: hyper::Request, -) -> impl IntoResponse { - if req.method() != Method::CONNECT { - return StatusCode::METHOD_NOT_ALLOWED.into_response(); - } - - // Claim the pending relay. This consumes the entry — it cannot be reused. - let supervisor_stream = match state.supervisor_sessions.claim_relay(&channel_id) { - Ok(stream) => stream, - Err(_) => { - warn!(channel_id = %channel_id, "relay: unknown or expired channel"); - return StatusCode::NOT_FOUND.into_response(); - } - }; - - info!(channel_id = %channel_id, "relay: supervisor connected, upgrading"); - - // Upgrade the HTTP connection to a raw byte stream and bridge it to - // the DuplexStream that connects to the gateway-side waiter. - let on_upgrade: OnUpgrade = hyper::upgrade::on(req); - tokio::spawn(async move { - match on_upgrade.await { - Ok(upgraded) => { - let mut upgraded = TokioIo::new(upgraded); - let mut supervisor = supervisor_stream; - let _ = tokio::io::copy_bidirectional(&mut upgraded, &mut supervisor).await; - let _ = AsyncWriteExt::shutdown(&mut upgraded).await; - } - Err(e) => { - warn!(channel_id = %channel_id, error = %e, "relay: upgrade failed"); - } - } - }); - - StatusCode::SWITCHING_PROTOCOLS.into_response() -} diff --git a/crates/openshell-server/src/supervisor_session.rs b/crates/openshell-server/src/supervisor_session.rs index d79540d42..8b26c3c31 100644 --- a/crates/openshell-server/src/supervisor_session.rs +++ b/crates/openshell-server/src/supervisor_session.rs @@ -13,7 +13,7 @@ use tracing::{info, warn}; use uuid::Uuid; use openshell_core::proto::{ - GatewayMessage, RelayOpen, SessionAccepted, SupervisorMessage, gateway_message, + GatewayMessage, RelayChunk, RelayOpen, SessionAccepted, SupervisorMessage, gateway_message, supervisor_message, }; @@ -258,8 +258,8 @@ impl SupervisorSessionRegistry { /// Pending entries are normally consumed either when the supervisor opens its /// reverse CONNECT (via `claim_relay`) or by the gateway-side waiter timing /// out. If neither happens — e.g., the supervisor crashed after acknowledging -/// `RelayOpen` but before dialing back — the entry would otherwise sit in the -/// map indefinitely. This sweeper bounds that leak. +/// `RelayOpen` but before initiating `RelayStream` — the entry would otherwise +/// sit in the map indefinitely. This sweeper bounds that leak. pub fn spawn_relay_reaper(state: Arc, interval: Duration) { tokio::spawn(async move { loop { @@ -269,6 +269,115 @@ pub fn spawn_relay_reaper(state: Arc, interval: Duration) { }); } +// --------------------------------------------------------------------------- +// RelayStream gRPC handler +// --------------------------------------------------------------------------- + +/// Size of chunks read from the gateway-side DuplexStream when forwarding +/// bytes back to the supervisor over the gRPC response stream. +const RELAY_STREAM_CHUNK_SIZE: usize = 16 * 1024; + +/// Handle a RelayStream RPC from a supervisor. The first inbound `RelayChunk` +/// identifies the pending relay via `channel_id`; subsequent chunks carry raw +/// bytes forward to the gateway-side waiter. Bytes flowing the other way are +/// chunked and sent as `RelayChunk` messages back over the response stream. +pub async fn handle_relay_stream( + state: &Arc, + request: Request>, +) -> Result< + Response< + Pin> + Send + 'static>>, + >, + Status, +> { + let mut inbound = request.into_inner(); + + // First chunk must identify the channel. + let first = inbound + .message() + .await? + .ok_or_else(|| Status::invalid_argument("empty RelayStream"))?; + if first.channel_id.is_empty() { + return Err(Status::invalid_argument( + "first RelayChunk must set channel_id", + )); + } + let channel_id = first.channel_id; + + // Claim the pending relay. Consumes the entry — it cannot be reused. + let supervisor_side = state.supervisor_sessions.claim_relay(&channel_id)?; + info!(channel_id = %channel_id, "relay stream: claimed pending relay, bridging"); + + let (mut read_half, mut write_half) = tokio::io::split(supervisor_side); + + // If the first chunk happened to carry payload bytes alongside the + // channel_id, forward them immediately. + if !first.data.is_empty() { + if let Err(e) = tokio::io::AsyncWriteExt::write_all(&mut write_half, &first.data).await { + warn!(channel_id = %channel_id, error = %e, "relay stream: failed initial write"); + return Err(Status::internal("relay bridge write failed")); + } + } + + // Supervisor → gateway: drain `inbound` and write to the DuplexStream. + let channel_id_in = channel_id.clone(); + tokio::spawn(async move { + loop { + match inbound.message().await { + Ok(Some(chunk)) => { + if chunk.data.is_empty() { + continue; + } + if let Err(e) = + tokio::io::AsyncWriteExt::write_all(&mut write_half, &chunk.data).await + { + warn!(channel_id = %channel_id_in, error = %e, "relay stream: write to duplex failed"); + break; + } + } + Ok(None) => break, + Err(e) => { + warn!(channel_id = %channel_id_in, error = %e, "relay stream: inbound errored"); + break; + } + } + } + // Best-effort half-close on the write side so the reader sees EOF. + let _ = tokio::io::AsyncWriteExt::shutdown(&mut write_half).await; + }); + + // Gateway → supervisor: read the DuplexStream and emit RelayChunk messages. + let (out_tx, out_rx) = mpsc::channel::>(16); + let channel_id_out = channel_id.clone(); + tokio::spawn(async move { + let mut buf = vec![0u8; RELAY_STREAM_CHUNK_SIZE]; + loop { + match tokio::io::AsyncReadExt::read(&mut read_half, &mut buf).await { + Ok(0) => break, + Ok(n) => { + let chunk = RelayChunk { + channel_id: String::new(), + data: buf[..n].to_vec(), + }; + if out_tx.send(Ok(chunk)).await.is_err() { + break; + } + } + Err(e) => { + warn!(channel_id = %channel_id_out, error = %e, "relay stream: read from duplex failed"); + break; + } + } + } + }); + + let stream = ReceiverStream::new(out_rx); + let stream: Pin< + Box> + Send + 'static>, + > = Box::pin(stream); + Ok(Response::new(stream)) +} + // --------------------------------------------------------------------------- // ConnectSupervisor gRPC handler // --------------------------------------------------------------------------- diff --git a/crates/openshell-server/tests/auth_endpoint_integration.rs b/crates/openshell-server/tests/auth_endpoint_integration.rs index cd2abe157..7f66be27a 100644 --- a/crates/openshell-server/tests/auth_endpoint_integration.rs +++ b/crates/openshell-server/tests/auth_endpoint_integration.rs @@ -673,6 +673,17 @@ impl openshell_core::proto::open_shell_server::OpenShell for TestOpenShell { ) -> Result, tonic::Status> { Err(tonic::Status::unimplemented("not implemented in test")) } + + type RelayStreamStream = tokio_stream::wrappers::ReceiverStream< + Result, + >; + + async fn relay_stream( + &self, + _request: tonic::Request>, + ) -> Result, tonic::Status> { + Err(tonic::Status::unimplemented("not implemented in test")) + } } /// Test 7: Plaintext server (no TLS) accepts both gRPC and HTTP. diff --git a/crates/openshell-server/tests/edge_tunnel_auth.rs b/crates/openshell-server/tests/edge_tunnel_auth.rs index a5d6a88e9..adcacb92d 100644 --- a/crates/openshell-server/tests/edge_tunnel_auth.rs +++ b/crates/openshell-server/tests/edge_tunnel_auth.rs @@ -316,6 +316,17 @@ impl OpenShell for TestOpenShell { ) -> Result, Status> { Err(Status::unimplemented("not implemented in test")) } + + type RelayStreamStream = tokio_stream::wrappers::ReceiverStream< + Result, + >; + + async fn relay_stream( + &self, + _request: tonic::Request>, + ) -> Result, tonic::Status> { + Err(tonic::Status::unimplemented("not implemented in test")) + } } // --------------------------------------------------------------------------- diff --git a/crates/openshell-server/tests/multiplex_integration.rs b/crates/openshell-server/tests/multiplex_integration.rs index 8b93b0989..203c2f3f2 100644 --- a/crates/openshell-server/tests/multiplex_integration.rs +++ b/crates/openshell-server/tests/multiplex_integration.rs @@ -284,6 +284,17 @@ impl OpenShell for TestOpenShell { ) -> Result, Status> { Err(Status::unimplemented("not implemented in test")) } + + type RelayStreamStream = tokio_stream::wrappers::ReceiverStream< + Result, + >; + + async fn relay_stream( + &self, + _request: tonic::Request>, + ) -> Result, tonic::Status> { + Err(tonic::Status::unimplemented("not implemented in test")) + } } #[tokio::test] diff --git a/crates/openshell-server/tests/multiplex_tls_integration.rs b/crates/openshell-server/tests/multiplex_tls_integration.rs index 4d77e8cae..0d7b058d9 100644 --- a/crates/openshell-server/tests/multiplex_tls_integration.rs +++ b/crates/openshell-server/tests/multiplex_tls_integration.rs @@ -297,6 +297,17 @@ impl OpenShell for TestOpenShell { ) -> Result, Status> { Err(Status::unimplemented("not implemented in test")) } + + type RelayStreamStream = tokio_stream::wrappers::ReceiverStream< + Result, + >; + + async fn relay_stream( + &self, + _request: tonic::Request>, + ) -> Result, tonic::Status> { + Err(tonic::Status::unimplemented("not implemented in test")) + } } /// PKI bundle: CA cert, server cert+key, client cert+key. diff --git a/crates/openshell-server/tests/ws_tunnel_integration.rs b/crates/openshell-server/tests/ws_tunnel_integration.rs index 705e9de49..84fef2efb 100644 --- a/crates/openshell-server/tests/ws_tunnel_integration.rs +++ b/crates/openshell-server/tests/ws_tunnel_integration.rs @@ -310,6 +310,17 @@ impl OpenShell for TestOpenShell { ) -> Result, Status> { Err(Status::unimplemented("not implemented in test")) } + + type RelayStreamStream = tokio_stream::wrappers::ReceiverStream< + Result, + >; + + async fn relay_stream( + &self, + _request: tonic::Request>, + ) -> Result, tonic::Status> { + Err(tonic::Status::unimplemented("not implemented in test")) + } } // --------------------------------------------------------------------------- diff --git a/proto/openshell.proto b/proto/openshell.proto index 53812c977..92e479906 100644 --- a/proto/openshell.proto +++ b/proto/openshell.proto @@ -95,10 +95,23 @@ service OpenShell { // // The supervisor opens this stream at startup and keeps it alive for the // sandbox lifetime. The gateway uses it to coordinate relay channels for - // SSH connect and ExecSandbox. SSH bytes flow over separate reverse HTTP - // CONNECT tunnels, not over this stream. + // SSH connect and ExecSandbox. Raw SSH bytes flow over RelayStream calls + // (separate HTTP/2 streams on the same connection), not over this stream. rpc ConnectSupervisor(stream SupervisorMessage) returns (stream GatewayMessage); + // Raw byte relay between supervisor and gateway. + // + // The supervisor initiates this call after receiving a RelayOpen message + // on its ConnectSupervisor stream. The first RelayChunk carries the + // channel_id to associate the new HTTP/2 stream with the pending relay + // slot on the gateway. Subsequent chunks carry raw bytes in either + // direction between the gateway-side waiter (ssh_tunnel / exec handler) + // and the supervisor-side local SSH daemon bridge. + // + // This rides the same TCP+TLS+HTTP/2 connection as ConnectSupervisor — + // no new TLS handshake, no reverse HTTP CONNECT. + rpc RelayStream(stream RelayChunk) returns (stream RelayChunk); + // Watch a sandbox and stream updates. // // This stream can include: @@ -767,14 +780,33 @@ message GatewayHeartbeat {} // Gateway requests the supervisor to open a relay channel. // -// On receiving this, the supervisor should open a reverse HTTP CONNECT -// to the gateway's /relay/{channel_id} endpoint and bridge it to the -// local SSH daemon. +// On receiving this, the supervisor should initiate a RelayStream RPC to +// the gateway, sending channel_id in the first RelayChunk to associate +// the new HTTP/2 stream with the pending relay slot. The supervisor +// bridges that stream to the local SSH daemon. message RelayOpen { // Gateway-allocated channel identifier (UUID). string channel_id = 1; } +// Raw byte chunk for the RelayStream RPC. The first chunk sent from the +// supervisor MUST set channel_id. All subsequent chunks (both directions) +// leave channel_id empty and carry payload bytes in `data`. +// +// Leaving channel_id empty on data frames is a deliberate per-frame size +// optimization: an SSH tunnel fragments into many small frames (a keystroke +// is ~50–200 bytes), where repeating a 36-byte UUID every frame would be a +// meaningful overhead. The cost is a weaker schema — the "first message is +// init-only" invariant lives in this comment rather than the proto. If this +// becomes confusing, switch to `oneof payload { string init_channel_id = 1; +// bytes data = 2; }` — behavior is equivalent, migration is cheap. +message RelayChunk { + // Only set on the very first message from the supervisor side. + string channel_id = 1; + // Raw bytes flowing in either direction. + bytes data = 2; +} + // Supervisor reports the result of a relay open request. message RelayOpenResult { // Channel identifier from the RelayOpen request. From ebc72b1a501326da30d0f79a7f298ca653c46987 Mon Sep 17 00:00:00 2001 From: Piotr Mlocek Date: Thu, 16 Apr 2026 12:10:26 -0700 Subject: [PATCH 05/20] style: apply cargo fmt to CLI test mocks --- crates/openshell-cli/tests/ensure_providers_integration.rs | 1 - crates/openshell-cli/tests/mtls_integration.rs | 1 - crates/openshell-cli/tests/provider_commands_integration.rs | 1 - .../openshell-cli/tests/sandbox_create_lifecycle_integration.rs | 1 - crates/openshell-cli/tests/sandbox_name_fallback_integration.rs | 1 - 5 files changed, 5 deletions(-) diff --git a/crates/openshell-cli/tests/ensure_providers_integration.rs b/crates/openshell-cli/tests/ensure_providers_integration.rs index 1c4943414..39044d01e 100644 --- a/crates/openshell-cli/tests/ensure_providers_integration.rs +++ b/crates/openshell-cli/tests/ensure_providers_integration.rs @@ -434,7 +434,6 @@ impl OpenShell for TestOpenShell { Err(Status::unimplemented("not implemented in test")) } - type RelayStreamStream = tokio_stream::wrappers::ReceiverStream< Result, >; diff --git a/crates/openshell-cli/tests/mtls_integration.rs b/crates/openshell-cli/tests/mtls_integration.rs index 05787d3fe..5bf2a7fe4 100644 --- a/crates/openshell-cli/tests/mtls_integration.rs +++ b/crates/openshell-cli/tests/mtls_integration.rs @@ -336,7 +336,6 @@ impl OpenShell for TestOpenShell { Err(Status::unimplemented("not implemented in test")) } - type RelayStreamStream = tokio_stream::wrappers::ReceiverStream< Result, >; diff --git a/crates/openshell-cli/tests/provider_commands_integration.rs b/crates/openshell-cli/tests/provider_commands_integration.rs index 1fe013169..34bd071a6 100644 --- a/crates/openshell-cli/tests/provider_commands_integration.rs +++ b/crates/openshell-cli/tests/provider_commands_integration.rs @@ -388,7 +388,6 @@ impl OpenShell for TestOpenShell { Err(Status::unimplemented("not implemented in test")) } - type RelayStreamStream = tokio_stream::wrappers::ReceiverStream< Result, >; diff --git a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs index 6ffb5c501..a1f1b1ed1 100644 --- a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs +++ b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs @@ -413,7 +413,6 @@ impl OpenShell for TestOpenShell { Err(Status::unimplemented("not implemented in test")) } - type RelayStreamStream = tokio_stream::wrappers::ReceiverStream< Result, >; diff --git a/crates/openshell-cli/tests/sandbox_name_fallback_integration.rs b/crates/openshell-cli/tests/sandbox_name_fallback_integration.rs index 65dcf9f61..663451cb1 100644 --- a/crates/openshell-cli/tests/sandbox_name_fallback_integration.rs +++ b/crates/openshell-cli/tests/sandbox_name_fallback_integration.rs @@ -346,7 +346,6 @@ impl OpenShell for TestOpenShell { Err(Status::unimplemented("not implemented in test")) } - type RelayStreamStream = tokio_stream::wrappers::ReceiverStream< Result, >; From cde52a29c67d88b6f065ae7debb8ccc2d37e5eb2 Mon Sep 17 00:00:00 2001 From: Piotr Mlocek Date: Thu, 16 Apr 2026 12:48:54 -0700 Subject: [PATCH 06/20] perf(server,sandbox): bump HTTP/2 flow-control windows for relay data plane Default h2 initial windows are 64 KiB per stream and 64 KiB per connection. That throttles a single RelayStream SSH tunnel to ~500 Mbps on LAN, roughly 35% below the raw HTTP CONNECT baseline measured on `nemoclaw`. Bump both server (hyper-util auto::Builder via multiplex.rs) and client (tonic Endpoint in openshell-sandbox/grpc_client.rs) windows to 2 MiB / 4 MiB. This is the window size at which bulk throughput on a 50 MiB transfer matches the reverse HTTP CONNECT path. The numbers apply only to the RelayStream data plane in this branch; ConnectSupervisor and all other RPCs benefit too but are low-rate. --- crates/openshell-sandbox/src/grpc_client.rs | 6 +++++- crates/openshell-server/src/multiplex.rs | 15 ++++++++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/crates/openshell-sandbox/src/grpc_client.rs b/crates/openshell-sandbox/src/grpc_client.rs index 09e7b607d..18e09ef92 100644 --- a/crates/openshell-sandbox/src/grpc_client.rs +++ b/crates/openshell-sandbox/src/grpc_client.rs @@ -33,7 +33,11 @@ async fn connect_channel(endpoint: &str) -> Result { .connect_timeout(Duration::from_secs(10)) .http2_keep_alive_interval(Duration::from_secs(10)) .keep_alive_while_idle(true) - .keep_alive_timeout(Duration::from_secs(20)); + .keep_alive_timeout(Duration::from_secs(20)) + // Match the gateway-side HTTP/2 windows (see `multiplex.rs`). The + // defaults throttle the RelayStream data plane to ~500 Mbps on LAN. + .initial_stream_window_size(2 * 1024 * 1024) + .initial_connection_window_size(4 * 1024 * 1024); let tls_enabled = endpoint.starts_with("https://"); diff --git a/crates/openshell-server/src/multiplex.rs b/crates/openshell-server/src/multiplex.rs index 5ba44b1ec..b49a1a0dd 100644 --- a/crates/openshell-server/src/multiplex.rs +++ b/crates/openshell-server/src/multiplex.rs @@ -62,7 +62,20 @@ impl MultiplexService { let service = MultiplexedService::new(grpc_service, http_service); - Builder::new(TokioExecutor::new()) + // HTTP/2 flow-control windows. The default (64 KiB stream, 64 KiB + // connection) throttles bulk gRPC streams — notably the RelayStream + // data plane used by `sandbox connect` and `ExecSandbox`. Bumping + // these lets a single SSH tunnel reach TCP-limited throughput on + // LAN and WAN alike. Values chosen empirically on `nemoclaw` — 2 MiB + // / 4 MiB was the point at which bulk throughput matched the raw + // HTTP CONNECT baseline. + let mut builder = Builder::new(TokioExecutor::new()); + builder + .http2() + .initial_stream_window_size(2 * 1024 * 1024) + .initial_connection_window_size(4 * 1024 * 1024); + + builder .serve_connection_with_upgrades(TokioIo::new(stream), service) .await?; From 1ec551a6f2dd5899269a1de748d368b3ccfb9dc3 Mon Sep 17 00:00:00 2001 From: Piotr Mlocek Date: Thu, 16 Apr 2026 12:54:36 -0700 Subject: [PATCH 07/20] perf(server,sandbox): use adaptive HTTP/2 flow control instead of fixed windows --- crates/openshell-sandbox/src/grpc_client.rs | 8 ++++---- crates/openshell-server/src/multiplex.rs | 19 ++++++++----------- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/crates/openshell-sandbox/src/grpc_client.rs b/crates/openshell-sandbox/src/grpc_client.rs index 18e09ef92..a15b36676 100644 --- a/crates/openshell-sandbox/src/grpc_client.rs +++ b/crates/openshell-sandbox/src/grpc_client.rs @@ -34,10 +34,10 @@ async fn connect_channel(endpoint: &str) -> Result { .http2_keep_alive_interval(Duration::from_secs(10)) .keep_alive_while_idle(true) .keep_alive_timeout(Duration::from_secs(20)) - // Match the gateway-side HTTP/2 windows (see `multiplex.rs`). The - // defaults throttle the RelayStream data plane to ~500 Mbps on LAN. - .initial_stream_window_size(2 * 1024 * 1024) - .initial_connection_window_size(4 * 1024 * 1024); + // Match the gateway-side HTTP/2 flow control (see `multiplex.rs`). + // Adaptive sizing lets idle streams stay tiny while bulk + // RelayStream data flows get a BDP-sized window. + .http2_adaptive_window(true); let tls_enabled = endpoint.starts_with("https://"); diff --git a/crates/openshell-server/src/multiplex.rs b/crates/openshell-server/src/multiplex.rs index b49a1a0dd..0f6b9449f 100644 --- a/crates/openshell-server/src/multiplex.rs +++ b/crates/openshell-server/src/multiplex.rs @@ -62,18 +62,15 @@ impl MultiplexService { let service = MultiplexedService::new(grpc_service, http_service); - // HTTP/2 flow-control windows. The default (64 KiB stream, 64 KiB - // connection) throttles bulk gRPC streams — notably the RelayStream - // data plane used by `sandbox connect` and `ExecSandbox`. Bumping - // these lets a single SSH tunnel reach TCP-limited throughput on - // LAN and WAN alike. Values chosen empirically on `nemoclaw` — 2 MiB - // / 4 MiB was the point at which bulk throughput matched the raw - // HTTP CONNECT baseline. + // HTTP/2 adaptive flow control. Default windows (64 KiB / 64 KiB) + // throttle the RelayStream data plane to ~500 Mbps on LAN. Instead + // of committing to a fixed large window (which worst-case pins + // `max_concurrent_streams × stream_window` bytes per connection), + // we let hyper/h2 auto-size based on the measured bandwidth-delay + // product. Idle streams stay tiny; busy bulk streams grow as + // needed. Overrides any fixed initial_*_window_size settings. let mut builder = Builder::new(TokioExecutor::new()); - builder - .http2() - .initial_stream_window_size(2 * 1024 * 1024) - .initial_connection_window_size(4 * 1024 * 1024); + builder.http2().adaptive_window(true); builder .serve_connection_with_upgrades(TokioIo::new(stream), service) From 0fd40167163c87c30862611955d43d58e6770c36 Mon Sep 17 00:00:00 2001 From: Piotr Mlocek Date: Fri, 17 Apr 2026 11:47:57 -0700 Subject: [PATCH 08/20] refactor(sandbox,server): switch supervisor SSH daemon to Unix socket, drop NSSH1 The embedded SSH daemon in openshell-sandbox no longer listens on a TCP port. Instead it binds a root-owned Unix socket (default /run/openshell/ssh.sock, 0700 parent dir, 0600 socket). The supervisor's relay bridge connects to that socket instead of 127.0.0.1:2222. With the socket gated by filesystem permissions, the NSSH1 HMAC preface is redundant and has been removed: - openshell-sandbox: drop `verify_preface`, `hmac_sha256`, the nonce cache and reaper, and the preface read/write on every SSH accept. `run_ssh_server` takes a `PathBuf` and uses `UnixListener`. - openshell-server/ssh_tunnel: remove the NSSH1 write + response read before bridging the client's upgraded CONNECT stream; the relay is now bridged immediately. - openshell-server/grpc/sandbox: same cleanup in the exec-path relay proxy. `stream_exec_over_relay` and `start_single_use_ssh_proxy_over_relay` stop taking a `handshake_secret`. - openshell-server lib: the K8s driver is now configured with the socket path ("/run/openshell/ssh.sock") instead of "0.0.0.0:2222". - Parent directory of the socket is created with 0700 root:root by the supervisor at startup to keep the sandbox entrypoint user out. `ssh_handshake_secret` is still accepted on the CLI / env for backwards compatibility but is no longer used for SSH. --- crates/openshell-sandbox/src/lib.rs | 36 +- crates/openshell-sandbox/src/ssh.rs | 363 +++--------------- .../src/supervisor_session.rs | 33 +- crates/openshell-server/src/grpc/sandbox.rs | 94 +---- crates/openshell-server/src/lib.rs | 6 +- crates/openshell-server/src/ssh_tunnel.rs | 102 +---- 6 files changed, 103 insertions(+), 531 deletions(-) diff --git a/crates/openshell-sandbox/src/lib.rs b/crates/openshell-sandbox/src/lib.rs index bb81b9ccf..b1fd1bf05 100644 --- a/crates/openshell-sandbox/src/lib.rs +++ b/crates/openshell-sandbox/src/lib.rs @@ -604,18 +604,17 @@ pub async fn run_sandbox( } }); + // The `ssh_listen_addr` argument now carries a filesystem path to the + // Unix socket the embedded SSH daemon listens on. Kept as an `Option` + // for backwards compatibility with the CLI flag name and env var. + let ssh_socket_path: Option = + ssh_listen_addr.as_ref().map(std::path::PathBuf::from); if let Some(listen_addr) = ssh_listen_addr { - let addr: SocketAddr = listen_addr.parse().into_diagnostic()?; + let listen_path = std::path::PathBuf::from(listen_addr); let policy_clone = policy.clone(); let workdir_clone = workdir.clone(); - let secret = ssh_handshake_secret - .filter(|s| !s.is_empty()) - .ok_or_else(|| { - miette::miette!( - "OPENSHELL_SSH_HANDSHAKE_SECRET is required when SSH is enabled.\n\ - Set --ssh-handshake-secret or the OPENSHELL_SSH_HANDSHAKE_SECRET env var." - ) - })?; + let _ = ssh_handshake_secret; // retained in the signature for compat; unused + let _ = ssh_handshake_skew_secs; let proxy_url = ssh_proxy_url; let netns_fd = ssh_netns_fd; let ca_paths = ca_file_paths.clone(); @@ -625,12 +624,10 @@ pub async fn run_sandbox( tokio::spawn(async move { if let Err(err) = ssh::run_ssh_server( - addr, + listen_path, ssh_ready_tx, policy_clone, workdir_clone, - secret, - ssh_handshake_skew_secs, netns_fd, proxy_url, ca_paths, @@ -682,15 +679,12 @@ pub async fn run_sandbox( // Spawn the persistent supervisor session if we have a gateway endpoint // and sandbox identity. The session provides relay channels for SSH // connect and ExecSandbox through the gateway. - if let (Some(endpoint), Some(id)) = (openshell_endpoint.as_ref(), sandbox_id.as_ref()) { - // The SSH listen address was consumed above, so we use the configured - // SSH port (default 2222) for loopback connections from the relay. - let ssh_port = std::env::var("OPENSHELL_SSH_PORT") - .ok() - .and_then(|p| p.parse::().ok()) - .unwrap_or(2222); - - supervisor_session::spawn(endpoint.clone(), id.clone(), ssh_port); + if let (Some(endpoint), Some(id), Some(socket)) = ( + openshell_endpoint.as_ref(), + sandbox_id.as_ref(), + ssh_socket_path.as_ref(), + ) { + supervisor_session::spawn(endpoint.clone(), id.clone(), socket.clone()); info!("supervisor session task spawned"); } diff --git a/crates/openshell-sandbox/src/ssh.rs b/crates/openshell-sandbox/src/ssh.rs index 3c26f8061..a8a402b3a 100644 --- a/crates/openshell-sandbox/src/ssh.rs +++ b/crates/openshell-sandbox/src/ssh.rs @@ -13,8 +13,7 @@ use miette::{IntoDiagnostic, Result}; use nix::pty::{Winsize, openpty}; use nix::unistd::setsid; use openshell_ocsf::{ - ActionId, ActivityId, AuthTypeId, ConfidenceId, DetectionFindingBuilder, DispositionId, - FindingInfo, SeverityId, SshActivityBuilder, StatusId, ocsf_emit, + ActionId, ActivityId, DispositionId, SeverityId, SshActivityBuilder, StatusId, ocsf_emit, }; use rand_core::OsRng; use russh::keys::{Algorithm, PrivateKey}; @@ -22,33 +21,22 @@ use russh::server::{Auth, Handle, Session}; use russh::{ChannelId, CryptoVec}; use std::collections::HashMap; use std::io::{Read, Write}; -use std::net::SocketAddr; use std::os::fd::{AsRawFd, RawFd}; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::process::{Command, Stdio}; -use std::sync::{Arc, Mutex, mpsc}; -use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; -use tokio::io::{AsyncReadExt, AsyncWriteExt}; -use tokio::net::TcpListener; +use std::sync::{Arc, mpsc}; +use std::time::Duration; +use tokio::net::UnixListener; use tracing::warn; -const PREFACE_MAGIC: &str = "NSSH1"; -#[cfg(test)] -const SSH_HANDSHAKE_SECRET_ENV: &str = "OPENSHELL_SSH_HANDSHAKE_SECRET"; - -/// A time-bounded set of nonces used to detect replayed NSSH1 handshakes. -/// Each entry records the `Instant` it was inserted; a background reaper task -/// periodically evicts entries older than the handshake skew window. -type NonceCache = Arc>>; - /// Perform SSH server initialization: generate a host key, build the config, -/// and bind the TCP listener. Extracted so that startup errors can be forwarded -/// through the readiness channel rather than being silently logged. +/// and bind the Unix socket listener. Extracted so that startup errors can be +/// forwarded through the readiness channel rather than being silently logged. async fn ssh_server_init( - listen_addr: SocketAddr, + listen_path: &Path, ca_file_paths: &Option<(PathBuf, PathBuf)>, ) -> Result<( - TcpListener, + UnixListener, Arc, Option>, )> { @@ -63,14 +51,43 @@ async fn ssh_server_init( let config = Arc::new(config); let ca_paths = ca_file_paths.as_ref().map(|p| Arc::new(p.clone())); - let listener = TcpListener::bind(listen_addr).await.into_diagnostic()?; + + // Ensure the parent directory exists and is root-owned with 0700 + // permissions. The sandbox entrypoint runs as an unprivileged user; it + // must not be able to enter this directory and connect to the socket. + if let Some(parent) = listen_path.parent() { + std::fs::create_dir_all(parent).into_diagnostic()?; + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let perms = std::fs::Permissions::from_mode(0o700); + std::fs::set_permissions(parent, perms).into_diagnostic()?; + } + } + + // Remove any stale socket from a previous run before binding. + if listen_path.exists() { + std::fs::remove_file(listen_path).into_diagnostic()?; + } + let listener = UnixListener::bind(listen_path).into_diagnostic()?; + + // Tighten permissions so only the supervisor (root) can connect. The + // sandbox entrypoint runs as an unprivileged user and must not be able to + // dial the SSH daemon directly — all access goes through the relay from + // the gateway. + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let perms = std::fs::Permissions::from_mode(0o600); + std::fs::set_permissions(listen_path, perms).into_diagnostic()?; + } + ocsf_emit!( SshActivityBuilder::new(crate::ocsf_ctx()) .activity(ActivityId::Listen) .severity(SeverityId::Informational) .status(StatusId::Success) - .src_endpoint_addr(listen_addr.ip(), listen_addr.port()) - .message(format!("SSH server listening on {listen_addr}")) + .message(format!("SSH server listening on {}", listen_path.display())) .build() ); @@ -79,18 +96,16 @@ async fn ssh_server_init( #[allow(clippy::too_many_arguments)] pub async fn run_ssh_server( - listen_addr: SocketAddr, + listen_path: PathBuf, ready_tx: tokio::sync::oneshot::Sender>, policy: SandboxPolicy, workdir: Option, - handshake_secret: String, - handshake_skew_secs: u64, netns_fd: Option, proxy_url: Option, ca_file_paths: Option<(PathBuf, PathBuf)>, provider_env: HashMap, ) -> Result<()> { - let (listener, config, ca_paths) = match ssh_server_init(listen_addr, &ca_file_paths).await { + let (listener, config, ca_paths) = match ssh_server_init(&listen_path, &ca_file_paths).await { Ok(v) => { // Signal that the SSH server has bound the socket and is ready to // accept connections. The parent task awaits this before spawning @@ -105,49 +120,25 @@ pub async fn run_ssh_server( } }; - // Nonce cache for replay detection. Entries are evicted by a background - // reaper once they exceed the handshake skew window. - let nonce_cache: NonceCache = Arc::new(Mutex::new(HashMap::new())); - - // Background task that periodically purges expired nonces. - let reaper_cache = nonce_cache.clone(); - let ttl = Duration::from_secs(handshake_skew_secs); - tokio::spawn(async move { - let mut interval = tokio::time::interval(Duration::from_secs(60)); - loop { - interval.tick().await; - if let Ok(mut cache) = reaper_cache.lock() { - cache.retain(|_, inserted| inserted.elapsed() < ttl); - } - } - }); - loop { - let (stream, peer) = listener.accept().await.into_diagnostic()?; - stream.set_nodelay(true).into_diagnostic()?; + let (stream, _peer) = listener.accept().await.into_diagnostic()?; let config = config.clone(); let policy = policy.clone(); let workdir = workdir.clone(); - let secret = handshake_secret.clone(); let proxy_url = proxy_url.clone(); let ca_paths = ca_paths.clone(); let provider_env = provider_env.clone(); - let nonce_cache = nonce_cache.clone(); tokio::spawn(async move { if let Err(err) = handle_connection( stream, - peer, config, policy, workdir, - &secret, - handshake_skew_secs, netns_fd, proxy_url, ca_paths, provider_env, - &nonce_cache, ) .await { @@ -166,41 +157,18 @@ pub async fn run_ssh_server( #[allow(clippy::too_many_arguments)] async fn handle_connection( - mut stream: tokio::net::TcpStream, - peer: SocketAddr, + stream: tokio::net::UnixStream, config: Arc, policy: SandboxPolicy, workdir: Option, - secret: &str, - handshake_skew_secs: u64, netns_fd: Option, proxy_url: Option, ca_file_paths: Option>, provider_env: HashMap, - nonce_cache: &NonceCache, ) -> Result<()> { - tracing::debug!(peer = %peer, "SSH connection: reading handshake preface"); - let mut line = String::new(); - read_line(&mut stream, &mut line).await?; - tracing::debug!(peer = %peer, preface_len = line.len(), "SSH connection: preface received, verifying"); - if !verify_preface(&line, secret, handshake_skew_secs, nonce_cache)? { - ocsf_emit!( - SshActivityBuilder::new(crate::ocsf_ctx()) - .activity(ActivityId::Open) - .action(ActionId::Denied) - .disposition(DispositionId::Blocked) - .severity(SeverityId::Medium) - .status(StatusId::Failure) - .src_endpoint_addr(peer.ip(), peer.port()) - .message(format!( - "SSH connection: handshake verification failed from {peer}" - )) - .build() - ); - let _ = stream.write_all(b"ERR\n").await; - return Ok(()); - } - stream.write_all(b"OK\n").await.into_diagnostic()?; + // Access is gated by the Unix-socket filesystem permissions (root-only), + // not by an application-level preface. The supervisor bridges the + // gateway's RelayStream directly into this socket. ocsf_emit!( SshActivityBuilder::new(crate::ocsf_ctx()) .activity(ActivityId::Open) @@ -208,9 +176,7 @@ async fn handle_connection( .disposition(DispositionId::Allowed) .severity(SeverityId::Informational) .status(StatusId::Success) - .src_endpoint_addr(peer.ip(), peer.port()) - .auth_type(AuthTypeId::Other, "NSSH1") - .message(format!("SSH handshake accepted from {peer}")) + .message("SSH connection accepted on supervisor Unix socket") .build() ); @@ -228,107 +194,6 @@ async fn handle_connection( Ok(()) } -async fn read_line(stream: &mut tokio::net::TcpStream, buf: &mut String) -> Result<()> { - let mut bytes = Vec::new(); - loop { - let mut byte = [0u8; 1]; - let n = stream.read(&mut byte).await.into_diagnostic()?; - if n == 0 { - break; - } - if byte[0] == b'\n' { - break; - } - bytes.push(byte[0]); - if bytes.len() > 1024 { - break; - } - } - *buf = String::from_utf8_lossy(&bytes).to_string(); - Ok(()) -} - -fn verify_preface( - line: &str, - secret: &str, - handshake_skew_secs: u64, - nonce_cache: &NonceCache, -) -> Result { - let parts: Vec<&str> = line.split_whitespace().collect(); - if parts.len() != 5 || parts[0] != PREFACE_MAGIC { - return Ok(false); - } - let token = parts[1]; - let timestamp: i64 = parts[2].parse().unwrap_or(0); - let nonce = parts[3]; - let signature = parts[4]; - - let now = i64::try_from( - SystemTime::now() - .duration_since(UNIX_EPOCH) - .into_diagnostic()? - .as_secs(), - ) - .into_diagnostic()?; - let skew = (now - timestamp).unsigned_abs(); - if skew > handshake_skew_secs { - return Ok(false); - } - - let payload = format!("{token}|{timestamp}|{nonce}"); - let expected = hmac_sha256(secret.as_bytes(), payload.as_bytes()); - if signature != expected { - return Ok(false); - } - - // Reject replayed nonces. The cache is bounded by the reaper task which - // evicts entries older than `handshake_skew_secs`. - let mut cache = nonce_cache - .lock() - .map_err(|_| miette::miette!("nonce cache lock poisoned"))?; - if cache.contains_key(nonce) { - ocsf_emit!( - SshActivityBuilder::new(crate::ocsf_ctx()) - .activity(ActivityId::Other) - .action(ActionId::Denied) - .disposition(DispositionId::Blocked) - .severity(SeverityId::High) - .auth_type(AuthTypeId::Other, "NSSH1") - .message(format!("NSSH1 nonce replay detected: {nonce}")) - .build() - ); - ocsf_emit!( - DetectionFindingBuilder::new(crate::ocsf_ctx()) - .activity(ActivityId::Open) - .action(ActionId::Denied) - .disposition(DispositionId::Blocked) - .severity(SeverityId::High) - .is_alert(true) - .confidence(ConfidenceId::High) - .finding_info(FindingInfo::new( - "nssh1-nonce-replay", - "NSSH1 Nonce Replay Attack" - )) - .evidence("nonce", nonce) - .build() - ); - return Ok(false); - } - cache.insert(nonce.to_string(), Instant::now()); - - Ok(true) -} - -fn hmac_sha256(key: &[u8], data: &[u8]) -> String { - use hmac::{Hmac, Mac}; - use sha2::Sha256; - - let mut mac = Hmac::::new_from_slice(key).expect("hmac key"); - mac.update(data); - let result = mac.finalize().into_bytes(); - hex::encode(result) -} - /// Per-channel state for tracking PTY resources and I/O senders. /// /// Each SSH channel gets its own PTY master (if a PTY was requested) and input @@ -1424,136 +1289,6 @@ mod tests { ); } - // ----------------------------------------------------------------------- - // verify_preface tests - // ----------------------------------------------------------------------- - - /// Build a valid NSSH1 preface line with the given parameters. - fn build_preface(token: &str, secret: &str, nonce: &str, timestamp: i64) -> String { - let payload = format!("{token}|{timestamp}|{nonce}"); - let signature = hmac_sha256(secret.as_bytes(), payload.as_bytes()); - format!("{PREFACE_MAGIC} {token} {timestamp} {nonce} {signature}") - } - - fn fresh_nonce_cache() -> NonceCache { - Arc::new(Mutex::new(HashMap::new())) - } - - fn current_timestamp() -> i64 { - i64::try_from( - SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap() - .as_secs(), - ) - .unwrap() - } - - #[test] - fn verify_preface_accepts_valid_preface() { - let secret = "test-secret-key"; - let nonce = "unique-nonce-1"; - let ts = current_timestamp(); - let line = build_preface("tok1", secret, nonce, ts); - let cache = fresh_nonce_cache(); - - assert!(verify_preface(&line, secret, 300, &cache).unwrap()); - } - - #[test] - fn verify_preface_rejects_replayed_nonce() { - let secret = "test-secret-key"; - let nonce = "replay-nonce"; - let ts = current_timestamp(); - let line = build_preface("tok1", secret, nonce, ts); - let cache = fresh_nonce_cache(); - - // First attempt should succeed. - assert!(verify_preface(&line, secret, 300, &cache).unwrap()); - // Second attempt with the same nonce should be rejected. - assert!(!verify_preface(&line, secret, 300, &cache).unwrap()); - } - - #[test] - fn verify_preface_rejects_expired_timestamp() { - let secret = "test-secret-key"; - let nonce = "expired-nonce"; - // Timestamp 600 seconds in the past, with a 300-second skew window. - let ts = current_timestamp() - 600; - let line = build_preface("tok1", secret, nonce, ts); - let cache = fresh_nonce_cache(); - - assert!(!verify_preface(&line, secret, 300, &cache).unwrap()); - } - - #[test] - fn verify_preface_rejects_invalid_hmac() { - let secret = "test-secret-key"; - let nonce = "hmac-nonce"; - let ts = current_timestamp(); - // Build with the correct secret, then verify with the wrong one. - let line = build_preface("tok1", secret, nonce, ts); - let cache = fresh_nonce_cache(); - - assert!(!verify_preface(&line, "wrong-secret", 300, &cache).unwrap()); - } - - #[test] - fn verify_preface_rejects_malformed_input() { - let cache = fresh_nonce_cache(); - - // Too few parts. - assert!(!verify_preface("NSSH1 tok1 123", "s", 300, &cache).unwrap()); - // Wrong magic. - assert!(!verify_preface("NSSH2 tok1 123 nonce sig", "s", 300, &cache).unwrap()); - // Empty string. - assert!(!verify_preface("", "s", 300, &cache).unwrap()); - } - - #[test] - fn verify_preface_distinct_nonces_both_accepted() { - let secret = "test-secret-key"; - let ts = current_timestamp(); - let cache = fresh_nonce_cache(); - - let line1 = build_preface("tok1", secret, "nonce-a", ts); - let line2 = build_preface("tok1", secret, "nonce-b", ts); - - assert!(verify_preface(&line1, secret, 300, &cache).unwrap()); - assert!(verify_preface(&line2, secret, 300, &cache).unwrap()); - } - - #[test] - fn apply_child_env_keeps_handshake_secret_out_of_ssh_children() { - let mut cmd = Command::new("/usr/bin/env"); - cmd.env(SSH_HANDSHAKE_SECRET_ENV, "should-not-leak") - .stdin(Stdio::null()) - .stdout(Stdio::piped()) - .stderr(Stdio::null()); - - let provider_env = std::iter::once(( - "ANTHROPIC_API_KEY".to_string(), - "openshell:resolve:env:ANTHROPIC_API_KEY".to_string(), - )) - .collect(); - - apply_child_env( - &mut cmd, - "/sandbox", - "sandbox", - "dumb", - None, - None, - &provider_env, - ); - - let output = cmd.output().expect("spawn env"); - let stdout = String::from_utf8(output.stdout).expect("utf8"); - - assert!(!stdout.contains(SSH_HANDSHAKE_SECRET_ENV)); - assert!(stdout.contains("ANTHROPIC_API_KEY=openshell:resolve:env:ANTHROPIC_API_KEY")); - } - // ----------------------------------------------------------------------- // SEC-007: is_loopback_host tests // ----------------------------------------------------------------------- diff --git a/crates/openshell-sandbox/src/supervisor_session.rs b/crates/openshell-sandbox/src/supervisor_session.rs index 2a45726e0..8877e67ce 100644 --- a/crates/openshell-sandbox/src/supervisor_session.rs +++ b/crates/openshell-sandbox/src/supervisor_session.rs @@ -40,19 +40,23 @@ const RELAY_CHUNK_SIZE: usize = 16 * 1024; pub fn spawn( endpoint: String, sandbox_id: String, - ssh_listen_port: u16, + ssh_socket_path: std::path::PathBuf, ) -> tokio::task::JoinHandle<()> { - tokio::spawn(run_session_loop(endpoint, sandbox_id, ssh_listen_port)) + tokio::spawn(run_session_loop(endpoint, sandbox_id, ssh_socket_path)) } -async fn run_session_loop(endpoint: String, sandbox_id: String, ssh_listen_port: u16) { +async fn run_session_loop( + endpoint: String, + sandbox_id: String, + ssh_socket_path: std::path::PathBuf, +) { let mut backoff = INITIAL_BACKOFF; let mut attempt: u64 = 0; loop { attempt += 1; - match run_single_session(&endpoint, &sandbox_id, ssh_listen_port).await { + match run_single_session(&endpoint, &sandbox_id, &ssh_socket_path).await { Ok(()) => { info!(sandbox_id = %sandbox_id, "supervisor session ended cleanly"); break; @@ -75,7 +79,7 @@ async fn run_session_loop(endpoint: String, sandbox_id: String, ssh_listen_port: async fn run_single_session( endpoint: &str, sandbox_id: &str, - ssh_listen_port: u16, + ssh_socket_path: &std::path::Path, ) -> Result<(), Box> { // Connect to the gateway. The same `Channel` is used for both the // long-lived control stream and all data-plane `RelayStream` calls, so @@ -142,7 +146,7 @@ async fn run_single_session( handle_gateway_message( &msg, sandbox_id, - ssh_listen_port, + ssh_socket_path, &channel, ).await; } @@ -172,7 +176,7 @@ async fn run_single_session( async fn handle_gateway_message( msg: &GatewayMessage, sandbox_id: &str, - ssh_listen_port: u16, + ssh_socket_path: &std::path::Path, channel: &Channel, ) { match &msg.payload { @@ -183,6 +187,7 @@ async fn handle_gateway_message( let channel_id = open.channel_id.clone(); let sandbox_id = sandbox_id.to_string(); let channel = channel.clone(); + let ssh_socket_path = ssh_socket_path.to_path_buf(); info!( sandbox_id = %sandbox_id, @@ -191,7 +196,7 @@ async fn handle_gateway_message( ); tokio::spawn(async move { - if let Err(e) = handle_relay_open(&channel_id, ssh_listen_port, channel).await { + if let Err(e) = handle_relay_open(&channel_id, &ssh_socket_path, channel).await { warn!( sandbox_id = %sandbox_id, channel_id = %channel_id, @@ -223,7 +228,7 @@ async fn handle_gateway_message( /// `channel_id`; subsequent chunks carry raw SSH bytes. async fn handle_relay_open( channel_id: &str, - ssh_listen_port: u16, + ssh_socket_path: &std::path::Path, channel: Channel, ) -> Result<(), Box> { let mut client = OpenShellClient::new(channel); @@ -248,11 +253,15 @@ async fn handle_relay_open( .map_err(|e| format!("relay_stream RPC failed: {e}"))?; let mut inbound = response.into_inner(); - // Connect to the local SSH daemon on loopback. - let ssh = tokio::net::TcpStream::connect(("127.0.0.1", ssh_listen_port)).await?; + // Connect to the local SSH daemon on its Unix socket. + let ssh = tokio::net::UnixStream::connect(ssh_socket_path).await?; let (mut ssh_r, mut ssh_w) = ssh.into_split(); - info!(channel_id = %channel_id, "relay bridge: connected to local SSH daemon"); + info!( + channel_id = %channel_id, + socket = %ssh_socket_path.display(), + "relay bridge: connected to local SSH daemon" + ); // SSH → gRPC (out_tx): read local SSH, forward as `RelayChunk`s. let out_tx_writer = out_tx.clone(); diff --git a/crates/openshell-server/src/grpc/sandbox.rs b/crates/openshell-server/src/grpc/sandbox.rs index bdda63d6a..6b01b28f4 100644 --- a/crates/openshell-server/src/grpc/sandbox.rs +++ b/crates/openshell-server/src/grpc/sandbox.rs @@ -452,7 +452,6 @@ pub(super) async fn handle_exec_sandbox( let timeout_seconds = req.timeout_seconds; let request_tty = req.tty; let sandbox_id = sandbox.id; - let handshake_secret = state.config.ssh_handshake_secret.clone(); let (tx, rx) = mpsc::channel::>(256); tokio::spawn(async move { @@ -486,7 +485,6 @@ pub(super) async fn handle_exec_sandbox( stdin_payload, timeout_seconds, request_tty, - &handshake_secret, ) .await { @@ -680,7 +678,6 @@ async fn stream_exec_over_relay( stdin_payload: Vec, timeout_seconds: u32, request_tty: bool, - handshake_secret: &str, ) -> Result<(), Status> { let command_preview: String = command.chars().take(120).collect(); info!( @@ -692,10 +689,9 @@ async fn stream_exec_over_relay( "ExecSandbox (relay): command started" ); - let (local_proxy_port, proxy_task) = - start_single_use_ssh_proxy_over_relay(relay_stream, handshake_secret) - .await - .map_err(|e| Status::internal(format!("failed to start relay proxy: {e}")))?; + let (local_proxy_port, proxy_task) = start_single_use_ssh_proxy_over_relay(relay_stream) + .await + .map_err(|e| Status::internal(format!("failed to start relay proxy: {e}")))?; let exec = run_exec_with_russh( local_proxy_port, @@ -749,68 +745,21 @@ async fn stream_exec_over_relay( /// Create a localhost SSH proxy that bridges to a relay DuplexStream. /// -/// The proxy sends the NSSH1 handshake preface through the relay (which flows -/// to the supervisor and on to the embedded SSH daemon), waits for "OK", then -/// bridges the russh client connection with the relay stream. +/// The proxy forwards raw SSH bytes between the `russh` client and the relay. +/// The supervisor bridges the relay to its Unix-socket SSH daemon; filesystem +/// permissions on that socket are the only access-control boundary. async fn start_single_use_ssh_proxy_over_relay( - relay_stream: tokio::io::DuplexStream, - handshake_secret: &str, + mut relay_stream: tokio::io::DuplexStream, ) -> Result<(u16, tokio::task::JoinHandle<()>), Box> { let listener = TcpListener::bind(("127.0.0.1", 0)).await?; let port = listener.local_addr()?.port(); - let handshake_secret = handshake_secret.to_string(); let task = tokio::spawn(async move { let Ok((mut client_conn, _)) = listener.accept().await else { warn!("SSH relay proxy: failed to accept local connection"); return; }; - - let (mut relay_read, mut relay_write) = tokio::io::split(relay_stream); - - // Send NSSH1 handshake through the relay to the SSH daemon. - let Ok(preface) = build_preface(&uuid::Uuid::new_v4().to_string(), &handshake_secret) - else { - warn!("SSH relay proxy: failed to build handshake preface"); - return; - }; - if let Err(e) = - tokio::io::AsyncWriteExt::write_all(&mut relay_write, preface.as_bytes()).await - { - warn!(error = %e, "SSH relay proxy: failed to send handshake preface"); - return; - } - - // Read handshake response from the relay. - let mut response_buf = Vec::new(); - loop { - let mut byte = [0u8; 1]; - match tokio::io::AsyncReadExt::read(&mut relay_read, &mut byte).await { - Ok(0) => break, - Ok(_) => { - if byte[0] == b'\n' { - break; - } - response_buf.push(byte[0]); - if response_buf.len() > 1024 { - break; - } - } - Err(e) => { - warn!(error = %e, "SSH relay proxy: failed to read handshake response"); - return; - } - } - } - let response = String::from_utf8_lossy(&response_buf); - if response.trim() != "OK" { - warn!(response = %response.trim(), "SSH relay proxy: handshake rejected"); - return; - } - - // Reunite the split halves for copy_bidirectional. - let mut relay = relay_read.unsplit(relay_write); - let _ = tokio::io::copy_bidirectional(&mut client_conn, &mut relay).await; + let _ = tokio::io::copy_bidirectional(&mut client_conn, &mut relay_stream).await; }); Ok((port, task)) @@ -942,33 +891,6 @@ async fn run_exec_with_russh( Ok(exit_code.unwrap_or(1)) } -fn build_preface( - token: &str, - secret: &str, -) -> Result> { - let timestamp = i64::try_from( - std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .map_err(|_| "time error")? - .as_secs(), - ) - .map_err(|_| "time error")?; - let nonce = uuid::Uuid::new_v4().to_string(); - let payload = format!("{token}|{timestamp}|{nonce}"); - let signature = hmac_sha256(secret.as_bytes(), payload.as_bytes()); - Ok(format!("NSSH1 {token} {timestamp} {nonce} {signature}\n")) -} - -fn hmac_sha256(key: &[u8], data: &[u8]) -> String { - use hmac::{Hmac, Mac}; - use sha2::Sha256; - - let mut mac = Hmac::::new_from_slice(key).expect("hmac key"); - mac.update(data); - let result = mac.finalize().into_bytes(); - hex::encode(result) -} - // --------------------------------------------------------------------------- // Tests // --------------------------------------------------------------------------- diff --git a/crates/openshell-server/src/lib.rs b/crates/openshell-server/src/lib.rs index 728aeb14f..b4426c3d1 100644 --- a/crates/openshell-server/src/lib.rs +++ b/crates/openshell-server/src/lib.rs @@ -253,7 +253,11 @@ async fn build_compute_runtime( default_image: config.sandbox_image.clone(), image_pull_policy: config.sandbox_image_pull_policy.clone(), grpc_endpoint: config.grpc_endpoint.clone(), - ssh_listen_addr: format!("0.0.0.0:{}", config.sandbox_ssh_port), + // Filesystem path to the supervisor's Unix-socket SSH daemon. + // The path lives in a root-only directory so only the + // supervisor can connect; the gateway reaches it through the + // RelayStream bridge, not directly. + ssh_listen_addr: "/run/openshell/ssh.sock".to_string(), ssh_port: config.sandbox_ssh_port, ssh_handshake_secret: config.ssh_handshake_secret.clone(), ssh_handshake_skew_secs: config.ssh_handshake_skew_secs, diff --git a/crates/openshell-server/src/ssh_tunnel.rs b/crates/openshell-server/src/ssh_tunnel.rs index 8b7d6b48d..fbef77ef1 100644 --- a/crates/openshell-server/src/ssh_tunnel.rs +++ b/crates/openshell-server/src/ssh_tunnel.rs @@ -11,9 +11,8 @@ use openshell_core::proto::{Sandbox, SandboxPhase, SshSession}; use prost::Message; use std::sync::Arc; use std::time::Duration; -use tokio::io::{AsyncReadExt, AsyncWriteExt}; +use tokio::io::AsyncWriteExt; use tracing::{info, warn}; -use uuid::Uuid; use crate::ServerState; use crate::persistence::{ObjectId, ObjectName, ObjectType, Store}; @@ -143,15 +142,15 @@ async fn ssh_connect( *count += 1; } - let handshake_secret = state.config.ssh_handshake_secret.clone(); let sandbox_id_clone = sandbox_id.clone(); let token_clone = token.clone(); let state_clone = state.clone(); let upgrade = hyper::upgrade::on(req); tokio::spawn(async move { - // Wait for the supervisor's reverse CONNECT to arrive and claim the relay. - let relay_stream = match tokio::time::timeout(Duration::from_secs(10), relay_rx).await { + // Wait for the supervisor to open its `RelayStream` and deliver the + // bridge half of the relay. + let mut relay = match tokio::time::timeout(Duration::from_secs(10), relay_rx).await { Ok(Ok(stream)) => stream, Ok(Err(_)) => { warn!(sandbox_id = %sandbox_id_clone, channel_id = %channel_id, "SSH tunnel: relay channel dropped"); @@ -173,67 +172,7 @@ async fn ssh_connect( } }; - // Send NSSH1 handshake through the relay to the SSH daemon before - // bridging the client's SSH bytes. The relay carries bytes to the - // supervisor which bridges them to the local SSH daemon on loopback. - let (mut relay_read, mut relay_write) = tokio::io::split(relay_stream); - let preface = match build_preface(&token_clone, &handshake_secret) { - Ok(p) => p, - Err(e) => { - warn!(error = %e, "SSH tunnel: failed to build NSSH1 preface"); - decrement_connection_count(&state_clone.ssh_connections_by_token, &token_clone); - decrement_connection_count( - &state_clone.ssh_connections_by_sandbox, - &sandbox_id_clone, - ); - return; - } - }; - if let Err(e) = relay_write.write_all(preface.as_bytes()).await { - warn!(error = %e, "SSH tunnel: failed to send NSSH1 preface through relay"); - decrement_connection_count(&state_clone.ssh_connections_by_token, &token_clone); - decrement_connection_count(&state_clone.ssh_connections_by_sandbox, &sandbox_id_clone); - return; - } - - // Read handshake response from the SSH daemon through the relay. - let mut response_buf = Vec::new(); - loop { - let mut byte = [0u8; 1]; - match relay_read.read(&mut byte).await { - Ok(0) => break, - Ok(_) => { - if byte[0] == b'\n' { - break; - } - response_buf.push(byte[0]); - if response_buf.len() > 1024 { - break; - } - } - Err(e) => { - warn!(error = %e, "SSH tunnel: failed to read NSSH1 response from relay"); - decrement_connection_count(&state_clone.ssh_connections_by_token, &token_clone); - decrement_connection_count( - &state_clone.ssh_connections_by_sandbox, - &sandbox_id_clone, - ); - return; - } - } - } - let response = String::from_utf8_lossy(&response_buf); - if response.trim() != "OK" { - warn!(response = %response.trim(), "SSH tunnel: NSSH1 handshake rejected by sandbox"); - decrement_connection_count(&state_clone.ssh_connections_by_token, &token_clone); - decrement_connection_count(&state_clone.ssh_connections_by_sandbox, &sandbox_id_clone); - return; - } - - info!(sandbox_id = %sandbox_id_clone, channel_id = %channel_id, "SSH tunnel: NSSH1 handshake OK, bridging client"); - - // Reunite the split relay halves and bridge with the client's upgraded stream. - let mut relay = relay_read.unsplit(relay_write); + info!(sandbox_id = %sandbox_id_clone, channel_id = %channel_id, "SSH tunnel: relay established, bridging client"); match upgrade.await { Ok(upgraded) => { @@ -268,37 +207,6 @@ fn header_value(headers: &http::HeaderMap, name: &str) -> Result Result> { - let timestamp = i64::try_from( - std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .map_err(|_| "time error")? - .as_secs(), - ) - .map_err(|_| "time error")?; - let nonce = Uuid::new_v4().to_string(); - let payload = format!("{token}|{timestamp}|{nonce}"); - let signature = hmac_sha256(secret.as_bytes(), payload.as_bytes()); - Ok(format!( - "{PREFACE_MAGIC} {token} {timestamp} {nonce} {signature}\n" - )) -} - -fn hmac_sha256(key: &[u8], data: &[u8]) -> String { - use hmac::{Hmac, Mac}; - use sha2::Sha256; - - let mut mac = Hmac::::new_from_slice(key).expect("hmac key"); - mac.update(data); - let result = mac.finalize().into_bytes(); - hex::encode(result) -} - impl ObjectType for SshSession { fn object_type() -> &'static str { "ssh_session" From a0e8391dd209e20cb2a277f0550e9c1ca69700b9 Mon Sep 17 00:00:00 2001 From: Piotr Mlocek Date: Fri, 17 Apr 2026 12:05:28 -0700 Subject: [PATCH 09/20] chore: exclude rfc/0002 from this PR (will land separately) --- rfc/0002-supervisor-initiated-connectivity.md | 775 ------------------ 1 file changed, 775 deletions(-) delete mode 100644 rfc/0002-supervisor-initiated-connectivity.md diff --git a/rfc/0002-supervisor-initiated-connectivity.md b/rfc/0002-supervisor-initiated-connectivity.md deleted file mode 100644 index 3705fe3d0..000000000 --- a/rfc/0002-supervisor-initiated-connectivity.md +++ /dev/null @@ -1,775 +0,0 @@ ---- -authors: - - "@pimlock" -state: draft -links: - - https://linear.app/nvidia/document/rfc-0001-core-architecture-c3a58267fd9c - - https://linear.app/nvidia/issue/OS-31/openshell-long-running-agentservice-primitives - - https://linear.app/nvidia/issue/OS-83/implement-initial-supervisor-gateway-session-stream - - https://linear.app/nvidia/issue/OS-86/implement-reverse-connect-sshconnect-relay-over-supervisor-sessions - - https://linear.app/nvidia/document/plan-sshexec-over-supervisor-sessions-960d5719e61c ---- - -# RFC 0002 - Supervisor-Initiated Connectivity for Connect and Exec - -## Summary - -This RFC proposes making the supervisor-initiated gateway session the only control and interactive connectivity path between sandboxes and the gateway. Instead of the gateway resolving and dialing a sandbox SSH endpoint, each supervisor maintains a long-lived authenticated session to the gateway, and the gateway uses that session for configuration delivery, SSH-compatible connect flows, and `ExecSandbox`. The public client contract remains stable: `CreateSshSession` plus `CONNECT /connect/ssh` stays the entrypoint for OpenSSH-compatible access, while `ExecSandbox` remains the typed gRPC API for remote command execution. In the initial implementation, `ExecSandbox` continues to use SSH internally, but that SSH transport runs through the supervisor session instead of requiring direct gateway-to-sandbox reachability. - -## Motivation - -RFC-0001 established the architectural direction that supervisors should connect outbound to the gateway and that the gateway should not depend on direct reachability into sandbox runtime addresses. The current implementation still relies on the opposite assumption for interactive access: - -- `/connect/ssh` validates a token and then dials the sandbox's SSH port directly -- `ExecSandbox` resolves the sandbox endpoint and establishes an SSH transport from the gateway to the sandbox -- the compute layer and driver are responsible for returning a dialable sandbox address - -That model creates several problems: - -- it ties the gateway to infrastructure-specific address resolution and reachability rules -- it makes NATed, private, or otherwise non-addressable sandbox environments harder to support -- it forces every compute backend to solve "how can the gateway reach the sandbox?" instead of only "how can the supervisor reach the gateway?" -- it splits sandbox-facing traffic between a supervisor->gateway control path and a gateway->sandbox interactive path - -We want to invert that relationship. The supervisor should be the only component that needs network reachability to the gateway. Once that session exists, all higher-level operations can be layered on top of it. - -This RFC also captures the next step after the initial supervisor session work tracked in `OS-83`. That issue establishes the session primitive and preserves today's config and log behavior. This RFC extends that model to interactive access and exec, while keeping the direct typed-exec redesign as a follow-on after the initial transport inversion lands. - -## Non-goals - -- Exposing long-running services from a sandbox to external clients. That is a separate RFC under the broader `OS-31` effort. -- Designing the full multi-replica gateway session ownership and cross-replica forwarding protocol. This RFC calls out the requirement but defers the details. -- Preserving the current gateway->sandbox direct-connect implementation as a long-lived fallback path. -- Replacing SSH as the public transport for `sandbox connect`, editor integrations, file transfer, or port forwarding. -- Redesigning the public `CreateSshSession`, `/connect/ssh`, or `ExecSandbox` APIs. -- Finalizing every supervisor/gateway session message in wire-level detail. - -## Proposal - -### Overview - -Introduce a persistent supervisor-to-gateway session primitive and make it the transport foundation for both control-plane synchronization and interactive access. The gateway no longer dials a sandbox network endpoint. Instead: - -- the supervisor establishes and maintains a long-lived authenticated session to the gateway -- the gateway records which live session owns each sandbox -- SSH-compatible connect flows and `ExecSandbox` are relayed through supervisor-initiated reverse tunnels coordinated by that session - -The resulting architecture is: - -```mermaid -flowchart LR - CLI["CLI / OpenSSH client"] - GW["Gateway"] - SUP["Supervisor"] - SSHD["Embedded SSH daemon"] - PROC["Sandbox process launcher"] - - CLI -- "gRPC" --> GW - CLI -- "CONNECT /connect/ssh" --> GW - SUP -- "ConnectSupervisor (gRPC control)" --> GW - SUP -- "CONNECT /relay/{channel_id} (data)" --> GW - SUP -- "local loopback TCP" --> SSHD - SSHD --> PROC -``` - -### Persistent supervisor session - -The gateway exposes a long-lived bidirectional RPC, referred to in this RFC as `ConnectSupervisor`. - -At a high level: - -```proto -rpc ConnectSupervisor(stream SupervisorToGatewayMessage) - returns (stream GatewayToSupervisorMessage); -``` - -The supervisor opens this session outbound to the gateway after sandbox startup and keeps it alive for the lifetime of the sandbox. The session is the authoritative live connectivity channel between the gateway and that sandbox. - -The session carries control-plane traffic: - -- supervisor registration and sandbox identity -- heartbeat and liveness traffic -- desired-state delivery for config, policy, provider environment, and inference routes -- desired-state application acknowledgements and failure reporting -- log batches -- policy analysis submissions -- relay channel lifecycle coordination (open, accept/reject, close) for SSH-compatible connect flows and `ExecSandbox` - -The actual SSH byte traffic for connect and exec flows over separate supervisor-initiated reverse HTTP CONNECT tunnels, not over the gRPC stream. The gRPC session coordinates when to open and close those tunnels. See the [OS-86 plan](https://linear.app/nvidia/document/plan-sshexec-over-supervisor-sessions-960d5719e61c) for the detailed data-plane design and alternatives considered. - -Although this is a single bidirectional RPC, the two stream directions should use different envelope types. - -At a high level: - -```proto -message SupervisorToGatewayMessage { - oneof payload { - SupervisorHello hello = 1; - Heartbeat heartbeat = 2; - ConfigApplied config_applied = 3; - LogBatch log_batch = 4; - PolicyAnalysisReport policy_analysis = 5; - PolicySyncRequest policy_sync = 6; - RelayOpenResult relay_open_result = 10; - RelayClose relay_close = 11; - } -} - -message GatewayToSupervisorMessage { - oneof payload { - SessionAccepted session_accepted = 1; - SessionRejected session_rejected = 2; - Heartbeat heartbeat = 3; - ConfigSnapshot config_snapshot = 4; - ConfigUpdated config_updated = 5; - InferenceRoutesSnapshot inference_snapshot = 6; - InferenceRoutesUpdated inference_updated = 7; - RelayOpen relay_open = 10; - RelayClose relay_close = 11; - } -} -``` - -The envelopes differ because the allowed actions differ by direction: - -- only the supervisor should send logs, desired-state application results, and policy analysis -- only the gateway should send desired-state snapshots or updates -- both sides may carry heartbeat traffic and relay lifecycle messages, but those can reuse shared payload message types inside direction-specific envelopes - -This gives us better schema clarity, simpler validation, and fewer impossible states. With a single shared envelope type, it becomes easy to accidentally model nonsensical cases such as the gateway receiving `ConfigUpdated` or the supervisor receiving `LogBatch`. - -This RFC still does not lock down every wire-level field. The important design choice is that the stream is bidirectional but directionally typed. If direct typed exec traffic is introduced later, those messages or RPCs should preserve this same directional separation. - -We should also prefer semantically specific message names over a generic `...Event` suffix. In practice: - -- use `...Snapshot` for full current desired state sent on attach -- use `...Updated` for pushed changes to that state -- use `...Applied` or `...Result` for acknowledgements and outcomes -- use `...Open`, `...Result`, and `...Close` for relay channel lifecycle on the control stream - -That naming makes it much easier to reason about message direction and whether a given payload is declarative state, a command, or an outcome. - -### Example session messages - -The following examples are intentionally partial. They are here to make the target message vocabulary concrete, not to lock down every field. - -```proto -message SupervisorHello { - string sandbox_id = 1; - string supervisor_instance_id = 2; - string supervisor_version = 3; - repeated string capabilities = 4; -} - -message SessionAccepted { - string session_id = 1; - string gateway_replica_id = 2; - uint32 heartbeat_interval_secs = 3; -} - -message ConfigSnapshot { - uint64 config_revision = 1; - openshell.sandbox.v1.SandboxPolicy policy = 2; - map settings = 3; - map provider_environment = 4; - PolicySource policy_source = 5; - uint32 policy_version = 6; - string policy_hash = 7; -} - -message ConfigUpdated { - ConfigSnapshot snapshot = 1; -} - -message ConfigApplied { - uint64 config_revision = 1; - bool success = 2; - string error = 3; - uint32 policy_version = 4; -} - -message InferenceRoutesSnapshot { - string revision = 1; - repeated ResolvedRoute routes = 2; -} - -message InferenceRoutesUpdated { - InferenceRoutesSnapshot snapshot = 1; -} - -message PolicyAnalysisReport { - repeated DenialSummary summaries = 1; - repeated PolicyChunk proposed_chunks = 2; - string analysis_mode = 3; -} - -message PolicySyncRequest { - openshell.sandbox.v1.SandboxPolicy policy = 1; - string reason = 2; -} - -message RelayOpen { - string channel_id = 1; - string kind = 2; -} - -message RelayOpenResult { - string channel_id = 1; - bool accepted = 2; - string error = 3; -} - -message RelayClose { - string channel_id = 1; - string reason = 2; -} - -``` - -These examples also show one of the intended simplifications of the target design: - -- provider environment is folded into config delivery instead of needing a separate startup fetch -- inference bundle delivery is pushed from the gateway instead of polled -- policy load reporting is generalized into desired-state application acknowledgement -- relay channel lifecycle messages on the control stream coordinate both `sandbox connect` and the initial `ExecSandbox` transport, with actual SSH bytes flowing over separate reverse HTTP CONNECT tunnels - -### Multiplexing and request correlation - -Once multiple operations share a single long-lived supervisor session, the protocol needs explicit application-level correlation for relay channel lifecycle. - -The gRPC control stream carries only relay lifecycle messages (`RelayOpen`, `RelayOpenResult`, `RelayClose`). The actual SSH byte traffic for each relay flows over a separate supervisor-initiated reverse HTTP CONNECT tunnel, one TCP connection per relay channel. This means the gRPC stream is not a data-plane multiplexer — it coordinates the lifecycle of independent data-plane connections. - -Each concurrent relay channel has its own `channel_id`, unique within a live `session_id`. The gateway allocates `channel_id` values for gateway-initiated relay operations. That keeps collision handling simple and makes ownership of cleanup clear. - -At a high level, the gateway keeps: - -- `channel_id -> active relay state` (the bridged HTTP CONNECT streams, cancellation handles, timeout state, metrics) -- a pending-channel map for relay channels that have been opened via gRPC but whose reverse HTTP CONNECT has not yet arrived - -At a high level, the supervisor keeps: - -- `channel_id -> local bridge state` (the reverse HTTP CONNECT stream paired with a loopback TCP connection to the embedded SSH daemon) - -This state is intentionally in-memory and tied to the live session. It should not be persisted as durable control-plane data. - -Each relay channel should have a simple local state machine: `opening -> open -> closing -> closed`. Out-of-order or invalid transitions should be treated as protocol errors for that channel and cleaned up eagerly. - -If the parent session is lost: - -- all active relay channels close (both the gRPC-coordinated lifecycle and the underlying HTTP CONNECT tunnels) -- all in-flight exec requests fail -- the corresponding in-memory maps are dropped - -The design should not attempt to migrate relay channels across session reconnects. - -See the [OS-86 plan](https://linear.app/nvidia/document/plan-sshexec-over-supervisor-sessions-960d5719e61c) for the detailed data-plane transport design and alternatives considered. - -`ConnectSupervisor` is the preferred working name because it accurately describes both the initial attach and every later reconnect. This is not a one-time registration ceremony. A supervisor may reconnect many times across gateway restarts, network interruptions, and supervisor restarts, and the same RPC should cover all of those cases. Names such as `RegisterSupervisor` imply a first-time, mostly one-shot side effect and fit a unary API better than a long-lived session stream. - -If we later decide we want the name to emphasize the session resource more explicitly, the strongest alternatives are: - -- `StartSupervisorSession`, which is straightforward and lighter-weight -- `OpenSupervisorSession`, which is shorter but slightly more generic - -At the moment, `ConnectSupervisor` strikes the best balance between clarity, brevity, and reconnect semantics. - -### Session lifecycle and failure handling - -The persistent supervisor session is reconnectable and must be treated as live state rather than permanent registration state. - -At a minimum, the protocol should distinguish: - -- the stable sandbox identity -- the stable supervisor identity for the current sandbox process instance, such as a boot id or process epoch -- the current live session id or session epoch assigned to this particular connection - -This allows the gateway to reason about duplicate sessions, stale sessions, reconnects, and ownership changes without conflating them. - -The gateway needs to track the following state: - -- sandbox id -> owning gateway replica -- current live session id or epoch for that sandbox -- connected / stale / disconnected live session state -- `connected_at` and `last_heartbeat_at` -- `last_disconnect_at` and `last_disconnect_reason` -- optionally, the last config or policy revision acknowledged by the supervisor - -Ephemeral interactive state, such as active SSH relay channels or running exec requests, should remain replica-local to the owning gateway and should not be treated as durable shared state. - -The supervisor also needs a small amount of reconnect state for correctness and observability: - -- the gateway endpoint or gateway replica it is currently attempting to reach -- current reconnect attempt and backoff state -- last successful connection time -- last connection failure reason, if known - -#### Gateway restart - -If the gateway process or replica restarts: - -- the supervisor's stream terminates or transport keepalives fail -- the supervisor reconnects using exponential backoff -- the new connection is treated as a fresh live session for an existing sandbox, not a new sandbox registration -- the gateway replays authoritative state on reconnect, including the latest policy, settings, and any other required runtime configuration - -Outstanding interactive operations fail closed: - -- active SSH-compatible sessions disconnect -- in-flight `ExecSandbox` requests fail and the client may retry if appropriate - -The system should not attempt to transparently preserve interactive byte streams across a gateway restart. - -#### Supervisor restart or crash - -If the supervisor process restarts while the sandbox is otherwise still provisioned: - -- the old live session disappears -- any active SSH relay channels terminate -- any in-flight exec requests fail -- the restarted supervisor reconnects using the same sandbox identity and a new supervisor or session epoch - -This should be handled the same way as any other reconnect, with the newer session superseding the older one. - -#### Heartbeats and keepalives - -The session should use both transport-level keepalives and application-level heartbeats: - -- gRPC keepalives detect half-open transport failures -- explicit heartbeat messages give the gateway and supervisor an application-visible liveness signal - -If heartbeats are missed: - -- the session first becomes stale -- after a timeout window it becomes disconnected -- connect and exec requests must fail rather than being silently queued behind a dead session - -If the transport remains open but application heartbeats stop making forward progress, the gateway should still treat that session as unhealthy. This protects against half-broken cases where the TCP or HTTP/2 connection exists but the supervisor event loop is wedged. - -#### Timeouts - -The session model needs explicit timeout behavior for at least: - -- supervisor session establishment -- heartbeat expiry -- SSH relay channel open -- exec startup -- exec inactivity or total runtime, when requested by the caller - -Timeout handling should be fail-fast and explicit. The gateway should not wait indefinitely for a disconnected or non-responsive supervisor. - -#### Network partitions and reconnect behavior - -Transient network failures between supervisor and gateway should be treated as normal operating conditions rather than exceptional cases. The reconnect loop should therefore: - -- use bounded exponential backoff with jitter -- continue retrying for the lifetime of the sandbox unless the sandbox is shutting down -- reset backoff after a successful connection -- record the reason for prolonged disconnects so operators can distinguish "sandbox stopped" from "sandbox exists but cannot reach gateway" - -The design should not assume that disconnects are rare. - -#### Duplicate and stale sessions - -Reconnects and race conditions can produce overlapping connection attempts from what claims to be the same sandbox. The protocol therefore needs a clear winner rule: - -- the gateway accepts one live session as authoritative for a sandbox at a time -- newer sessions supersede older ones -- superseded sessions are closed and their relay or exec traffic is terminated - -This is especially important once multi-replica ownership exists. - -#### Relay and exec cleanup - -Interactive sub-operations should be tied to the lifetime of the owning supervisor session: - -- relay channels close when either side closes them or when the parent session is lost -- exec requests are cancelled when the parent session is lost unless a future design explicitly introduces detached execution -- the gateway and supervisor should both treat orphaned relay or exec state as a bug and clean it up eagerly - -This prevents session churn from leaking local resources or leaving misleading in-memory state behind. - -#### Surfacing live session state to users and operators - -Sandbox provisioning readiness and supervisor live session state are related but not identical. A sandbox can still exist at the compute layer while its live session is down. - -The system should therefore surface supervisor connectivity separately from raw provisioning state. At a high level, operators and clients need to know: - -- whether the sandbox exists -- whether the sandbox is expected to have a live supervisor session -- whether that session is currently connected, stale, or disconnected - -This may be represented as a dedicated condition or status field rather than overloading the existing sandbox phase model. - -### Data model - -The data model should separate the stable sandbox resource from the live connectivity state used to reach it. - -`Sandbox` should remain the canonical long-lived, user-facing resource: - -- desired spec stays on `Sandbox.spec` -- provisioning and user-facing readiness stay on `Sandbox.status` and `Sandbox.phase` -- sandbox identity and lifecycle continue to exist even when no live supervisor session is connected - -The live reverse-connect attachment should be represented as a separate gateway-owned entity, referred to here as `SupervisorSession`. - -For the initial implementation, this should be modeled as one current session record per sandbox, updated in place, rather than an append-only list of every historical reconnect. In practice that means: - -- `SupervisorSession` has a `sandbox_id` -- there is at most one authoritative current `SupervisorSession` record per sandbox -- reconnects replace or update that record rather than creating an unbounded number of durable rows -- the record carries the current `session_id` or session epoch for the live connection - -This avoids growth concerns while still giving the gateway a first-class place to store session-specific state. - -At a high level, `SupervisorSession` should contain: - -- `sandbox_id` -- current live `session_id` or session epoch -- supervisor instance identity, such as boot id or process epoch -- owning gateway replica id -- live session state such as connected, stale, or disconnected -- `connected_at`, `last_heartbeat_at`, and `last_disconnect_at` -- `last_disconnect_reason` -- optionally, last acknowledged config or policy revision - -By contrast, sub-operations under a live session should remain ephemeral and in-memory on the owning gateway replica: - -- active SSH relay channels -- active exec requests -- per-channel byte counters, flow-control state, and cancellation handles - -Those objects are tied to a live transport and do not benefit from durable storage in the initial design. If the gateway restarts, they are lost and fail closed. - -This leads to a deliberate hybrid model: - -- durable store: stable sandbox resources plus the latest per-sandbox supervisor-session summary -- in-memory registry: the actual live stream handle and all active relay or exec sub-state - -Routing decisions for connect and exec should use the in-memory registry on the owning gateway replica. The persisted `SupervisorSession` record is for coordination, recovery, and observability, not as a substitute for a live stream handle. - -To keep the public API simple, the gateway should also project a small derived view of supervisor connectivity onto the sandbox itself, likely as a dedicated condition or status summary such as `SupervisorConnected`. That gives clients a straightforward way to answer "is this sandbox currently reachable?" without forcing them to query a separate session object for common cases. - -The system should not initially create durable child records for every relay channel, every reconnect attempt, or every exec request. If historical analysis is needed, it should come from logs and telemetry rather than from making the primary control-plane data model append-only. - -### Authentication and trust model - -This RFC preserves the current client-facing auth model: - -- CLI->gateway auth remains whatever the gateway already supports, such as mTLS or edge-authenticated access -- `CreateSshSession` remains the authorization/bootstrap step for SSH-compatible connect flows -- `/connect/ssh` continues to require the sandbox id and session token - -The supervisor session introduces a second trust boundary: - -- the supervisor establishes its identity when establishing `ConnectSupervisor` -- the gateway binds the live session to a specific sandbox identity -- connect and exec requests are routed to that already-authenticated live session - -Under this model, the gateway no longer needs a remote network handshake like today's gateway->sandbox NSSH1 exchange in order to prove that it reached the intended sandbox over the network. NSSH1 therefore stops being a gateway-to-sandbox network trust boundary. However, the supervisor continues to send the NSSH1 preface on local loopback connections to the embedded SSH daemon. This preserves compatibility with the existing SSH daemon implementation and the `ExecSandbox` path, which relies on the same SSH handshake flow. Whether NSSH1 is eventually removed from the loopback path is a future cleanup decision, not a blocker for this RFC. - -### `sandbox connect` and SSH-compatible relay - -The public connect contract remains unchanged: - -1. CLI resolves the sandbox and calls `CreateSshSession` -2. CLI launches OpenSSH with `ssh-proxy` as `ProxyCommand` -3. `ssh-proxy` opens `CONNECT /connect/ssh` and presents the sandbox id and token - -What changes is the backend implementation of `/connect/ssh`. - -Instead of resolving a dialable sandbox endpoint and opening a TCP connection from the gateway to port 2222, the gateway: - -1. validates the SSH session token -2. resolves the owning live supervisor session for the target sandbox -3. sends `RelayOpen` over the gRPC control stream -4. waits for the supervisor to open a reverse HTTP CONNECT tunnel back to the gateway -5. bridges the client's upgraded CONNECT stream with the supervisor's reverse CONNECT stream - -The supervisor handles that relay-open request by: - -1. opening a reverse HTTP CONNECT to the gateway's `/relay/{channel_id}` endpoint -2. connecting locally to the embedded SSH daemon on loopback (with NSSH1 preface) -3. bridging bytes between the reverse HTTP CONNECT stream and the local SSH connection - -Sequence: - -```mermaid -sequenceDiagram - participant CLI as CLI / OpenSSH - participant GW as Gateway - participant SUP as Supervisor - participant SSHD as Embedded SSH daemon - - CLI->>GW: GetSandbox + CreateSshSession - CLI->>GW: CONNECT /connect/ssh + token - GW->>GW: Validate token and sandbox identity - GW->>SUP: RelayOpen(kind=ssh, channel_id) [gRPC control] - SUP->>GW: CONNECT /relay/{channel_id} [reverse HTTP tunnel] - SUP->>SSHD: Local loopback TCP connect + NSSH1 preface - GW->>GW: Bridge client CONNECT stream with supervisor CONNECT stream - GW-->>CLI: CONNECT established - - Note over CLI,SSHD: Opaque SSH bytes flow: CLI ↔ GW bridge ↔ SUP ↔ SSHD -``` - -Because the relay is intentionally opaque after setup, this preserves compatibility with: - -- interactive shell sessions -- editor integrations that rely on `ProxyCommand` -- SFTP and modern `scp` -- SSH `direct-tcpip` port forwarding - -This is an important design constraint. We are not introducing a new public interactive protocol here; we are relocating where the bytes are bridged. - -### `ExecSandbox` - -`ExecSandbox` remains the public typed gRPC API exposed by the gateway. In the initial implementation, its transport changes from "gateway runs an SSH client against a sandbox-resolved endpoint" to "gateway runs the same SSH client over a relay channel on the supervisor session." - -The proposed flow is: - -1. CLI calls `ExecSandbox` on the gateway exactly as it does today -2. gateway validates the sandbox id and finds the owning live supervisor session -3. gateway sends `RelayOpen` over the gRPC control stream -4. supervisor opens a reverse HTTP CONNECT tunnel back to the gateway and bridges it to the local SSH daemon -5. gateway runs the existing per-request SSH exec flow over that reverse tunnel -6. gateway forwards stdout, stderr, and exit status to the existing `ExecSandbox` response stream - -Sequence: - -```mermaid -sequenceDiagram - participant CLI as CLI - participant GW as Gateway - participant SUP as Supervisor - participant SSHD as Embedded SSH daemon - participant PROC as Local process launcher - - CLI->>GW: ExecSandbox(sandbox_id, command, stdin, tty) - GW->>SUP: RelayOpen(channel_id, kind=ssh) [gRPC control] - SUP->>GW: CONNECT /relay/{channel_id} [reverse HTTP tunnel] - SUP->>SSHD: Local loopback TCP connect + NSSH1 preface - GW->>SSHD: SSH handshake + exec over reverse tunnel - SSHD->>PROC: Spawn command locally - CLI-->>GW: gRPC stream remains open - SSHD-->>GW: SSH stdout / stderr / exit over reverse tunnel - GW-->>CLI: ExecSandboxEvent stream -``` - -Today, `ExecSandbox` is implemented as a fresh SSH transport per request. The gateway resolves the sandbox endpoint, starts a single-use local SSH proxy, opens a new SSH connection, authenticates, opens one session channel, optionally requests a PTY, sends one exec request, sends the full stdin payload, waits for output and exit, and then tears the SSH connection down. It is not a shared long-lived SSH control connection with many exec channels underneath it. - -For the initial implementation, this RFC intentionally keeps SSH as the internal exec transport to minimize behavior changes and reduce the amount of new protocol surface required in `OS-86`. The gateway-side `russh` client path can remain largely intact, as long as it is pointed at the reverse HTTP CONNECT tunnel instead of a sandbox-resolved network endpoint. - -With SSH bytes flowing over dedicated reverse HTTP CONNECT tunnels rather than the gRPC stream, the practical motivation for replacing the internal SSH transport with typed exec messages is weaker than originally anticipated. See the alternatives section and implementation step 4 for further discussion. - -To preserve behavior and avoid divergence, the supervisor should reuse the same local process-launch logic and policy enforcement path currently used by the embedded SSH daemon for shell and exec requests: - -- same run-as-user behavior -- same PTY allocation semantics when `tty=true` -- same environment and provider injection behavior -- same workdir handling -- same network namespace, privilege dropping, and sandbox enforcement - -Where current PTY semantics inherently merge stdout and stderr, the session-based exec path should preserve that behavior rather than inventing new stream semantics. - -It is also important to be clear about the current interaction model. `ExecSandbox` is not a general-purpose interactive terminal protocol today: - -- stdin is sent as a single upfront payload, not as a live stream -- there is no resize channel in the public API -- there is no user-visible signal forwarding model beyond timeout-driven termination - -In this API, `tty=true` should therefore be read as "allocate PTY semantics for the child process" rather than "provide a full interactive terminal session." PTY still matters because many programs change behavior based on whether they are attached to a terminal, and because PTY mode changes output and stream semantics. For example: - -- the child sees a terminal-like environment instead of plain pipe mode -- stdout and stderr may no longer remain meaningfully separate -- terminal-oriented formatting, prompts, and buffering behavior can change - -That means truly interactive workloads such as full-screen terminal applications are already better served by `sandbox connect`. As long as the typed session-based exec path preserves the current `ExecSandbox` semantics, moving off SSH should not create a large new compatibility break for those workloads because they are not fully supported by the current API either. - -### Impact on compute drivers - -This RFC removes "return a gateway-dialable interactive endpoint" from the critical path for connect and exec. - -Compute still needs to provision sandboxes and identify them, but it no longer needs to satisfy "the gateway must be able to open a TCP connection to this sandbox for interactive access." That simplifies the transport assumptions for future compute backends. - -This does not mean endpoint resolution disappears everywhere immediately. Existing code may still need sandbox addresses for other operational purposes, and the compute API may continue to expose them. The key change is that connect and exec stop depending on that capability. - -In particular, the current compute-driver RPC: - -```proto -rpc ResolveSandboxEndpoint(ResolveSandboxEndpointRequest) - returns (ResolveSandboxEndpointResponse); -``` - -exists specifically to support gateway-initiated exec and SSH reachability. Once the reverse-connect design in this RFC is fully implemented and the remaining call sites are removed, this RPC should be removable from the compute-driver contract rather than preserved indefinitely as dead compatibility surface. - -### Multi-replica note - -As described in RFC-0001, multi-replica gateways introduce a session ownership problem. This RFC adopts the same constraint without fully designing the solution: - -- each live supervisor session is owned by exactly one gateway replica at a time -- connect and exec requests may land on a different replica -- the system therefore needs a way to route or forward those requests to the owning replica - -This RFC requires that capability but defers its detailed design. The reverse-connect transport must not assume a single gateway replica forever, but it also does not need to solve that problem in this document. - -## Implementation plan - -### 1. Introduce the persistent supervisor session - -Implement `ConnectSupervisor` as a long-lived bidirectional RPC between supervisor and gateway. - -The first implementation (`OS-86`) introduces the session primitive with: - -- supervisor identity binding and session establishment (hello/accept/reject) -- heartbeat and live session state tracking -- relay channel lifecycle coordination (open/result/close) -- gateway-side in-memory session registry - -SSH connect and `ExecSandbox` are the first consumers. The remaining control-traffic migrations (config delivery, log push, policy status, inference routes) are handled separately under `OS-83`. - -### 2. Move SSH-compatible connect onto the session - -Replace the backend implementation of `/connect/ssh` so it: - -- validates tokens exactly as today -- finds the owning live supervisor session -- sends `RelayOpen` over the gRPC control stream -- waits for the supervisor's reverse HTTP CONNECT on `/relay/{channel_id}` -- bridges the client's upgraded CONNECT stream with the supervisor's reverse CONNECT stream - -On the supervisor side, on receiving `RelayOpen`, open a reverse HTTP CONNECT to the gateway and bridge it to the local embedded SSH daemon via loopback TCP with NSSH1 preface. - -We intentionally keep the external contract unchanged: - -- `CreateSshSession` stays -- `/connect/ssh` stays -- `ssh-proxy` stays -- OpenSSH compatibility stays - -### 3. Move `ExecSandbox` onto the session while keeping SSH internally - -Replace the gateway-side direct sandbox dial path with a reverse HTTP CONNECT tunnel through the supervisor session, but keep the existing gateway-side SSH exec transport for the first implementation. - -The supervisor implementation reuses the same relay bridge as `sandbox connect`—both open a reverse HTTP CONNECT tunnel and bridge it to the local embedded SSH daemon. - -### 4. Possible future optimization: direct typed exec over the session - -It is theoretically possible to replace the gateway-side SSH client with direct typed exec messages over the supervisor session, removing the per-exec SSH handshake overhead. However, with SSH bytes flowing over dedicated reverse HTTP CONNECT tunnels rather than the gRPC stream, the practical motivation for this is weaker than originally anticipated: - -- the reverse HTTP CONNECT tunnel already provides a clean, zero-framing-overhead byte path—there is no gRPC data-plane bottleneck to eliminate -- the embedded SSH daemon already handles PTY allocation, environment injection, user switching, workdir, and process lifecycle—replacing it with typed gRPC messages would mean reimplementing that surface -- the per-exec SSH handshake cost (NSSH1 + key exchange + auth) is real but small relative to typical command execution time - -This optimization should only be pursued if profiling shows the SSH handshake overhead is a meaningful bottleneck. It is not a planned next step. - -### 5. Consolidate the remaining supervisor-gateway control traffic - -Replace the remaining ad hoc sandbox-side gRPC traffic with session messages: - -- replace `GetSandboxConfig` polling with pushed `ConfigSnapshot` and `ConfigUpdated` -- fold `GetSandboxProviderEnvironment` into config delivery -- replace `ReportPolicyStatus` with `ConfigApplied` -- replace `PushSandboxLogs` with `LogBatch` -- replace `SubmitPolicyAnalysis` with `PolicyAnalysisReport` -- replace `Inference.GetInferenceBundle` polling with `InferenceRoutesSnapshot` and `InferenceRoutesUpdated` - -The only intentionally conditional piece is bootstrap policy sync from the sandbox back to the gateway. If that behavior remains, it should become a typed session message such as `PolicySyncRequest`. - -### 6. Remove the direct gateway->sandbox interactive path - -This RFC proposes a hard switch rather than maintaining both paths in parallel. - -Rationale: - -- OpenShell is still alpha -- consumers pin versions -- maintaining two interactive transports would increase code complexity and testing burden -- the old path actively works against the architecture we want to converge on - -This means that once the new session-based path lands, `/connect/ssh` and `ExecSandbox` should stop relying on direct sandbox dialing entirely. - -As part of that cutover, the now-obsolete `ResolveSandboxEndpoint` path should also be deleted from the gateway and compute-driver APIs. - -### 6. Update observability and docs - -The implementation should emit clear telemetry for: - -- supervisor session connect and disconnect -- relay channel open and close -- exec request lifecycle -- config and inference snapshot delivery -- routing failures when the owning supervisor session is unavailable - -Once implemented, the living architecture docs should be updated so they describe the supervisor-initiated model rather than the current gateway-initiated one. - -## Risks - -- The supervisor session becomes the single coordination point for both control-plane synchronization and interactive relay lifecycle. Although SSH bytes flow over separate HTTP CONNECT tunnels (not the gRPC stream), the gRPC control stream is still on the critical path for relay setup latency. -- Removing the direct-connect path immediately means there is no transport fallback if the new session-based path regresses. -- Each relay channel requires a new TCP/HTTP connection from supervisor to gateway, and the gateway must bridge two upgraded streams per active tunnel. Under high concurrency this increases connection count and file descriptor usage. -- A race window exists between the gRPC `RelayOpen` message and the supervisor's reverse HTTP CONNECT arriving. Network delays or supervisor slowness could cause correlation failures or timeouts. -- Multi-replica ownership and forwarding are intentionally deferred, which creates integration risk for future HA work. - -## Alternatives - -### Keep gateway-initiated connectivity - -Rejected. This keeps the current infrastructure coupling and directly conflicts with the supervisor-initiated architecture from RFC-0001. - -### Run both direct-connect and reverse-connect paths in parallel - -Rejected for the initial implementation. While a dual-path rollout would reduce migration risk, it would also increase maintenance cost and prolong the life of the architecture we are trying to remove. Given the project's current alpha stage, a clean cutover is preferred. - -### Preserve SSH internally for `ExecSandbox` - -This is the chosen approach. The gateway continues to run a `russh` client per exec request, but the SSH connection runs over a reverse HTTP CONNECT tunnel through the supervisor instead of a direct TCP connection to sandbox:2222. - -This preserves behavior: the existing SSH exec semantics (PTY handling, stdout/stderr, exit codes, NSSH1 handshake) remain identical. The supervisor bridges each relay tunnel to the same embedded SSH daemon that serves `sandbox connect`. Both paths share the same relay mechanism, which keeps the implementation simple. - -With SSH bytes flowing over dedicated HTTP CONNECT tunnels rather than the gRPC stream, there is no data-plane framing overhead to justify replacing SSH with typed gRPC exec messages. The embedded SSH daemon already handles process lifecycle, PTY allocation, environment injection, and sandbox enforcement. Replacing it would mean reimplementing that protocol surface for a marginal reduction in per-exec handshake cost. See implementation step 4 for further discussion. - -### Replace `/connect/ssh` with a new public protocol - -Rejected for now. SSH compatibility is valuable for OpenSSH itself, editor tooling, SFTP/scp, and port forwarding. The gateway can preserve that public contract indefinitely if it continues to act as an SSH-byte relay. - -That contract would only need to change if a future product decision prefers a different public session abstraction than "opaque SSH over a tokenized CONNECT tunnel." - -## Prior art - -- **RFC-0001 Core Architecture**: establishes the supervisor-initiated connectivity direction, active-active gateway model, and the principle that the gateway should not need to dial sandboxes directly. -- **Kubernetes exec/attach/port-forward**: the API server relays typed or byte-stream traffic into workloads without exposing pods directly as public interactive endpoints. -- **Cloudflare Tunnel and similar reverse tunnels**: a private agent maintains an outbound session to a public control plane, which then relays user traffic over that session. -- **OpenSSH `ProxyCommand`**: demonstrates the practical value of preserving the SSH client contract while changing the transport under it. - -## Open questions - -- Should relay lifecycle coordination remain on the main supervisor session RPC, or should it move onto a dedicated RPC once the initial design is proven? -- What exact authentication handshake should the supervisor use when establishing `ConnectSupervisor`, and how should that tie into the long-term identity driver model from RFC-0001? -- How should non-owning gateway replicas forward connect and exec requests to the owning replica in a multi-replica deployment? - -## Appendix: Current and Target Gateway-Sandbox Connections - -Today there is no single persistent gateway-sandbox session. Instead, the supervisor opens several independent outbound gRPC connections to the gateway, while interactive connect and exec cause the gateway to open separate direct connections back into the sandbox. - -In the target design, those flows collapse onto a single long-lived `ConnectSupervisor` gRPC stream for control-plane traffic, plus per-relay reverse HTTP CONNECT tunnels for SSH byte traffic, plus local supervisor-owned loopback connections to the embedded SSH daemon. - -| Flow | Current behavior | Target behavior | -| --- | --- | --- | -| Session establishment and liveness | No single session exists today. Each sandbox-side client creates its own outbound gRPC connection to the gateway using tonic over HTTP/2, with transport keepalives enabled. | A single long-lived `ConnectSupervisor` bidirectional gRPC stream. Typical messages: `SupervisorHello`, `SessionAccepted`, `Heartbeat`, `SessionRejected`. | -| Effective config and policy delivery | Supervisor does an initial `GetSandboxConfig` unary gRPC call at startup, then polls `GetSandboxConfig` every `OPENSHELL_POLICY_POLL_INTERVAL_SECS` with a default of 10 seconds. This carries effective policy, settings, `config_revision`, `policy_hash`, and source metadata. | Gateway pushes `ConfigSnapshot` when the session is established and `ConfigUpdated` whenever the effective config changes. Supervisor replies with `ConfigApplied` after applying or rejecting the new desired state. | -| Provider environment delivery | Supervisor does a one-time `GetSandboxProviderEnvironment` unary gRPC call at startup after loading policy. | Fold provider environment into `ConfigSnapshot` and `ConfigUpdated`, eliminating the separate startup fetch. | -| Bootstrap policy sync from sandbox to gateway | Conditional. If the gateway returns no sandbox policy, the supervisor discovers a local policy and sends `UpdateConfig`, then re-fetches via `GetSandboxConfig`. If the fetched policy is locally enriched with baseline paths, the supervisor may also send `UpdateConfig` again. | If this behavior is retained, represent it as a typed sandbox->gateway message such as `PolicySyncRequest` over the supervisor session. Longer-term, this path could be eliminated if the gateway becomes the sole author of effective policy. | -| Policy load status reporting | After a policy change is applied or fails to apply, the supervisor sends unary `ReportPolicyStatus` gRPC requests back to the gateway. This is only reported for sandbox-scoped policy revisions. | Fold this into `ConfigApplied`, keyed by `config_revision` and carrying success or failure plus error details. | -| Sandbox log ingestion | Supervisor opens a long-lived client-streaming `PushSandboxLogs` gRPC call. Logs are batched roughly every 500 ms, with reconnect and backoff when the stream breaks. | Send `LogBatch` messages over the existing `ConnectSupervisor` session instead of maintaining a separate streaming RPC. | -| Denial summary and draft-policy analysis submission | Conditional. When the denial aggregator is active, the supervisor sends unary `SubmitPolicyAnalysis` gRPC requests every `OPENSHELL_DENIAL_FLUSH_INTERVAL_SECS` with a default of 10 seconds, carrying summaries and proposed chunks. | Send `PolicyAnalysisReport` messages over the supervisor session. | -| Inference route bundle delivery | Conditional. In cluster inference mode, the supervisor calls `Inference.GetInferenceBundle` once at startup and then refreshes it every `OPENSHELL_ROUTE_REFRESH_INTERVAL_SECS` with a default of 5 seconds using a separate gRPC service. | Gateway pushes `InferenceRoutesSnapshot` when the session attaches and `InferenceRoutesUpdated` whenever the bundle revision changes. No polling loop is needed. | -| SSH-compatible `sandbox connect` transport | User-triggered. The client gets a token via `CreateSshSession`, then the gateway handles `CONNECT /connect/ssh` by resolving the sandbox endpoint, opening a direct TCP connection to sandbox port 2222, sending the NSSH1 preface, and then relaying opaque SSH bytes. | Client contract stays the same externally, but the gateway no longer dials the sandbox. Instead it sends `RelayOpen(kind=ssh)` over the gRPC control stream. The supervisor opens a reverse HTTP CONNECT tunnel to the gateway's `/relay/{channel_id}` endpoint, and the gateway bridges the client's CONNECT stream with the supervisor's reverse CONNECT stream. The supervisor bridges its end to a local loopback connection to the embedded SSH daemon with NSSH1 preface. SSH bytes flow over the HTTP tunnels, not the gRPC stream. | -| `ExecSandbox` transport | User-triggered. The client calls `ExecSandbox` on the gateway, and the gateway resolves the sandbox endpoint, opens a direct TCP connection to port 2222, performs NSSH1 plus SSH, runs the command, and maps SSH output back into the typed gRPC response stream. | The public `ExecSandbox` API remains, but the gateway no longer dials the sandbox directly. Instead it sends `RelayOpen` over the gRPC control stream, the supervisor opens a reverse HTTP CONNECT tunnel, and the gateway runs the existing per-request SSH exec flow over that tunnel. No gateway->sandbox direct network path is required. A later follow-on may replace this internal SSH transport with direct typed exec messages. | - -Notes: - -- The policy example that motivated this appendix is slightly broader than "policy updates." The current supervisor uses `GetSandboxConfig` both for the initial fetch and for periodic polling, and that RPC returns the effective config, not just policy changes. -- `GetGatewayConfig` exists today but is not currently used by the sandbox supervisor, so it is not included in this inventory. -- All current sandbox->gateway RPCs listed above use gRPC over HTTP/2 on a tonic channel. Depending on deployment, that channel is either plaintext `http://` or mTLS `https://`. -- The two rows that currently rely on gateway->sandbox reachability are SSH-compatible connect and `ExecSandbox`. Those are the flows this RFC most directly inverts. -- The compute-driver RPC `ResolveSandboxEndpoint` exists to serve those gateway->sandbox transport rows. Once those rows are migrated, that RPC becomes removable. From fe62b16418eb7f1f73a9ce85c71ed590e2b16eca Mon Sep 17 00:00:00 2001 From: Piotr Mlocek Date: Fri, 17 Apr 2026 12:05:43 -0700 Subject: [PATCH 10/20] feat(core,server): make sandbox SSH socket path configurable Adds `sandbox_ssh_socket_path` to `Config` (default `/run/openshell/ssh.sock`). The K8s driver is now wired with the configured value instead of a hard-coded path. K8s and VM drivers already isolate the socket via per-pod / per-VM filesystems, so the default is safe there. This makes it easy to override in local dev when multiple supervisors share a filesystem, matching the prior `OPENSHELL_SSH_LISTEN_ADDR` knob on the supervisor side. --- crates/openshell-core/src/config.rs | 22 ++++++++++++++++++++++ crates/openshell-server/src/lib.rs | 6 ++++-- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/crates/openshell-core/src/config.rs b/crates/openshell-core/src/config.rs index 01b5c2372..f2dfe3941 100644 --- a/crates/openshell-core/src/config.rs +++ b/crates/openshell-core/src/config.rs @@ -108,9 +108,26 @@ pub struct Config { pub ssh_connect_path: String, /// SSH listen port inside sandbox pods. + /// + /// Retained for the K8s driver's dead `ResolveSandboxEndpoint` path and + /// any external tooling that still references the port. The supervisor's + /// embedded SSH daemon itself no longer listens on TCP; it binds a Unix + /// socket at [`Self::sandbox_ssh_socket_path`]. #[serde(default = "default_sandbox_ssh_port")] pub sandbox_ssh_port: u16, + /// Filesystem path where the sandbox supervisor binds its SSH Unix + /// socket. The supervisor is passed this path via + /// `OPENSHELL_SSH_LISTEN_ADDR` / `--ssh-listen-addr` and connects its + /// relay bridge to the same path. + /// + /// When the gateway orchestrates sandboxes that each live in their own + /// filesystem (K8s pod, libkrun VM, etc.), the default is safe. For + /// local dev where multiple supervisors share `/run`, override this to + /// something unique per sandbox. + #[serde(default = "default_sandbox_ssh_socket_path")] + pub sandbox_ssh_socket_path: String, + /// Shared secret for gateway-to-sandbox SSH handshake. #[serde(default)] pub ssh_handshake_secret: String, @@ -180,6 +197,7 @@ impl Config { ssh_gateway_port: default_ssh_gateway_port(), ssh_connect_path: default_ssh_connect_path(), sandbox_ssh_port: default_sandbox_ssh_port(), + sandbox_ssh_socket_path: default_sandbox_ssh_socket_path(), ssh_handshake_secret: String::new(), ssh_handshake_skew_secs: default_ssh_handshake_skew_secs(), ssh_session_ttl_secs: default_ssh_session_ttl_secs(), @@ -343,6 +361,10 @@ const fn default_sandbox_ssh_port() -> u16 { 2222 } +fn default_sandbox_ssh_socket_path() -> String { + "/run/openshell/ssh.sock".to_string() +} + const fn default_ssh_handshake_skew_secs() -> u64 { 300 } diff --git a/crates/openshell-server/src/lib.rs b/crates/openshell-server/src/lib.rs index b4426c3d1..dd384a02b 100644 --- a/crates/openshell-server/src/lib.rs +++ b/crates/openshell-server/src/lib.rs @@ -256,8 +256,10 @@ async fn build_compute_runtime( // Filesystem path to the supervisor's Unix-socket SSH daemon. // The path lives in a root-only directory so only the // supervisor can connect; the gateway reaches it through the - // RelayStream bridge, not directly. - ssh_listen_addr: "/run/openshell/ssh.sock".to_string(), + // RelayStream bridge, not directly. Override via + // `sandbox_ssh_socket_path` in the config for deployments + // where multiple supervisors share a filesystem. + ssh_listen_addr: config.sandbox_ssh_socket_path.clone(), ssh_port: config.sandbox_ssh_port, ssh_handshake_secret: config.ssh_handshake_secret.clone(), ssh_handshake_skew_secs: config.ssh_handshake_skew_secs, From 28427f6cfbbfcb57219ba3185f0dd2140d560012 Mon Sep 17 00:00:00 2001 From: Piotr Mlocek Date: Fri, 17 Apr 2026 13:07:43 -0700 Subject: [PATCH 11/20] refactor(server,sandbox): use typed relay init frames --- .../tests/ensure_providers_integration.rs | 4 +- .../openshell-cli/tests/mtls_integration.rs | 4 +- .../tests/provider_commands_integration.rs | 4 +- .../sandbox_create_lifecycle_integration.rs | 4 +- .../sandbox_name_fallback_integration.rs | 4 +- .../src/supervisor_session.rs | 41 ++++++----- crates/openshell-server/src/grpc/mod.rs | 6 +- .../src/supervisor_session.rs | 71 ++++++++++--------- .../tests/auth_endpoint_integration.rs | 4 +- .../tests/edge_tunnel_auth.rs | 4 +- .../tests/multiplex_integration.rs | 4 +- .../tests/multiplex_tls_integration.rs | 4 +- .../tests/ws_tunnel_integration.rs | 4 +- proto/openshell.proto | 39 +++++----- 14 files changed, 105 insertions(+), 92 deletions(-) diff --git a/crates/openshell-cli/tests/ensure_providers_integration.rs b/crates/openshell-cli/tests/ensure_providers_integration.rs index 39044d01e..a3dd6826f 100644 --- a/crates/openshell-cli/tests/ensure_providers_integration.rs +++ b/crates/openshell-cli/tests/ensure_providers_integration.rs @@ -435,12 +435,12 @@ impl OpenShell for TestOpenShell { } type RelayStreamStream = tokio_stream::wrappers::ReceiverStream< - Result, + Result, >; async fn relay_stream( &self, - _request: tonic::Request>, + _request: tonic::Request>, ) -> Result, tonic::Status> { Err(tonic::Status::unimplemented("not implemented in test")) } diff --git a/crates/openshell-cli/tests/mtls_integration.rs b/crates/openshell-cli/tests/mtls_integration.rs index 5bf2a7fe4..e78c91578 100644 --- a/crates/openshell-cli/tests/mtls_integration.rs +++ b/crates/openshell-cli/tests/mtls_integration.rs @@ -337,12 +337,12 @@ impl OpenShell for TestOpenShell { } type RelayStreamStream = tokio_stream::wrappers::ReceiverStream< - Result, + Result, >; async fn relay_stream( &self, - _request: tonic::Request>, + _request: tonic::Request>, ) -> Result, tonic::Status> { Err(tonic::Status::unimplemented("not implemented in test")) } diff --git a/crates/openshell-cli/tests/provider_commands_integration.rs b/crates/openshell-cli/tests/provider_commands_integration.rs index 34bd071a6..dc6ec9d4c 100644 --- a/crates/openshell-cli/tests/provider_commands_integration.rs +++ b/crates/openshell-cli/tests/provider_commands_integration.rs @@ -389,12 +389,12 @@ impl OpenShell for TestOpenShell { } type RelayStreamStream = tokio_stream::wrappers::ReceiverStream< - Result, + Result, >; async fn relay_stream( &self, - _request: tonic::Request>, + _request: tonic::Request>, ) -> Result, tonic::Status> { Err(tonic::Status::unimplemented("not implemented in test")) } diff --git a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs index a1f1b1ed1..50a4fa651 100644 --- a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs +++ b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs @@ -414,12 +414,12 @@ impl OpenShell for TestOpenShell { } type RelayStreamStream = tokio_stream::wrappers::ReceiverStream< - Result, + Result, >; async fn relay_stream( &self, - _request: tonic::Request>, + _request: tonic::Request>, ) -> Result, tonic::Status> { Err(tonic::Status::unimplemented("not implemented in test")) } diff --git a/crates/openshell-cli/tests/sandbox_name_fallback_integration.rs b/crates/openshell-cli/tests/sandbox_name_fallback_integration.rs index 663451cb1..2cfa38bde 100644 --- a/crates/openshell-cli/tests/sandbox_name_fallback_integration.rs +++ b/crates/openshell-cli/tests/sandbox_name_fallback_integration.rs @@ -347,12 +347,12 @@ impl OpenShell for TestOpenShell { } type RelayStreamStream = tokio_stream::wrappers::ReceiverStream< - Result, + Result, >; async fn relay_stream( &self, - _request: tonic::Request>, + _request: tonic::Request>, ) -> Result, tonic::Status> { Err(tonic::Status::unimplemented("not implemented in test")) } diff --git a/crates/openshell-sandbox/src/supervisor_session.rs b/crates/openshell-sandbox/src/supervisor_session.rs index 8877e67ce..90056ed4a 100644 --- a/crates/openshell-sandbox/src/supervisor_session.rs +++ b/crates/openshell-sandbox/src/supervisor_session.rs @@ -14,7 +14,7 @@ use std::time::Duration; use openshell_core::proto::open_shell_client::OpenShellClient; use openshell_core::proto::{ - GatewayMessage, RelayChunk, SupervisorHeartbeat, SupervisorHello, SupervisorMessage, + GatewayMessage, RelayFrame, RelayInit, SupervisorHeartbeat, SupervisorHello, SupervisorMessage, gateway_message, supervisor_message, }; use tokio::io::{AsyncReadExt, AsyncWriteExt}; @@ -30,7 +30,7 @@ const MAX_BACKOFF: Duration = Duration::from_secs(30); /// Size of chunks read from the local SSH socket when forwarding bytes back /// to the gateway over the gRPC response stream. 16 KiB matches the default -/// HTTP/2 frame size so each `RelayChunk` fits in one frame. +/// HTTP/2 frame size so each `RelayFrame::data` fits in one frame. const RELAY_CHUNK_SIZE: usize = 16 * 1024; /// Spawn the supervisor session task. @@ -224,8 +224,8 @@ async fn handle_gateway_message( /// bridging that stream to the local SSH daemon. /// /// This opens a new HTTP/2 stream on the existing `Channel` — no new TCP or -/// TLS handshake. The first `RelayChunk` we send identifies the channel via -/// `channel_id`; subsequent chunks carry raw SSH bytes. +/// TLS handshake. The first `RelayFrame` we send is a `RelayInit`; subsequent +/// frames carry raw SSH bytes in `data`. async fn handle_relay_open( channel_id: &str, ssh_socket_path: &std::path::Path, @@ -234,14 +234,17 @@ async fn handle_relay_open( let mut client = OpenShellClient::new(channel); // Outbound chunks to the gateway. - let (out_tx, out_rx) = mpsc::channel::(16); + let (out_tx, out_rx) = mpsc::channel::(16); let outbound = tokio_stream::wrappers::ReceiverStream::new(out_rx); - // First frame: identify the channel. No payload on this frame. + // First frame: identify the channel. out_tx - .send(RelayChunk { - channel_id: channel_id.to_string(), - data: Vec::new(), + .send(RelayFrame { + payload: Some(openshell_core::proto::relay_frame::Payload::Init( + RelayInit { + channel_id: channel_id.to_string(), + }, + )), }) .await .map_err(|_| "outbound channel closed before init")?; @@ -263,7 +266,7 @@ async fn handle_relay_open( "relay bridge: connected to local SSH daemon" ); - // SSH → gRPC (out_tx): read local SSH, forward as `RelayChunk`s. + // SSH → gRPC (out_tx): read local SSH, forward as `RelayFrame::data`. let out_tx_writer = out_tx.clone(); let ssh_to_grpc = tokio::spawn(async move { let mut buf = vec![0u8; RELAY_CHUNK_SIZE]; @@ -271,9 +274,10 @@ async fn handle_relay_open( match ssh_r.read(&mut buf).await { Ok(0) | Err(_) => break, Ok(n) => { - let chunk = RelayChunk { - channel_id: String::new(), - data: buf[..n].to_vec(), + let chunk = RelayFrame { + payload: Some(openshell_core::proto::relay_frame::Payload::Data( + buf[..n].to_vec(), + )), }; if out_tx_writer.send(chunk).await.is_err() { break; @@ -287,11 +291,16 @@ async fn handle_relay_open( let mut inbound_err: Option = None; while let Some(next) = inbound.next().await { match next { - Ok(chunk) => { - if chunk.data.is_empty() { + Ok(frame) => { + let Some(openshell_core::proto::relay_frame::Payload::Data(data)) = frame.payload + else { + inbound_err = Some("relay inbound received non-data frame".to_string()); + break; + }; + if data.is_empty() { continue; } - if let Err(e) = ssh_w.write_all(&chunk.data).await { + if let Err(e) = ssh_w.write_all(&data).await { inbound_err = Some(format!("write to ssh failed: {e}")); break; } diff --git a/crates/openshell-server/src/grpc/mod.rs b/crates/openshell-server/src/grpc/mod.rs index 97751b72b..2ffdf5068 100644 --- a/crates/openshell-server/src/grpc/mod.rs +++ b/crates/openshell-server/src/grpc/mod.rs @@ -22,7 +22,7 @@ use openshell_core::proto::{ HealthRequest, HealthResponse, ListProvidersRequest, ListProvidersResponse, ListSandboxPoliciesRequest, ListSandboxPoliciesResponse, ListSandboxesRequest, ListSandboxesResponse, ProviderResponse, PushSandboxLogsRequest, PushSandboxLogsResponse, - RejectDraftChunkRequest, RejectDraftChunkResponse, RelayChunk, ReportPolicyStatusRequest, + RejectDraftChunkRequest, RejectDraftChunkResponse, RelayFrame, ReportPolicyStatusRequest, ReportPolicyStatusResponse, RevokeSshSessionRequest, RevokeSshSessionResponse, SandboxResponse, SandboxStreamEvent, ServiceStatus, SubmitPolicyAnalysisRequest, SubmitPolicyAnalysisResponse, SupervisorMessage, UndoDraftChunkRequest, UndoDraftChunkResponse, UpdateConfigRequest, @@ -398,11 +398,11 @@ impl OpenShell for OpenShellService { } type RelayStreamStream = - Pin> + Send + 'static>>; + Pin> + Send + 'static>>; async fn relay_stream( &self, - request: Request>, + request: Request>, ) -> Result, Status> { crate::supervisor_session::handle_relay_stream(&self.state, request).await } diff --git a/crates/openshell-server/src/supervisor_session.rs b/crates/openshell-server/src/supervisor_session.rs index 8b26c3c31..09734e7d7 100644 --- a/crates/openshell-server/src/supervisor_session.rs +++ b/crates/openshell-server/src/supervisor_session.rs @@ -13,8 +13,8 @@ use tracing::{info, warn}; use uuid::Uuid; use openshell_core::proto::{ - GatewayMessage, RelayChunk, RelayOpen, SessionAccepted, SupervisorMessage, gateway_message, - supervisor_message, + GatewayMessage, RelayFrame, RelayInit, RelayOpen, SessionAccepted, SupervisorMessage, + gateway_message, supervisor_message, }; use crate::ServerState; @@ -277,32 +277,39 @@ pub fn spawn_relay_reaper(state: Arc, interval: Duration) { /// bytes back to the supervisor over the gRPC response stream. const RELAY_STREAM_CHUNK_SIZE: usize = 16 * 1024; -/// Handle a RelayStream RPC from a supervisor. The first inbound `RelayChunk` -/// identifies the pending relay via `channel_id`; subsequent chunks carry raw -/// bytes forward to the gateway-side waiter. Bytes flowing the other way are -/// chunked and sent as `RelayChunk` messages back over the response stream. +/// Handle a RelayStream RPC from a supervisor. The first inbound `RelayFrame` +/// must carry a `RelayInit` identifying the pending relay; subsequent frames +/// carry raw bytes forward to the gateway-side waiter. Bytes flowing the other +/// way are chunked and sent as `RelayFrame::data` messages back over the +/// response stream. pub async fn handle_relay_stream( state: &Arc, - request: Request>, + request: Request>, ) -> Result< Response< - Pin> + Send + 'static>>, + Pin> + Send + 'static>>, >, Status, > { let mut inbound = request.into_inner(); - // First chunk must identify the channel. + // First frame must identify the channel. let first = inbound .message() .await? .ok_or_else(|| Status::invalid_argument("empty RelayStream"))?; - if first.channel_id.is_empty() { - return Err(Status::invalid_argument( - "first RelayChunk must set channel_id", - )); - } - let channel_id = first.channel_id; + let channel_id = match first.payload { + Some(openshell_core::proto::relay_frame::Payload::Init(RelayInit { channel_id })) + if !channel_id.is_empty() => + { + channel_id + } + _ => { + return Err(Status::invalid_argument( + "first RelayFrame must be init with non-empty channel_id", + )); + } + }; // Claim the pending relay. Consumes the entry — it cannot be reused. let supervisor_side = state.supervisor_sessions.claim_relay(&channel_id)?; @@ -310,26 +317,23 @@ pub async fn handle_relay_stream( let (mut read_half, mut write_half) = tokio::io::split(supervisor_side); - // If the first chunk happened to carry payload bytes alongside the - // channel_id, forward them immediately. - if !first.data.is_empty() { - if let Err(e) = tokio::io::AsyncWriteExt::write_all(&mut write_half, &first.data).await { - warn!(channel_id = %channel_id, error = %e, "relay stream: failed initial write"); - return Err(Status::internal("relay bridge write failed")); - } - } - // Supervisor → gateway: drain `inbound` and write to the DuplexStream. let channel_id_in = channel_id.clone(); tokio::spawn(async move { loop { match inbound.message().await { - Ok(Some(chunk)) => { - if chunk.data.is_empty() { + Ok(Some(frame)) => { + let Some(openshell_core::proto::relay_frame::Payload::Data(data)) = + frame.payload + else { + warn!(channel_id = %channel_id_in, "relay stream: received non-data frame after init"); + break; + }; + if data.is_empty() { continue; } if let Err(e) = - tokio::io::AsyncWriteExt::write_all(&mut write_half, &chunk.data).await + tokio::io::AsyncWriteExt::write_all(&mut write_half, &data).await { warn!(channel_id = %channel_id_in, error = %e, "relay stream: write to duplex failed"); break; @@ -346,8 +350,8 @@ pub async fn handle_relay_stream( let _ = tokio::io::AsyncWriteExt::shutdown(&mut write_half).await; }); - // Gateway → supervisor: read the DuplexStream and emit RelayChunk messages. - let (out_tx, out_rx) = mpsc::channel::>(16); + // Gateway → supervisor: read the DuplexStream and emit RelayFrame::data messages. + let (out_tx, out_rx) = mpsc::channel::>(16); let channel_id_out = channel_id.clone(); tokio::spawn(async move { let mut buf = vec![0u8; RELAY_STREAM_CHUNK_SIZE]; @@ -355,9 +359,10 @@ pub async fn handle_relay_stream( match tokio::io::AsyncReadExt::read(&mut read_half, &mut buf).await { Ok(0) => break, Ok(n) => { - let chunk = RelayChunk { - channel_id: String::new(), - data: buf[..n].to_vec(), + let chunk = RelayFrame { + payload: Some(openshell_core::proto::relay_frame::Payload::Data( + buf[..n].to_vec(), + )), }; if out_tx.send(Ok(chunk)).await.is_err() { break; @@ -373,7 +378,7 @@ pub async fn handle_relay_stream( let stream = ReceiverStream::new(out_rx); let stream: Pin< - Box> + Send + 'static>, + Box> + Send + 'static>, > = Box::pin(stream); Ok(Response::new(stream)) } diff --git a/crates/openshell-server/tests/auth_endpoint_integration.rs b/crates/openshell-server/tests/auth_endpoint_integration.rs index 7f66be27a..12f302b63 100644 --- a/crates/openshell-server/tests/auth_endpoint_integration.rs +++ b/crates/openshell-server/tests/auth_endpoint_integration.rs @@ -675,12 +675,12 @@ impl openshell_core::proto::open_shell_server::OpenShell for TestOpenShell { } type RelayStreamStream = tokio_stream::wrappers::ReceiverStream< - Result, + Result, >; async fn relay_stream( &self, - _request: tonic::Request>, + _request: tonic::Request>, ) -> Result, tonic::Status> { Err(tonic::Status::unimplemented("not implemented in test")) } diff --git a/crates/openshell-server/tests/edge_tunnel_auth.rs b/crates/openshell-server/tests/edge_tunnel_auth.rs index adcacb92d..e8c7e0038 100644 --- a/crates/openshell-server/tests/edge_tunnel_auth.rs +++ b/crates/openshell-server/tests/edge_tunnel_auth.rs @@ -318,12 +318,12 @@ impl OpenShell for TestOpenShell { } type RelayStreamStream = tokio_stream::wrappers::ReceiverStream< - Result, + Result, >; async fn relay_stream( &self, - _request: tonic::Request>, + _request: tonic::Request>, ) -> Result, tonic::Status> { Err(tonic::Status::unimplemented("not implemented in test")) } diff --git a/crates/openshell-server/tests/multiplex_integration.rs b/crates/openshell-server/tests/multiplex_integration.rs index 203c2f3f2..561ea2ba7 100644 --- a/crates/openshell-server/tests/multiplex_integration.rs +++ b/crates/openshell-server/tests/multiplex_integration.rs @@ -286,12 +286,12 @@ impl OpenShell for TestOpenShell { } type RelayStreamStream = tokio_stream::wrappers::ReceiverStream< - Result, + Result, >; async fn relay_stream( &self, - _request: tonic::Request>, + _request: tonic::Request>, ) -> Result, tonic::Status> { Err(tonic::Status::unimplemented("not implemented in test")) } diff --git a/crates/openshell-server/tests/multiplex_tls_integration.rs b/crates/openshell-server/tests/multiplex_tls_integration.rs index 0d7b058d9..dc51e6118 100644 --- a/crates/openshell-server/tests/multiplex_tls_integration.rs +++ b/crates/openshell-server/tests/multiplex_tls_integration.rs @@ -299,12 +299,12 @@ impl OpenShell for TestOpenShell { } type RelayStreamStream = tokio_stream::wrappers::ReceiverStream< - Result, + Result, >; async fn relay_stream( &self, - _request: tonic::Request>, + _request: tonic::Request>, ) -> Result, tonic::Status> { Err(tonic::Status::unimplemented("not implemented in test")) } diff --git a/crates/openshell-server/tests/ws_tunnel_integration.rs b/crates/openshell-server/tests/ws_tunnel_integration.rs index 84fef2efb..584f09281 100644 --- a/crates/openshell-server/tests/ws_tunnel_integration.rs +++ b/crates/openshell-server/tests/ws_tunnel_integration.rs @@ -312,12 +312,12 @@ impl OpenShell for TestOpenShell { } type RelayStreamStream = tokio_stream::wrappers::ReceiverStream< - Result, + Result, >; async fn relay_stream( &self, - _request: tonic::Request>, + _request: tonic::Request>, ) -> Result, tonic::Status> { Err(tonic::Status::unimplemented("not implemented in test")) } diff --git a/proto/openshell.proto b/proto/openshell.proto index 92e479906..8109c3e80 100644 --- a/proto/openshell.proto +++ b/proto/openshell.proto @@ -102,15 +102,15 @@ service OpenShell { // Raw byte relay between supervisor and gateway. // // The supervisor initiates this call after receiving a RelayOpen message - // on its ConnectSupervisor stream. The first RelayChunk carries the - // channel_id to associate the new HTTP/2 stream with the pending relay - // slot on the gateway. Subsequent chunks carry raw bytes in either + // on its ConnectSupervisor stream. The first RelayFrame carries a + // RelayInit with the channel_id to associate the new HTTP/2 stream with + // the pending relay slot on the gateway. Subsequent frames carry raw bytes in either // direction between the gateway-side waiter (ssh_tunnel / exec handler) // and the supervisor-side local SSH daemon bridge. // // This rides the same TCP+TLS+HTTP/2 connection as ConnectSupervisor — // no new TLS handshake, no reverse HTTP CONNECT. - rpc RelayStream(stream RelayChunk) returns (stream RelayChunk); + rpc RelayStream(stream RelayFrame) returns (stream RelayFrame); // Watch a sandbox and stream updates. // @@ -781,7 +781,7 @@ message GatewayHeartbeat {} // Gateway requests the supervisor to open a relay channel. // // On receiving this, the supervisor should initiate a RelayStream RPC to -// the gateway, sending channel_id in the first RelayChunk to associate +// the gateway, sending a RelayInit in the first RelayFrame to associate // the new HTTP/2 stream with the pending relay slot. The supervisor // bridges that stream to the local SSH daemon. message RelayOpen { @@ -789,22 +789,21 @@ message RelayOpen { string channel_id = 1; } -// Raw byte chunk for the RelayStream RPC. The first chunk sent from the -// supervisor MUST set channel_id. All subsequent chunks (both directions) -// leave channel_id empty and carry payload bytes in `data`. -// -// Leaving channel_id empty on data frames is a deliberate per-frame size -// optimization: an SSH tunnel fragments into many small frames (a keystroke -// is ~50–200 bytes), where repeating a 36-byte UUID every frame would be a -// meaningful overhead. The cost is a weaker schema — the "first message is -// init-only" invariant lives in this comment rather than the proto. If this -// becomes confusing, switch to `oneof payload { string init_channel_id = 1; -// bytes data = 2; }` — behavior is equivalent, migration is cheap. -message RelayChunk { - // Only set on the very first message from the supervisor side. +// Initial RelayStream frame sent by the supervisor to claim a pending relay. +message RelayInit { + // Gateway-allocated channel identifier (UUID). string channel_id = 1; - // Raw bytes flowing in either direction. - bytes data = 2; +} + +// A single frame on the RelayStream RPC. +// +// The supervisor MUST send `init` as the first frame. All subsequent frames +// in either direction carry raw bytes in `data`. +message RelayFrame { + oneof payload { + RelayInit init = 1; + bytes data = 2; + } } // Supervisor reports the result of a relay open request. From 3e8a245a89825bd66d9e73fc030e29d3f0360091 Mon Sep 17 00:00:00 2001 From: Piotr Mlocek Date: Fri, 17 Apr 2026 13:29:15 -0700 Subject: [PATCH 12/20] test(server): add relay gRPC integration tests Adds tests/supervisor_relay_integration.rs covering the RelayStream wire contract, handshake frame, bridging, and claim timing. Five cases: happy-path echo, gateway drop, supervisor drop, no-session timeout, and concurrent multiplexed relays on one session. Narrows handle_relay_stream to take &SupervisorSessionRegistry directly so the test can exercise the real handler without standing up a full ServerState. Adds register_for_test for the same reason. --- crates/openshell-server/src/grpc/mod.rs | 3 +- crates/openshell-server/src/lib.rs | 2 +- .../src/supervisor_session.rs | 6 +- .../tests/supervisor_relay_integration.rs | 507 ++++++++++++++++++ 4 files changed, 513 insertions(+), 5 deletions(-) create mode 100644 crates/openshell-server/tests/supervisor_relay_integration.rs diff --git a/crates/openshell-server/src/grpc/mod.rs b/crates/openshell-server/src/grpc/mod.rs index 2ffdf5068..9eab56d47 100644 --- a/crates/openshell-server/src/grpc/mod.rs +++ b/crates/openshell-server/src/grpc/mod.rs @@ -404,7 +404,8 @@ impl OpenShell for OpenShellService { &self, request: Request>, ) -> Result, Status> { - crate::supervisor_session::handle_relay_stream(&self.state, request).await + crate::supervisor_session::handle_relay_stream(&self.state.supervisor_sessions, request) + .await } } diff --git a/crates/openshell-server/src/lib.rs b/crates/openshell-server/src/lib.rs index dd384a02b..2ff4ae34b 100644 --- a/crates/openshell-server/src/lib.rs +++ b/crates/openshell-server/src/lib.rs @@ -30,7 +30,7 @@ mod persistence; mod sandbox_index; mod sandbox_watch; mod ssh_tunnel; -pub(crate) mod supervisor_session; +pub mod supervisor_session; mod tls; pub mod tracing_bus; mod ws_tunnel; diff --git a/crates/openshell-server/src/supervisor_session.rs b/crates/openshell-server/src/supervisor_session.rs index 09734e7d7..dcf272879 100644 --- a/crates/openshell-server/src/supervisor_session.rs +++ b/crates/openshell-server/src/supervisor_session.rs @@ -78,7 +78,7 @@ impl SupervisorSessionRegistry { /// Register a live supervisor session for the given sandbox. /// /// Returns the previous session's sender (if any) so the caller can close it. - fn register( + pub fn register( &self, sandbox_id: String, session_id: String, @@ -283,7 +283,7 @@ const RELAY_STREAM_CHUNK_SIZE: usize = 16 * 1024; /// way are chunked and sent as `RelayFrame::data` messages back over the /// response stream. pub async fn handle_relay_stream( - state: &Arc, + registry: &SupervisorSessionRegistry, request: Request>, ) -> Result< Response< @@ -312,7 +312,7 @@ pub async fn handle_relay_stream( }; // Claim the pending relay. Consumes the entry — it cannot be reused. - let supervisor_side = state.supervisor_sessions.claim_relay(&channel_id)?; + let supervisor_side = registry.claim_relay(&channel_id)?; info!(channel_id = %channel_id, "relay stream: claimed pending relay, bridging"); let (mut read_half, mut write_half) = tokio::io::split(supervisor_side); diff --git a/crates/openshell-server/tests/supervisor_relay_integration.rs b/crates/openshell-server/tests/supervisor_relay_integration.rs new file mode 100644 index 000000000..7f976c80e --- /dev/null +++ b/crates/openshell-server/tests/supervisor_relay_integration.rs @@ -0,0 +1,507 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Integration tests for the supervisor relay gRPC path. +//! +//! Stands up an in-process tonic server hosting the real `handle_relay_stream` +//! handler, plus a mock "supervisor" client that calls `relay_stream` over a +//! real `Channel`. Exercises the wire contract (typed `RelayFrame { Init | Data }`), +//! `SupervisorSessionRegistry::open_relay` → `claim_relay` pairing, and the +//! bidirectional byte bridge inside the handler. +//! +//! These tests complement the unit tests in `supervisor_session.rs` (which +//! exercise registry state only) and the live cluster tests (which exercise +//! the full CLI → gateway → sandbox path). They catch regressions in the gRPC +//! wire layer that unit tests can't see and that are expensive to catch in +//! E2E. + +use std::sync::Arc; +use std::time::Duration; + +use hyper_util::{ + rt::{TokioExecutor, TokioIo}, + server::conn::auto::Builder, +}; +use openshell_core::proto::{ + GatewayMessage, RelayFrame, RelayInit, SupervisorMessage, + open_shell_client::OpenShellClient, + open_shell_server::{OpenShell, OpenShellServer}, +}; +use openshell_server::supervisor_session::SupervisorSessionRegistry; +use openshell_server::{MultiplexedService, health_router}; +use tokio::io::{AsyncReadExt, AsyncWriteExt}; +use tokio::net::TcpListener; +use tokio::sync::mpsc; +use tokio_stream::StreamExt; +use tokio_stream::wrappers::ReceiverStream; +use tonic::transport::{Channel, Endpoint}; +use tonic::{Response, Status}; + +// --------------------------------------------------------------------------- +// Gateway service: only relay_stream does real work; everything else stubs. +// --------------------------------------------------------------------------- + +#[derive(Clone)] +struct RelayGateway { + registry: Arc, +} + +#[tonic::async_trait] +impl OpenShell for RelayGateway { + type RelayStreamStream = std::pin::Pin< + Box> + Send + 'static>, + >; + + async fn relay_stream( + &self, + request: tonic::Request>, + ) -> Result, Status> { + openshell_server::supervisor_session::handle_relay_stream(&self.registry, request).await + } + + // ------ unused stubs ------ + + type ConnectSupervisorStream = ReceiverStream>; + async fn connect_supervisor( + &self, + _: tonic::Request>, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } + + type WatchSandboxStream = + ReceiverStream>; + async fn watch_sandbox( + &self, + _: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } + + type ExecSandboxStream = + ReceiverStream>; + async fn exec_sandbox( + &self, + _: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } + + async fn health( + &self, + _: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } + async fn create_sandbox( + &self, + _: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } + async fn get_sandbox( + &self, + _: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } + async fn list_sandboxes( + &self, + _: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } + async fn delete_sandbox( + &self, + _: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } + async fn get_sandbox_config( + &self, + _: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } + async fn get_gateway_config( + &self, + _: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } + async fn get_sandbox_provider_environment( + &self, + _: tonic::Request, + ) -> Result, Status> + { + Err(Status::unimplemented("unused")) + } + async fn create_ssh_session( + &self, + _: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } + async fn revoke_ssh_session( + &self, + _: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } + async fn create_provider( + &self, + _: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } + async fn update_provider( + &self, + _: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } + async fn get_provider( + &self, + _: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } + async fn list_providers( + &self, + _: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } + async fn delete_provider( + &self, + _: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } + async fn update_config( + &self, + _: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } + async fn get_sandbox_policy_status( + &self, + _: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } + async fn list_sandbox_policies( + &self, + _: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } + async fn report_policy_status( + &self, + _: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } + async fn get_sandbox_logs( + &self, + _: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } + async fn push_sandbox_logs( + &self, + _: tonic::Request>, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } + async fn submit_policy_analysis( + &self, + _: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } + async fn get_draft_policy( + &self, + _: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } + async fn approve_draft_chunk( + &self, + _: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } + async fn reject_draft_chunk( + &self, + _: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } + async fn approve_all_draft_chunks( + &self, + _: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } + async fn edit_draft_chunk( + &self, + _: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } + async fn undo_draft_chunk( + &self, + _: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } + async fn clear_draft_chunks( + &self, + _: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } + async fn get_draft_history( + &self, + _: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } +} + +// --------------------------------------------------------------------------- +// Test harness +// --------------------------------------------------------------------------- + +async fn spawn_gateway(registry: Arc) -> Channel { + let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); + let addr = listener.local_addr().unwrap(); + + let grpc = OpenShellServer::new(RelayGateway { registry }); + let service = MultiplexedService::new(grpc, health_router()); + + tokio::spawn(async move { + loop { + let Ok((stream, _)) = listener.accept().await else { + continue; + }; + let svc = service.clone(); + tokio::spawn(async move { + let _ = Builder::new(TokioExecutor::new()) + .serve_connection(TokioIo::new(stream), svc) + .await; + }); + } + }); + + Endpoint::from_shared(format!("http://{addr}")) + .unwrap() + .connect() + .await + .expect("client connect") +} + +fn register_session( + registry: &SupervisorSessionRegistry, + sandbox_id: &str, +) -> mpsc::Receiver { + let (tx, rx) = mpsc::channel(8); + registry.register(sandbox_id.to_string(), "sess-1".to_string(), tx); + rx +} + +/// Mock supervisor that opens a `RelayStream`, sends `Init`, then echoes every +/// data frame it receives. Returns when the gateway drops the stream or when +/// the supervisor's own outbound channel closes. +async fn run_echo_supervisor(channel: Channel, channel_id: String) { + let mut client = OpenShellClient::new(channel); + let (out_tx, out_rx) = mpsc::channel::(16); + let outbound = ReceiverStream::new(out_rx); + + out_tx + .send(RelayFrame { + payload: Some(openshell_core::proto::relay_frame::Payload::Init( + RelayInit { channel_id }, + )), + }) + .await + .expect("send init"); + + let response = client + .relay_stream(outbound) + .await + .expect("relay_stream rpc"); + let mut inbound = response.into_inner(); + + while let Some(msg) = inbound.next().await { + let Ok(frame) = msg else { break }; + let Some(openshell_core::proto::relay_frame::Payload::Data(data)) = frame.payload else { + continue; + }; + let echoed = RelayFrame { + payload: Some(openshell_core::proto::relay_frame::Payload::Data(data)), + }; + if out_tx.send(echoed).await.is_err() { + break; + } + } +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[tokio::test] +async fn relay_round_trips_bytes() { + let registry = Arc::new(SupervisorSessionRegistry::new()); + let channel = spawn_gateway(Arc::clone(®istry)).await; + let mut session_rx = register_session(®istry, "sbx"); + + let (channel_id, relay_rx) = registry + .open_relay("sbx", Duration::from_secs(2)) + .await + .expect("open_relay"); + + let opened = match session_rx.recv().await.expect("RelayOpen").payload { + Some(openshell_core::proto::gateway_message::Payload::RelayOpen(r)) => r.channel_id, + other => panic!("expected RelayOpen, got {other:?}"), + }; + assert_eq!(opened, channel_id); + + tokio::spawn(run_echo_supervisor(channel, channel_id)); + + let relay = relay_rx.await.expect("relay duplex"); + let (mut read_half, mut write_half) = tokio::io::split(relay); + + write_half.write_all(b"hello relay").await.expect("write"); + write_half.flush().await.expect("flush"); + + let mut buf = [0u8; 11]; + read_half.read_exact(&mut buf).await.expect("read echoed"); + assert_eq!(&buf, b"hello relay"); +} + +#[tokio::test] +async fn relay_closes_cleanly_when_gateway_drops() { + let registry = Arc::new(SupervisorSessionRegistry::new()); + let channel = spawn_gateway(Arc::clone(®istry)).await; + let mut session_rx = register_session(®istry, "sbx"); + + let (channel_id, relay_rx) = registry + .open_relay("sbx", Duration::from_secs(2)) + .await + .expect("open_relay"); + let _ = session_rx.recv().await.expect("RelayOpen"); + + let supervisor = tokio::spawn(run_echo_supervisor(channel, channel_id)); + + let relay = relay_rx.await.expect("relay duplex"); + drop(relay); + + // The supervisor's inbound stream should terminate shortly after the + // gateway side drops — its echo loop exits and the task finishes. + tokio::time::timeout(Duration::from_secs(5), supervisor) + .await + .expect("supervisor should terminate after gateway drop") + .expect("supervisor task"); +} + +#[tokio::test] +async fn relay_sees_eof_when_supervisor_closes() { + let registry = Arc::new(SupervisorSessionRegistry::new()); + let channel = spawn_gateway(Arc::clone(®istry)).await; + let mut session_rx = register_session(®istry, "sbx"); + + let (channel_id, relay_rx) = registry + .open_relay("sbx", Duration::from_secs(2)) + .await + .expect("open_relay"); + let _ = session_rx.recv().await.expect("RelayOpen"); + + // Supervisor sends init, then drops its outbound sender → gateway reader + // should see EOF. + let supervisor = { + let channel_id = channel_id.clone(); + tokio::spawn(async move { + let mut client = OpenShellClient::new(channel); + let (out_tx, out_rx) = mpsc::channel::(4); + let outbound = ReceiverStream::new(out_rx); + out_tx + .send(RelayFrame { + payload: Some(openshell_core::proto::relay_frame::Payload::Init( + RelayInit { channel_id }, + )), + }) + .await + .expect("send init"); + let _response = client.relay_stream(outbound).await.expect("rpc"); + drop(out_tx); + tokio::time::sleep(Duration::from_millis(200)).await; + }) + }; + + let relay = relay_rx.await.expect("relay duplex"); + let (mut read_half, _write_half) = tokio::io::split(relay); + let mut buf = [0u8; 16]; + let n = tokio::time::timeout(Duration::from_secs(5), read_half.read(&mut buf)) + .await + .expect("read should complete") + .expect("read ok"); + assert_eq!(n, 0, "gateway-side read should see EOF"); + + supervisor.await.expect("supervisor task"); +} + +#[tokio::test] +async fn open_relay_times_out_when_no_session() { + let registry = Arc::new(SupervisorSessionRegistry::new()); + let _channel = spawn_gateway(Arc::clone(®istry)).await; + + let err = registry + .open_relay("missing", Duration::from_millis(100)) + .await + .expect_err("should time out"); + assert_eq!(err.code(), tonic::Code::Unavailable); +} + +#[tokio::test] +async fn concurrent_relays_multiplex_independently() { + let registry = Arc::new(SupervisorSessionRegistry::new()); + let channel = spawn_gateway(Arc::clone(®istry)).await; + let mut session_rx = register_session(®istry, "sbx"); + + let (id_a, rx_a) = registry + .open_relay("sbx", Duration::from_secs(2)) + .await + .expect("open_relay a"); + let _ = session_rx.recv().await.expect("RelayOpen a"); + + let (id_b, rx_b) = registry + .open_relay("sbx", Duration::from_secs(2)) + .await + .expect("open_relay b"); + let _ = session_rx.recv().await.expect("RelayOpen b"); + assert_ne!(id_a, id_b); + + tokio::spawn(run_echo_supervisor(channel.clone(), id_a)); + tokio::spawn(run_echo_supervisor(channel, id_b)); + + let relay_a = rx_a.await.expect("relay a"); + let relay_b = rx_b.await.expect("relay b"); + + let (mut ra, mut wa) = tokio::io::split(relay_a); + let (mut rb, mut wb) = tokio::io::split(relay_b); + + wa.write_all(b"stream-A").await.unwrap(); + wb.write_all(b"stream-B").await.unwrap(); + wa.flush().await.unwrap(); + wb.flush().await.unwrap(); + + let mut buf_a = [0u8; 8]; + let mut buf_b = [0u8; 8]; + ra.read_exact(&mut buf_a).await.unwrap(); + rb.read_exact(&mut buf_b).await.unwrap(); + assert_eq!(&buf_a, b"stream-A"); + assert_eq!(&buf_b, b"stream-B"); +} From e7ea5c3342e90c721236516dc396b0890782a912 Mon Sep 17 00:00:00 2001 From: Piotr Mlocek Date: Fri, 17 Apr 2026 13:51:17 -0700 Subject: [PATCH 13/20] feat(sandbox): add OCSF telemetry for supervisor session and relay events Emits NetworkActivity events for session open/close/fail and relay open/close/fail from the sandbox side. Keeps plain tracing for internal plumbing (SSH socket connect, gateway stream close observation). Event shapes are extracted into pure builder fns so unit tests can assert activity/severity/status without wiring up a tracing subscriber. Gateway endpoint is parsed into host + port for dst_endpoint. --- .../src/supervisor_session.rs | 307 +++++++++++++++--- 1 file changed, 270 insertions(+), 37 deletions(-) diff --git a/crates/openshell-sandbox/src/supervisor_session.rs b/crates/openshell-sandbox/src/supervisor_session.rs index 90056ed4a..f24d333f8 100644 --- a/crates/openshell-sandbox/src/supervisor_session.rs +++ b/crates/openshell-sandbox/src/supervisor_session.rs @@ -17,17 +17,123 @@ use openshell_core::proto::{ GatewayMessage, RelayFrame, RelayInit, SupervisorHeartbeat, SupervisorHello, SupervisorMessage, gateway_message, supervisor_message, }; +use openshell_ocsf::{ + ActivityId, Endpoint, NetworkActivityBuilder, OcsfEvent, SandboxContext, SeverityId, StatusId, + ocsf_emit, +}; use tokio::io::{AsyncReadExt, AsyncWriteExt}; use tokio::sync::mpsc; use tokio_stream::StreamExt; use tonic::transport::Channel; -use tracing::{info, warn}; +use tracing::{debug, warn}; use crate::grpc_client; const INITIAL_BACKOFF: Duration = Duration::from_secs(1); const MAX_BACKOFF: Duration = Duration::from_secs(30); +/// Parse a gRPC endpoint URI into an OCSF `Endpoint` (host + port). Falls back +/// to treating the whole string as a domain if parsing fails. +fn ocsf_gateway_endpoint(endpoint: &str) -> Endpoint { + let without_scheme = endpoint + .split_once("://") + .map_or(endpoint, |(_, rest)| rest); + let host_and_port = without_scheme.split('/').next().unwrap_or(without_scheme); + if let Some((host, port)) = host_and_port.rsplit_once(':') + && let Ok(port) = port.parse::() + { + return Endpoint::from_domain(host, port); + } + Endpoint::from_domain(host_and_port, 0) +} + +fn session_established_event( + ctx: &SandboxContext, + endpoint: &str, + session_id: &str, + heartbeat_secs: u32, +) -> OcsfEvent { + NetworkActivityBuilder::new(ctx) + .activity(ActivityId::Open) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .dst_endpoint(ocsf_gateway_endpoint(endpoint)) + .message(format!( + "supervisor session established (session_id={session_id}, heartbeat_secs={heartbeat_secs})" + )) + .build() +} + +fn session_closed_event(ctx: &SandboxContext, endpoint: &str, sandbox_id: &str) -> OcsfEvent { + NetworkActivityBuilder::new(ctx) + .activity(ActivityId::Close) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .dst_endpoint(ocsf_gateway_endpoint(endpoint)) + .message(format!("supervisor session ended cleanly ({sandbox_id})")) + .build() +} + +fn session_failed_event( + ctx: &SandboxContext, + endpoint: &str, + attempt: u64, + error: &str, +) -> OcsfEvent { + NetworkActivityBuilder::new(ctx) + .activity(ActivityId::Fail) + .severity(SeverityId::Low) + .status(StatusId::Failure) + .dst_endpoint(ocsf_gateway_endpoint(endpoint)) + .message(format!( + "supervisor session failed, reconnecting (attempt {attempt}): {error}" + )) + .build() +} + +fn relay_open_event(ctx: &SandboxContext, channel_id: &str) -> OcsfEvent { + NetworkActivityBuilder::new(ctx) + .activity(ActivityId::Open) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .message(format!("relay open (channel_id={channel_id})")) + .build() +} + +fn relay_closed_event(ctx: &SandboxContext, channel_id: &str) -> OcsfEvent { + NetworkActivityBuilder::new(ctx) + .activity(ActivityId::Close) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .message(format!("relay closed (channel_id={channel_id})")) + .build() +} + +fn relay_failed_event(ctx: &SandboxContext, channel_id: &str, error: &str) -> OcsfEvent { + NetworkActivityBuilder::new(ctx) + .activity(ActivityId::Fail) + .severity(SeverityId::Low) + .status(StatusId::Failure) + .message(format!( + "relay bridge failed (channel_id={channel_id}): {error}" + )) + .build() +} + +fn relay_close_from_gateway_event( + ctx: &SandboxContext, + channel_id: &str, + reason: &str, +) -> OcsfEvent { + NetworkActivityBuilder::new(ctx) + .activity(ActivityId::Close) + .severity(SeverityId::Informational) + .message(format!( + "relay close from gateway (channel_id={channel_id}, reason={reason})" + )) + .build() +} + /// Size of chunks read from the local SSH socket when forwarding bytes back /// to the gateway over the gRPC response stream. 16 KiB matches the default /// HTTP/2 frame size so each `RelayFrame::data` fits in one frame. @@ -58,17 +164,14 @@ async fn run_session_loop( match run_single_session(&endpoint, &sandbox_id, &ssh_socket_path).await { Ok(()) => { - info!(sandbox_id = %sandbox_id, "supervisor session ended cleanly"); + let event = session_closed_event(crate::ocsf_ctx(), &endpoint, &sandbox_id); + ocsf_emit!(event); break; } Err(e) => { - warn!( - sandbox_id = %sandbox_id, - attempt = attempt, - backoff_ms = backoff.as_millis() as u64, - error = %e, - "supervisor session failed, reconnecting" - ); + let event = + session_failed_event(crate::ocsf_ctx(), &endpoint, attempt, &e.to_string()); + ocsf_emit!(event); tokio::time::sleep(backoff).await; backoff = (backoff * 2).min(MAX_BACKOFF); } @@ -125,13 +228,13 @@ async fn run_single_session( }; let heartbeat_secs = accepted.heartbeat_interval_secs.max(5); - info!( - sandbox_id = %sandbox_id, - session_id = %accepted.session_id, - instance_id = %instance_id, - heartbeat_secs = heartbeat_secs, - "supervisor session established" + let event = session_established_event( + crate::ocsf_ctx(), + endpoint, + &accepted.session_id, + heartbeat_secs, ); + ocsf_emit!(event); // Main loop: receive gateway messages + send heartbeats. let mut heartbeat_interval = @@ -148,10 +251,10 @@ async fn run_single_session( sandbox_id, ssh_socket_path, &channel, - ).await; + ); } Ok(None) => { - info!(sandbox_id = %sandbox_id, "supervisor session: gateway closed stream"); + debug!(sandbox_id = %sandbox_id, "supervisor session: gateway closed stream"); return Ok(()); } Err(e) => { @@ -173,7 +276,7 @@ async fn run_single_session( } } -async fn handle_gateway_message( +fn handle_gateway_message( msg: &GatewayMessage, sandbox_id: &str, ssh_socket_path: &std::path::Path, @@ -189,30 +292,33 @@ async fn handle_gateway_message( let channel = channel.clone(); let ssh_socket_path = ssh_socket_path.to_path_buf(); - info!( - sandbox_id = %sandbox_id, - channel_id = %channel_id, - "supervisor session: relay open request, spawning bridge" - ); + let event = relay_open_event(crate::ocsf_ctx(), &channel_id); + ocsf_emit!(event); tokio::spawn(async move { - if let Err(e) = handle_relay_open(&channel_id, &ssh_socket_path, channel).await { - warn!( - sandbox_id = %sandbox_id, - channel_id = %channel_id, - error = %e, - "supervisor session: relay bridge failed" - ); + match handle_relay_open(&channel_id, &ssh_socket_path, channel).await { + Ok(()) => { + let event = relay_closed_event(crate::ocsf_ctx(), &channel_id); + ocsf_emit!(event); + } + Err(e) => { + let event = + relay_failed_event(crate::ocsf_ctx(), &channel_id, &e.to_string()); + ocsf_emit!(event); + warn!( + sandbox_id = %sandbox_id, + channel_id = %channel_id, + error = %e, + "supervisor session: relay bridge failed" + ); + } } }); } Some(gateway_message::Payload::RelayClose(close)) => { - info!( - sandbox_id = %sandbox_id, - channel_id = %close.channel_id, - reason = %close.reason, - "supervisor session: relay close from gateway" - ); + let event = + relay_close_from_gateway_event(crate::ocsf_ctx(), &close.channel_id, &close.reason); + ocsf_emit!(event); } _ => { warn!(sandbox_id = %sandbox_id, "supervisor session: unexpected gateway message"); @@ -260,7 +366,7 @@ async fn handle_relay_open( let ssh = tokio::net::UnixStream::connect(ssh_socket_path).await?; let (mut ssh_r, mut ssh_w) = ssh.into_split(); - info!( + debug!( channel_id = %channel_id, socket = %ssh_socket_path.display(), "relay bridge: connected to local SSH daemon" @@ -325,3 +431,130 @@ async fn handle_relay_open( } Ok(()) } + +#[cfg(test)] +mod ocsf_event_tests { + use super::*; + + fn ctx() -> SandboxContext { + SandboxContext { + sandbox_id: "sbx-1".into(), + sandbox_name: "sandbox".into(), + container_image: "img".into(), + hostname: "host".into(), + product_version: "0.0.1".into(), + proxy_ip: "127.0.0.1".parse().unwrap(), + proxy_port: 3128, + } + } + + #[test] + fn gateway_endpoint_parses_https_with_port() { + let e = ocsf_gateway_endpoint("https://gateway.openshell:8443"); + assert_eq!(e.domain.as_deref(), Some("gateway.openshell")); + assert_eq!(e.port, Some(8443)); + } + + #[test] + fn gateway_endpoint_parses_http_with_port_and_path() { + let e = ocsf_gateway_endpoint("http://gw:7000/grpc"); + assert_eq!(e.domain.as_deref(), Some("gw")); + assert_eq!(e.port, Some(7000)); + } + + #[test] + fn gateway_endpoint_falls_back_without_port() { + let e = ocsf_gateway_endpoint("gateway.openshell"); + assert_eq!(e.domain.as_deref(), Some("gateway.openshell")); + assert_eq!(e.port, Some(0)); + } + + fn network_activity(event: &OcsfEvent) -> &openshell_ocsf::NetworkActivityEvent { + match event { + OcsfEvent::NetworkActivity(n) => n, + other => panic!("expected NetworkActivity, got {other:?}"), + } + } + + #[test] + fn session_established_emits_network_open_success() { + let event = session_established_event(&ctx(), "https://gw:443", "sess-1", 30); + let na = network_activity(&event); + assert_eq!(na.base.activity_id, ActivityId::Open.as_u8()); + assert_eq!(na.base.severity, SeverityId::Informational); + assert_eq!(na.base.status, Some(StatusId::Success)); + assert_eq!( + na.dst_endpoint.as_ref().and_then(|e| e.domain.as_deref()), + Some("gw") + ); + let msg = na.base.message.as_deref().unwrap_or_default(); + assert!(msg.contains("sess-1"), "message missing session_id: {msg}"); + assert!(msg.contains("heartbeat_secs=30"), "message: {msg}"); + } + + #[test] + fn session_closed_emits_network_close_success() { + let event = session_closed_event(&ctx(), "https://gw:443", "sbx-1"); + let na = network_activity(&event); + assert_eq!(na.base.activity_id, ActivityId::Close.as_u8()); + assert_eq!(na.base.severity, SeverityId::Informational); + assert_eq!(na.base.status, Some(StatusId::Success)); + } + + #[test] + fn session_failed_emits_network_fail_low() { + let event = session_failed_event(&ctx(), "https://gw:443", 3, "connect refused"); + let na = network_activity(&event); + assert_eq!(na.base.activity_id, ActivityId::Fail.as_u8()); + assert_eq!(na.base.severity, SeverityId::Low); + assert_eq!(na.base.status, Some(StatusId::Failure)); + let msg = na.base.message.as_deref().unwrap_or_default(); + assert!(msg.contains("attempt 3"), "message: {msg}"); + assert!(msg.contains("connect refused"), "message: {msg}"); + } + + #[test] + fn relay_open_emits_network_open_success() { + let event = relay_open_event(&ctx(), "ch-42"); + let na = network_activity(&event); + assert_eq!(na.base.activity_id, ActivityId::Open.as_u8()); + assert_eq!(na.base.severity, SeverityId::Informational); + assert!( + na.base + .message + .as_deref() + .unwrap_or_default() + .contains("ch-42") + ); + } + + #[test] + fn relay_closed_emits_network_close_success() { + let event = relay_closed_event(&ctx(), "ch-42"); + let na = network_activity(&event); + assert_eq!(na.base.activity_id, ActivityId::Close.as_u8()); + assert_eq!(na.base.status, Some(StatusId::Success)); + } + + #[test] + fn relay_failed_emits_network_fail_low() { + let event = relay_failed_event(&ctx(), "ch-42", "write to ssh failed"); + let na = network_activity(&event); + assert_eq!(na.base.activity_id, ActivityId::Fail.as_u8()); + assert_eq!(na.base.severity, SeverityId::Low); + assert_eq!(na.base.status, Some(StatusId::Failure)); + let msg = na.base.message.as_deref().unwrap_or_default(); + assert!(msg.contains("ch-42"), "message: {msg}"); + assert!(msg.contains("write to ssh failed"), "message: {msg}"); + } + + #[test] + fn relay_close_from_gateway_is_network_close_informational() { + let event = relay_close_from_gateway_event(&ctx(), "ch-42", "sandbox deleted"); + let na = network_activity(&event); + assert_eq!(na.base.activity_id, ActivityId::Close.as_u8()); + assert_eq!(na.base.severity, SeverityId::Informational); + let msg = na.base.message.as_deref().unwrap_or_default(); + assert!(msg.contains("sandbox deleted"), "message: {msg}"); + } +} From 4bd88f563613d52f858d4ae28189074777b211f5 Mon Sep 17 00:00:00 2001 From: Piotr Mlocek Date: Fri, 17 Apr 2026 14:36:07 -0700 Subject: [PATCH 14/20] fix(cli): detect dead relay via SSH keepalives MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds ServerAliveInterval=15 and ServerAliveCountMax=3 to both the rendered ssh-config block and the direct ssh invocation used by `openshell sandbox connect`. Without this, a client-side SSH session hangs indefinitely when the gateway or supervisor dies mid-session: the relay transport's TCP connection can't signal EOF to the client because the peer process is gone, not cleanly closing. Detection now takes ~45s instead of the TCP keepalive default of 2 hours. Verified on a live cluster by deleting the gateway pod and the sandbox pod mid-session — SSH exits with "Broken pipe" after one missed ServerAlive reply. --- crates/openshell-cli/src/ssh.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/crates/openshell-cli/src/ssh.rs b/crates/openshell-cli/src/ssh.rs index ebcbbeb4f..709732bc9 100644 --- a/crates/openshell-cli/src/ssh.rs +++ b/crates/openshell-cli/src/ssh.rs @@ -142,7 +142,15 @@ fn ssh_base_command(proxy_command: &str) -> Command { .arg("-o") .arg("GlobalKnownHostsFile=/dev/null") .arg("-o") - .arg("LogLevel=ERROR"); + .arg("LogLevel=ERROR") + // Detect a dead relay within ~45s. The relay rides on a TCP connection + // that the client has no way to observe silently dropping (gateway + // restart, supervisor restart, cluster failover), so fall back to + // SSH-level keepalives instead of hanging forever. + .arg("-o") + .arg("ServerAliveInterval=15") + .arg("-o") + .arg("ServerAliveCountMax=3"); command } @@ -870,7 +878,7 @@ fn render_ssh_config(gateway: &str, name: &str) -> String { let proxy_cmd = format!("{exe} ssh-proxy --gateway-name {gateway} --name {name}"); let host_alias = host_alias(name); format!( - "Host {host_alias}\n User sandbox\n StrictHostKeyChecking no\n UserKnownHostsFile /dev/null\n GlobalKnownHostsFile /dev/null\n LogLevel ERROR\n ProxyCommand {proxy_cmd}\n" + "Host {host_alias}\n User sandbox\n StrictHostKeyChecking no\n UserKnownHostsFile /dev/null\n GlobalKnownHostsFile /dev/null\n LogLevel ERROR\n ServerAliveInterval 15\n ServerAliveCountMax 3\n ProxyCommand {proxy_cmd}\n" ) } From 1b56c01f4a0885ebd58d0d9eedf31755ad5f4137 Mon Sep 17 00:00:00 2001 From: Piotr Mlocek Date: Fri, 17 Apr 2026 14:47:08 -0700 Subject: [PATCH 15/20] refactor(proto,server,drivers): drop ResolveSandboxEndpoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The RPC was used by the direct gateway→sandbox SSH/exec path, which is gone — connect/ssh and ExecSandbox both ride the supervisor session relay now. Removes the RPC, SandboxEndpoint/ResolveSandboxEndpoint* messages, and the now-dead ssh_port / sandbox_ssh_port config fields across openshell-core, openshell-server, openshell-driver-kubernetes, and openshell-driver-vm. The k8s driver's standalone binary also stops synthesizing a TCP listen address ("0.0.0.0:") and reads the Unix socket path directly from OPENSHELL_SANDBOX_SSH_SOCKET_PATH. --- crates/openshell-core/src/config.rs | 21 ------ .../openshell-driver-kubernetes/src/config.rs | 1 - .../openshell-driver-kubernetes/src/driver.rs | 67 +------------------ .../openshell-driver-kubernetes/src/grpc.rs | 18 +---- .../openshell-driver-kubernetes/src/main.rs | 11 +-- crates/openshell-driver-vm/src/driver.rs | 35 +--------- crates/openshell-server/src/cli.rs | 5 -- crates/openshell-server/src/compute/mod.rs | 21 +----- crates/openshell-server/src/lib.rs | 1 - proto/compute_driver.proto | 25 ------- 10 files changed, 15 insertions(+), 190 deletions(-) diff --git a/crates/openshell-core/src/config.rs b/crates/openshell-core/src/config.rs index f2dfe3941..7c12ea463 100644 --- a/crates/openshell-core/src/config.rs +++ b/crates/openshell-core/src/config.rs @@ -107,15 +107,6 @@ pub struct Config { #[serde(default = "default_ssh_connect_path")] pub ssh_connect_path: String, - /// SSH listen port inside sandbox pods. - /// - /// Retained for the K8s driver's dead `ResolveSandboxEndpoint` path and - /// any external tooling that still references the port. The supervisor's - /// embedded SSH daemon itself no longer listens on TCP; it binds a Unix - /// socket at [`Self::sandbox_ssh_socket_path`]. - #[serde(default = "default_sandbox_ssh_port")] - pub sandbox_ssh_port: u16, - /// Filesystem path where the sandbox supervisor binds its SSH Unix /// socket. The supervisor is passed this path via /// `OPENSHELL_SSH_LISTEN_ADDR` / `--ssh-listen-addr` and connects its @@ -196,7 +187,6 @@ impl Config { ssh_gateway_host: default_ssh_gateway_host(), ssh_gateway_port: default_ssh_gateway_port(), ssh_connect_path: default_ssh_connect_path(), - sandbox_ssh_port: default_sandbox_ssh_port(), sandbox_ssh_socket_path: default_sandbox_ssh_socket_path(), ssh_handshake_secret: String::new(), ssh_handshake_skew_secs: default_ssh_handshake_skew_secs(), @@ -286,13 +276,6 @@ impl Config { self } - /// Create a new configuration with the sandbox SSH port. - #[must_use] - pub const fn with_sandbox_ssh_port(mut self, port: u16) -> Self { - self.sandbox_ssh_port = port; - self - } - /// Create a new configuration with the SSH handshake secret. #[must_use] pub fn with_ssh_handshake_secret(mut self, secret: impl Into) -> Self { @@ -357,10 +340,6 @@ fn default_ssh_connect_path() -> String { "/connect/ssh".to_string() } -const fn default_sandbox_ssh_port() -> u16 { - 2222 -} - fn default_sandbox_ssh_socket_path() -> String { "/run/openshell/ssh.sock".to_string() } diff --git a/crates/openshell-driver-kubernetes/src/config.rs b/crates/openshell-driver-kubernetes/src/config.rs index 3ce98eae8..4c8c4a0f2 100644 --- a/crates/openshell-driver-kubernetes/src/config.rs +++ b/crates/openshell-driver-kubernetes/src/config.rs @@ -8,7 +8,6 @@ pub struct KubernetesComputeConfig { pub image_pull_policy: String, pub grpc_endpoint: String, pub ssh_listen_addr: String, - pub ssh_port: u16, pub ssh_handshake_secret: String, pub ssh_handshake_skew_secs: u64, pub client_tls_secret_name: String, diff --git a/crates/openshell-driver-kubernetes/src/driver.rs b/crates/openshell-driver-kubernetes/src/driver.rs index 440703af5..cc9fde68c 100644 --- a/crates/openshell-driver-kubernetes/src/driver.rs +++ b/crates/openshell-driver-kubernetes/src/driver.rs @@ -15,12 +15,10 @@ use openshell_core::proto::compute::v1::{ DriverCondition as SandboxCondition, DriverPlatformEvent as PlatformEvent, DriverSandbox as Sandbox, DriverSandboxSpec as SandboxSpec, DriverSandboxStatus as SandboxStatus, DriverSandboxTemplate as SandboxTemplate, - GetCapabilitiesResponse, ResolveSandboxEndpointResponse, SandboxEndpoint, - WatchSandboxesDeletedEvent, WatchSandboxesEvent, WatchSandboxesPlatformEvent, - WatchSandboxesSandboxEvent, sandbox_endpoint, watch_sandboxes_event, + GetCapabilitiesResponse, WatchSandboxesDeletedEvent, WatchSandboxesEvent, + WatchSandboxesPlatformEvent, WatchSandboxesSandboxEvent, watch_sandboxes_event, }; use std::collections::BTreeMap; -use std::net::IpAddr; use std::pin::Pin; use std::time::Duration; use tokio::sync::mpsc; @@ -271,21 +269,6 @@ impl KubernetesComputeDriver { &self.config.ssh_handshake_secret } - async fn agent_pod_ip(&self, pod_name: &str) -> Result, KubeError> { - let api: Api = Api::namespaced(self.client.clone(), &self.config.namespace); - match api.get(pod_name).await { - Ok(pod) => { - let ip = pod - .status - .and_then(|status| status.pod_ip) - .and_then(|ip| ip.parse().ok()); - Ok(ip) - } - Err(KubeError::Api(err)) if err.code == 404 => Ok(None), - Err(err) => Err(err), - } - } - pub async fn create_sandbox(&self, sandbox: &Sandbox) -> Result<(), KubernetesDriverError> { let name = sandbox.name.as_str(); info!( @@ -407,52 +390,6 @@ impl KubernetesComputeDriver { } } - pub async fn resolve_sandbox_endpoint( - &self, - sandbox: &Sandbox, - ) -> Result { - if let Some(status) = sandbox.status.as_ref() - && !status.instance_id.is_empty() - { - match self.agent_pod_ip(&status.instance_id).await { - Ok(Some(ip)) => { - return Ok(ResolveSandboxEndpointResponse { - endpoint: Some(SandboxEndpoint { - target: Some(sandbox_endpoint::Target::Ip(ip.to_string())), - port: u32::from(self.config.ssh_port), - }), - }); - } - Ok(None) => { - return Err(KubernetesDriverError::Precondition( - "sandbox agent pod IP is not available".to_string(), - )); - } - Err(err) => { - return Err(KubernetesDriverError::Message(format!( - "failed to resolve agent pod IP: {err}" - ))); - } - } - } - - if sandbox.name.is_empty() { - return Err(KubernetesDriverError::Precondition( - "sandbox has no name".to_string(), - )); - } - - Ok(ResolveSandboxEndpointResponse { - endpoint: Some(SandboxEndpoint { - target: Some(sandbox_endpoint::Target::Host(format!( - "{}.{}.svc.cluster.local", - sandbox.name, self.config.namespace - ))), - port: u32::from(self.config.ssh_port), - }), - }) - } - pub async fn watch_sandboxes(&self) -> Result { let namespace = self.config.namespace.clone(); let sandbox_api = self.api(); diff --git a/crates/openshell-driver-kubernetes/src/grpc.rs b/crates/openshell-driver-kubernetes/src/grpc.rs index 2c5a94467..75e131d41 100644 --- a/crates/openshell-driver-kubernetes/src/grpc.rs +++ b/crates/openshell-driver-kubernetes/src/grpc.rs @@ -5,8 +5,7 @@ use futures::{Stream, StreamExt}; use openshell_core::proto::compute::v1::{ CreateSandboxRequest, CreateSandboxResponse, DeleteSandboxRequest, DeleteSandboxResponse, GetCapabilitiesRequest, GetCapabilitiesResponse, GetSandboxRequest, GetSandboxResponse, - ListSandboxesRequest, ListSandboxesResponse, ResolveSandboxEndpointRequest, - ResolveSandboxEndpointResponse, StopSandboxRequest, StopSandboxResponse, + ListSandboxesRequest, ListSandboxesResponse, StopSandboxRequest, StopSandboxResponse, ValidateSandboxCreateRequest, ValidateSandboxCreateResponse, WatchSandboxesEvent, WatchSandboxesRequest, compute_driver_server::ComputeDriver, }; @@ -128,21 +127,6 @@ impl ComputeDriver for ComputeDriverService { Ok(Response::new(DeleteSandboxResponse { deleted })) } - async fn resolve_sandbox_endpoint( - &self, - request: Request, - ) -> Result, Status> { - let sandbox = request - .into_inner() - .sandbox - .ok_or_else(|| Status::invalid_argument("sandbox is required"))?; - self.driver - .resolve_sandbox_endpoint(&sandbox) - .await - .map(Response::new) - .map_err(status_from_driver_error) - } - type WatchSandboxesStream = Pin> + Send + 'static>>; diff --git a/crates/openshell-driver-kubernetes/src/main.rs b/crates/openshell-driver-kubernetes/src/main.rs index 76c567f59..244083465 100644 --- a/crates/openshell-driver-kubernetes/src/main.rs +++ b/crates/openshell-driver-kubernetes/src/main.rs @@ -39,8 +39,12 @@ struct Args { #[arg(long, env = "OPENSHELL_GRPC_ENDPOINT")] grpc_endpoint: Option, - #[arg(long, env = "OPENSHELL_SANDBOX_SSH_PORT", default_value_t = 2222)] - sandbox_ssh_port: u16, + #[arg( + long, + env = "OPENSHELL_SANDBOX_SSH_SOCKET_PATH", + default_value = "/run/openshell/ssh.sock" + )] + sandbox_ssh_socket_path: String, #[arg(long, env = "OPENSHELL_SSH_HANDSHAKE_SECRET")] ssh_handshake_secret: String, @@ -69,8 +73,7 @@ async fn main() -> Result<()> { default_image: args.sandbox_image.unwrap_or_default(), image_pull_policy: args.sandbox_image_pull_policy.unwrap_or_default(), grpc_endpoint: args.grpc_endpoint.unwrap_or_default(), - ssh_listen_addr: format!("0.0.0.0:{}", args.sandbox_ssh_port), - ssh_port: args.sandbox_ssh_port, + ssh_listen_addr: args.sandbox_ssh_socket_path, ssh_handshake_secret: args.ssh_handshake_secret, ssh_handshake_skew_secs: args.ssh_handshake_skew_secs, client_tls_secret_name: args.client_tls_secret_name.unwrap_or_default(), diff --git a/crates/openshell-driver-vm/src/driver.rs b/crates/openshell-driver-vm/src/driver.rs index 3d3fbf4b6..9189b5309 100644 --- a/crates/openshell-driver-vm/src/driver.rs +++ b/crates/openshell-driver-vm/src/driver.rs @@ -14,11 +14,10 @@ use openshell_core::proto::compute::v1::{ DriverCondition as SandboxCondition, DriverPlatformEvent as PlatformEvent, DriverSandbox as Sandbox, DriverSandboxStatus as SandboxStatus, GetCapabilitiesRequest, GetCapabilitiesResponse, GetSandboxRequest, GetSandboxResponse, ListSandboxesRequest, - ListSandboxesResponse, ResolveSandboxEndpointRequest, ResolveSandboxEndpointResponse, - SandboxEndpoint, StopSandboxRequest, StopSandboxResponse, ValidateSandboxCreateRequest, + ListSandboxesResponse, StopSandboxRequest, StopSandboxResponse, ValidateSandboxCreateRequest, ValidateSandboxCreateResponse, WatchSandboxesDeletedEvent, WatchSandboxesEvent, WatchSandboxesPlatformEvent, WatchSandboxesRequest, WatchSandboxesSandboxEvent, - compute_driver_server::ComputeDriver, sandbox_endpoint, watch_sandboxes_event, + compute_driver_server::ComputeDriver, watch_sandboxes_event, }; use std::collections::{HashMap, HashSet}; use std::net::{Ipv4Addr, SocketAddr, TcpListener}; @@ -385,25 +384,6 @@ impl VmDriver { Ok(DeleteSandboxResponse { deleted: true }) } - pub async fn resolve_endpoint( - &self, - sandbox: &Sandbox, - ) -> Result { - let registry = self.registry.lock().await; - let record = registry.get(&sandbox.id).or_else(|| { - registry - .values() - .find(|record| record.snapshot.name == sandbox.name) - }); - let record = record.ok_or_else(|| Status::not_found("sandbox not found"))?; - Ok(ResolveSandboxEndpointResponse { - endpoint: Some(SandboxEndpoint { - target: Some(sandbox_endpoint::Target::Host("127.0.0.1".to_string())), - port: u32::from(record.ssh_port), - }), - }) - } - pub async fn get_sandbox( &self, sandbox_id: &str, @@ -649,17 +629,6 @@ impl ComputeDriver for VmDriver { Ok(Response::new(response)) } - async fn resolve_sandbox_endpoint( - &self, - request: Request, - ) -> Result, Status> { - let sandbox = request - .into_inner() - .sandbox - .ok_or_else(|| Status::invalid_argument("sandbox is required"))?; - Ok(Response::new(self.resolve_endpoint(&sandbox).await?)) - } - type WatchSandboxesStream = Pin> + Send + 'static>>; diff --git a/crates/openshell-server/src/cli.rs b/crates/openshell-server/src/cli.rs index ba9425036..c1ccabd38 100644 --- a/crates/openshell-server/src/cli.rs +++ b/crates/openshell-server/src/cli.rs @@ -91,10 +91,6 @@ struct Args { )] ssh_connect_path: String, - /// SSH port inside sandbox pods. - #[arg(long, env = "OPENSHELL_SANDBOX_SSH_PORT", default_value_t = 2222)] - sandbox_ssh_port: u16, - /// Shared secret for gateway-to-sandbox SSH handshake. #[arg(long, env = "OPENSHELL_SSH_HANDSHAKE_SECRET")] ssh_handshake_secret: Option, @@ -233,7 +229,6 @@ async fn run_from_args(args: Args) -> Result<()> { .with_ssh_gateway_host(args.ssh_gateway_host) .with_ssh_gateway_port(args.ssh_gateway_port) .with_ssh_connect_path(args.ssh_connect_path) - .with_sandbox_ssh_port(args.sandbox_ssh_port) .with_ssh_handshake_skew_secs(args.ssh_handshake_skew_secs); if let Some(image) = args.sandbox_image { diff --git a/crates/openshell-server/src/compute/mod.rs b/crates/openshell-server/src/compute/mod.rs index 09b35d5cd..95ffbfaa4 100644 --- a/crates/openshell-server/src/compute/mod.rs +++ b/crates/openshell-server/src/compute/mod.rs @@ -17,9 +17,9 @@ use openshell_core::proto::compute::v1::{ CreateSandboxRequest, DeleteSandboxRequest, DriverCondition, DriverPlatformEvent, DriverResourceRequirements, DriverSandbox, DriverSandboxSpec, DriverSandboxStatus, DriverSandboxTemplate, GetCapabilitiesRequest, GetSandboxRequest, ListSandboxesRequest, - ResolveSandboxEndpointRequest, ResolveSandboxEndpointResponse, ValidateSandboxCreateRequest, - WatchSandboxesEvent, WatchSandboxesRequest, compute_driver_client::ComputeDriverClient, - compute_driver_server::ComputeDriver, watch_sandboxes_event, + ValidateSandboxCreateRequest, WatchSandboxesEvent, WatchSandboxesRequest, + compute_driver_client::ComputeDriverClient, compute_driver_server::ComputeDriver, + watch_sandboxes_event, }; use openshell_core::proto::{ PlatformEvent, Sandbox, SandboxCondition, SandboxPhase, SandboxSpec, SandboxStatus, @@ -166,14 +166,6 @@ impl ComputeDriver for RemoteComputeDriver { client.delete_sandbox(request).await } - async fn resolve_sandbox_endpoint( - &self, - request: Request, - ) -> Result, Status> { - let mut client = self.client(); - client.resolve_sandbox_endpoint(request).await - } - async fn watch_sandboxes( &self, request: Request, @@ -1149,13 +1141,6 @@ mod tests { })) } - async fn resolve_sandbox_endpoint( - &self, - _request: Request, - ) -> Result, Status> { - Err(Status::unimplemented("not used by these tests")) - } - async fn watch_sandboxes( &self, _request: Request, diff --git a/crates/openshell-server/src/lib.rs b/crates/openshell-server/src/lib.rs index 2ff4ae34b..526485e58 100644 --- a/crates/openshell-server/src/lib.rs +++ b/crates/openshell-server/src/lib.rs @@ -260,7 +260,6 @@ async fn build_compute_runtime( // `sandbox_ssh_socket_path` in the config for deployments // where multiple supervisors share a filesystem. ssh_listen_addr: config.sandbox_ssh_socket_path.clone(), - ssh_port: config.sandbox_ssh_port, ssh_handshake_secret: config.ssh_handshake_secret.clone(), ssh_handshake_skew_secs: config.ssh_handshake_skew_secs, client_tls_secret_name: config.client_tls_secret_name.clone(), diff --git a/proto/compute_driver.proto b/proto/compute_driver.proto index 53b0ac27d..68af695e5 100644 --- a/proto/compute_driver.proto +++ b/proto/compute_driver.proto @@ -38,10 +38,6 @@ service ComputeDriver { // Tear down platform resources for a sandbox. rpc DeleteSandbox(DeleteSandboxRequest) returns (DeleteSandboxResponse); - // Resolve the current endpoint for sandbox exec/SSH transport. - rpc ResolveSandboxEndpoint(ResolveSandboxEndpointRequest) - returns (ResolveSandboxEndpointResponse); - // Stream sandbox observations from the platform. rpc WatchSandboxes(WatchSandboxesRequest) returns (stream WatchSandboxesEvent); } @@ -238,27 +234,6 @@ message DeleteSandboxResponse { bool deleted = 1; } -message ResolveSandboxEndpointRequest { - // Sandbox to resolve for exec or SSH connectivity. - DriverSandbox sandbox = 1; -} - -message SandboxEndpoint { - oneof target { - // Direct IP address for the sandbox endpoint. - string ip = 1; - // DNS host name for the sandbox endpoint. - string host = 2; - } - // TCP port for the sandbox endpoint. - uint32 port = 3; -} - -message ResolveSandboxEndpointResponse { - // Current endpoint the gateway should use to reach the sandbox. - SandboxEndpoint endpoint = 1; -} - message WatchSandboxesRequest {} message WatchSandboxesSandboxEvent { From 861c8a6974d307df5ae7c91dd19e62d67e9fe22a Mon Sep 17 00:00:00 2001 From: Piotr Mlocek Date: Fri, 17 Apr 2026 16:00:20 -0700 Subject: [PATCH 16/20] =?UTF-8?q?refactor(sandbox,drivers):=20migrate=20VM?= =?UTF-8?q?=20driver=20to=20supervisor=20relay=20and=20rename=20ssh-listen?= =?UTF-8?q?-addr=20=E2=86=92=20ssh-socket-path?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Renames the sandbox binary's `--ssh-listen-addr` / `OPENSHELL_SSH_LISTEN_ADDR` / `ssh_listen_addr` to `--ssh-socket-path` / `OPENSHELL_SSH_SOCKET_PATH` / `ssh_socket_path` so the flag name matches its sole accepted form (a Unix socket filesystem path) after the supervisor-initiated relay migration. Migrates the VM compute driver to the same supervisor-initiated model used by the K8s driver: the in-guest sandbox now binds `/run/openshell/ssh.sock` and opens its own outbound `ConnectSupervisor` session to the gateway, so the host→guest SSH port-forward is no longer needed. Drops `--vm-port` plumbing, the `ssh_port` allocation path, the `port_is_ready` TCP probe, and the now- unused `GUEST_SSH_PORT` import from `driver.rs`. Readiness falls back to the existing console-log marker from `guest_ssh_ready`. Remaining `ssh_port` / `GUEST_SSH_PORT` residue in `openshell-driver-vm/src/runtime.rs` (gvproxy port-mapping plan) is dead but left for OS-102, which already covers NSSH1/handshake plumbing removal across crates. --- crates/openshell-core/src/config.rs | 2 +- .../openshell-driver-kubernetes/src/config.rs | 2 +- .../openshell-driver-kubernetes/src/driver.rs | 28 +++++----- .../openshell-driver-kubernetes/src/main.rs | 2 +- crates/openshell-driver-vm/src/driver.rs | 55 +++---------------- crates/openshell-sandbox/src/lib.rs | 10 +--- crates/openshell-sandbox/src/main.rs | 10 ++-- crates/openshell-server/src/lib.rs | 2 +- 8 files changed, 35 insertions(+), 76 deletions(-) diff --git a/crates/openshell-core/src/config.rs b/crates/openshell-core/src/config.rs index 7c12ea463..7a9141c52 100644 --- a/crates/openshell-core/src/config.rs +++ b/crates/openshell-core/src/config.rs @@ -109,7 +109,7 @@ pub struct Config { /// Filesystem path where the sandbox supervisor binds its SSH Unix /// socket. The supervisor is passed this path via - /// `OPENSHELL_SSH_LISTEN_ADDR` / `--ssh-listen-addr` and connects its + /// `OPENSHELL_SSH_SOCKET_PATH` / `--ssh-socket-path` and connects its /// relay bridge to the same path. /// /// When the gateway orchestrates sandboxes that each live in their own diff --git a/crates/openshell-driver-kubernetes/src/config.rs b/crates/openshell-driver-kubernetes/src/config.rs index 4c8c4a0f2..be3666130 100644 --- a/crates/openshell-driver-kubernetes/src/config.rs +++ b/crates/openshell-driver-kubernetes/src/config.rs @@ -7,7 +7,7 @@ pub struct KubernetesComputeConfig { pub default_image: String, pub image_pull_policy: String, pub grpc_endpoint: String, - pub ssh_listen_addr: String, + pub ssh_socket_path: String, pub ssh_handshake_secret: String, pub ssh_handshake_skew_secs: u64, pub client_tls_secret_name: String, diff --git a/crates/openshell-driver-kubernetes/src/driver.rs b/crates/openshell-driver-kubernetes/src/driver.rs index cc9fde68c..f70054805 100644 --- a/crates/openshell-driver-kubernetes/src/driver.rs +++ b/crates/openshell-driver-kubernetes/src/driver.rs @@ -5,7 +5,7 @@ use crate::config::KubernetesComputeConfig; use futures::{Stream, StreamExt, TryStreamExt}; -use k8s_openapi::api::core::v1::{Event as KubeEventObj, Node, Pod}; +use k8s_openapi::api::core::v1::{Event as KubeEventObj, Node}; use kube::api::{Api, ApiResource, DeleteParams, ListParams, PostParams}; use kube::core::gvk::GroupVersionKind; use kube::core::{DynamicObject, ObjectMeta}; @@ -147,8 +147,8 @@ impl KubernetesComputeDriver { &self.config.namespace } - pub fn ssh_listen_addr(&self) -> &str { - &self.config.ssh_listen_addr + pub fn ssh_socket_path(&self) -> &str { + &self.config.ssh_socket_path } pub const fn ssh_handshake_skew_secs(&self) -> u64 { @@ -294,7 +294,7 @@ impl KubernetesComputeDriver { &sandbox.id, &sandbox.name, &self.config.grpc_endpoint, - self.ssh_listen_addr(), + self.ssh_socket_path(), self.ssh_handshake_secret(), self.ssh_handshake_skew_secs(), &self.config.client_tls_secret_name, @@ -862,7 +862,7 @@ fn sandbox_to_k8s_spec( sandbox_id: &str, sandbox_name: &str, grpc_endpoint: &str, - ssh_listen_addr: &str, + ssh_socket_path: &str, ssh_handshake_secret: &str, ssh_handshake_skew_secs: u64, client_tls_secret_name: &str, @@ -902,7 +902,7 @@ fn sandbox_to_k8s_spec( sandbox_id, sandbox_name, grpc_endpoint, - ssh_listen_addr, + ssh_socket_path, ssh_handshake_secret, ssh_handshake_skew_secs, &spec.environment, @@ -948,7 +948,7 @@ fn sandbox_to_k8s_spec( sandbox_id, sandbox_name, grpc_endpoint, - ssh_listen_addr, + ssh_socket_path, ssh_handshake_secret, ssh_handshake_skew_secs, spec_env, @@ -973,7 +973,7 @@ fn sandbox_template_to_k8s( sandbox_id: &str, sandbox_name: &str, grpc_endpoint: &str, - ssh_listen_addr: &str, + ssh_socket_path: &str, ssh_handshake_secret: &str, ssh_handshake_skew_secs: u64, spec_environment: &std::collections::HashMap, @@ -1026,7 +1026,7 @@ fn sandbox_template_to_k8s( sandbox_id, sandbox_name, grpc_endpoint, - ssh_listen_addr, + ssh_socket_path, ssh_handshake_secret, ssh_handshake_skew_secs, !client_tls_secret_name.is_empty(), @@ -1176,7 +1176,7 @@ fn build_env_list( sandbox_id: &str, sandbox_name: &str, grpc_endpoint: &str, - ssh_listen_addr: &str, + ssh_socket_path: &str, ssh_handshake_secret: &str, ssh_handshake_skew_secs: u64, tls_enabled: bool, @@ -1189,7 +1189,7 @@ fn build_env_list( sandbox_id, sandbox_name, grpc_endpoint, - ssh_listen_addr, + ssh_socket_path, ssh_handshake_secret, ssh_handshake_skew_secs, tls_enabled, @@ -1211,7 +1211,7 @@ fn apply_required_env( sandbox_id: &str, sandbox_name: &str, grpc_endpoint: &str, - ssh_listen_addr: &str, + ssh_socket_path: &str, ssh_handshake_secret: &str, ssh_handshake_skew_secs: u64, tls_enabled: bool, @@ -1220,8 +1220,8 @@ fn apply_required_env( upsert_env(env, "OPENSHELL_SANDBOX", sandbox_name); upsert_env(env, "OPENSHELL_ENDPOINT", grpc_endpoint); upsert_env(env, "OPENSHELL_SANDBOX_COMMAND", "sleep infinity"); - if !ssh_listen_addr.is_empty() { - upsert_env(env, "OPENSHELL_SSH_LISTEN_ADDR", ssh_listen_addr); + if !ssh_socket_path.is_empty() { + upsert_env(env, "OPENSHELL_SSH_SOCKET_PATH", ssh_socket_path); } upsert_env(env, "OPENSHELL_SSH_HANDSHAKE_SECRET", ssh_handshake_secret); upsert_env( diff --git a/crates/openshell-driver-kubernetes/src/main.rs b/crates/openshell-driver-kubernetes/src/main.rs index 244083465..4b871d77f 100644 --- a/crates/openshell-driver-kubernetes/src/main.rs +++ b/crates/openshell-driver-kubernetes/src/main.rs @@ -73,7 +73,7 @@ async fn main() -> Result<()> { default_image: args.sandbox_image.unwrap_or_default(), image_pull_policy: args.sandbox_image_pull_policy.unwrap_or_default(), grpc_endpoint: args.grpc_endpoint.unwrap_or_default(), - ssh_listen_addr: args.sandbox_ssh_socket_path, + ssh_socket_path: args.sandbox_ssh_socket_path, ssh_handshake_secret: args.ssh_handshake_secret, ssh_handshake_skew_secs: args.ssh_handshake_skew_secs, client_tls_secret_name: args.client_tls_secret_name.unwrap_or_default(), diff --git a/crates/openshell-driver-vm/src/driver.rs b/crates/openshell-driver-vm/src/driver.rs index 9189b5309..8237ba03c 100644 --- a/crates/openshell-driver-vm/src/driver.rs +++ b/crates/openshell-driver-vm/src/driver.rs @@ -1,10 +1,7 @@ // SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 -use crate::{ - GUEST_SSH_PORT, - rootfs::{extract_sandbox_rootfs_to, sandbox_guest_init_path}, -}; +use crate::rootfs::{extract_sandbox_rootfs_to, sandbox_guest_init_path}; use futures::Stream; use nix::errno::Errno; use nix::sys::signal::{Signal, kill}; @@ -20,14 +17,12 @@ use openshell_core::proto::compute::v1::{ compute_driver_server::ComputeDriver, watch_sandboxes_event, }; use std::collections::{HashMap, HashSet}; -use std::net::{Ipv4Addr, SocketAddr, TcpListener}; use std::os::unix::fs::PermissionsExt; use std::path::{Path, PathBuf}; use std::pin::Pin; use std::process::Stdio; use std::sync::Arc; use std::time::Duration; -use tokio::net::TcpStream; use tokio::process::{Child, Command}; use tokio::sync::{Mutex, broadcast, mpsc}; use tokio_stream::wrappers::ReceiverStream; @@ -38,6 +33,7 @@ const DRIVER_NAME: &str = "openshell-driver-vm"; const WATCH_BUFFER: usize = 256; const DEFAULT_VCPUS: u8 = 2; const DEFAULT_MEM_MIB: u32 = 2048; +const GUEST_SSH_SOCKET_PATH: &str = "/run/openshell/ssh.sock"; const GUEST_TLS_DIR: &str = "/opt/openshell/tls"; const GUEST_TLS_CA_PATH: &str = "/opt/openshell/tls/ca.crt"; const GUEST_TLS_CERT_PATH: &str = "/opt/openshell/tls/tls.crt"; @@ -167,7 +163,6 @@ struct VmProcess { #[derive(Debug)] struct SandboxRecord { snapshot: Sandbox, - ssh_port: u16, state_dir: PathBuf, process: Arc>, } @@ -235,7 +230,6 @@ impl VmDriver { return Err(Status::already_exists("sandbox already exists")); } - let ssh_port = allocate_local_port()?; let state_dir = sandbox_state_dir(&self.config.state_dir, &sandbox.id); let rootfs = state_dir.join("rootfs"); @@ -278,9 +272,6 @@ impl VmDriver { .arg("--vm-krun-log-level") .arg(self.config.krun_log_level.to_string()); command.arg("--vm-console-output").arg(&console_output); - command - .arg("--vm-port") - .arg(format!("{ssh_port}:{GUEST_SSH_PORT}")); for env in build_guest_environment(sandbox, &self.config) { command.arg("--vm-env").arg(env); } @@ -307,7 +298,6 @@ impl VmDriver { sandbox.id.clone(), SandboxRecord { snapshot: snapshot.clone(), - ssh_port, state_dir: state_dir.clone(), process: process.clone(), }, @@ -417,16 +407,12 @@ impl VmDriver { let mut ready_emitted = false; loop { - let (process, ssh_port, state_dir) = { + let (process, state_dir) = { let registry = self.registry.lock().await; let Some(record) = registry.get(&sandbox_id) else { return; }; - ( - record.process.clone(), - record.ssh_port, - record.state_dir.clone(), - ) + (record.process.clone(), record.state_dir.clone()) }; let exit_status = { @@ -483,8 +469,7 @@ impl VmDriver { return; } - if !ready_emitted && port_is_ready(ssh_port).await && guest_ssh_ready(&state_dir).await - { + if !ready_emitted && guest_ssh_ready(&state_dir).await { if let Some(snapshot) = self .set_snapshot_condition(&sandbox_id, ready_condition(), false) .await @@ -763,16 +748,8 @@ fn build_guest_environment(sandbox: &Sandbox, config: &VmDriverConfig) -> Vec Result<(), std::io::Error> { } } -fn allocate_local_port() -> Result { - let listener = TcpListener::bind((Ipv4Addr::LOCALHOST, 0)) - .map_err(|err| Status::internal(format!("failed to allocate local ssh port: {err}")))?; - listener - .local_addr() - .map(|addr| addr.port()) - .map_err(|err| Status::internal(format!("failed to inspect local ssh port: {err}"))) -} - -async fn port_is_ready(port: u16) -> bool { - TcpStream::connect(SocketAddr::new(Ipv4Addr::LOCALHOST.into(), port)) - .await - .is_ok() -} - async fn guest_ssh_ready(state_dir: &Path) -> bool { let console_log = state_dir.join("rootfs-console.log"); let Ok(contents) = tokio::fs::read_to_string(console_log).await else { @@ -1071,7 +1033,7 @@ mod tests { assert!(env.contains(&"OPENSHELL_ENDPOINT=http://192.168.127.1:8080/".to_string())); assert!(env.contains(&"OPENSHELL_SANDBOX_ID=sandbox-123".to_string())); assert!(env.contains(&format!( - "OPENSHELL_SSH_LISTEN_ADDR=0.0.0.0:{GUEST_SSH_PORT}" + "OPENSHELL_SSH_SOCKET_PATH={GUEST_SSH_SOCKET_PATH}" ))); } @@ -1323,7 +1285,6 @@ mod tests { sandbox_id.to_string(), SandboxRecord { snapshot: sandbox, - ssh_port: 2222, state_dir, process, }, diff --git a/crates/openshell-sandbox/src/lib.rs b/crates/openshell-sandbox/src/lib.rs index b1fd1bf05..226cccb57 100644 --- a/crates/openshell-sandbox/src/lib.rs +++ b/crates/openshell-sandbox/src/lib.rs @@ -209,7 +209,7 @@ pub async fn run_sandbox( openshell_endpoint: Option, policy_rules: Option, policy_data: Option, - ssh_listen_addr: Option, + ssh_socket_path: Option, ssh_handshake_secret: Option, ssh_handshake_skew_secs: u64, _health_check: bool, @@ -604,13 +604,9 @@ pub async fn run_sandbox( } }); - // The `ssh_listen_addr` argument now carries a filesystem path to the - // Unix socket the embedded SSH daemon listens on. Kept as an `Option` - // for backwards compatibility with the CLI flag name and env var. let ssh_socket_path: Option = - ssh_listen_addr.as_ref().map(std::path::PathBuf::from); - if let Some(listen_addr) = ssh_listen_addr { - let listen_path = std::path::PathBuf::from(listen_addr); + ssh_socket_path.map(std::path::PathBuf::from); + if let Some(listen_path) = ssh_socket_path.clone() { let policy_clone = policy.clone(); let workdir_clone = workdir.clone(); let _ = ssh_handshake_secret; // retained in the signature for compat; unused diff --git a/crates/openshell-sandbox/src/main.rs b/crates/openshell-sandbox/src/main.rs index a37dce0e4..74efb3d3a 100644 --- a/crates/openshell-sandbox/src/main.rs +++ b/crates/openshell-sandbox/src/main.rs @@ -69,9 +69,11 @@ struct Args { #[arg(long, default_value = "warn", env = "OPENSHELL_LOG_LEVEL")] log_level: String, - /// SSH listen address for sandbox access. - #[arg(long, env = "OPENSHELL_SSH_LISTEN_ADDR")] - ssh_listen_addr: Option, + /// Filesystem path to the Unix socket the embedded SSH daemon binds. + /// The supervisor bridges `RelayStream` traffic from the gateway onto + /// this socket; nothing else should connect to it. + #[arg(long, env = "OPENSHELL_SSH_SOCKET_PATH")] + ssh_socket_path: Option, /// Shared secret for gateway-to-sandbox SSH handshake. #[arg(long, env = "OPENSHELL_SSH_HANDSHAKE_SECRET")] @@ -219,7 +221,7 @@ async fn main() -> Result<()> { args.openshell_endpoint, args.policy_rules, args.policy_data, - args.ssh_listen_addr, + args.ssh_socket_path, args.ssh_handshake_secret, args.ssh_handshake_skew_secs, args.health_check, diff --git a/crates/openshell-server/src/lib.rs b/crates/openshell-server/src/lib.rs index 526485e58..0894e5342 100644 --- a/crates/openshell-server/src/lib.rs +++ b/crates/openshell-server/src/lib.rs @@ -259,7 +259,7 @@ async fn build_compute_runtime( // RelayStream bridge, not directly. Override via // `sandbox_ssh_socket_path` in the config for deployments // where multiple supervisors share a filesystem. - ssh_listen_addr: config.sandbox_ssh_socket_path.clone(), + ssh_socket_path: config.sandbox_ssh_socket_path.clone(), ssh_handshake_secret: config.ssh_handshake_secret.clone(), ssh_handshake_skew_secs: config.ssh_handshake_skew_secs, client_tls_secret_name: config.client_tls_secret_name.clone(), From 2e6b96dd665640bcfba1245b95b0d5ac8d2c03f5 Mon Sep 17 00:00:00 2001 From: Piotr Mlocek Date: Fri, 17 Apr 2026 16:00:30 -0700 Subject: [PATCH 17/20] docs(architecture): describe supervisor-initiated relay model and drop historical prose MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Updates `sandbox-connect.md`, `gateway.md`, `sandbox.md`, `gateway-security.md`, and `system-architecture.md` to describe the current supervisor-initiated model forward-facing: two-plane `ConnectSupervisor` + `RelayStream` design, the registry's `open_relay` / `claim_relay` / reaper behaviour, Unix-socket sshd access control, and the sandbox-side OCSF event surface. Strips historical framing that describes what was removed — the "Earlier designs..." paragraph, the "Historical: NSSH1 Handshake (removed)" subsection, retained-for-compat config/env table rows, and scattered "no longer X" prose — in favour of clean current-state descriptions. Syncs env- var and flag names to the renamed `--ssh-socket-path` / `OPENSHELL_SSH_SOCKET_PATH`. --- architecture/gateway-security.md | 46 +- architecture/gateway.md | 359 +++++++++------ architecture/sandbox-connect.md | 671 ++++++++++++++-------------- architecture/sandbox.md | 143 ++++-- architecture/system-architecture.md | 40 +- 5 files changed, 735 insertions(+), 524 deletions(-) diff --git a/architecture/gateway-security.md b/architecture/gateway-security.md index 319800c08..6baaee88b 100644 --- a/architecture/gateway-security.md +++ b/architecture/gateway-security.md @@ -269,20 +269,28 @@ The gateway enforces two concurrent connection limits to bound the impact of cre These limits are tracked in-memory and decremented when tunnels close. Exceeding either limit returns HTTP 429 (Too Many Requests). -### NSSH1 Handshake +### Supervisor-Initiated Relay Model -After the gateway connects to the sandbox pod's SSH port, it performs a cryptographic handshake: +The gateway never dials the sandbox. Instead, the sandbox supervisor opens an outbound `ConnectSupervisor` bidirectional gRPC stream to the gateway on startup and keeps it alive for the sandbox lifetime. SSH traffic for `/connect/ssh` (and exec traffic for `ExecSandbox`) rides this same TCP+TLS+HTTP/2 connection as separate multiplexed HTTP/2 streams. The gateway-side registry and `RelayStream` handler live in `crates/openshell-server/src/supervisor_session.rs`; the supervisor-side bridge lives in `crates/openshell-sandbox/src/supervisor_session.rs`. -``` -NSSH1 \n -``` +Per-connection flow: + +1. CLI presents `x-sandbox-id` + `x-sandbox-token` at `/connect/ssh` and passes gateway token validation. +2. Gateway calls `SupervisorSessionRegistry::open_relay(sandbox_id, ...)`, which allocates a `channel_id` (UUID) and sends a `RelayOpen` message to the supervisor over the already-established `ConnectSupervisor` stream. If no session is registered yet, it polls with exponential backoff up to a bounded timeout (30 s for `/connect/ssh`, 15 s for `ExecSandbox`). +3. The supervisor opens a new `RelayStream` RPC on the same `Channel` — a new HTTP/2 stream, no new TCP connection and no new TLS handshake. The first `RelayFrame` is a `RelayInit { channel_id }` that claims the pending slot on the gateway. +4. `claim_relay` pairs the gateway-side waiter with the supervisor-side RPC via a `tokio::io::duplex(64 KiB)` pair. Subsequent `RelayFrame::data` frames carry raw SSH bytes in both directions. The supervisor is a dumb byte bridge: it has no protocol awareness of the SSH bytes flowing through. +5. Inside the sandbox pod, the supervisor connects the relay to sshd over a Unix domain socket at `/run/openshell/ssh.sock` (see `crates/openshell-driver-kubernetes/src/main.rs`). + +Security properties of this model: -- **HMAC**: `HMAC-SHA256(secret, "{token}|{timestamp}|{nonce}")`, hex-encoded. -- **Secret**: shared via `OPENSHELL_SSH_HANDSHAKE_SECRET` env var, set on both the gateway and sandbox. -- **Clock skew tolerance**: configurable via `OPENSHELL_SSH_HANDSHAKE_SKEW_SECS` (default 300 seconds). -- **Expected response**: `OK\n` from the sandbox. +- **One auth boundary.** mTLS on the `ConnectSupervisor` stream is the only identity check between gateway and sandbox. Every relay rides that same authenticated HTTP/2 connection. +- **No inbound network path into the sandbox.** The sandbox exposes no TCP port for gateway ingress; all relays are supervisor-initiated. The pod only needs egress to the gateway. +- **In-pod access control is filesystem permissions on the Unix socket.** sshd listens on `/run/openshell/ssh.sock` with the parent directory at `0700` and the socket itself at `0600`, both owned by the supervisor (root). The sandbox entrypoint runs as an unprivileged user and cannot open either. Any process in the supervisor's filesystem view that can open the socket can reach sshd — same trust model as any local Unix socket with `0600` permissions. See `crates/openshell-sandbox/src/ssh.rs:55-83`. +- **Supersede race is closed.** A supervisor reconnect registers a new `session_id` for the same sandbox id. Cleanup on the old session's task uses `remove_if_current(sandbox_id, session_id)` so a late-finishing old task cannot evict the new registration or serve relays meant for the new instance. See `SupervisorSessionRegistry::remove_if_current` in `crates/openshell-server/src/supervisor_session.rs`. +- **Pending-relay reaper.** A background task sweeps `pending_relays` entries older than 10 s (`RELAY_PENDING_TIMEOUT`). If the supervisor acknowledges `RelayOpen` but never initiates `RelayStream` — crash, deadlock, or adversarial stall — the gateway-side slot does not pin indefinitely. +- **Client-side keepalives.** The CLI's `ssh` invocation sets `ServerAliveInterval=15` / `ServerAliveCountMax=3` (`crates/openshell-cli/src/ssh.rs:150`), so a silently-dropped relay (gateway restart, supervisor restart, or adversarial TCP drop) surfaces to the user within roughly 45 s rather than hanging. -This handshake prevents direct connections to sandbox SSH ports from within the cluster, even from pods that share the network. +Observability (sandbox side, OCSF): `session_established`, `session_closed`, `session_failed`, `relay_open`, `relay_closed`, `relay_failed`, `relay_close_from_gateway` — all emitted as `NetworkActivity` events. Gateway-side OCSF emission for the same lifecycle is a tracked follow-up. ## Port Configuration @@ -325,8 +333,8 @@ graph LR CLI -- "mTLS
(cluster CA)" --> TLS SDK -- "mTLS
(cluster CA)" --> TLS TLS --> API - SBX -- "mTLS
(cluster CA)" --> TLS - API -- "SSH + NSSH1
handshake" --> SBX + SBX -- "mTLS + ConnectSupervisor
(supervisor-initiated)" --> TLS + API -- "RelayStream
(HTTP/2 on same mTLS conn)" --> SBX SBX -- "OPA policy +
process identity" --> HOSTS ``` @@ -335,8 +343,9 @@ graph LR | Boundary | Mechanism | |---|---| | External → Gateway | mTLS with cluster CA by default, or trusted reverse-proxy/Cloudflare boundary in edge mode | -| Sandbox → Gateway | mTLS with shared client cert | -| Gateway → Sandbox (SSH) | Session token + HMAC-SHA256 handshake (NSSH1) | +| Sandbox → Gateway | mTLS with shared client cert (supervisor-initiated `ConnectSupervisor` stream) | +| Gateway → Sandbox (SSH/exec) | Rides the supervisor's mTLS `ConnectSupervisor` HTTP/2 connection as a `RelayStream` — no separate gateway-to-pod connection | +| Supervisor → in-pod sshd | Unix-socket filesystem permissions (`/run/openshell/ssh.sock`, 0700 parent / 0600 socket) | | Sandbox → External (network) | OPA policy + process identity binding via `/proc` | ### What Is Not Authenticated (by Design) @@ -387,8 +396,11 @@ This section defines the primary attacker profiles, what the current design prot |---|---|---| | MITM or passive interception of gateway traffic | Mandatory mTLS with cluster CA, or trusted reverse-proxy boundary in Cloudflare mode | Default mode is direct mTLS; reverse-proxy mode shifts the outer trust boundary upstream | | Unauthenticated API/health access | mTLS by default, or Cloudflare/reverse-proxy auth in edge mode | `/health*` are direct-mTLS only in the default deployment mode | -| Forged SSH tunnel connection to sandbox | Session token validation + NSSH1 HMAC handshake | Requires token and shared handshake secret | -| Direct access to sandbox SSH port from cluster peers | NSSH1 challenge-response | Connection denied without valid signature | +| Forged SSH tunnel connection to sandbox | Session token validation at the gateway; only the supervisor's authenticated mTLS `ConnectSupervisor` stream can carry a `RelayStream` to its sandbox | Forging a relay requires stealing a valid mTLS client identity | +| Direct access to sandbox sshd from cluster peers | sshd listens on a Unix socket (`0700` parent / `0600` socket) inside the pod | No network path exists to sshd from cluster peers | +| Stale or reconnecting supervisor serves relays for a new instance | `session_id`-scoped `remove_if_current` on the registry | Old session cleanup cannot evict a newer registration | +| Supervisor acknowledges `RelayOpen` but never initiates `RelayStream` | Gateway-side pending-relay reaper (10 s timeout) | Prevents indefinite resource pinning by a buggy or malicious supervisor | +| Silent TCP drop of an in-flight relay | CLI `ServerAliveInterval=15` / `ServerAliveCountMax=3` | Client detects a dead relay within ~45 s instead of hanging | | Unauthorized outbound internet access from sandbox | OPA policy + process identity checks | Applies to sandbox egress policy layer | ### Residual Risks and Current Tradeoffs @@ -414,7 +426,7 @@ This section defines the primary attacker profiles, what the current design prot - The cluster CA is generated and distributed without interception during bootstrap. - Kubernetes secret access is restricted to intended workloads and operators. - Gateway and sandbox container images are trusted and not tampered with. -- System clocks are reasonably synchronized for timestamp-based SSH handshake checks. +- The sandbox pod's filesystem is trusted: only the supervisor process (root) can open `/run/openshell/ssh.sock`, which is enforced by the `0700` parent directory and `0600` socket permissions set at sshd start. ## Sandbox Outbound TLS (L7 Inspection) diff --git a/architecture/gateway.md b/architecture/gateway.md index 02f487050..29fbaf5d6 100644 --- a/architecture/gateway.md +++ b/architecture/gateway.md @@ -2,7 +2,9 @@ ## Overview -`openshell-server` is the gateway -- the central control plane for a cluster. It exposes two gRPC services (OpenShell and Inference) and HTTP endpoints on a single multiplexed port, manages sandbox lifecycle through Kubernetes CRDs, persists state in SQLite or Postgres, and provides SSH tunneling into sandbox pods. The gateway coordinates all interactions between clients, the Kubernetes cluster, and the persistence layer. +`openshell-server` is the gateway -- the central control plane for a cluster. It exposes two gRPC services (OpenShell and Inference) and HTTP endpoints on a single multiplexed port, manages sandbox lifecycle through a pluggable compute driver, persists state in SQLite or Postgres, and brokers SSH access into sandboxes through supervisor-initiated relay streams. The gateway coordinates all interactions between clients, the compute backend, and the persistence layer. + +Each sandbox supervisor opens a persistent inbound gRPC session (`ConnectSupervisor`); the gateway multiplexes per-invocation `RelayStream` RPCs onto the same HTTP/2 connection to move bytes between clients and the in-sandbox SSH Unix socket. The gateway does not need to know, resolve, or reach the sandbox's network address. ## Architecture Diagram @@ -11,25 +13,27 @@ The following diagram shows the major components inside the gateway process and ```mermaid graph TD Client["gRPC / HTTP Client"] + Supervisor["Sandbox Supervisor
(inbound gRPC)"] TCP["TCP Listener"] TLS["TLS Acceptor
(optional)"] - MUX["MultiplexedService"] + MUX["MultiplexedService
(HTTP/2 adaptive window)"] GRPC_ROUTER["GrpcRouter"] NAV["OpenShellServer
(OpenShell service)"] INF["InferenceServer
(Inference service)"] HTTP["HTTP Router
(Axum)"] HEALTH["Health Endpoints"] SSH_TUNNEL["SSH Tunnel
(/connect/ssh)"] + SUP_REG["SupervisorSessionRegistry"] STORE["Store
(SQLite / Postgres)"] - K8S["Kubernetes API"] - WATCHER["Sandbox Watcher"] - EVENT_TAILER["Kube Event Tailer"] + COMPUTE["ComputeRuntime"] + DRIVER["ComputeDriver
(kubernetes / vm)"] WATCH_BUS["SandboxWatchBus"] LOG_BUS["TracingLogBus"] PLAT_BUS["PlatformEventBus"] INDEX["SandboxIndex"] Client --> TCP + Supervisor --> TCP TCP --> TLS TLS --> MUX MUX -->|"content-type: application/grpc"| GRPC_ROUTER @@ -39,17 +43,16 @@ graph TD HTTP --> HEALTH HTTP --> SSH_TUNNEL NAV --> STORE - NAV --> K8S - INF --> STORE + NAV --> COMPUTE + NAV --> SUP_REG SSH_TUNNEL --> STORE - SSH_TUNNEL --> K8S - WATCHER --> K8S - WATCHER --> STORE - WATCHER --> WATCH_BUS - WATCHER --> INDEX - EVENT_TAILER --> K8S - EVENT_TAILER --> PLAT_BUS - EVENT_TAILER --> INDEX + SSH_TUNNEL --> SUP_REG + INF --> STORE + COMPUTE --> DRIVER + COMPUTE --> STORE + COMPUTE --> WATCH_BUS + COMPUTE --> INDEX + COMPUTE --> PLAT_BUS LOG_BUS --> PLAT_BUS ``` @@ -57,22 +60,23 @@ graph TD | Module | File | Purpose | |--------|------|---------| -| Entry point | `crates/openshell-server/src/main.rs` | CLI argument parsing, config assembly, tracing setup, calls `run_server` | +| Entry point | `crates/openshell-server/src/main.rs` | Thin binary wrapper that calls `cli::run_cli` | +| CLI | `crates/openshell-server/src/cli.rs` | `Args` parser, config assembly, tracing setup, calls `run_server` | | Gateway runtime | `crates/openshell-server/src/lib.rs` | `ServerState` struct, `run_server()` accept loop | -| Protocol mux | `crates/openshell-server/src/multiplex.rs` | `MultiplexService`, `MultiplexedService`, `GrpcRouter`, `BoxBody` | -| gRPC: OpenShell | `crates/openshell-server/src/grpc.rs` | `OpenShellService` -- sandbox CRUD, provider CRUD, watch, exec, SSH sessions, policy delivery | -| gRPC: Inference | `crates/openshell-server/src/inference.rs` | `InferenceService` -- cluster inference config (set/get) and sandbox inference bundle delivery | +| Protocol mux | `crates/openshell-server/src/multiplex.rs` | `MultiplexService`, `MultiplexedService`, `GrpcRouter`, `BoxBody`, HTTP/2 adaptive-window tuning | +| gRPC: OpenShell | `crates/openshell-server/src/grpc/mod.rs` | `OpenShellService` trait impl -- dispatches to per-concern handlers | +| gRPC: Sandbox/Exec | `crates/openshell-server/src/grpc/sandbox.rs` | Sandbox CRUD, `ExecSandbox`, SSH session handlers, relay-backed exec proxy | +| gRPC: Inference | `crates/openshell-server/src/inference.rs` | `InferenceService` -- cluster inference config and sandbox bundle delivery | +| Supervisor sessions | `crates/openshell-server/src/supervisor_session.rs` | `SupervisorSessionRegistry`, `handle_connect_supervisor`, `handle_relay_stream`, reaper | | HTTP | `crates/openshell-server/src/http.rs` | Health endpoints, merged with SSH tunnel router | | Browser auth | `crates/openshell-server/src/auth.rs` | Cloudflare browser login relay at `/auth/connect` | -| SSH tunnel | `crates/openshell-server/src/ssh_tunnel.rs` | HTTP CONNECT handler at `/connect/ssh` | +| SSH tunnel | `crates/openshell-server/src/ssh_tunnel.rs` | HTTP CONNECT handler at `/connect/ssh` backed by `open_relay` | | WS tunnel | `crates/openshell-server/src/ws_tunnel.rs` | WebSocket tunnel handler at `/_ws_tunnel` for Cloudflare-fronted clients | | TLS | `crates/openshell-server/src/tls.rs` | `TlsAcceptor` wrapping rustls with ALPN | | Persistence | `crates/openshell-server/src/persistence/mod.rs` | `Store` enum (SQLite/Postgres), generic object CRUD, protobuf codec | -| Persistence: SQLite | `crates/openshell-server/src/persistence/sqlite.rs` | `SqliteStore` with sqlx | -| Persistence: Postgres | `crates/openshell-server/src/persistence/postgres.rs` | `PostgresStore` with sqlx | | Compute runtime | `crates/openshell-server/src/compute/mod.rs` | `ComputeRuntime`, gateway-owned sandbox lifecycle orchestration over a compute backend | -| Compute driver: Kubernetes | `crates/openshell-driver-kubernetes/src/driver.rs` | Kubernetes CRD create/delete, endpoint resolution, watch stream, pod template translation | -| Compute driver: VM | `crates/openshell-driver-vm/src/driver.rs` | Per-sandbox microVM create/delete, localhost endpoint resolution, watch stream, supervisor-only guest boot | +| Compute driver: Kubernetes | `crates/openshell-driver-kubernetes/src/driver.rs` | Kubernetes CRD create/delete/watch, pod template translation | +| Compute driver: VM | `crates/openshell-driver-vm/src/driver.rs` | Per-sandbox microVM create/delete/watch, supervisor-only guest boot | | Sandbox index | `crates/openshell-server/src/sandbox_index.rs` | `SandboxIndex` -- in-memory name/pod-to-id correlation | | Watch bus | `crates/openshell-server/src/sandbox_watch.rs` | `SandboxWatchBus` -- in-memory broadcast for persisted sandbox updates | | Tracing bus | `crates/openshell-server/src/tracing_bus.rs` | `TracingLogBus` -- captures tracing events keyed by `sandbox_id` | @@ -81,28 +85,30 @@ Proto definitions consumed by the gateway: | Proto file | Package | Defines | |------------|---------|---------| -| `proto/openshell.proto` | `openshell.v1` | `OpenShell` service, public sandbox resource model, provider/SSH/watch messages | -| `proto/compute_driver.proto` | `openshell.compute.v1` | Internal `ComputeDriver` service, driver-native sandbox observations, endpoint resolution, compute watch stream envelopes | +| `proto/openshell.proto` | `openshell.v1` | `OpenShell` service, public sandbox resource model, provider/SSH/watch/policy messages, supervisor session messages (`ConnectSupervisor`, `RelayStream`, `RelayFrame`) | +| `proto/compute_driver.proto` | `openshell.compute.v1` | Internal `ComputeDriver` service, driver-native sandbox observations, compute watch stream envelopes | | `proto/inference.proto` | `openshell.inference.v1` | `Inference` service: `SetClusterInference`, `GetClusterInference`, `GetInferenceBundle` | | `proto/datamodel.proto` | `openshell.datamodel.v1` | `Provider` | | `proto/sandbox.proto` | `openshell.sandbox.v1` | Sandbox supervisor policy, settings, and config messages | ## Startup Sequence -The gateway boots in `main()` (`crates/openshell-server/src/main.rs`) and proceeds through these steps: +The gateway boots in `cli::run_cli` (`crates/openshell-server/src/cli.rs`) and proceeds through these steps: -1. **Install rustls crypto provider** -- `aws_lc_rs::default_provider().install_default()`. +1. **Install rustls crypto provider** -- `rustls::crypto::ring::default_provider().install_default()`. 2. **Parse CLI arguments** -- `Args::parse()` via `clap`. Every flag has a corresponding environment variable (see [Configuration](#configuration)). 3. **Initialize tracing** -- Creates a `TracingLogBus` and installs a tracing subscriber that writes to stdout and publishes log events keyed by `sandbox_id` into the bus. -4. **Build `Config`** -- Assembles a `openshell_core::Config` from the parsed arguments. +4. **Build `Config`** -- Assembles an `openshell_core::Config` from the parsed arguments. 5. **Call `run_server()`** (`crates/openshell-server/src/lib.rs`): 1. Connect to the persistence store (`Store::connect`), which auto-detects SQLite vs Postgres from the URL prefix and runs migrations. 2. Create `ComputeRuntime` with a `ComputeDriver` implementation selected by `OPENSHELL_DRIVERS`: - `kubernetes` wraps `KubernetesComputeDriver` in `ComputeDriverService`, so the gateway uses the `openshell.compute.v1.ComputeDriver` RPC surface even without transport. - `vm` spawns the sibling `openshell-driver-vm` binary as a local compute-driver process, connects to it over a Unix domain socket, and keeps the libkrun/rootfs runtime out of the gateway binary. - 3. Build `ServerState` (shared via `Arc` across all handlers). + 3. Build `ServerState` (shared via `Arc` across all handlers), including a fresh `SupervisorSessionRegistry`. 4. **Spawn background tasks**: - - `ComputeRuntime::spawn_watchers` -- consumes the compute-driver watch stream, republishes platform events, and runs a periodic `ListSandboxes` snapshot reconcile so the store-backed public sandbox reads stay aligned with the compute driver. + - `ComputeRuntime::spawn_watchers` -- consumes the compute-driver watch stream, republishes platform events, and runs a periodic `ListSandboxes` snapshot reconcile. + - `ssh_tunnel::spawn_session_reaper` -- sweeps expired or revoked SSH session tokens from the store hourly. + - `supervisor_session::spawn_relay_reaper` -- sweeps orphaned pending relay channels every 30 seconds. 5. Create `MultiplexService`. 6. Bind `TcpListener` on `config.bind_address`. 7. Optionally create `TlsAcceptor` from cert/key files. @@ -110,7 +116,7 @@ The gateway boots in `main()` (`crates/openshell-server/src/main.rs`) and procee ## Configuration -All configuration is via CLI flags with environment variable fallbacks. The `--db-url` flag is the only required argument. +All configuration is via CLI flags with environment variable fallbacks. The `--db-url` and `--ssh-handshake-secret` flags are required. | Flag | Env Var | Default | Description | |------|---------|---------|-------------| @@ -125,7 +131,7 @@ All configuration is via CLI flags with environment variable fallbacks. The `--d | `--db-url` | `OPENSHELL_DB_URL` | *required* | Database URL (`sqlite:...` or `postgres://...`). The Helm chart defaults to `sqlite:/var/openshell/openshell.db` (persistent volume). In-memory SQLite (`sqlite::memory:?cache=shared`) works for ephemeral/test environments but data is lost on restart. | | `--sandbox-namespace` | `OPENSHELL_SANDBOX_NAMESPACE` | `default` | Kubernetes namespace for sandbox CRDs | | `--sandbox-image` | `OPENSHELL_SANDBOX_IMAGE` | None | Default container image for sandbox pods | -| `--grpc-endpoint` | `OPENSHELL_GRPC_ENDPOINT` | None | gRPC endpoint reachable from within the cluster (for sandbox callbacks) | +| `--grpc-endpoint` | `OPENSHELL_GRPC_ENDPOINT` | None | gRPC endpoint reachable from within the cluster (for supervisor callbacks) | | `--drivers` | `OPENSHELL_DRIVERS` | `kubernetes` | Compute backend to use. Current options are `kubernetes` and `vm`. | | `--vm-driver-state-dir` | `OPENSHELL_VM_DRIVER_STATE_DIR` | `target/openshell-vm-driver` | Host directory for VM sandbox rootfs, console logs, and runtime state | | `--vm-compute-driver-bin` | `OPENSHELL_VM_COMPUTE_DRIVER_BIN` | sibling `openshell-driver-vm` binary | Local VM compute-driver process spawned by the gateway | @@ -138,9 +144,8 @@ All configuration is via CLI flags with environment variable fallbacks. The `--d | `--ssh-gateway-host` | `OPENSHELL_SSH_GATEWAY_HOST` | `127.0.0.1` | Public hostname returned in SSH session responses | | `--ssh-gateway-port` | `OPENSHELL_SSH_GATEWAY_PORT` | `8080` | Public port returned in SSH session responses | | `--ssh-connect-path` | `OPENSHELL_SSH_CONNECT_PATH` | `/connect/ssh` | HTTP path for SSH CONNECT/upgrade | -| `--sandbox-ssh-port` | `OPENSHELL_SANDBOX_SSH_PORT` | `2222` | SSH listen port inside sandbox pods | -| `--ssh-handshake-secret` | `OPENSHELL_SSH_HANDSHAKE_SECRET` | None | Shared HMAC-SHA256 secret for gateway-to-sandbox handshake | -| `--ssh-handshake-skew-secs` | `OPENSHELL_SSH_HANDSHAKE_SKEW_SECS` | `300` | Allowed clock skew (seconds) for SSH handshake timestamps | + +The sandbox-side SSH listener is a Unix domain socket inside the sandbox. The path defaults to `/run/openshell/ssh.sock` and is configured on the compute driver (e.g. `openshell-driver-kubernetes --sandbox-ssh-socket-path`). The gateway never dials this socket itself; the supervisor bridges it onto a `RelayStream` when asked. ## Shared State @@ -157,15 +162,17 @@ pub struct ServerState { pub ssh_connections_by_token: Mutex>, pub ssh_connections_by_sandbox: Mutex>, pub settings_mutex: tokio::sync::Mutex<()>, + pub supervisor_sessions: SupervisorSessionRegistry, } ``` - **`store`** -- persistence backend (SQLite or Postgres) for all object types. -- **`compute`** -- gateway-owned compute orchestration. Persists sandbox lifecycle transitions, validates create requests through the compute backend, resolves exec/SSH endpoints, consumes the backend watch stream, and periodically reconciles the store against `ComputeDriver/ListSandboxes` snapshots. +- **`compute`** -- gateway-owned compute orchestration. Persists sandbox lifecycle transitions, validates create requests through the compute backend, consumes the backend watch stream, and periodically reconciles the store against `ComputeDriver/ListSandboxes` snapshots. - **`sandbox_index`** -- in-memory bidirectional index mapping sandbox names and agent pod names to sandbox IDs. Updated from compute-driver sandbox snapshots. - **`sandbox_watch_bus`** -- `broadcast`-based notification bus keyed by sandbox ID. Producers call `notify(&id)` when the persisted sandbox record changes; consumers in `WatchSandbox` streams receive `()` signals and re-read the record. - **`tracing_log_bus`** -- captures `tracing` events that include a `sandbox_id` field and republishes them as `SandboxLogLine` messages. Maintains a per-sandbox tail buffer (default 200 entries). Also contains a nested `PlatformEventBus` for compute-driver platform events. -- **`settings_mutex`** -- serializes settings mutations (global and sandbox) to prevent read-modify-write races. Held for the duration of any setting set/delete or global policy set/delete operation. See [Gateway Settings Channel](gateway-settings.md#global-policy-lifecycle). +- **`supervisor_sessions`** -- tracks the live `ConnectSupervisor` session per sandbox and the set of pending relay channels awaiting the supervisor's `RelayStream` dial-back. See [Supervisor Sessions](#supervisor-sessions). +- **`settings_mutex`** -- serializes settings mutations (global and sandbox) to prevent read-modify-write races. See [Gateway Settings Channel](gateway-settings.md#global-policy-lifecycle). ## Protocol Multiplexing @@ -176,8 +183,9 @@ All traffic (gRPC and HTTP) shares a single TCP port. Multiplexing happens at th `MultiplexService::serve()` (`crates/openshell-server/src/multiplex.rs`) creates per-connection service instances: 1. Each accepted TCP stream (optionally TLS-wrapped) is passed to `hyper_util::server::conn::auto::Builder`, which auto-negotiates HTTP/1.1 or HTTP/2. -2. The builder calls `serve_connection_with_upgrades()`, which supports HTTP upgrades (needed for the SSH tunnel's CONNECT method). -3. For each request, `MultiplexedService` inspects the `content-type` header: +2. The HTTP/2 side is built with `adaptive_window(true)`. Hyper/h2 auto-sizes the per-stream flow-control window based on measured bandwidth-delay product, so bulk byte transfers on `RelayStream` (and `ExecSandbox` / `PushSandboxLogs`) are not throttled by the default 64 KiB window. Idle streams stay cheap; active streams grow as needed. +3. The builder calls `serve_connection_with_upgrades()`, which supports HTTP upgrades (needed for the SSH tunnel's CONNECT method). +4. For each request, `MultiplexedService` inspects the `content-type` header: - **Starts with `application/grpc`** -- routes to `GrpcRouter`. - **Anything else** -- routes to the Axum HTTP router. @@ -202,32 +210,147 @@ When TLS is enabled (`crates/openshell-server/src/tls.rs`): - Supports PKCS#1, PKCS#8, and SEC1 private key formats. - The TLS handshake happens before the stream reaches Hyper's auto builder, so ALPN negotiation and HTTP version detection work together transparently. - Certificates are generated at cluster bootstrap time by the `openshell-bootstrap` crate using `rcgen`, not by a Helm Job. The bootstrap reconciles three K8s secrets: `openshell-server-tls` (server cert+key), `openshell-server-client-ca` (CA cert), and `openshell-client-tls` (client cert+key+CA, shared by CLI and sandbox pods). -- **Certificate lifetime**: Certificates use `rcgen` defaults (effectively never expire), which is appropriate for an internal dev-cluster PKI where certs are ephemeral to the cluster's lifetime. -- **Redeploy behavior**: On redeploy, existing cluster TLS secrets are loaded and reused if they are complete and valid PEM. If secrets are missing, incomplete, or malformed, fresh PKI is generated. If rotation occurs and the openshell workload is already running, the bootstrap performs a rollout restart and waits for completion before persisting CLI-side credentials. +- Sandbox supervisors reuse the shared client cert to authenticate their `ConnectSupervisor` and `RelayStream` calls over the same mTLS channel. + +## Supervisor Sessions + +The gateway brokers all byte-level access into a sandbox through a two-plane design on a single HTTP/2 connection initiated by the supervisor: + +1. **Control plane** -- `ConnectSupervisor(stream SupervisorMessage) returns (stream GatewayMessage)`. Long-lived, one per sandbox. Carries `SupervisorHello`, `SessionAccepted`/`SessionRejected`, heartbeats, and `RelayOpen`/`RelayClose` control messages. +2. **Data plane** -- `RelayStream(stream RelayFrame) returns (stream RelayFrame)`. One short-lived call per SSH or exec invocation. The first inbound frame is a `RelayInit { channel_id }`; subsequent frames carry raw bytes in `RelayFrame.data` in either direction. + +Both RPCs are defined in `proto/openshell.proto` and ride the same TCP + TLS + HTTP/2 connection from the supervisor. No new TLS handshake, no reverse HTTP CONNECT, no direct gateway-to-pod dial. + +### `SupervisorSessionRegistry` + +`crates/openshell-server/src/supervisor_session.rs` defines `SupervisorSessionRegistry`, a single instance of which lives on `ServerState.supervisor_sessions`. It holds two maps guarded by `std::sync::Mutex`: + +- `sessions: HashMap` -- one entry per connected supervisor. Each `LiveSession` carries a unique `session_id`, the `mpsc::Sender` for the outbound stream, and a connection timestamp. +- `pending_relays: HashMap` -- one entry per in-flight `open_relay` call awaiting the supervisor's `RelayStream` dial-back. Each `PendingRelay` wraps a `oneshot::Sender` and a creation timestamp. + +Core operations: + +| Method | Purpose | +|--------|---------| +| `register(sandbox_id, session_id, tx)` | Insert a live session; returns the previous session's sender (if any) so the caller can close it. Used by `handle_connect_supervisor` when a supervisor reconnects. | +| `remove_if_current(sandbox_id, session_id)` | Remove the session only if its `session_id` still matches. Guards against the supersede race where an old session's cleanup task fires after a newer session already registered. | +| `open_relay(sandbox_id, session_wait_timeout)` | Wait up to `session_wait_timeout` for a live session, allocate a fresh `channel_id` (UUID v4), insert the pending slot, send `RelayOpen { channel_id }` to the supervisor, and return `(channel_id, oneshot::Receiver)`. The receiver resolves once the supervisor's `RelayStream` arrives and `claim_relay` pairs them up. | +| `claim_relay(channel_id)` | Consume the pending slot, construct a `tokio::io::duplex(64 KiB)` pair, hand the gateway-side half to the waiter via the oneshot, and return the supervisor-side half to `handle_relay_stream`. | +| `reap_expired_relays()` | Drop pending relays older than 10 s. Called by `spawn_relay_reaper` on a 30 s cadence. | + +Session wait uses exponential backoff from 100 ms to 2 s while polling the sessions map. Pending-relay expiry is fixed at `RELAY_PENDING_TIMEOUT = 10 s`. + +### `handle_connect_supervisor` + +Lifecycle of a supervisor session: + +1. Read the first `SupervisorMessage`; require `payload = Hello { sandbox_id, instance_id }` and a non-empty `sandbox_id`. +2. Allocate a fresh `session_id` (UUID v4) and create an `mpsc::channel::(64)` for the outbound stream. +3. Call `registry.register(...)`. If it returns a previous sender, log that the previous session was superseded (dropping the previous `tx` closes the old outbound stream). +4. Send `SessionAccepted { session_id, heartbeat_interval_secs: 15 }`. If the send fails, call `remove_if_current` (so a concurrent reconnect isn't evicted) and return `Internal`. +5. Spawn a session loop that `select!`s between inbound messages and a 15 s heartbeat timer. Inbound heartbeats are silent; `RelayOpenResult` is logged; `RelayClose` is logged; unknown payloads are logged as warnings. +6. When the loop exits (inbound EOF, inbound error, or outbound channel closed), `remove_if_current` drops the registration -- unless a newer session has already replaced it. + +### `handle_relay_stream` + +Lifecycle of one relay call: + +1. Read the first inbound `RelayFrame`; require `payload = Init { channel_id }` with a non-empty `channel_id`. Reject anything else with `InvalidArgument`. +2. Call `registry.claim_relay(channel_id)`. Returns `NotFound` if the channel is unknown or already expired, `DeadlineExceeded` if older than 10 s, or `Internal` if the waiter has dropped the oneshot receiver. +3. Split the supervisor-side `DuplexStream` into read and write halves and spawn two tasks: + - **Supervisor → gateway**: pull `RelayFrame`s from the inbound stream, accept `Data(bytes)`, write to the duplex write-half. On non-data frames, warn and break. Best-effort `shutdown()` on exit so the reader sees EOF. + - **Gateway → supervisor**: read up to `RELAY_STREAM_CHUNK_SIZE = 16 KiB` at a time from the duplex read-half and emit `RelayFrame { Data }` messages on an outbound `mpsc::channel(16)`. +4. Return the outbound receiver as the RPC response stream. + +### Connect Flow (SSH Tunnel) + +```mermaid +sequenceDiagram + participant Client as SSH client + participant GW as Gateway
(/connect/ssh) + participant Reg as SupervisorSessionRegistry + participant Sup as Sandbox Supervisor + participant Daemon as In-sandbox sshd
(Unix socket) + + Client->>GW: CONNECT /connect/ssh
x-sandbox-id, x-sandbox-token + GW->>GW: validate session + sandbox Ready + GW->>Reg: open_relay(sandbox_id, 30s) + Reg->>Sup: GatewayMessage::RelayOpen { channel_id } + Note over Reg: waits for RelayStream on channel_id + Sup->>Daemon: connect to Unix socket + Sup->>GW: RelayStream(RelayFrame::Init { channel_id }) + GW->>Reg: claim_relay(channel_id) + Reg-->>Sup: supervisor-side DuplexStream + Reg-->>GW: gateway-side DuplexStream + GW-->>Client: 200 OK + HTTP upgrade + Client<<->>GW: copy_bidirectional(upgraded, duplex) + GW<<->>Sup: RelayFrame::Data in both directions + Sup<<->>Daemon: raw SSH bytes +``` + +Timeouts on the tunnel path: + +- `open_relay` session wait: **30 s**. A first `sandbox connect` immediately after `sandbox create` must cover the supervisor's initial TLS + gRPC handshake on a cold pod. +- `relay_rx` delivery timeout: 10 s. Covers the round-trip from the `RelayOpen` message to the supervisor's `RelayStream` dial-back. + +Per-token and per-sandbox concurrent-tunnel limits (3 and 20 respectively) are still enforced before the upgrade. + +### Exec Flow + +`ExecSandbox` reuses the same machinery from `grpc/sandbox.rs`: + +1. Validate the request (`sandbox_id`, `command`, env-key format, other field rules), fetch the sandbox, require `Ready` phase. +2. `state.supervisor_sessions.open_relay(&sandbox.id, 15s)` -- shorter timeout than SSH connect, because exec is typically called mid-lifetime after the supervisor session is already established. +3. Wait up to 10 s for the relay `DuplexStream`. +4. `stream_exec_over_relay`: bind an ephemeral localhost TCP listener, bridge that single-use TCP socket to the relay duplex, and drive a `russh` client through the local port. The `russh` session opens a channel, executes the shell-escaped command, and streams `ExecSandboxStdout`/`ExecSandboxStderr` chunks to the caller. On completion, send `ExecSandboxExit { exit_code }`. +5. On timeout (if `timeout_seconds > 0`), emit exit code 124 (matching `timeout(1)`). + +The supervisor-side SSH daemon is an SSH server bound to a Unix domain socket inside the sandbox's filesystem. Filesystem permissions on that socket are the only access-control boundary between the supervisor bridge and the daemon; all higher-level authorization is enforced at `CreateSshSession` / `ExecSandbox` in the gateway. + +### Regression Coverage + +`crates/openshell-server/tests/supervisor_relay_integration.rs` is the regression guard for the `RelayStream` wire protocol. It stands up an in-process tonic server that mounts the real `handle_relay_stream` behind `MultiplexedService`, connects a mock supervisor client over a real tonic `Channel`, and exercises the registry's `open_relay` → `claim_relay` pairing end to end with `tokio::io::duplex` bridging. The five test cases cover: + +- Round-trip bytes from gateway to supervisor and back (echo loop). +- Clean close when the gateway drops the relay. +- EOF propagation when the supervisor closes its outbound sender. +- `Unavailable` when `open_relay` is called without a registered session. +- Concurrent `RelayStream` calls multiplexed independently on the same connection. + +These complement the unit tests inside `supervisor_session.rs` (registry-only behavior) and the live cluster tests (full CLI → gateway → sandbox path). ## gRPC Services ### OpenShell Service -Defined in `proto/openshell.proto`, implemented in `crates/openshell-server/src/grpc.rs` as `OpenShellService`. +Defined in `proto/openshell.proto`, implemented in `crates/openshell-server/src/grpc/mod.rs` as `OpenShellService`. Per-concern handlers live in `crates/openshell-server/src/grpc/` submodules. #### Sandbox Management | RPC | Description | Key behavior | |-----|-------------|--------------| | `Health` | Returns service status and version | Always returns `HEALTHY` with `CARGO_PKG_VERSION` | -| `CreateSandbox` | Create a new sandbox | Validates spec and policy, validates provider names exist (fail-fast), persists to store, creates Kubernetes CRD. On K8s 409 conflict or error, rolls back the store record and index entry. | +| `CreateSandbox` | Create a new sandbox | Validates spec and policy, validates provider names exist (fail-fast), persists to store, creates the compute-driver sandbox. On driver failure, rolls back the store record and index entry. | | `GetSandbox` | Fetch sandbox by name | Looks up by name via `store.get_message_by_name()` | | `ListSandboxes` | List sandboxes | Paginated (default limit 100), decodes protobuf payloads from store records | -| `DeleteSandbox` | Delete sandbox by name | Sets phase to `Deleting`, persists, notifies watch bus, then deletes the Kubernetes CRD. Cleans up store if the CRD was already gone. | +| `DeleteSandbox` | Delete sandbox by name | Sets phase to `Deleting`, persists, notifies watch bus, then deletes via the compute driver. Cleans up store if the sandbox was already gone. | | `WatchSandbox` | Stream sandbox updates | Server-streaming RPC. See [Watch Sandbox Stream](#watch-sandbox-stream) below. | -| `ExecSandbox` | Execute command in sandbox | Server-streaming RPC. See [Remote Exec via SSH](#remote-exec-via-ssh) below. | +| `ExecSandbox` | Execute command in sandbox | Server-streaming RPC; data plane runs through `SupervisorSessionRegistry::open_relay`. See [Exec Flow](#exec-flow). | + +#### Supervisor Session + +| RPC | Description | +|-----|-------------| +| `ConnectSupervisor` | Persistent bidi stream from the sandbox supervisor. Carries hello/accept/heartbeat/`RelayOpen`/`RelayClose`. One session per sandbox; reconnects supersede. | +| `RelayStream` | Per-invocation bidi byte bridge. Supervisor initiates after receiving `RelayOpen`; first frame is `RelayInit { channel_id }`; subsequent frames carry raw bytes. | + +Neither RPC is called by end users. They are the private control/data plane between the gateway and each sandbox supervisor. #### SSH Session Management | RPC | Description | |-----|-------------| -| `CreateSshSession` | Creates a session token for a `Ready` sandbox. Persists an `SshSession` record and returns gateway connection details (host, port, scheme, connect path). | +| `CreateSshSession` | Creates a session token for a `Ready` sandbox. Persists an `SshSession` record and returns gateway connection details (host, port, scheme, connect path). The resulting token is presented on the `/connect/ssh` HTTP CONNECT request. | | `RevokeSshSession` | Marks a session as revoked by setting `session.revoked = true` in the store. | #### Provider Management @@ -244,17 +367,17 @@ Full CRUD for `Provider` objects, which store typed credentials (e.g., API keys #### Policy, Settings, and Provider Environment Delivery -These RPCs are called by sandbox pods at startup and during runtime polling. +These RPCs are called by sandbox supervisors at startup and during runtime polling. | RPC | Description | |-----|-------------| -| `GetSandboxSettings` | Returns effective sandbox config looked up by sandbox ID: policy payload, policy metadata (version, hash, source, `global_policy_version`), merged effective settings, and a `config_revision` fingerprint for change detection. Two-tier resolution: registered keys start unset, sandbox values overlay, global values override. The reserved `policy` key in global settings can override the sandbox's own policy. When a global policy is active, `policy_source` is `GLOBAL` and `global_policy_version` carries the active revision number. See [Gateway Settings Channel](gateway-settings.md). | -| `GetGatewaySettings` | Returns gateway-global settings only (excluding the reserved `policy` key). Returns registered keys with empty values when unconfigured, and a monotonic `settings_revision`. | +| `GetSandboxConfig` | Returns effective sandbox config looked up by sandbox ID: policy payload, policy metadata (version, hash, source, `global_policy_version`), merged effective settings, and a `config_revision` fingerprint for change detection. Two-tier resolution: registered keys start unset, sandbox values overlay, global values override. The reserved `policy` key in global settings can override the sandbox's own policy. When a global policy is active, `policy_source` is `GLOBAL` and `global_policy_version` carries the active revision number. See [Gateway Settings Channel](gateway-settings.md). | +| `GetGatewayConfig` | Returns gateway-global settings only (excluding the reserved `policy` key). Returns registered keys with empty values when unconfigured, and a monotonic `settings_revision`. | | `GetSandboxProviderEnvironment` | Resolves provider credentials into environment variables for a sandbox. Iterates the sandbox's `spec.providers` list, fetches each `Provider`, and collects credential key-value pairs. First provider wins on duplicate keys. Skips credential keys that do not match `^[A-Za-z_][A-Za-z0-9_]*$`. | #### Policy Recommendation (Network Rules) -These RPCs support the sandbox-initiated policy recommendation pipeline. The sandbox generates proposals via its mechanistic mapper and submits them; the gateway validates, persists, and manages the approval workflow. See [architecture/policy-advisor.md](policy-advisor.md) for the full pipeline design. +These RPCs support the sandbox-initiated policy recommendation pipeline. The sandbox generates proposals via its mechanistic mapper and submits them; the gateway validates, persists, and manages the approval workflow. See [policy-advisor.md](policy-advisor.md) for the full pipeline design. | RPC | Description | |-----|-------------| @@ -327,9 +450,9 @@ The HTTP router (`crates/openshell-server/src/http.rs`) merges two sub-routers: | Path | Method | Response | |------|--------|----------| -| `/connect/ssh` | CONNECT | Upgrades the connection to a bidirectional TCP tunnel to a sandbox pod's SSH port | +| `/connect/ssh` | CONNECT | Upgrades the connection to a bidirectional byte bridge tunneled through `SupervisorSessionRegistry::open_relay` | -See [SSH Tunnel Gateway](#ssh-tunnel-gateway) for details. +See [Connect Flow (SSH Tunnel)](#connect-flow-ssh-tunnel) for details. ### Cloudflare Endpoints @@ -340,7 +463,7 @@ See [SSH Tunnel Gateway](#ssh-tunnel-gateway) for details. ## Watch Sandbox Stream -The `WatchSandbox` RPC (`crates/openshell-server/src/grpc.rs`) provides a multiplexed server-streaming response that can include sandbox status snapshots, gateway log lines, and platform events. +The `WatchSandbox` RPC (`crates/openshell-server/src/grpc/`) provides a multiplexed server-streaming response that can include sandbox status snapshots, gateway log lines, and platform events. ### Request Options @@ -348,7 +471,7 @@ The `WatchSandboxRequest` controls what the stream includes: - `follow_status` -- subscribe to `SandboxWatchBus` notifications and re-read the sandbox record on each change. - `follow_logs` -- subscribe to `TracingLogBus` for gateway log lines correlated by `sandbox_id`. -- `follow_events` -- subscribe to `PlatformEventBus` for Kubernetes events correlated to the sandbox. +- `follow_events` -- subscribe to `PlatformEventBus` for compute-driver platform events correlated to the sandbox. - `log_tail_lines` -- replay the last N log lines before following (default 200). - `stop_on_terminal` -- end the stream when the sandbox reaches the `Ready` phase. Note: `Error` phase does not stop the stream because it may be transient (e.g., `ReconcilerError`). @@ -367,8 +490,8 @@ The `WatchSandboxRequest` controls what the stream includes: ```mermaid graph LR - SW["spawn_sandbox_watcher"] - ET["spawn_kube_event_tailer"] + CW["ComputeRuntime watcher"] + PE["Platform events
(driver watch)"] TL["SandboxLogLayer
(tracing layer)"] WB["SandboxWatchBus
(broadcast per ID)"] @@ -377,9 +500,9 @@ graph LR WS["WatchSandbox stream"] - SW -->|"notify(id)"| WB + CW -->|"notify(id)"| WB TL -->|"publish(id, log_event)"| LB - ET -->|"publish(id, platform_event)"| PB + PE -->|"publish(id, platform_event)"| PB WB -->|"subscribe(id)"| WS LB -->|"subscribe(id)"| WS @@ -397,52 +520,6 @@ Broadcast lag is translated to `Status::resource_exhausted` via `broadcast_to_st **Validation:** `WatchSandbox` validates that the sandbox exists before subscribing to any bus, preventing entries from being created for non-existent IDs. `PushSandboxLogs` validates sandbox existence once on the first batch of the stream. -## Remote Exec via SSH - -The `ExecSandbox` RPC (`crates/openshell-server/src/grpc.rs`) executes a command inside a sandbox pod over SSH and streams stdout/stderr/exit back to the client. - -### Execution Flow - -1. Validate request: `sandbox_id`, `command`, and environment key format (`^[A-Za-z_][A-Za-z0-9_]*$`). -2. Verify sandbox exists and is in `Ready` phase. -3. Resolve target: prefer agent pod IP, fall back to Kubernetes service DNS (`..svc.cluster.local`). If the sandbox is not connectable yet (for example the pod exists but has no IP), the gateway returns `FAILED_PRECONDITION` instead of surfacing the condition as an internal server fault. -4. Build the remote command string: sort environment variables, shell-escape all values, prepend `cd &&` if `workdir` is set. -5. **Start a single-use SSH proxy**: binds an ephemeral local TCP port, accepts one connection, performs the NSSH1 handshake with the sandbox, and bidirectionally copies data. -6. **Connect via `russh`**: establishes an SSH connection through the local proxy, authenticates with `none` auth as user `sandbox`, opens a session channel, and executes the command. -7. Stream `ExecSandboxStdout`, `ExecSandboxStderr` chunks as they arrive, then send `ExecSandboxExit` with the exit code. -8. On timeout (if `timeout_seconds > 0`), send exit code 124 (matching the `timeout(1)` convention). - -### NSSH1 Handshake Protocol - -The single-use SSH proxy and the SSH tunnel endpoint both use the same handshake: - -``` -NSSH1 \n -``` - -- `token` -- session token or a one-time UUID. -- `timestamp` -- Unix epoch seconds. -- `nonce` -- UUID v4. -- `hmac_signature` -- `HMAC-SHA256(secret, "{token}|{timestamp}|{nonce}")`, hex-encoded. -- Expected response: `OK\n` from the sandbox. - -The `ssh_handshake_skew_secs` configuration controls how much clock skew is tolerated. - -## SSH Tunnel Gateway - -The SSH tunnel endpoint (`crates/openshell-server/src/ssh_tunnel.rs`) allows external SSH clients to reach sandbox pods through the gateway using HTTP CONNECT upgrades. - -### Request Flow - -1. Client sends `CONNECT /connect/ssh` with headers `x-sandbox-id` and `x-sandbox-token`. -2. Handler validates the method is CONNECT, extracts headers. -3. Fetches the `SshSession` from the store by token; rejects if revoked or if `sandbox_id` does not match. -4. Fetches the `Sandbox`; rejects if not in `Ready` phase. -5. Resolves the connect target: agent pod IP if available, otherwise Kubernetes service DNS. -6. Returns `200 OK`, then upgrades the connection via `hyper::upgrade::on()`. -7. In a spawned task: connects to the sandbox's SSH port, performs the NSSH1 handshake, then bidirectionally copies bytes between the upgraded HTTP connection and the sandbox TCP stream. -8. On completion, gracefully shuts down the write-half of the upgraded connection for clean EOF handling. - ## Persistence Layer ### Store Architecture @@ -508,28 +585,33 @@ The Helm chart template is at `deploy/helm/openshell/templates/statefulset.yaml` - **Get / Delete**: Operate by primary key (`id`), filtered by `object_type`. - **List**: Pages by `limit` + `offset` with deterministic ordering: `ORDER BY created_at_ms ASC, name ASC`. The secondary sort on `name` prevents unstable ordering when rows share the same millisecond timestamp. -## Kubernetes Integration +## Compute Driver Integration -### Sandbox CRD Management +### Kubernetes Driver `KubernetesComputeDriver` (`crates/openshell-driver-kubernetes/src/driver.rs`) manages `agents.x-k8s.io/v1alpha1/Sandbox` CRDs behind the gateway's compute interface. The gateway binds to that driver through `ComputeDriverService` (`crates/openshell-driver-kubernetes/src/grpc.rs`) in-process, so the same `openshell.compute.v1.ComputeDriver` request and response types are exercised whether the driver is invoked locally or served over gRPC. - **Get**: `GetSandbox` looks up a sandbox CRD by name and returns a driver-native platform observation (`openshell.compute.v1.DriverSandbox`) with raw status and condition data from the object. - **List**: `ListSandboxes` enumerates sandbox CRDs and returns driver-native platform observations for each, sorted by name for stable results. -- **Create**: Translates an internal `openshell.compute.v1.DriverSandbox` message into a Kubernetes `DynamicObject` with labels (`openshell.ai/sandbox-id`, `openshell.ai/managed-by: openshell`) and a spec that includes the pod template, environment variables, and gateway-required env vars (`OPENSHELL_SANDBOX_ID`, `OPENSHELL_ENDPOINT`, `OPENSHELL_SSH_LISTEN_ADDR`, etc.). When callers do not provide custom `volumeClaimTemplates`, the driver injects a default `workspace` PVC and mounts it at `/sandbox` so the default sandbox home/workdir survives pod rescheduling. +- **Create**: Translates an internal `openshell.compute.v1.DriverSandbox` message into a Kubernetes `DynamicObject` with labels (`openshell.ai/sandbox-id`, `openshell.ai/managed-by: openshell`) and a spec that includes the pod template, environment variables, and gateway-required env vars (`OPENSHELL_SANDBOX_ID`, `OPENSHELL_ENDPOINT`, `OPENSHELL_SSH_SOCKET_PATH`, etc.). `OPENSHELL_SSH_SOCKET_PATH` is set from the driver's `--sandbox-ssh-socket-path` flag (default `/run/openshell/ssh.sock`) so the in-sandbox SSH daemon binds a Unix socket rather than a TCP port. When callers do not provide custom `volumeClaimTemplates`, the driver injects a default `workspace` PVC and mounts it at `/sandbox` so the default sandbox home/workdir survives pod rescheduling. - **Delete**: Calls the Kubernetes API to delete the CRD by name. Returns `false` if already gone (404). -- **Stop**: `proto/compute_driver.proto` now reserves `StopSandbox` for a non-destructive lifecycle transition. Resume is intentionally not a dedicated compute-driver RPC; the gateway is expected to auto-resume a stopped sandbox when a client connects or executes into it. -- **Pod IP resolution**: `agent_pod_ip()` fetches the agent pod and reads `status.podIP`. +- **Stop**: `proto/compute_driver.proto` reserves `StopSandbox` for a non-destructive lifecycle transition. Resume is intentionally not a dedicated compute-driver RPC; the gateway auto-resumes a stopped sandbox when a client connects or executes into it. -### Sandbox Watcher +The gateway reaches the sandbox exclusively through the supervisor-initiated `ConnectSupervisor` session, so the driver never returns sandbox network endpoints. -The Kubernetes driver emits `WatchSandboxes` events through `proto/compute_driver.proto`. `ComputeRuntime` consumes that stream, translates the driver-native snapshots into public `openshell.v1.Sandbox` resources, derives the public phase, and applies the results to the store. +### VM Driver + +`VmDriver` (`crates/openshell-driver-vm/src/driver.rs`) is served by the standalone `openshell-driver-vm` process. The gateway spawns that binary on demand and talks to it over the internal `openshell.compute.v1.ComputeDriver` gRPC contract via a Unix domain socket. + +- **Create**: The VM driver process allocates a sandbox-specific rootfs from its own embedded `rootfs.tar.zst`, injects an explicitly configured guest mTLS bundle when the gateway callback endpoint is `https://`, then re-execs itself in a hidden helper mode that loads libkrun directly and boots the supervisor. +- **Networking**: The helper starts an embedded `gvproxy`, wires it into libkrun as virtio-net, and gives the guest outbound connectivity. No inbound TCP listener is needed — the supervisor reaches the gateway over its outbound `ConnectSupervisor` stream. +- **Gateway callback**: The guest init script configures `eth0` for gvproxy networking, prefers the configured `OPENSHELL_GRPC_ENDPOINT`, and falls back to host aliases or the gvproxy gateway IP (`192.168.127.1`) when local hostname resolution is unavailable on macOS. +- **Guest boot**: The sandbox guest runs a minimal init script that starts `openshell-sandbox` directly as PID 1 inside the VM. +- **Watch stream**: Emits provisioning, ready, error, deleting, deleted, and platform-event updates so the gateway store remains the durable source of truth. -- **Applied**: Extracts the sandbox ID from labels (or falls back to name prefix stripping), reads the CRD status, emits a driver-native snapshot, and lets the gateway translate that into the stored public sandbox record. Notifies the watch bus. -- **Deleted**: Removes the sandbox record from the store and the index. Notifies the watch bus. -- **Restarted**: Re-processes all objects (full resync). +### Compute Runtime -In addition to the watch stream, `ComputeRuntime` periodically calls `ComputeDriver/ListSandboxes` through the in-process `ComputeDriverService` and reconciles the store to that full driver snapshot. Public `GetSandbox` and `ListSandboxes` handlers remain store-backed, but the store is refreshed from the driver on a timer so the gateway still exercises the compute-driver RPC surface for reconciliation. +`ComputeRuntime` consumes the driver-native watch stream from `WatchSandboxes`, translates the snapshots into public `openshell.v1.Sandbox` resources, derives the public phase, and applies the results to the store. In parallel, it periodically calls `ListSandboxes` and reconciles the store to the full driver snapshot; public `GetSandbox` and `ListSandboxes` handlers remain store-backed but are refreshed from the driver on a timer. ### Gateway Phase Derivation @@ -546,7 +628,7 @@ In addition to the watch stream, `ComputeRuntime` periodically calls `ComputeDri **Transient reasons** (will retry, stay in `Provisioning`): `ReconcilerError`, `DependenciesNotReady`. All other `Ready=False` reasons are treated as terminal failures (`Error` phase). -### Kubernetes Event Tailer +### Kubernetes Event Correlation The Kubernetes driver also watches namespace-scoped Kubernetes `Event` objects and correlates them to sandbox IDs before emitting them as compute-driver platform events: @@ -556,17 +638,6 @@ The Kubernetes driver also watches namespace-scoped Kubernetes `Event` objects a Matched events are published to the `PlatformEventBus` as `SandboxStreamEvent::Event` payloads. -## VM Driver - -`VmDriver` (`crates/openshell-driver-vm/src/driver.rs`) is served by the standalone `openshell-driver-vm` process. The gateway spawns that binary on demand, talks to it over the internal `openshell.compute.v1.ComputeDriver` gRPC contract via a Unix domain socket, and keeps VM runtime dependencies out of `openshell-server`. - -- **Create**: The VM driver process allocates a localhost SSH port, prepares a sandbox-specific rootfs from its own embedded `rootfs.tar.zst`, injects an explicitly configured guest mTLS bundle when the gateway callback endpoint is `https://`, then re-execs itself in a hidden helper mode that loads libkrun directly and boots `/srv/openshell-vm-sandbox-init.sh`. -- **Networking**: The helper starts an embedded `gvproxy`, wires it into libkrun as virtio-net, and exposes the single inbound SSH port (`host_port:2222`) through gvproxy’s forwarder API. This keeps VM launch inside `openshell-driver-vm` without depending on the `openshell-vm` binary. -- **Gateway callback**: The guest init script configures `eth0` for gvproxy networking, prefers the configured `OPENSHELL_GRPC_ENDPOINT`, and falls back to host aliases or the gvproxy gateway IP (`192.168.127.1`) when local hostname resolution is unavailable on macOS. -- **Guest boot**: The sandbox guest runs a minimal init script that skips k3s and starts `openshell-sandbox` directly as PID 1 inside the VM. -- **Endpoint resolution**: Returns `127.0.0.1:` for SSH/exec transport. -- **Watch stream**: Emits provisioning, ready, error, deleting, deleted, and platform-event updates so the gateway store remains the durable source of truth. - ## Sandbox Index `SandboxIndex` (`crates/openshell-server/src/sandbox_index.rs`) maintains two in-memory maps protected by an `RwLock`: @@ -574,29 +645,35 @@ Matched events are published to the `PlatformEventBus` as `SandboxStreamEvent::E - `sandbox_name_to_id: HashMap` - `agent_pod_to_id: HashMap` -Updated by the sandbox watcher on every Applied event and by gRPC handlers during sandbox creation. Used by the event tailer to map Kubernetes event objects back to sandbox IDs. +Updated by the compute watcher on every driver observation and by gRPC handlers during sandbox creation. Used by the compute-driver event correlator to map platform events back to sandbox IDs. + +## Observability + +Supervisor session telemetry is currently emitted as plain `tracing` events from `supervisor_session.rs` (accepted, superseded, ended, relay opened/claimed). OCSF structured logging on the gateway side is a tracked follow-up -- the `openshell-ocsf` crate needs a `GatewayContext` equivalent to the sandbox's `SandboxContext` before events like `network.activity` or `app.lifecycle` can be emitted here. Sandbox-side OCSF already covers SSH authentication, network decisions, and supervisor lifecycle. ## Error Handling - **gRPC errors**: All gRPC handlers return `tonic::Status` with appropriate codes: - - `InvalidArgument` for missing/malformed fields - - `NotFound` for nonexistent objects - - `AlreadyExists` for duplicate creation - - `FailedPrecondition` for state violations (e.g., exec on non-Ready sandbox, missing provider) - - `Internal` for store/decode/Kubernetes failures - - `PermissionDenied` for policy violations - - `ResourceExhausted` for broadcast lag (missed messages) - - `Cancelled` for closed broadcast channels - -- **HTTP errors**: The SSH tunnel handler returns HTTP status codes directly (`401`, `404`, `405`, `412`, `500`, `502`). + - `InvalidArgument` for missing/malformed fields, including a non-`Init` first frame on `RelayStream`. + - `NotFound` for nonexistent objects, including unknown or expired relay channels on `claim_relay`. + - `AlreadyExists` for duplicate creation. + - `FailedPrecondition` for state violations (e.g., exec on non-Ready sandbox, missing provider). + - `Unavailable` when the supervisor session for a sandbox is not connected within `open_relay`'s wait window, or when the supervisor's outbound channel has closed between lookup and send. + - `DeadlineExceeded` when a pending relay slot is claimed past `RELAY_PENDING_TIMEOUT`, or when `relay_rx` fails to deliver in time. + - `Internal` for store/decode/driver failures and for the `claim_relay` case where the waiter has dropped the oneshot receiver. + - `PermissionDenied` for policy violations. + - `ResourceExhausted` for broadcast lag (missed messages). + - `Cancelled` for closed broadcast channels. + +- **HTTP errors**: The SSH tunnel handler returns HTTP status codes directly (`401`, `404`, `405`, `412`, `429`, `500`, `502`). `502` indicates the supervisor relay could not be opened; `429` indicates a per-token or per-sandbox concurrent-tunnel limit. - **Connection errors**: Logged at `error` level but do not crash the gateway. TLS handshake failures and individual connection errors are caught and logged per-connection. -- **Background task errors**: The sandbox watcher and event tailer log warnings for individual processing failures but continue running. If the watcher stream ends, it logs a warning and the task exits (no automatic restart). +- **Background task errors**: The compute watcher and relay reaper log warnings for individual processing failures but continue running. If the watcher stream ends, it logs a warning and the task exits (no automatic restart). ## Cross-References -- [Sandbox Architecture](sandbox.md) -- sandbox-side policy enforcement, proxy, and isolation details +- [Sandbox Architecture](sandbox.md) -- sandbox-side policy enforcement, supervisor, and the local SSH daemon on the other end of the relay - [Gateway Settings Channel](gateway-settings.md) -- runtime settings channel, two-tier resolution, CLI/TUI commands - [Inference Routing](inference-routing.md) -- end-to-end inference interception flow, sandbox-side proxy logic, and route resolution - [Container Management](build-containers.md) -- how sandbox container images are built and configured diff --git a/architecture/sandbox-connect.md b/architecture/sandbox-connect.md index 9abb0383a..88505c7f1 100644 --- a/architecture/sandbox-connect.md +++ b/architecture/sandbox-connect.md @@ -8,9 +8,23 @@ Sandbox connect provides secure remote access into running sandbox environments. 2. **Command execution** (`sandbox create -- `) -- runs a command over SSH with stdout/stderr piped back 3. **File sync** (`sandbox create --upload`) -- uploads local files into the sandbox before command execution -All three modes tunnel SSH traffic through the gateway's multiplexed port using HTTP CONNECT. The gateway authenticates each connection with a short-lived session token, then performs a custom NSSH1 handshake with the sandbox's embedded SSH daemon before bridging raw bytes between client and sandbox. +Gateway connectivity is **supervisor-initiated**: the gateway never dials the sandbox pod. On startup, each sandbox's supervisor opens a long-lived bidirectional gRPC stream (`ConnectSupervisor`) to the gateway and holds it for the sandbox's lifetime. When a client asks the gateway for SSH, the gateway sends a `RelayOpen` message over that stream; the supervisor responds by initiating a `RelayStream` gRPC call that rides the same TCP+TLS+HTTP/2 connection as a new multiplexed stream. The supervisor bridges the bytes of that stream into a root-owned Unix socket where the embedded SSH daemon listens. -There is also a gateway-side `ExecSandbox` gRPC RPC that executes commands inside sandboxes without requiring an external SSH client. This is used for programmatic execution. +There is also a gateway-side `ExecSandbox` gRPC RPC that executes commands inside sandboxes without requiring an external SSH client. It uses the same relay mechanism. + +## Two-Plane Architecture + +The supervisor and gateway maintain two logical planes over **one TCP+TLS connection**, multiplexed by HTTP/2 streams: + +- **Control plane** -- the `ConnectSupervisor` bidirectional gRPC stream. Carries `SupervisorHello`, heartbeats, `RelayOpen`/`RelayClose` requests from the gateway, and `RelayOpenResult`/`RelayClose` replies from the supervisor. Lives for the lifetime of the sandbox supervisor process. +- **Data plane** -- one `RelayStream` bidirectional gRPC call per SSH connect or exec invocation. Each call is a new HTTP/2 stream on the same connection. Frames are opaque bytes except for the first frame from the supervisor, which is a typed `RelayInit { channel_id }` used to pair the stream with a pending relay slot on the gateway. + +Running both planes over one HTTP/2 connection means each relay avoids a fresh TLS handshake and benefits from a single authenticated transport boundary. Hyper/h2 `adaptive_window(true)` is enabled on both sides so bulk transfers (large file uploads, long exec stdout) aren't pinned to the default 64 KiB stream window. + +The supervisor-initiated direction gives the model two properties: + +1. The sandbox pod exposes no ingress surface. Network reachability is whatever the supervisor itself can reach outward. +2. Authentication reduces to one place: the existing gateway mTLS channel. There is no second application-layer handshake to design, rotate, or replay-protect. ## Components @@ -18,70 +32,126 @@ There is also a gateway-side `ExecSandbox` gRPC RPC that executes commands insid **File**: `crates/openshell-cli/src/ssh.rs` -Contains the client-side SSH and editor-launch helpers for sandbox connectivity: +Client-side SSH and editor-launch helpers: - `sandbox_connect()` -- interactive SSH shell session - `sandbox_exec()` -- non-interactive command execution via SSH -- `sandbox_rsync()` -- file synchronization via rsync over SSH +- `sandbox_rsync()` -- file synchronization via tar-over-SSH - `sandbox_ssh_proxy()` -- the `ProxyCommand` process that bridges stdin/stdout to the gateway -- OpenShell-managed SSH config helpers -- install a single `Include` entry in - `~/.ssh/config` and maintain generated `Host openshell-` blocks in a - separate OpenShell-owned config file for editor workflows +- OpenShell-managed SSH config helpers -- install a single `Include` entry in `~/.ssh/config` and maintain generated `Host openshell-` blocks in a separate OpenShell-owned config file for editor workflows -These are re-exported from `crates/openshell-cli/src/run.rs` for backward compatibility. +Every generated SSH invocation and every entry in the OpenShell-managed `~/.ssh/config` include `ServerAliveInterval=15` and `ServerAliveCountMax=3`. SSH has no other way to observe that the underlying relay (not the end-to-end TCP socket) has silently dropped, so the client falls back to SSH-level keepalives to surface dead connections within ~45 seconds. + +These helpers are re-exported from `crates/openshell-cli/src/run.rs` for backward compatibility. ### CLI `ssh-proxy` subcommand -**File**: `crates/openshell-cli/src/main.rs` (line ~139, `Commands::SshProxy`) +**File**: `crates/openshell-cli/src/main.rs` (`Commands::SshProxy`) -A top-level CLI subcommand (`ssh-proxy`) that the SSH `ProxyCommand` invokes. It receives `--gateway`, `--sandbox-id`, and `--token` flags, then delegates to `sandbox_ssh_proxy()`. This process has no TTY of its own -- it pipes stdin/stdout directly to the gateway tunnel. +A top-level CLI subcommand (`ssh-proxy`) that the SSH `ProxyCommand` invokes. It receives `--gateway`, `--sandbox-id`, `--token`, and `--gateway-name` flags, then delegates to `sandbox_ssh_proxy()`. This process has no TTY of its own -- it pipes stdin/stdout directly to the gateway tunnel. ### gRPC session bootstrap -**Files**: `proto/openshell.proto`, `crates/openshell-server/src/grpc.rs` +**Files**: `proto/openshell.proto`, `crates/openshell-server/src/grpc/sandbox.rs` Two RPCs manage SSH session tokens: -- `CreateSshSession(sandbox_id)` -- validates the sandbox exists and is `Ready`, generates a UUID token, persists an `SshSession` record, and returns the token plus gateway connection details (host, port, scheme, connect path). +- `CreateSshSession(sandbox_id)` -- validates the sandbox exists and is `Ready`, generates a UUID token, persists an `SshSession` record, and returns the token plus gateway connection details (host, port, scheme, connect path, optional TTL). - `RevokeSshSession(token)` -- marks the session's `revoked` flag to `true` in the persistence layer. +### Supervisor session registry + +**File**: `crates/openshell-server/src/supervisor_session.rs` + +`SupervisorSessionRegistry` holds: + +- `sessions: HashMap` -- the active `ConnectSupervisor` stream sender for each sandbox, plus a `session_id` that uniquely identifies each registration. +- `pending_relays: HashMap` -- one entry per `RelayOpen` waiting for the supervisor's `RelayStream` to arrive. + +Key operations: + +- `register(sandbox_id, session_id, tx)` -- inserts a new session and returns the previous sender if it superseded one. Used by `handle_connect_supervisor` to accept a new stream. +- `remove_if_current(sandbox_id, session_id)` -- removes only if the stored `session_id` matches. Guards against the supersede race where an old session's cleanup runs after a newer session has already registered. +- `open_relay(sandbox_id, timeout)` -- called by the gateway tunnel and exec handlers. Waits up to `timeout` for a supervisor session to appear (with exponential backoff 100 ms → 2 s), registers a pending relay slot keyed by a fresh `channel_id`, sends `RelayOpen` to the supervisor, and returns a `oneshot::Receiver` that resolves when the supervisor claims the slot. +- `claim_relay(channel_id)` -- called by `handle_relay_stream` when the supervisor's first `RelayFrame::Init` arrives. Removes the pending entry, enforces a 10-second staleness bound (`RELAY_PENDING_TIMEOUT`), creates a 64 KiB `tokio::io::duplex` pair, hands the gateway-side half to the waiter, and returns the supervisor-side half to be bridged against the inbound/outbound `RelayFrame` streams. +- `reap_expired_relays()` -- bounds leaks from pending slots the supervisor never claimed (e.g., supervisor crashed between `RelayOpen` and `RelayStream`). Scheduled every 30 s by `spawn_relay_reaper()` during server startup. + +The `ConnectSupervisor` handler (`handle_connect_supervisor`) validates `SupervisorHello`, assigns a fresh `session_id`, sends `SessionAccepted { heartbeat_interval_secs: 15 }`, spawns a loop that processes inbound messages (`Heartbeat`, `RelayOpenResult`, `RelayClose`), and emits a `GatewayHeartbeat` every 15 seconds. + +### RelayStream handler + +**File**: `crates/openshell-server/src/supervisor_session.rs` (`handle_relay_stream`) + +Accepts one inbound `RelayFrame` to extract `channel_id` from `RelayInit`, claims the pending relay, then runs two concurrent forwarding tasks: + +- **Supervisor → gateway**: drains `RelayFrame::Data` frames and writes the bytes to the supervisor-side end of the duplex pair. +- **Gateway → supervisor**: reads the duplex in `RELAY_STREAM_CHUNK_SIZE` (16 KiB) chunks and emits `RelayFrame::Data` messages back. + +The first frame that isn't `RelayInit` is rejected (`invalid_argument`). Any non-data frame after init closes the relay. + ### Gateway tunnel handler **File**: `crates/openshell-server/src/ssh_tunnel.rs` An Axum route at `/connect/ssh` on the shared gateway port. Handles HTTP CONNECT requests by: -1. Validating the session token and sandbox readiness -2. Resolving the sandbox pod's network address -3. Opening a TCP connection to the sandbox SSH port -4. Performing the NSSH1 handshake -5. Bridging bytes bidirectionally between the HTTP-upgraded connection and the sandbox TCP stream + +1. Validating the session token (present, not revoked, bound to the sandbox id in `X-Sandbox-Id`, not expired). +2. Confirming the sandbox is in `Ready` phase. +3. Enforcing per-token (max 3) and per-sandbox (max 20) concurrent connection limits. +4. Calling `supervisor_sessions.open_relay(sandbox_id, 30s)` -- the 30-second wait covers the supervisor's initial mTLS + `ConnectSupervisor` handshake on a freshly-scheduled pod. +5. Waiting up to 10 seconds for the supervisor to open its `RelayStream` and deliver the gateway-side `DuplexStream`. +6. Performing the HTTP CONNECT upgrade on the client connection and calling `copy_bidirectional` between the upgraded client socket and the relay stream. + +There is no gateway-to-sandbox TCP dial, handshake preface, or pod-IP resolution in this path. ### Gateway multiplexing **File**: `crates/openshell-server/src/multiplex.rs` -The gateway runs a single listener that multiplexes gRPC and HTTP on the same port. `MultiplexedService` routes based on the `content-type` header: requests with `application/grpc` go to the gRPC router; all others (including HTTP CONNECT) go to the HTTP router. The HTTP router (`crates/openshell-server/src/http.rs`) merges health endpoints with the SSH tunnel router. +The gateway runs a single listener that multiplexes gRPC and HTTP on the same port. `MultiplexedService` routes based on the `content-type` header: requests with `application/grpc` go to the gRPC router; all others (including HTTP CONNECT) go to the HTTP router. The HTTP router (`crates/openshell-server/src/http.rs`) merges health endpoints with the SSH tunnel router. Hyper is configured with `http2().adaptive_window(true)` so the HTTP/2 stream windows grow under load rather than throttling `RelayStream` to the default 64 KiB window. + +### Sandbox supervisor session + +**File**: `crates/openshell-sandbox/src/supervisor_session.rs` + +`spawn(endpoint, sandbox_id, ssh_socket_path)` starts a background task that: + +1. Opens a gRPC `Channel` to the gateway (`http2_adaptive_window(true)`). The same channel multiplexes the control stream and every relay. +2. Sends `SupervisorHello { sandbox_id, instance_id }` as the first outbound message. +3. Waits for `SessionAccepted` (or fails fast on `SessionRejected`). +4. Runs a loop that reads inbound `GatewayMessage` values and emits `SupervisorHeartbeat` at the accepted interval (min 5 s, usually 15 s). +5. On `RelayOpen`, spawns `handle_relay_open()` which opens a new `RelayStream` RPC on the existing channel, sends `RelayInit { channel_id }` as the first frame, dials the local SSH Unix socket, and bridges bytes in both directions in 16 KiB chunks. + +Reconnect policy: the session loop wraps `run_single_session()` with exponential backoff (1 s → 30 s) on any error. A `session_established` / `session_failed` OCSF event is emitted on each attempt. + +The supervisor is a dumb byte bridge with no awareness of the SSH protocol flowing through it. ### Sandbox SSH daemon **File**: `crates/openshell-sandbox/src/ssh.rs` An embedded SSH server built on `russh` that runs inside each sandbox pod. It: -- Generates an ephemeral Ed25519 host key on startup (no persistent key material) -- Validates the NSSH1 handshake preface before starting the SSH protocol -- Accepts any SSH authentication (none or public key) since authorization is handled by the gateway -- Spawns shell processes on a PTY with full sandbox policy enforcement (Landlock, seccomp, network namespace, privilege dropping) -- Supports interactive shells, exec commands, PTY resize, and window change events + +- Generates an ephemeral Ed25519 host key on startup (no persistent key material). +- Listens on a Unix socket (default `/run/openshell/ssh.sock`, see [Unix socket access control](#unix-socket-access-control)). +- Accepts any SSH authentication (none or public key) because authorization is handled upstream by the gateway session token and by filesystem permissions on the socket. +- Spawns shell processes on a PTY with full sandbox policy enforcement (Landlock, seccomp, network namespace, privilege dropping). +- Supports interactive shells, exec commands, PTY resize, window-change events, and loopback-only `direct-tcpip` channels for port forwarding. ### Gateway-side exec (gRPC) -**File**: `crates/openshell-server/src/grpc.rs` (functions `stream_exec_over_ssh`, `start_single_use_ssh_proxy`, `run_exec_with_russh`) +**File**: `crates/openshell-server/src/grpc/sandbox.rs` (`handle_exec_sandbox`, `stream_exec_over_relay`, `start_single_use_ssh_proxy_over_relay`, `run_exec_with_russh`) The `ExecSandbox` gRPC RPC provides programmatic command execution without requiring an external SSH client. It: -1. Spins up a single-use local TCP proxy that performs the NSSH1 handshake -2. Connects a `russh` client through that proxy -3. Authenticates with `none` auth, opens a channel, sends the command -4. Streams stdout/stderr chunks and exit status back to the gRPC caller + +1. Validates `sandbox_id`, `command`, env keys, and field sizes; confirms the sandbox is `Ready`. +2. Calls `supervisor_sessions.open_relay(sandbox_id, 15s)` -- a shorter wait than connect because exec runs in steady state, not on cold start. +3. Waits up to 10 seconds for the relay `DuplexStream` to arrive. +4. Starts a single-use localhost TCP listener on `127.0.0.1:0` and spawns a task that bridges a single accept to the `DuplexStream` with `copy_bidirectional`. This adapts the `DuplexStream` to something `russh::client::connect_stream` can dial. +5. Connects `russh` to the local proxy, authenticates `none` as user `sandbox`, opens a channel, optionally requests a PTY, and executes the shell-escaped command. +6. Streams `stdout`/`stderr`/`exit` events back to the gRPC caller. + +If `timeout_seconds > 0`, the exec is wrapped in `tokio::time::timeout`. On timeout, exit code 124 is sent (matching the `timeout` command convention). ## Connection Flows @@ -93,104 +163,106 @@ The `sandbox connect` command opens an interactive SSH session. sequenceDiagram participant User as User Terminal participant CLI as CLI (sandbox connect) - participant gRPC as Gateway (gRPC) - participant Proxy as CLI (ssh-proxy) - participant GW as Gateway (/connect/ssh) - participant K8s as Pod Resolver - participant SSHD as Sandbox SSH Daemon - - CLI->>gRPC: GetSandbox(name) -> sandbox.id - CLI->>gRPC: CreateSshSession(sandbox_id) - gRPC-->>CLI: token, gateway_host, gateway_port, scheme, connect_path - - Note over CLI: Builds ProxyCommand string
exec()s into ssh process - - User->>Proxy: ssh spawns ProxyCommand subprocess - Proxy->>GW: CONNECT /connect/ssh HTTP/1.1
X-Sandbox-Id, X-Sandbox-Token - GW->>GW: Validate token + sandbox phase - GW->>K8s: Resolve pod IP (or service DNS) - GW->>SSHD: TCP connect to port 2222 - GW->>SSHD: NSSH1 preface (token, ts, nonce, hmac) - SSHD-->>GW: OK - GW-->>Proxy: 200 OK (upgrade) - - Note over Proxy,SSHD: Bidirectional byte stream (SSH protocol) - - Proxy->>SSHD: SSH handshake + auth_none - SSHD-->>Proxy: Auth accepted - Proxy->>SSHD: channel_open + shell_request + participant GW as Gateway + participant Reg as SessionRegistry + participant Sup as Supervisor (sandbox) + participant Sock as SSH Unix socket + participant SSHD as russh daemon + + Note over Sup,GW: On sandbox startup (persistent): + Sup->>GW: ConnectSupervisor stream + SupervisorHello + GW-->>Sup: SessionAccepted{session_id, heartbeat=15s} + + User->>CLI: openshell sandbox connect foo + CLI->>GW: GetSandbox(name) -> sandbox.id + CLI->>GW: CreateSshSession(sandbox_id) + GW-->>CLI: token, gateway_host, gateway_port, scheme, connect_path + + Note over CLI: Builds ProxyCommand string; exec()s ssh + + User->>CLI: ssh spawns ssh-proxy subprocess + CLI->>GW: CONNECT /connect/ssh
X-Sandbox-Id, X-Sandbox-Token + GW->>GW: Validate token + sandbox Ready + GW->>Reg: open_relay(sandbox_id, 30s) + Reg-->>GW: (channel_id, relay_rx) + GW->>Sup: RelayOpen{channel_id} (over ConnectSupervisor) + + Sup->>GW: RelayStream RPC (new HTTP/2 stream) + Sup->>GW: RelayFrame::Init{channel_id} + GW->>Reg: claim_relay(channel_id) -> DuplexStream pair + Reg-->>GW: gateway-side DuplexStream (via relay_rx) + Sup->>Sock: UnixStream::connect(/run/openshell/ssh.sock) + Sock-->>SSHD: connection accepted + + GW-->>CLI: 200 OK (upgrade) + + Note over CLI,SSHD: SSH protocol over:
CLI↔GW (HTTP CONNECT) ↔ RelayStream frames ↔ Sup ↔ Unix socket ↔ SSHD + + CLI->>SSHD: SSH handshake + auth_none + SSHD-->>CLI: Auth accepted + CLI->>SSHD: channel_open + shell_request SSHD->>SSHD: openpty() + spawn /bin/bash -i
(with sandbox policy applied) User<<->>SSHD: Interactive PTY session ``` **Code trace for `sandbox connect`:** -1. `crates/openshell-cli/src/main.rs` -- `SandboxCommands::Connect { name }` dispatches to `run::sandbox_connect()` +1. `crates/openshell-cli/src/main.rs` -- `SandboxCommands::Connect { name }` dispatches to `run::sandbox_connect()`. 2. `crates/openshell-cli/src/ssh.rs` -- `sandbox_connect()` calls `ssh_session_config()`: - - Resolves sandbox name to ID via `GetSandbox` gRPC - - Creates an SSH session via `CreateSshSession` gRPC - - Builds a `ProxyCommand` string: ` ssh-proxy --gateway --sandbox-id --token ` - - If the gateway host is loopback but the cluster endpoint is not, `resolve_ssh_gateway()` overrides the host with the cluster endpoint's host + - Resolves sandbox name to ID via `GetSandbox` gRPC. + - Creates an SSH session via `CreateSshSession` gRPC. + - Builds a `ProxyCommand` string: ` ssh-proxy --gateway --sandbox-id --token --gateway-name `. + - If the gateway host is loopback but the cluster endpoint is not, `resolve_ssh_gateway()` overrides the host with the cluster endpoint's host. 3. `sandbox_connect()` builds an `ssh` command with: - - `-o ProxyCommand=...` (the proxy command from step 2) + - `-o ProxyCommand=...` - `-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o GlobalKnownHostsFile=/dev/null` (ephemeral host keys) + - `-o ServerAliveInterval=15 -o ServerAliveCountMax=3` (surface silently-dropped relays in ~45 s) - `-tt -o RequestTTY=force` (force PTY allocation) - - `-o SetEnv=TERM=xterm-256color` (terminal type) + - `-o SetEnv=TERM=xterm-256color` - `sandbox` as the SSH user -4. If stdin is a terminal (interactive), the CLI calls `exec()` (Unix) to replace itself with the `ssh` process, giving SSH direct terminal ownership. Otherwise it spawns and waits. -5. When SSH starts, it spawns the `ssh-proxy` subprocess as its `ProxyCommand`. -6. `crates/openshell-cli/src/ssh.rs` -- `sandbox_ssh_proxy()`: - - Parses the gateway URL, connects via TCP (plain) or TLS (mTLS) - - Sends a raw HTTP CONNECT request with `X-Sandbox-Id` and `X-Sandbox-Token` headers - - Reads the response status line; proceeds if 200 - - Spawns two `tokio::spawn` tasks for bidirectional copy between stdin/stdout and the gateway stream - - When the remote-to-stdout direction completes, aborts the stdin-to-remote task (SSH has all the data it needs) +4. If stdin is a terminal (interactive), the CLI calls `exec()` (Unix) to replace itself with the `ssh` process. Otherwise it spawns and waits. +5. `sandbox_ssh_proxy()` connects via TCP (plain) or TLS (mTLS) to the gateway, sends a raw HTTP CONNECT request with `X-Sandbox-Id` and `X-Sandbox-Token` headers, and on a 200 response spawns two tasks to copy bytes between stdin/stdout and the tunnel. +6. Gateway-side: `ssh_connect()` in `ssh_tunnel.rs` authorizes the request, opens a relay, waits for the supervisor's `RelayStream`, and bridges the upgraded HTTP connection to the relay with `tokio::io::copy_bidirectional`. +7. Supervisor-side: on `RelayOpen`, `handle_relay_open()` in `crates/openshell-sandbox/src/supervisor_session.rs` opens a `RelayStream` RPC, sends `RelayInit`, dials `/run/openshell/ssh.sock`, and bridges the frames to the Unix socket. ### Command Execution (CLI) The `sandbox exec` path is identical to interactive connect except: -- The SSH command uses `-T -o RequestTTY=no` (no PTY) when `tty=false` -- The command string is passed as the final SSH argument -- The sandbox daemon routes it through `exec_request()` instead of `shell_request()`, spawning `/bin/bash -lc ` + +- The SSH command uses `-T -o RequestTTY=no` (no PTY) when `tty=false`. +- The command string is passed as the final SSH argument. +- The sandbox daemon routes it through `exec_request()` instead of `shell_request()`, spawning `/bin/bash -lc `. When `openshell sandbox create` launches a `--no-keep` command or shell, it keeps the CLI process alive instead of `exec()`-ing into SSH so it can delete the sandbox after SSH exits. The default create flow, along with `--forward`, keeps the sandbox running. ### Port Forwarding (`forward start`) -`openshell forward start ` opens a local SSH tunnel so connections to `127.0.0.1:` -on the host are forwarded to `127.0.0.1:` inside the sandbox. +`openshell forward start ` opens a local SSH tunnel so connections to `127.0.0.1:` on the host are forwarded to `127.0.0.1:` inside the sandbox. Because SSH runs over the same relay as interactive connect, no additional proxying machinery is needed. #### CLI - Reuses the same `ProxyCommand` path as `sandbox connect`. - Invokes OpenSSH with `-N -o ExitOnForwardFailure=yes -L :127.0.0.1: sandbox`. -- By default stays attached in foreground until interrupted (Ctrl+C), and prints an early startup - confirmation after SSH stays up through its initial forward-setup checks. -- With `-d`/`--background`, SSH forks after auth and the CLI exits. The PID is - tracked in `~/.config/openshell/forwards/-.pid` along with sandbox id metadata. +- By default stays attached in foreground until interrupted (Ctrl+C), and prints an early startup confirmation after SSH stays up through its initial forward-setup checks. +- With `-d`/`--background`, SSH forks after auth and the CLI exits. The PID is tracked in `~/.config/openshell/forwards/-.pid` along with sandbox id metadata. - `openshell forward stop ` validates PID ownership and then kills a background forward. - `openshell forward list` shows all tracked forwards. -- `openshell forward stop` and `openshell forward list` are local operations and do not require - resolving an active cluster. -- `openshell sandbox create --forward ` starts a background forward before connect/exec, including - when no trailing command is provided. +- `openshell forward stop` and `openshell forward list` are local operations and do not require resolving an active cluster. +- `openshell sandbox create --forward ` starts a background forward before connect/exec, including when no trailing command is provided. - `openshell sandbox delete` auto-stops any active forwards for the deleted sandbox. #### TUI -The TUI (`crates/openshell-tui/`) supports port forwarding through the create sandbox modal. Users -specify comma-separated ports in the **Ports** field. After sandbox creation: +The TUI (`crates/openshell-tui/`) supports port forwarding through the create sandbox modal. Users specify comma-separated ports in the **Ports** field. After sandbox creation: 1. The TUI polls for `Ready` state (up to 30 attempts at 2-second intervals). 2. Creates an SSH session via `CreateSshSession` gRPC. 3. Spawns background SSH tunnels (`ssh -N -f -L :127.0.0.1:`) for each port. 4. Sends a `ForwardResult` event back to the main loop with the outcome. -Active forwards are displayed in the sandbox table's NOTES column (e.g., `fwd:8080,3000`) and in -the sandbox detail view's Forwards row. +Active forwards are displayed in the sandbox table's NOTES column (e.g., `fwd:8080,3000`) and in the sandbox detail view's Forwards row. -When deleting a sandbox, the TUI calls `stop_forwards_for_sandbox()` before sending the delete -request. PID tracking uses the same `~/.config/openshell/forwards/` directory as the CLI. +When deleting a sandbox, the TUI calls `stop_forwards_for_sandbox()` before sending the delete request. PID tracking uses the same `~/.config/openshell/forwards/` directory as the CLI. #### Shared forward module @@ -202,81 +274,55 @@ Port forwarding PID management and SSH utility functions are shared between the - `save_forward_pid()` / `read_forward_pid()` / `remove_forward_pid()` -- PID file I/O - `list_forwards()` -- lists all active forwards from PID files - `stop_forward()` / `stop_forwards_for_sandbox()` -- kills forwarding processes by PID -- `resolve_ssh_gateway()` -- loopback gateway resolution (see Gateway Loopback Resolution) +- `resolve_ssh_gateway()` -- loopback gateway resolution (see [Gateway Loopback Resolution](#gateway-loopback-resolution)) - `shell_escape()` -- safe shell argument escaping for SSH commands - `build_sandbox_notes()` -- builds notes strings (e.g., `fwd:8080,3000`) from active forwards #### Supervisor `direct-tcpip` handling -The sandbox SSH server (`crates/openshell-sandbox/src/ssh.rs`) implements -`channel_open_direct_tcpip` from the russh `Handler` trait. - -- **Loopback-only**: only `127.0.0.1`, `localhost`, and `::1` destinations are accepted. - Non-loopback destinations are rejected (`Ok(false)`) to prevent the sandbox from being - used as a generic proxy. -- **Bridge**: accepted channels spawn a tokio task that connects a `TcpStream` to the - target address and uses `copy_bidirectional` between the SSH channel stream and the - TCP stream. -- No additional state is stored on `SshHandler` — the `Channel` object from russh is - self-contained, so forwarding channels are fully independent of session channels. +The sandbox SSH server (`crates/openshell-sandbox/src/ssh.rs`) implements `channel_open_direct_tcpip` from the russh `Handler` trait. -#### Flow - -```mermaid -sequenceDiagram - participant App as Local Application - participant SSH as OpenSSH Client - participant GW as Gateway (CONNECT) - participant SSHD as Sandbox SSH - participant SVC as Service in Sandbox - - SSH->>GW: CONNECT /connect/ssh - GW->>SSHD: TCP + Preface handshake - SSH->>SSHD: direct-tcpip channel (127.0.0.1:port) - SSHD->>SVC: TcpStream::connect(127.0.0.1:port) - App->>SSH: connect to 127.0.0.1:port (local) - SSH->>SSHD: channel data - SSHD->>SVC: TCP data - SVC-->>SSHD: TCP response - SSHD-->>SSH: channel data - SSH-->>App: response -``` +- **Loopback-only**: only `127.0.0.1`, `localhost`, and `::1` destinations are accepted. Non-loopback destinations are rejected (`Ok(false)`) to prevent the sandbox from being used as a generic proxy. +- **Bridge**: accepted channels spawn a tokio task that connects a `TcpStream` to the target address and uses `copy_bidirectional` between the SSH channel stream and the TCP stream. ### Gateway-side Exec (gRPC) -The `ExecSandbox` gRPC RPC bypasses the external SSH client entirely. +The `ExecSandbox` gRPC RPC bypasses the external SSH client entirely while using the same relay plumbing. ```mermaid sequenceDiagram participant Client as gRPC Client - participant Server as Gateway (gRPC) - participant Proxy as Single-Use TCP Proxy - participant SSHD as Sandbox SSH Daemon - - Client->>Server: ExecSandbox(sandbox_id, command, stdin, timeout) - Server->>Server: Validate sandbox exists + Ready - Server->>Server: Resolve target host:port - Server->>Proxy: Bind 127.0.0.1:0 (ephemeral port) - Proxy->>SSHD: TCP connect + NSSH1 handshake - SSHD-->>Proxy: OK - - Server->>Proxy: russh client connects to 127.0.0.1: - Proxy<<->>SSHD: Bridge bytes bidirectionally - Server->>SSHD: SSH auth_none + channel_open + exec(command) - Server->>SSHD: stdin payload + EOF + participant GW as Gateway + participant Reg as SessionRegistry + participant Sup as Supervisor + participant SSHD as SSH daemon (Unix socket) + + Client->>GW: ExecSandbox(sandbox_id, command, stdin, timeout) + GW->>GW: Validate sandbox exists + Ready + GW->>Reg: open_relay(sandbox_id, 15s) + Reg-->>GW: (channel_id, relay_rx) + GW->>Sup: RelayOpen{channel_id} + + Sup->>GW: RelayStream + RelayInit{channel_id} + GW->>Reg: claim_relay -> DuplexStream + Sup->>SSHD: connect /run/openshell/ssh.sock + + Note over GW: start_single_use_ssh_proxy_over_relay
(127.0.0.1:ephemeral -> DuplexStream) + + GW->>GW: russh client dials 127.0.0.1: + GW->>SSHD: SSH auth_none + channel_open + exec(command) + GW->>SSHD: stdin payload + EOF loop Stream output - SSHD-->>Server: stdout/stderr chunks - Server-->>Client: ExecSandboxEvent (Stdout/Stderr) + SSHD-->>GW: stdout/stderr chunks + GW-->>Client: ExecSandboxEvent (Stdout/Stderr) end - SSHD-->>Server: ExitStatus - Server-->>Client: ExecSandboxEvent (Exit) + SSHD-->>GW: ExitStatus + GW-->>Client: ExecSandboxEvent (Exit) ``` -The `start_single_use_ssh_proxy()` function creates a one-shot TCP listener on localhost, accepts a single connection, performs the NSSH1 handshake with the sandbox, then bridges bytes. The `run_exec_with_russh()` function connects through this local proxy, authenticates, executes the command, and streams channel messages to the gRPC response stream. - -If `timeout_seconds > 0`, the exec is wrapped in `tokio::time::timeout`. On timeout, exit code 124 is sent (matching the `timeout` command convention). +`start_single_use_ssh_proxy_over_relay()` exists only as an adapter so `russh::client::connect_stream` can consume the relay `DuplexStream` through an ephemeral TCP listener on `127.0.0.1:0`. It never reaches the network. ### File Sync @@ -288,10 +334,10 @@ File sync uses **tar-over-SSH**: the CLI streams a tar archive through the exist When `--upload` is passed to `sandbox create`, the CLI pushes local files into `/sandbox` (or a specified destination) after the sandbox reaches `Ready` and before any command runs. -1. `git_repo_root()` determines the repository root via `git rev-parse --show-toplevel` -2. `git_sync_files()` lists files with `git ls-files -co --exclude-standard -z` (tracked + untracked, respecting gitignore, null-delimited) -3. `sandbox_sync_up_files()` creates an SSH session config, spawns `ssh sandbox "tar xf - -C /sandbox"`, and streams a tar archive of the file list to the SSH child's stdin using the `tar` crate -4. Files land in `/sandbox` inside the container +1. `git_repo_root()` determines the repository root via `git rev-parse --show-toplevel`. +2. `git_sync_files()` lists files with `git ls-files -co --exclude-standard -z` (tracked + untracked, respecting gitignore, null-delimited). +3. `sandbox_sync_up_files()` creates an SSH session config, spawns `ssh sandbox "tar xf - -C /sandbox"`, and streams a tar archive of the file list to the SSH child's stdin using the `tar` crate. +4. Files land in `/sandbox` inside the container. #### `openshell sandbox upload` / `openshell sandbox download` @@ -307,151 +353,95 @@ openshell sandbox download [] - **Upload**: `sandbox_upload()` streams a tar archive of the local path to `ssh ... tar xf - -C ` on the sandbox side. Default destination: `/sandbox`. - **Download**: `sandbox_download()` runs `ssh ... tar cf - -C ` on the sandbox side and extracts the output locally via `tar::Archive`. Default destination: `.` (current directory). -- No compression for v1 — the SSH tunnel is local-network; compression adds CPU cost with marginal bandwidth savings. - -#### Why tar-over-SSH instead of rsync - -| | tar-over-SSH | rsync | -|---|---|---| -| **Client dependency** | None — `tar` crate is compiled into the CLI | Requires `rsync` installed on the client machine | -| **Sandbox dependency** | GNU `tar` (present in every base image) | Requires `rsync` installed in the container | -| **Bidirectional** | Same pipe pattern reversed for push/pull | Needs different invocation or rsync daemon for pull | -| **Transport complexity** | Single process (`ssh ... tar xf -`) | Two processes coordinating a delta-transfer protocol through the proxy tunnel | -| **Incremental sync** | Re-sends everything every time | Only transfers changed blocks (faster for repeated syncs of large repos) | -| **Compression** | Uncompressed (can add gzip via `flate2` later) | Built-in `-z` flag | +- No compression for v1 -- the SSH tunnel rides the already-TLS-encrypted gateway connection; compression adds CPU cost with marginal bandwidth savings. -For OpenShell's use case — one-shot or on-demand pushes of project files over a local network tunnel — the incremental sync advantage of rsync is marginal. Eliminating the external dependency and getting clean bidirectional support outweigh the delta-transfer benefit. If repeated rapid re-syncs of large repos become a need (e.g., a watch mode), revisit by adding content-hash-based skip lists or gzip compression. +## Supervisor Session Lifecycle -## NSSH1 Handshake Protocol +Each sandbox has at most one live `ConnectSupervisor` stream at a time. The registry enforces this via `register()`, which overwrites any previous entry. -The NSSH1 ("OpenShell SSH v1") handshake authenticates the gateway to the sandbox daemon, preventing direct pod access from outside the gateway. +### States -### Wire Format - -A single newline-terminated text line: - -``` -NSSH1 \n +```mermaid +stateDiagram-v2 + [*] --> Connecting: spawn() + Connecting --> Rejected: SessionRejected + Connecting --> Live: SessionAccepted + Live --> Live: Heartbeats
RelayOpen/Result
RelayClose + Live --> Disconnected: stream closed / error + Disconnected --> Connecting: backoff (1s..30s) + Rejected --> Connecting: backoff (1s..30s) + Live --> [*]: sandbox exits ``` -| Field | Type | Description | -|-------------|--------|-------------| -| `NSSH1` | string | Magic prefix (protocol version identifier) | -| `token` | string | UUID session token (from `CreateSshSession` for interactive; freshly generated for gateway-side exec) | -| `timestamp` | i64 | Unix epoch seconds at time of generation | -| `nonce` | string | UUID v4, unique per handshake attempt | -| `hmac` | string | Hex-encoded HMAC-SHA256 of `token\|timestamp\|nonce` keyed on the shared secret | +### Hello and accept -### Validation (sandbox side) +The supervisor sends `SupervisorHello { sandbox_id, instance_id }` (where `instance_id` is a fresh UUID per process start) as the first message. The gateway: -**File**: `crates/openshell-sandbox/src/ssh.rs` -- `verify_preface()` +1. Assigns `session_id = Uuid::new_v4()`. +2. Registers the session; any existing entry is evicted and its sender is dropped. +3. Replies with `SessionAccepted { session_id, heartbeat_interval_secs: 15 }`. +4. Spawns `run_session_loop` to process inbound messages and emit gateway heartbeats. -1. Split line on whitespace; reject if not exactly 5 fields or magic is not `NSSH1` -2. Parse timestamp; compute absolute clock skew `|now - timestamp|` -3. Reject if skew exceeds `ssh_handshake_skew_secs` (default: 300 seconds) -4. Recompute HMAC-SHA256 over `token|timestamp|nonce` with the shared secret -5. Compare computed signature against the received signature (constant-time via `hmac` crate) -6. Check nonce against the replay cache; reject if the nonce has been seen before within the skew window -7. Insert the nonce into the replay cache on success -8. Respond with `OK\n` on success or `ERR\n` on failure +On any registration failure (e.g., the supervisor's mpsc receiver was already dropped), `remove_if_current` is called with the assigned `session_id` so the cleanup does not evict a newer successful registration. -### Nonce replay detection +### Heartbeats -The SSH server maintains a per-process `NonceCache` (`HashMap` behind `Arc>`) that tracks nonces seen within the handshake skew window. A background tokio task reaps expired entries every 60 seconds. If a valid preface is presented with a previously-seen nonce, the handshake is rejected. This prevents replay attacks within the timestamp validity window. - -### HMAC computation - -Both the gateway (`crates/openshell-server/src/ssh_tunnel.rs` -- `build_preface()`) and the gRPC exec path (`crates/openshell-server/src/grpc.rs` -- `build_preface()`) use identical logic: - -```rust -let payload = format!("{token}|{timestamp}|{nonce}"); -let signature = hmac_sha256(secret.as_bytes(), payload.as_bytes()); -// hmac_sha256 returns hex::encode(Hmac::::finalize()) -``` +Both directions emit heartbeats at the negotiated interval (15 s). Heartbeats are strictly informational -- their purpose is to keep the HTTP/2 connection warm and let each side detect a half-open transport quickly. There is no explicit application-level timeout that kills the session if heartbeats stop; failures are detected when a send fails or when the stream reports EOF / error. -### Read-line safety +### Supersede semantics -Both sides cap the preface line at 1024 bytes and stop reading at `\n` or EOF. This prevents a misbehaving peer from consuming unbounded memory. +If a supervisor restarts (or a network blip forces a new `ConnectSupervisor` call), the gateway sees a second `SupervisorHello` for the same `sandbox_id`. `register()` inserts the new session and returns the old `tx`. The old session's `run_session_loop` continues to poll its inbound stream until it errors out, at which point its cleanup calls `remove_if_current(sandbox_id, old_session_id)` -- which does nothing because the stored entry now has the new `session_id`. The newer session stays live. -## Sandbox SSH Daemon Internals +Tests in `supervisor_session.rs` pin this behavior: -### Startup +- `registry_supersedes_previous_session` -- confirms that `register()` returns the prior sender. +- `remove_if_current_ignores_stale_session_id` -- confirms a late cleanup does not evict a newer registration. +- `open_relay_uses_newest_session_after_supersede` -- confirms `RelayOpen` is delivered to the newest session only. -`run_ssh_server()` in `crates/openshell-sandbox/src/ssh.rs`: +### Pending-relay reaper -1. Generates an ephemeral Ed25519 host key using `OsRng` -2. Configures `russh::server::Config` with 1-second auth rejection delay -3. Binds a `TcpListener` on the configured address (default: `0.0.0.0:2222`) -4. Enters an accept loop; each connection is handled in a `tokio::spawn` task +`spawn_relay_reaper(state, 30s)` sweeps `pending_relays` every 30 seconds and removes entries older than `RELAY_PENDING_TIMEOUT` (10 s). This bounds the leak if a supervisor acknowledges `RelayOpen` but crashes before initiating `RelayStream`. -### Connection handling +## Authentication and Security Model -`handle_connection()`: +### Transport authentication -1. Reads and validates the NSSH1 preface (rejects with `ERR\n` on failure) -2. Responds `OK\n` on success -3. Hands the TCP stream to `russh::server::run_stream()` with an `SshHandler` +All gRPC traffic (control plane + data plane + other RPCs) rides one mTLS-authenticated TCP+TLS+HTTP/2 connection from the supervisor to the gateway. Client certificates prove the supervisor's identity; the server certificate proves the gateway's. Nothing sits between the supervisor and the SSH daemon except the Unix socket's filesystem permissions. -### Authentication +The CLI continues to authenticate to the gateway with its own mTLS credentials (or Cloudflare bearer token in reverse-proxy deployments) and a per-session token returned by `CreateSshSession`. The session token is enforced at the gateway: token scope (sandbox id), revocation state, and optional expiry are all checked in `ssh_connect()` before `open_relay()` is called. -The `SshHandler` implements `russh::server::Handler`: +### Unix socket access control -- `auth_none()` returns `Auth::Accept` -- any user is accepted -- `auth_publickey()` returns `Auth::Accept` -- any key is accepted +The supervisor creates `/run/openshell/ssh.sock` (path is configurable via the gateway's `sandbox_ssh_socket_path` / supervisor's `--ssh-socket-path` / `OPENSHELL_SSH_SOCKET_PATH`) and: -Authorization is performed by the gateway (token validation + sandbox readiness check) before the SSH protocol starts. The NSSH1 handshake proves the connection came through an authorized gateway. +1. Creates the parent directory if missing and sets it to mode `0700` (root-owned). +2. Removes any stale socket from a previous run. +3. Binds a `UnixListener` on the path. +4. Sets the socket to mode `0600`. -### Shell and exec +The supervisor runs as root; the sandbox workload runs as an unprivileged user. Only the supervisor can connect to the socket. The workload inside the sandbox has no filesystem path by which it can reach the SSH daemon directly. All ingress goes through the relay bridge, which only the supervisor can open (because only the supervisor holds the gateway session). -- `shell_request()` calls `start_shell(channel, handle, None)` -- spawns `/bin/bash -i` -- `exec_request()` calls `start_shell(channel, handle, Some(command))` -- spawns `/bin/bash -lc ` -- `pty_request()` stores the PTY dimensions for use when spawning the shell -- `window_change_request()` calls `TIOCSWINSZ` ioctl on the PTY master fd +`handle_connection()` in `crates/openshell-sandbox/src/ssh.rs` hands the Unix stream directly to `russh::server::run_stream` with no preface or handshake layer in between. -### PTY and process management +### Kubernetes NetworkPolicy -`spawn_pty_shell()`: +The sandbox pod needs no gateway-to-sandbox ingress rule; the SSH daemon has no TCP listener. Helm ships an egress policy that constrains what the pod can reach outward -- see [Gateway Security](gateway-security.md). -1. Calls `nix::pty::openpty()` with the requested window size -2. Clones the master fd for reading and writing -3. Configures the shell command with environment variables: - - `OPENSHELL_SANDBOX=1`, `HOME=/sandbox`, `USER=sandbox`, `TERM=` - - Proxy vars: `HTTP_PROXY`, `HTTPS_PROXY`, `ALL_PROXY`, `NO_PROXY=127.0.0.1,localhost,::1`, `http_proxy`, `https_proxy`, `grpc_proxy`, `no_proxy=127.0.0.1,localhost,::1`, `NODE_USE_ENV_PROXY=1` so Node.js `fetch` honors the proxy env while localhost stays direct - - TLS trust vars: `NODE_EXTRA_CA_CERTS`, `SSL_CERT_FILE`, `REQUESTS_CA_BUNDLE`, `CURL_CA_BUNDLE` - - Provider credential env vars (from the provider registry) -4. Installs a `pre_exec` hook that: - - Calls `setsid()` to create a new session - - Calls `TIOCSCTTY` to set the slave PTY as the controlling terminal - - Enters the network namespace (`setns(fd, CLONE_NEWNET)`) if configured (Linux only) - - Drops privileges (`initgroups` + `setgid` + `setuid`) per the sandbox policy - - Applies sandbox restrictions (Landlock, seccomp) via `sandbox::apply()` -5. Spawns the child process +### What SSH auth does NOT enforce -### I/O threading +The embedded SSH daemon accepts all authentication attempts. This is intentional: -Three threads handle the PTY I/O: +- The gateway already validated the session token and sandbox readiness. +- Unix socket permissions already restrict who can connect to the daemon to the supervisor, and the supervisor only opens the socket in response to a gateway `RelayOpen`. +- SSH key management would add complexity without additional security value in this architecture. -1. **Writer thread** (std::thread) -- receives bytes from `SshHandler::data()` via an `mpsc::channel` and writes them to the PTY master -2. **Reader thread** (std::thread) -- reads from PTY master in 4096-byte chunks, dispatches each chunk to the SSH channel via `handle.data()` on the tokio runtime. Sends EOF when the master returns 0 or errors. Signals completion via a `reader_done_tx` channel. -3. **Exit thread** (std::thread) -- waits for `child.wait()`, then waits for the reader thread to finish (via `reader_done_rx`), then sends `exit_status_request` and `close` on the SSH channel +### Ephemeral host keys -The reader-done synchronization ensures correct SSH protocol ordering: data -> EOF -> exit-status -> close. +The sandbox generates a fresh Ed25519 host key on every startup. The CLI disables `StrictHostKeyChecking` and sets `UserKnownHostsFile=/dev/null` and `GlobalKnownHostsFile=/dev/null` to avoid known-hosts conflicts. ## Sandbox Target Resolution -The gateway and the gRPC exec path both resolve the sandbox's network address using the same logic. - -**File**: `crates/openshell-server/src/ssh_tunnel.rs` (gateway), `crates/openshell-server/src/grpc.rs` (exec) - -Resolution order: -1. If the sandbox has a `status.agent_pod` field, resolve the pod IP via the Kubernetes API (`agent_pod_ip()`) -2. Otherwise, construct a cluster-internal DNS name: `..svc.cluster.local` - -The target port is always `config.sandbox_ssh_port` (default: 2222). - -The `ConnectTarget` enum in `ssh_tunnel.rs` encodes both cases: -- `ConnectTarget::Ip(SocketAddr)` -- direct IP from pod resolution -- `ConnectTarget::Host(String, u16)` -- DNS hostname fallback +The gateway does not resolve a sandbox's network address or port. The only identifier that matters is `sandbox_id`, which keys into the supervisor session registry. ## API and Persistence @@ -460,9 +450,11 @@ The `ConnectTarget` enum in `ssh_tunnel.rs` encodes both cases: **Proto**: `proto/openshell.proto` -- `CreateSshSessionRequest` / `CreateSshSessionResponse` Request: + - `sandbox_id` (string) -- the sandbox to connect to Response: + - `sandbox_id` (string) - `token` (string) -- UUID session token - `gateway_host` (string) -- resolved from `Config::ssh_gateway_host` (defaults to bind address if empty) @@ -470,13 +462,16 @@ Response: - `gateway_scheme` (string) -- `"https"` if TLS is configured, otherwise `"http"` - `connect_path` (string) -- from `Config::ssh_connect_path` (default: `/connect/ssh`) - `host_key_fingerprint` (string) -- currently unused (empty) +- `expires_at_ms` (int64) -- session expiry; 0 disables expiry ### RevokeSshSession Request: + - `token` (string) -- session token to revoke Response: + - `revoked` (bool) -- true if a session was found and revoked ### SshSession persistence @@ -493,25 +488,53 @@ Stored in the gateway's persistence layer (SQLite or Postgres) as object type `" | `created_at_ms` | int64 | Creation time (ms since epoch) | | `revoked` | bool | Whether the session has been revoked | | `name` | string | Auto-generated human-friendly name | +| `expires_at_ms` | int64 | Expiry timestamp; 0 means no expiry | + +A background reaper (`spawn_session_reaper`) deletes revoked and expired rows every hour. + +### ConnectSupervisor / RelayStream + +**Proto**: `proto/openshell.proto` + +- `ConnectSupervisor(stream SupervisorMessage) returns (stream GatewayMessage)` +- `RelayStream(stream RelayFrame) returns (stream RelayFrame)` + +Key messages: + +| Message | Direction | Fields | +|---|---|---| +| `SupervisorHello` | sup → gw | `sandbox_id`, `instance_id` | +| `SessionAccepted` | gw → sup | `session_id`, `heartbeat_interval_secs` | +| `SessionRejected` | gw → sup | `reason` | +| `SupervisorHeartbeat` | sup → gw | (empty) | +| `GatewayHeartbeat` | gw → sup | (empty) | +| `RelayOpen` | gw → sup | `channel_id` (UUID) | +| `RelayOpenResult` | sup → gw | `channel_id`, `success`, `error` | +| `RelayClose` | either | `channel_id`, `reason` | +| `RelayInit` | sup → gw (first `RelayFrame`) | `channel_id` | +| `RelayFrame` | either | `oneof { RelayInit init, bytes data }` | ### ExecSandbox **Proto**: `proto/openshell.proto` -- `ExecSandboxRequest` / `ExecSandboxEvent` Request: + - `sandbox_id` (string) - `command` (repeated string) -- command and arguments - `workdir` (string) -- optional working directory - `environment` (map) -- optional env var overrides (keys validated against `^[A-Za-z_][A-Za-z0-9_]*$`) - `timeout_seconds` (uint32) -- 0 means no timeout - `stdin` (bytes) -- optional stdin payload +- `tty` (bool) -- request a PTY Response stream (`ExecSandboxEvent`): + - `Stdout(data)` -- stdout chunk - `Stderr(data)` -- stderr chunk - `Exit(exit_code)` -- final exit status (124 on timeout) -The gateway builds the remote command by shell-escaping arguments, prepending sorted env var assignments, and optionally wrapping in `cd && ...`. +The gateway builds the remote command by shell-escaping arguments, prepending sorted env var assignments, and optionally wrapping in `cd && ...`. The assembled command is capped at 256 KiB. ## Gateway Loopback Resolution @@ -523,98 +546,98 @@ The override only applies if the cluster endpoint itself is not also a loopback This function is shared between the CLI and TUI via the `openshell-core::forward` module. -## Authentication and Security Model - -### Layered authentication - -1. **mTLS (transport layer)** -- when TLS is configured, the CLI authenticates to the gateway using client certificates. The `ssh-proxy` subprocess inherits TLS options from the parent CLI process. -2. **Session token (application layer)** -- the gateway validates the session token against the persistence layer. Tokens are scoped to a specific sandbox and can be revoked. -3. **NSSH1 handshake (gateway-to-sandbox)** -- the shared handshake secret proves the connection originated from an authorized gateway. The timestamp + nonce prevent replay attacks within the skew window. The nonce replay cache rejects duplicates. -4. **Kubernetes NetworkPolicy** -- a Helm-managed `NetworkPolicy` restricts ingress to sandbox pods on port 2222 to only the gateway pod, preventing lateral movement from other in-cluster workloads. Controlled by `networkPolicy.enabled` in the Helm values (default: `true`). - -### Mandatory handshake secret - -The NSSH1 handshake secret (`OPENSHELL_SSH_HANDSHAKE_SECRET`) is required. Both the server and sandbox will refuse to start if the secret is empty or unset. For cluster deployments the secret is auto-generated by the entrypoint script (`deploy/docker/cluster-entrypoint.sh`) via `openssl rand -hex 32` and injected into the Helm values. - -### What SSH auth does NOT enforce - -The embedded SSH daemon accepts all authentication attempts. This is intentional: -- The NSSH1 handshake already proved the connection came through the gateway -- The gateway already validated the session token and sandbox readiness -- SSH key management would add complexity without additional security value in this architecture - -### Ephemeral host keys - -The sandbox generates a fresh Ed25519 host key on every startup. The CLI disables `StrictHostKeyChecking` and sets `UserKnownHostsFile=/dev/null` and `GlobalKnownHostsFile=/dev/null` to avoid known-hosts conflicts. - -## Configuration Reference - -### Gateway configuration - -**File**: `crates/openshell-core/src/config.rs` -- `Config` struct - -| Field | Default | Description | -|----------------------------|------------------|-------------| -| `ssh_gateway_host` | `127.0.0.1` | Public hostname/IP for gateway connections | -| `ssh_gateway_port` | `8080` | Public port for gateway connections (0 = use bind port) | -| `ssh_connect_path` | `/connect/ssh` | HTTP path for CONNECT requests | -| `sandbox_ssh_port` | `2222` | SSH listen port inside sandbox pods | -| `ssh_handshake_secret` | (required) | Shared HMAC key for NSSH1 handshake (server fails to start if empty) | -| `ssh_handshake_skew_secs` | `300` | Maximum allowed clock skew (seconds) | - -### Sandbox environment variables - -These are injected into sandbox pods by the gateway: +## Timeouts -| Variable | Description | -|--------------------------------------|-------------| -| `OPENSHELL_SSH_LISTEN_ADDR` | Address for the embedded SSH server to bind | -| `OPENSHELL_SSH_HANDSHAKE_SECRET` | Shared secret for NSSH1 handshake validation | -| `OPENSHELL_SSH_HANDSHAKE_SKEW_SECS` | Allowed clock skew for handshake timestamp | - -### CLI TLS options - -| Flag / Env Var | Description | -|-----------------------------|-------------| -| `--tls-ca` / `OPENSHELL_TLS_CA` | CA certificate for gateway verification | -| `--tls-cert` / `OPENSHELL_TLS_CERT` | Client certificate for mTLS | -| `--tls-key` / `OPENSHELL_TLS_KEY` | Client private key for mTLS | +| Stage | Duration | Where | +|---|---|---| +| Supervisor session wait (SSH connect) | 30 s | `ssh_tunnel::ssh_connect` -> `open_relay` | +| Supervisor session wait (ExecSandbox) | 15 s | `handle_exec_sandbox` -> `open_relay` | +| Wait for supervisor to claim relay | 10 s | `relay_rx` wrapped in `tokio::time::timeout` | +| Pending-relay TTL (reaper) | 10 s | `RELAY_PENDING_TIMEOUT` in registry | +| Session-wait backoff | 100 ms → 2 s | `wait_for_session` | +| Supervisor reconnect backoff | 1 s → 30 s | `run_session_loop` in sandbox supervisor | +| SSH-level keepalive | 15 s × 3 | CLI / managed ssh-config | +| Supervisor heartbeat | 15 s | `HEARTBEAT_INTERVAL_SECS` | +| SSH session reaper sweep | 1 h | `spawn_session_reaper` | +| Pending-relay reaper sweep | 30 s | `spawn_relay_reaper` | ## Failure Modes | Scenario | Status / Behavior | Source | -|----------|-------------------|--------| +|---|---|---| | Missing `X-Sandbox-Id` or `X-Sandbox-Token` header | `401 Unauthorized` | `ssh_tunnel.rs` -- `header_value()` | | Empty header value | `400 Bad Request` | `ssh_tunnel.rs` -- `header_value()` | | Non-CONNECT method on `/connect/ssh` | `405 Method Not Allowed` | `ssh_tunnel.rs` -- `ssh_connect()` | | Token not found in persistence | `401 Unauthorized` | `ssh_tunnel.rs` -- `ssh_connect()` | | Token revoked or sandbox ID mismatch | `401 Unauthorized` | `ssh_tunnel.rs` -- `ssh_connect()` | +| Token expired | `401 Unauthorized` | `ssh_tunnel.rs` -- `ssh_connect()` | | Sandbox not found | `404 Not Found` | `ssh_tunnel.rs` -- `ssh_connect()` | | Sandbox not in `Ready` phase | `412 Precondition Failed` | `ssh_tunnel.rs` -- `ssh_connect()` | -| Pod IP resolution fails | `502 Bad Gateway` | `ssh_tunnel.rs` -- `ssh_connect()` | -| No pod IP and no sandbox name | `412 Precondition Failed` | `ssh_tunnel.rs` -- `ssh_connect()` | -| Persistence read error | `500 Internal Server Error` | `ssh_tunnel.rs` -- `ssh_connect()` | -| NSSH1 handshake rejected by sandbox | Tunnel closed; `"sandbox handshake rejected"` logged | `ssh_tunnel.rs` -- `handle_tunnel()` | -| HTTP upgrade failure | `"SSH upgrade failed"` logged; tunnel not established | `ssh_tunnel.rs` -- `ssh_connect()` | -| TCP connection to sandbox fails | Tunnel error logged and closed | `ssh_tunnel.rs` -- `handle_tunnel()` | -| SSH exec timeout | Exit code 124 returned | `grpc.rs` -- `stream_exec_over_ssh()` | +| Per-token or per-sandbox concurrency limit hit | `429 Too Many Requests` | `ssh_tunnel.rs` -- `ssh_connect()` | +| Supervisor session not connected after 30 s | `502 Bad Gateway` | `ssh_tunnel.rs` -- `ssh_connect()` | +| Supervisor failed to claim relay within 10 s | Tunnel closed; `"relay open timed out"` logged | `ssh_tunnel.rs` -- spawned tunnel task | +| Relay channel oneshot dropped | Tunnel closed; `"relay channel dropped"` logged | `ssh_tunnel.rs` -- spawned tunnel task | +| First `RelayFrame` not `RelayInit` or empty `channel_id` | `invalid_argument` on `RelayStream` | `supervisor_session.rs` -- `handle_relay_stream` | +| `RelayStream` arrives after pending entry expired (>10 s) | `deadline_exceeded` | `supervisor_session.rs` -- `claim_relay` | +| Gateway restart during live relay | CLI SSH detects via keepalive within ~45 s; relays are torn down with the TCP connection | CLI `ServerAliveInterval=15`, `ServerAliveCountMax=3` | +| Supervisor restart | Gateway sends on stale mpsc fails; client sees same behavior as gateway restart; supervisor's reconnect loop re-registers | `run_session_loop`, `open_relay` | +| Silently-dropped relay (half-open TCP) | CLI-side SSH keepalives probe every 15 s; session exits with `Broken pipe` after 3 missed probes | SSH client keepalives | +| ExecSandbox timeout | Exit code 124 returned to caller | `stream_exec_over_relay` | +| Command exceeds 256 KiB assembled length | `invalid_argument` | `build_remote_exec_command` | ## Graceful Shutdown ### Gateway tunnel teardown -After `copy_bidirectional` completes (either side closes), `handle_tunnel()` calls `AsyncWriteExt::shutdown()` on the upgraded connection to send a clean EOF to the client. This avoids TCP RST and gives SSH time to read remaining protocol data (e.g., exit-status) from its buffer. +After `copy_bidirectional` completes on either side, `ssh_connect()` calls `AsyncWriteExt::shutdown()` on the upgraded client connection so SSH sees a clean EOF and can read any remaining protocol data (e.g., exit-status) before exiting. + +### RelayStream teardown + +The `handle_relay_stream` task half-closes the supervisor-side duplex on inbound EOF so the gateway-side reader sees EOF and terminates its own forwarding task. On the supervisor side, `handle_relay_open` does the symmetric shutdown on the Unix socket after inbound EOF, then drops the outbound mpsc so the gateway observes EOF on the response stream too. -### SSH proxy teardown +### Supervisor session teardown -The `sandbox_ssh_proxy()` function spawns two copy tasks. When the remote-to-stdout task completes, the stdin-to-remote task is aborted. This ensures the proxy exits promptly when the SSH session ends without waiting for the user to type something. +When the sandbox exits, the supervisor process ends, the HTTP/2 connection closes, and all multiplexed streams fail with `stream error`. The gateway's `run_session_loop` observes the error, logs `supervisor session: ended`, and calls `remove_if_current` to deregister. Pending relay slots that never got claimed are swept by `reap_expired_relays` within 30 s. ### PTY reader-exit ordering The sandbox SSH daemon's exit thread waits for the reader thread to finish forwarding all PTY output before sending `exit_status_request` and `close`. This prevents a race where the channel closes before all output has been delivered. +## Configuration Reference + +### Gateway configuration + +**File**: `crates/openshell-core/src/config.rs` -- `Config` struct + +| Field | Default | Description | +|---|---|---| +| `ssh_gateway_host` | `127.0.0.1` | Public hostname/IP advertised in `CreateSshSessionResponse` | +| `ssh_gateway_port` | `8080` | Public port for gateway connections (0 = use bind port) | +| `ssh_connect_path` | `/connect/ssh` | HTTP path for CONNECT requests | +| `sandbox_ssh_socket_path` | `/run/openshell/ssh.sock` | Path the supervisor binds its Unix socket on; passed to the sandbox as `OPENSHELL_SSH_SOCKET_PATH` | +| `ssh_session_ttl_secs` | (default in code) | Default TTL applied to new `SshSession` rows; 0 disables expiry | + +### Sandbox environment variables + +These are injected into sandbox pods by the Kubernetes driver (`crates/openshell-driver-kubernetes/src/driver.rs`): + +| Variable | Description | +|---|---| +| `OPENSHELL_SSH_SOCKET_PATH` | Filesystem path for the embedded SSH server's Unix socket (default `/run/openshell/ssh.sock`) | +| `OPENSHELL_ENDPOINT` | Gateway gRPC endpoint; the supervisor uses this to open `ConnectSupervisor` | +| `OPENSHELL_SANDBOX_ID` | Identifier reported in `SupervisorHello` | + +### CLI TLS options + +| Flag / Env Var | Description | +|---|---| +| `--tls-ca` / `OPENSHELL_TLS_CA` | CA certificate for gateway verification | +| `--tls-cert` / `OPENSHELL_TLS_CERT` | Client certificate for mTLS | +| `--tls-key` / `OPENSHELL_TLS_KEY` | Client private key for mTLS | + ## Cross-References - [Gateway Architecture](gateway.md) -- gateway multiplexing, persistence layer, gRPC service details +- [Gateway Security](gateway-security.md) -- mTLS, session tokens, network policy - [Sandbox Architecture](sandbox.md) -- sandbox lifecycle, policy enforcement, network isolation, proxy - [Providers](sandbox-providers.md) -- provider credential injection into SSH shell processes diff --git a/architecture/sandbox.md b/architecture/sandbox.md index c7e789cae..571a01569 100644 --- a/architecture/sandbox.md +++ b/architecture/sandbox.md @@ -15,7 +15,8 @@ All paths are relative to `crates/openshell-sandbox/src/`. | `opa.rs` | OPA/Rego policy engine using `regorus` crate -- network evaluation, sandbox config queries, L7 endpoint queries | | `process.rs` | `ProcessHandle` for spawning child processes, privilege dropping, signal handling | | `proxy.rs` | HTTP CONNECT proxy with OPA evaluation, process-identity binding, inference interception, and L7 dispatch | -| `ssh.rs` | Embedded SSH server (`russh` crate) with PTY support and handshake verification | +| `ssh.rs` | Embedded SSH server (`russh` crate) listening on a Unix socket, with PTY support | +| `supervisor_session.rs` | Persistent outbound `ConnectSupervisor` gRPC session to the gateway; bridges `RelayStream` calls to the local SSH daemon's Unix socket | | `identity.rs` | `BinaryIdentityCache` -- SHA256 trust-on-first-use binary integrity | | `procfs.rs` | `/proc` filesystem reading for TCP peer identity resolution and ancestor chain walking | | `grpc_client.rs` | gRPC client for fetching policy, provider environment, inference route bundles, policy polling/status reporting, proposal submission, and log push (`CachedOpenShellClient`) | @@ -64,9 +65,12 @@ flowchart TD L --> L2[Spawn bypass monitor] L2 --> N{SSH enabled?} M --> N - N -- Yes --> O[Spawn SSH server task] - N -- No --> P[Spawn child process] - O --> P + N -- Yes --> O[Spawn SSH server task on Unix socket] + N -- No --> P0{gRPC mode + socket?} + O --> P0 + P0 -- Yes --> P1[Spawn supervisor session task] + P0 -- No --> P[Spawn child process] + P1 --> P P --> Q[Store entrypoint PID] Q --> R{gRPC mode?} R -- Yes --> T[Spawn policy poll task] @@ -109,7 +113,9 @@ flowchart TD - Build `InferenceContext` via `build_inference_context()` which resolves routes from one of two sources (see [Inference routing context](#inference-routing-context) below) - `ProxyHandle::start_with_bind_addr()` binds a `TcpListener` and spawns an accept loop, passing the inference context to each connection handler -8. **SSH server** (optional): If `--ssh-listen-addr` is provided, spawn an async task running `ssh::run_ssh_server()` with the policy, workdir, netns FD, proxy URL, CA paths, and provider env. +8. **SSH server** (optional): If `--ssh-socket-path` is provided, spawn an async task running `ssh::run_ssh_server()` with the policy, workdir, netns FD, proxy URL, CA paths, and provider env. The value is a filesystem path to the Unix socket the embedded sshd binds. The supervisor waits on a readiness `oneshot` channel before proceeding so that exec requests arriving immediately after pod-ready cannot race against socket bind. + +8a. **Supervisor session** (gRPC mode + SSH socket only): If `--sandbox-id`, `--openshell-endpoint`, and an SSH socket path are all set, spawn `supervisor_session::spawn()`. This task opens a persistent outbound bidirectional gRPC stream to the gateway and bridges inbound relay requests to the local SSH daemon. See [Supervisor Session](#supervisor-session) for the full protocol. 9. **Child process spawning** (`ProcessHandle::spawn()`): - Build `tokio::process::Command` with inherited stdio and `kill_on_drop(true)` @@ -1313,30 +1319,24 @@ Exit code is `code` if the process exited normally, or `128 + signal` if killed **File:** `crates/openshell-sandbox/src/ssh.rs` -The embedded SSH server provides remote shell access to the sandbox. It uses the `russh` crate and allocates PTYs for interactive sessions. +The embedded SSH server provides remote shell access to the sandbox. It uses the `russh` crate and allocates PTYs for interactive sessions. The daemon listens on a **Unix domain socket** rather than a TCP port -- the gateway never dials the sandbox pod directly. All SSH traffic arrives through the [supervisor session](#supervisor-session)'s `RelayStream` RPC, which the supervisor bridges into the socket. ### Startup -`run_ssh_server()`: -1. Generate an ephemeral Ed25519 host key via `russh::keys::PrivateKey::random()` -2. Bind a `TcpListener` to the configured address -3. Accept connections in a loop, spawning per-connection handlers - -### Handshake verification +`ssh_server_init()` (called from `run_ssh_server()`): -Before the SSH protocol begins, the server reads a preface line: +1. Generate an ephemeral Ed25519 host key via `russh::keys::PrivateKey::random()`. +2. Ensure the socket's parent directory exists and is owned by root with mode `0700`. The sandbox entrypoint runs as an unprivileged user, so it cannot enter this directory. +3. Remove any stale socket file from a prior run, then `UnixListener::bind(listen_path)`. +4. Set the socket file's mode to `0600` so only the supervisor (root) can connect to it. +5. Signal readiness back to `lib.rs` via a `oneshot` channel. +6. Accept connections in a loop and spawn `handle_connection()` per connection. -``` -NSSH1 {token} {timestamp} {nonce} {hmac_hex}\n -``` +The socket path is taken from `--ssh-socket-path` / `OPENSHELL_SSH_SOCKET_PATH`. The Kubernetes compute driver sets this to `/run/openshell/ssh.sock` by default (see `crates/openshell-driver-kubernetes/src/main.rs`); the VM driver pins it to the same path inside the guest. -`verify_preface()`: -1. Verify magic is `NSSH1` and exactly 5 fields -2. Verify `|now - timestamp|` is within `--ssh-handshake-skew-secs` (default 300s) -3. Compute `HMAC-SHA256(secret, "{token}|{timestamp}|{nonce}")` and compare with `{hmac_hex}` -4. Send `OK\n` on success, `ERR\n` on failure +### Access control -This pre-SSH handshake authenticates the gateway-to-sandbox tunnel. After it succeeds, the SSH session uses permissive authentication (`auth_none` and `auth_publickey` both return `Accept`) since the transport is already verified. +The filesystem permissions on the parent directory (`0700`) and the socket itself (`0600`) are the sole authentication boundary. Only the supervisor, which runs as root inside the container, can open the socket. The sandboxed entrypoint process -- dropped to the unprivileged `sandbox` user and further constrained by Landlock -- cannot reach `/run/openshell/` at all. Consequently, the SSH session handler's `auth_none` and `auth_publickey` callbacks both return `Auth::Accept` unconditionally; any byte stream that reaches the daemon has already passed the trust check via the socket's permission bits. ### Shell/exec handling @@ -1364,6 +1364,90 @@ The `SshHandler` implements `russh::server::Handler`: - **Reader thread**: Reads from PTY master, sends SSH channel data, sends EOF when done, signals the exit thread - **Exit thread**: Waits for child to exit, waits for reader to finish (ensures correct SSH protocol ordering: data -> EOF -> exit-status -> close), sends exit status and closes the channel +## Supervisor Session + +**File:** `crates/openshell-sandbox/src/supervisor_session.rs` + +The sandbox pod has no inbound network surface. Instead, the supervisor opens a single persistent outbound gRPC stream to the gateway and the gateway uses that stream to request on-demand byte relays back into the sandbox. All SSH connect traffic and `ExecSandbox` calls ride this connection -- there is no reverse HTTP CONNECT, no TCP listener on the pod, and no per-session TLS handshake. + +### Connection model + +```mermaid +sequenceDiagram + participant S as Supervisor (sandbox) + participant GW as Gateway + participant SSHD as Local sshd (Unix socket) + participant Client as Operator / CLI + + S->>GW: ConnectSupervisor stream (mTLS, HTTP/2) + S->>GW: SupervisorMessage::Hello{sandbox_id, instance_id} + GW-->>S: GatewayMessage::SessionAccepted{session_id, heartbeat_interval_secs} + loop Heartbeats (max(accepted.heartbeat_interval_secs, 5)) + S-->>GW: SupervisorHeartbeat + GW-->>S: GatewayHeartbeat + end + + Client->>GW: sandbox connect / ExecSandbox + GW-->>S: GatewayMessage::RelayOpen{channel_id} + S->>GW: RelayStream RPC (new HTTP/2 stream on same Channel) + S->>GW: RelayFrame::Init{channel_id} + S->>SSHD: UnixStream::connect(ssh_socket_path) + loop Relay active + GW-->>S: RelayFrame::Data (raw SSH bytes from operator) + S->>SSHD: write_all + SSHD-->>S: read chunk (up to 16 KiB) + S-->>GW: RelayFrame::Data + end +``` + +One TCP+TLS+HTTP/2 connection carries both the long-lived control stream and every concurrent relay. The sandbox-side `Endpoint` uses `adaptive_window(true)` so HTTP/2 flow control does not throttle bulk transfers (SFTP, `sandbox rsync`) to the 64 KiB default window. + +### Session lifecycle + +`spawn(endpoint, sandbox_id, ssh_socket_path)` launches `run_session_loop()`, which runs for the lifetime of the supervisor: + +1. **Connect**: `grpc_client::connect_channel_pub(endpoint)` builds an mTLS `tonic::transport::Channel`. The same `Channel` is cloned into every subsequent `RelayStream` call so no additional TLS handshakes occur. +2. **Hello**: The supervisor sends `SupervisorMessage::Hello { sandbox_id, instance_id }` as the first envelope, where `instance_id` is a fresh UUID per session. The gateway uses the sandbox ID and instance ID to supersede a stale prior session (see [Supersede](#session-supersede)). +3. **Wait for `SessionAccepted` / `SessionRejected`**: If rejected, the loop returns an error and backs off. On accept, the supervisor clamps `heartbeat_interval_secs` to a minimum of 5 seconds. +4. **Main select loop**: Concurrently reads inbound `GatewayMessage`s and fires heartbeat ticks. Inbound `Heartbeat` messages are acknowledged by the supervisor's outbound heartbeat cadence; `RelayOpen` and `RelayClose` are dispatched to `handle_gateway_message()`. +5. **Reconnect**: Any error in the session (stream error, connect failure, rejected hello) is reported as an OCSF event and the loop sleeps with exponential backoff (`INITIAL_BACKOFF = 1s`, doubled up to `MAX_BACKOFF = 30s`) before redialing. + +### Relay bridge loop + +`handle_gateway_message()` is a synchronous dispatcher. When a `RelayOpen { channel_id }` arrives, it spawns a dedicated task running `handle_relay_open()`. That task: + +1. Creates an outbound `mpsc::channel::(16)` wrapped in a `ReceiverStream`. +2. Sends `RelayFrame { payload: RelayInit { channel_id } }` as the first frame -- this claims the matching pending-relay slot on the gateway. +3. Calls `OpenShellClient::relay_stream(outbound)` on the shared `Channel`. This opens a new HTTP/2 stream on the existing connection -- no new TCP or TLS handshake. +4. `UnixStream::connect(ssh_socket_path)` dials the local sshd. The split read/write halves become the local endpoints of the bridge. +5. Spawns a task that reads from the Unix socket in 16 KiB chunks (`RELAY_CHUNK_SIZE`, matching the default HTTP/2 frame size) and forwards each chunk as `RelayFrame::Data` on the outbound stream. +6. The main loop drains inbound `RelayFrame::Data` messages and writes them to the socket. Non-data inbound frames (e.g. a second `Init`) are treated as protocol errors. +7. On any side closing, the bridge calls `ssh_w.shutdown()` to propagate EOF, drops the outbound sender to close the gRPC stream, and joins the reader task. + +The supervisor has no SSH or HTTP awareness -- it is purely a byte bridge. The protocol on top of the relay is whatever the gateway's caller (interactive `sandbox connect`, `ExecSandbox`, `rsync`-over-ssh) speaks to the sshd. + +### Session supersede + +If the gateway restarts or the sandbox restarts and reconnects with a new `instance_id` for the same `sandbox_id`, the gateway atomically replaces any prior session it has recorded. The new supervisor continues normally; the old stream (if still live on the gateway side) is torn down by the gateway's `remove_if_current` logic. Supervisors never need to coordinate between themselves -- each just keeps trying to connect, and the most recent `Hello` wins. + +If the gateway closes the stream cleanly (`inbound.message()` returns `Ok(None)`), `run_single_session` returns `Ok(())` and a `session_closed` event is emitted. Otherwise the loop reconnects. + +### OCSF telemetry + +Every session and relay transition emits an OCSF `NetworkActivity` event via `ocsf_emit!()` so operators can audit the control-plane connection from the sandbox's own logs. All events are built in `supervisor_session.rs` and covered by unit tests in the `ocsf_event_tests` module. + +| Helper | `activity_id` | `severity` | `status` | Fires when | +|--------|---------------|------------|----------|------------| +| `session_established_event` | `Open` | `Informational` | `Success` | After `SessionAccepted`, includes `session_id` and `heartbeat_secs` in the message | +| `session_closed_event` | `Close` | `Informational` | `Success` | Gateway closed the stream cleanly (`Ok(None)`) | +| `session_failed_event` | `Fail` | `Low` | `Failure` | Connect failed, hello rejected, or stream errored. Includes reconnect attempt counter | +| `relay_open_event` | `Open` | `Informational` | `Success` | `RelayOpen` received from the gateway | +| `relay_closed_event` | `Close` | `Informational` | `Success` | Relay bridge task exited without error | +| `relay_failed_event` | `Fail` | `Low` | `Failure` | Bridge task returned an error (e.g., socket write failure, inbound non-data frame) | +| `relay_close_from_gateway_event` | `Close` | `Informational` | -- | Gateway sent an explicit `RelayClose` on the control stream, with its `reason` | + +The `dst_endpoint` on session events is parsed from the gateway URI by `ocsf_gateway_endpoint()`. Relay events omit a destination (the bridge is sandbox-internal). + ## Zombie Reaping (PID 1 Init Duties) `openshell-sandbox` runs as PID 1 inside the container. In Linux, when a process exits, its parent must call `waitpid()` to collect the exit status; otherwise the process remains as a zombie. Orphaned processes (whose parent exits first) are reparented to PID 1, which becomes responsible for reaping them. @@ -1394,9 +1478,7 @@ This two-phase approach (peek with `WNOWAIT`, then selectively reap) avoids `ECH | `OPENSHELL_LOG_LEVEL` | `--log-level` | `warn` | Log level (trace/debug/info/warn/error) | | `OPENSHELL_POLICY_POLL_INTERVAL_SECS` | | `30` | Poll interval for gRPC policy updates (seconds). Only active in gRPC mode. | | `OPENSHELL_LOG_PUSH_LEVEL` | | `info` | Maximum tracing level for log push to gateway. Events above this level are not streamed. Only active in gRPC mode. | -| `OPENSHELL_SSH_LISTEN_ADDR` | `--ssh-listen-addr` | | SSH server bind address | -| `OPENSHELL_SSH_HANDSHAKE_SECRET` | `--ssh-handshake-secret` | | HMAC secret for SSH handshake | -| `OPENSHELL_SSH_HANDSHAKE_SKEW_SECS` | `--ssh-handshake-skew-secs` | `300` | Allowed clock skew for handshake | +| `OPENSHELL_SSH_SOCKET_PATH` | `--ssh-socket-path` | | Filesystem path to the Unix socket the embedded sshd binds (e.g. `/run/openshell/ssh.sock`). | | `OPENSHELL_INFERENCE_ROUTES` | `--inference-routes` | | Path to YAML inference routes file for standalone routing | ### Injected into child process @@ -1473,7 +1555,15 @@ The sandbox uses `miette` for error reporting and `thiserror` for typed errors. | Credential injection: path credential contains traversal/separator | HTTP 500, connection closed (fail-closed) | | Credential injection: percent-encoded placeholder bypass attempt | HTTP 500, connection closed (fail-closed) | | L7 parse error | Close the connection | -| SSH server failure | Async task error logged, main process unaffected | +| SSH socket bind failure | Fatal -- reported through the readiness channel and aborts startup | +| SSH server accept failure | Async task error logged, main process unaffected | +| Supervisor session: connect failure | Emit `session_failed` OCSF event, sleep with exponential backoff (1s -> 30s) and reconnect | +| Supervisor session: `SessionRejected` | Emit `session_failed` event with rejection reason; backoff and reconnect | +| Supervisor session: stream error mid-session | Emit `session_failed` event; backoff and reconnect | +| Supervisor session: gateway closes stream cleanly | Emit `session_closed` event and exit the task (no reconnect) | +| Relay bridge: `RelayStream` RPC failure | Emit `relay_failed` event; the individual relay is abandoned, the session stays up | +| Relay bridge: Unix socket connect failure | Emit `relay_failed` event; gateway observes EOF on the RelayStream | +| Relay bridge: non-data inbound frame after Init | Emit `relay_failed` event with protocol error | | Process timeout | Kill process, return exit code 124 | ## Logging @@ -1486,6 +1576,7 @@ Key structured log events: - `CONNECT`: One per proxy CONNECT request (for non-`inference.local` targets) with full identity context. Inference interception failures produce a separate `info!()` log with `action=deny` and the denial reason. - `BYPASS_DETECT`: One per detected direct connection attempt that bypassed the HTTP CONNECT proxy. Includes destination, protocol, process identity (best-effort), and remediation hint. Emitted at `warn` level. - `L7_REQUEST`: One per L7-inspected request with method, path, and decision +- Supervisor session / relay OCSF events: `session_established`, `session_closed`, `session_failed`, `relay_open`, `relay_closed`, `relay_failed`, `relay_close_from_gateway` (see [Supervisor Session](#supervisor-session)). - Sandbox lifecycle events: process start, exit, namespace creation/cleanup, bypass rule installation - Policy reload events: new version detected, reload success/failure, status report outcomes diff --git a/architecture/system-architecture.md b/architecture/system-architecture.md index 5ea92064e..5c7fcdcf7 100644 --- a/architecture/system-architecture.md +++ b/architecture/system-architecture.md @@ -28,8 +28,7 @@ graph TB subgraph GatewayPod["Gateway StatefulSet"] Gateway["openshell-server
:8080
(gRPC + HTTP, mTLS)"] SQLite[("SQLite DB
/var/openshell/
openshell.db")] - SandboxWatcher["Sandbox Watcher"] - KubeEventTailer["Kube Event Tailer"] + SupRegistry["SupervisorSessionRegistry
(live sessions + pending relays)"] WatchBus["SandboxWatchBus
(in-memory broadcast)"] LogBus["TracingLogBus
(in-memory broadcast)"] end @@ -37,7 +36,8 @@ graph TB subgraph SandboxPod["Sandbox Pod (1 per sandbox)"] subgraph Supervisor["Sandbox Supervisor
(privileged user)"] - SSHServer["Embedded SSH
Server (russh)
:2222"] + SSHServer["Embedded SSH
Server (russh)
Unix socket
/run/openshell/ssh.sock"] + RelayBridge["Relay Bridge
(ConnectSupervisor +
RelayStream client)"] Proxy["HTTP CONNECT
Proxy
10.200.0.1:3128"] OPA["OPA Policy Engine
(regorus, in-process)"] InferenceRouter["Inference Router
(openshell-router)"] @@ -101,10 +101,16 @@ graph TB %% CONNECTIONS: Gateway internals %% ============================================================ Gateway --> SQLite + Gateway --> SupRegistry Gateway -- "Watch + CRUD
Sandbox CRDs" --> KubeAPI - SandboxWatcher -- "status changes" --> WatchBus - KubeEventTailer -- "K8s events" --> Gateway - Gateway -- "NSSH1 handshake
(HMAC-SHA256) + SSH
:2222" --> SSHServer + KubeAPI -- "compute-driver events
(status, platform events)" --> Gateway + + %% ============================================================ + %% CONNECTIONS: Supervisor session (inbound from sandbox) + %% ============================================================ + RelayBridge -- "ConnectSupervisor
(persistent bidi stream)" --> SupRegistry + RelayBridge -- "RelayStream
(per-invocation byte bridge,
same HTTP/2 connection)" --> SupRegistry + RelayBridge -- "Unix socket
SSH bytes" --> SSHServer %% ============================================================ %% CONNECTIONS: CRD Controller @@ -123,7 +129,7 @@ graph TB %% ============================================================ %% CONNECTIONS: Sandbox --> Gateway (control plane) %% ============================================================ - Supervisor -- "gRPC (mTLS):
GetSandboxSettings
(policy + settings),
GetProviderEnvironment,
GetInferenceBundle,
PushSandboxLogs" --> Gateway + Supervisor -- "gRPC (mTLS):
GetSandboxConfig
(policy + settings),
GetProviderEnvironment,
GetInferenceBundle,
PushSandboxLogs" --> Gateway %% ============================================================ %% CONNECTIONS: Sandbox --> External (via proxy) @@ -145,9 +151,9 @@ graph TB K3s -- "pulls images
at runtime" --> GHCR %% ============================================================ - %% FILE SYNC + %% CLIENT SSH / EXEC (bytes tunneled via supervisor relay) %% ============================================================ - CLI -- "tar-over-SSH
(file sync)" --> SSHServer + CLI -- "HTTP CONNECT /connect/ssh
+ tar-over-SSH file sync
(bytes bridged through
SupervisorSessionRegistry)" --> Gateway %% ============================================================ %% STYLES @@ -164,8 +170,8 @@ graph TB classDef config fill:#90A4AE,stroke:#607D8B,color:#fff class CLI,TUI,SDK userComponent - class Gateway,SandboxWatcher,KubeEventTailer,WatchBus,LogBus gateway - class SSHServer,Proxy,OPA,InferenceRouter,CertCache sandbox + class Gateway,SupRegistry,WatchBus,LogBus gateway + class SSHServer,RelayBridge,Proxy,OPA,InferenceRouter,CertCache sandbox class Agent,Landlock,Seccomp,NetNS agent class SQLite datastore class Anthropic,OpenAI,NVIDIA_API,GitHub,GitLab,PyPI,NPM,LMStudio,VLLM,GHCR external @@ -189,12 +195,14 @@ graph TB 1. **CLI/SDK to Gateway**: All control-plane traffic uses gRPC over HTTPS with mutual TLS (mTLS). Single multiplexed port (8080 inside cluster, 30051 NodePort). -2. **SSH Access**: CLI connects via HTTP CONNECT upgrade at `/connect/ssh`. Gateway authenticates with session token, then bridges to sandbox SSH (port 2222) using NSSH1 HMAC-SHA256 handshake. +2. **Supervisor Session (inbound from sandbox)**: Each sandbox supervisor opens a persistent `ConnectSupervisor` bidi gRPC stream to the gateway over mTLS. The gateway tracks these in `SupervisorSessionRegistry`. When SSH or exec access is needed, the gateway sends `RelayOpen { channel_id }` on that stream; the supervisor responds by initiating a `RelayStream` RPC on the same HTTP/2 connection whose first frame is a `RelayInit { channel_id }`. Subsequent frames carry raw bytes in both directions. The gateway never dials the sandbox pod. + +3. **SSH / Exec Access**: CLI connects via HTTP CONNECT upgrade at `/connect/ssh` (or calls `ExecSandbox` gRPC). The gateway authenticates, calls `open_relay`, and bridges the client bytes through the supervisor's `RelayStream` to the supervisor's in-sandbox SSH daemon, which binds to a Unix socket (`/run/openshell/ssh.sock`) rather than a TCP port. -3. **File Sync**: tar archives streamed over the SSH tunnel (no rsync dependency). +4. **File Sync**: tar archives streamed over the relay-tunneled SSH session (no rsync dependency). -4. **Sandbox to External**: All agent outbound traffic is forced through the HTTP CONNECT proxy (10.200.0.1:3128) via a network namespace veth pair. OPA/Rego policies evaluate every connection. TLS is automatically detected and terminated for credential injection; endpoints with `protocol` configured also get L7 request-level inspection. +5. **Sandbox to External**: All agent outbound traffic is forced through the HTTP CONNECT proxy (10.200.0.1:3128) via a network namespace veth pair. OPA/Rego policies evaluate every connection. TLS is automatically detected and terminated for credential injection; endpoints with `protocol` configured also get L7 request-level inspection. -5. **Inference Routing**: Inference requests are handled inside the sandbox by the openshell-router (not through the gateway). The gateway provides route configuration and credentials via gRPC; the sandbox executes HTTP requests directly to inference backends. +6. **Inference Routing**: Inference requests are handled inside the sandbox by the openshell-router (not through the gateway). The gateway provides route configuration and credentials via gRPC; the sandbox executes HTTP requests directly to inference backends. -6. **Sandbox to Gateway**: The sandbox supervisor uses gRPC (mTLS) to fetch policies and runtime settings (via `GetSandboxSettings`), provider credentials, inference bundles, and to push logs back to the gateway. The settings channel delivers typed key-value pairs alongside policy through a unified poll loop. +7. **Sandbox to Gateway (control plane)**: The sandbox supervisor uses gRPC (mTLS) to fetch policies and runtime settings (via `GetSandboxConfig`), provider credentials, inference bundles, and to push logs back to the gateway. The settings channel delivers typed key-value pairs alongside policy through a unified poll loop. This reuses the same mTLS connection that carries `ConnectSupervisor`. From 264ebb15e04691b96d7bafa1ded6ef2b214cff97 Mon Sep 17 00:00:00 2001 From: Piotr Mlocek Date: Fri, 17 Apr 2026 16:10:39 -0700 Subject: [PATCH 18/20] style: cargo fmt --- crates/openshell-sandbox/src/lib.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crates/openshell-sandbox/src/lib.rs b/crates/openshell-sandbox/src/lib.rs index 226cccb57..ef5220a7a 100644 --- a/crates/openshell-sandbox/src/lib.rs +++ b/crates/openshell-sandbox/src/lib.rs @@ -604,8 +604,7 @@ pub async fn run_sandbox( } }); - let ssh_socket_path: Option = - ssh_socket_path.map(std::path::PathBuf::from); + let ssh_socket_path: Option = ssh_socket_path.map(std::path::PathBuf::from); if let Some(listen_path) = ssh_socket_path.clone() { let policy_clone = policy.clone(); let workdir_clone = workdir.clone(); From 482980bd4bd3355235a449d6bda9fce14c8eccb0 Mon Sep 17 00:00:00 2001 From: Piotr Mlocek Date: Fri, 17 Apr 2026 19:38:28 -0700 Subject: [PATCH 19/20] docs: refresh SSH transport description for supervisor-initiated relay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Updates user-facing docs to match the connect/exec transport change: - `docs/security/best-practices.mdx` — SSH tunnel section now describes traffic riding the sandbox's mTLS session (transport auth) plus a short-lived session token scoped to the sandbox (authorization), with the sandbox's sshd bound to a local Unix socket rather than a TCP port. Removes the stale mention of the NSSH1 HMAC handshake. - `docs/observability/logging.mdx` — example OCSF shorthand lines for SSH:LISTEN / SSH:OPEN updated to reflect the current emit shape (no peer endpoint on the Unix-socket listener, no NSSH1 auth tag). --- docs/observability/logging.mdx | 6 +++--- docs/security/best-practices.mdx | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/observability/logging.mdx b/docs/observability/logging.mdx index 2053ba4b2..5f5aadf03 100644 --- a/docs/observability/logging.mdx +++ b/docs/observability/logging.mdx @@ -134,13 +134,13 @@ Proxy and SSH servers ready: ```text OCSF NET:LISTEN [INFO] 10.200.0.1:3128 -OCSF SSH:LISTEN [INFO] 0.0.0.0:2222 +OCSF SSH:LISTEN [INFO] ``` -An SSH handshake accepted, one event per connection: +An SSH connection accepted (one event per invocation, arriving over the supervisor's Unix socket, so there is no network peer address to log): ```text -OCSF SSH:OPEN [INFO] ALLOWED 10.42.0.52:42706 [auth:NSSH1] +OCSF SSH:OPEN [INFO] ALLOWED ``` A process launched inside the sandbox: diff --git a/docs/security/best-practices.mdx b/docs/security/best-practices.mdx index 52fc82131..1647d92c3 100644 --- a/docs/security/best-practices.mdx +++ b/docs/security/best-practices.mdx @@ -251,7 +251,7 @@ OpenShell generates a cluster CA at bootstrap and distributes it through Kuberne ### SSH Tunnel Authentication -SSH connections to sandboxes pass through the gateway's HTTP CONNECT tunnel with token-based authentication and HMAC-SHA256 handshake verification (NSSH1 protocol). +SSH connections to sandboxes travel through the gateway over the sandbox's existing mTLS session. On top of the gateway's transport authentication (mTLS by default), each SSH connect call also carries a short-lived session token scoped to a specific sandbox. The sandbox never exposes an SSH port on the network — its SSH daemon listens on a local Unix socket that only the sandbox's own supervisor process can reach. | Aspect | Detail | |---|---| From 7a850aed86f17e3aef8e0357818f91660b82f358 Mon Sep 17 00:00:00 2001 From: Piotr Mlocek Date: Fri, 17 Apr 2026 20:07:35 -0700 Subject: [PATCH 20/20] feat(server): cap in-flight relay channels per sandbox and globally MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds two `ResourceExhausted`-returning guards on `open_relay` to bound the `pending_relays` map against runaway or abusive callers: - `MAX_PENDING_RELAYS = 256` — upper bound across all sandboxes. Caps the memory a caller can pin by calling `open_relay` in a loop while no supervisor ever claims (or the supervisor is hung). - `MAX_PENDING_RELAYS_PER_SANDBOX = 32` — per-sandbox ceiling so one noisy tenant can't consume the entire global budget. Sits above the existing SSH-tunnel per-sandbox cap (20) so tunnel-specific limits still fire first for that caller. Both checks and the `pending_relays` insert happen under a single lock hold so concurrent callers can't each observe "under the cap" and both insert past it. Adds a `sandbox_id` field on `PendingRelay` so the per-sandbox count is a single filter over the map without extra indexes. Tests: - Two unit tests in `supervisor_session.rs` — assert the global cap and the per-sandbox cap both return `ResourceExhausted` with the right message, and a cap-hit on one sandbox doesn't leak onto others. - One integration test in `supervisor_relay_integration.rs` — bursts 64 concurrent `open_relay` calls at a single sandbox and asserts exactly 32 succeed, exactly 32 are rejected with the per-sandbox message, and a different sandbox still accepts new relays. Reaper behaviour is unchanged; the cap makes the map bounded, so the existing `HashMap::retain` pass stays cheap under any load. --- .../src/supervisor_session.rs | 105 ++++++++++++++++++ .../tests/supervisor_relay_integration.rs | 61 +++++++++- 2 files changed, 165 insertions(+), 1 deletion(-) diff --git a/crates/openshell-server/src/supervisor_session.rs b/crates/openshell-server/src/supervisor_session.rs index dcf272879..5345d78f1 100644 --- a/crates/openshell-server/src/supervisor_session.rs +++ b/crates/openshell-server/src/supervisor_session.rs @@ -25,6 +25,16 @@ const RELAY_PENDING_TIMEOUT: Duration = Duration::from_secs(10); const SESSION_WAIT_INITIAL_BACKOFF: Duration = Duration::from_millis(100); /// Maximum backoff between session-availability polls in `wait_for_session`. const SESSION_WAIT_MAX_BACKOFF: Duration = Duration::from_secs(2); +/// Upper bound on unclaimed relay channels across all sandboxes. Caps the +/// memory a misbehaving caller can pin by calling `open_relay` repeatedly +/// while the supervisor never claims (or isn't responding). Sized generously +/// so normal bursts pass through; exceeding it returns `ResourceExhausted`. +const MAX_PENDING_RELAYS: usize = 256; +/// Upper bound on concurrent unclaimed relay channels for a single sandbox. +/// Enforces the same shape per sandbox so one misbehaving sandbox can't +/// consume the entire global budget. Sits above the SSH-tunnel per-sandbox +/// cap (20) so tunnel-specific limits still fire first for that caller. +const MAX_PENDING_RELAYS_PER_SANDBOX: usize = 32; // --------------------------------------------------------------------------- // Session registry @@ -56,6 +66,7 @@ pub struct SupervisorSessionRegistry { struct PendingRelay { sender: RelayStreamSender, + sandbox_id: String, created_at: Instant, } @@ -186,13 +197,31 @@ impl SupervisorSessionRegistry { let channel_id = Uuid::new_v4().to_string(); // Register the pending relay before sending RelayOpen to avoid a race. + // Both caps are checked and the insert happens under a single lock hold + // so two concurrent calls can't both observe "under the cap" and then + // both insert past it. let (relay_tx, relay_rx) = oneshot::channel(); { let mut pending = self.pending_relays.lock().unwrap(); + if pending.len() >= MAX_PENDING_RELAYS { + return Err(Status::resource_exhausted(format!( + "gateway relay capacity reached ({MAX_PENDING_RELAYS} in flight)" + ))); + } + let per_sandbox = pending + .values() + .filter(|p| p.sandbox_id == sandbox_id) + .count(); + if per_sandbox >= MAX_PENDING_RELAYS_PER_SANDBOX { + return Err(Status::resource_exhausted(format!( + "per-sandbox relay limit reached ({MAX_PENDING_RELAYS_PER_SANDBOX} in flight for {sandbox_id})" + ))); + } pending.insert( channel_id.clone(), PendingRelay { sender: relay_tx, + sandbox_id: sandbox_id.to_string(), created_at: Instant::now(), }, ); @@ -731,6 +760,76 @@ mod tests { assert!(registry.pending_relays.lock().unwrap().is_empty()); } + #[tokio::test] + async fn open_relay_rejects_when_global_cap_reached() { + let registry = SupervisorSessionRegistry::new(); + let (tx, _rx) = mpsc::channel::(8); + registry.register("sbx-a".to_string(), "s-a".to_string(), tx.clone()); + registry.register("sbx-b".to_string(), "s-b".to_string(), tx); + + // Pre-seed pending_relays to exactly the global cap, split across two + // sandboxes so neither hits the per-sandbox cap first. + { + let mut pending = registry.pending_relays.lock().unwrap(); + for i in 0..MAX_PENDING_RELAYS { + let (oneshot_tx, _) = oneshot::channel(); + let sandbox_id = if i % 2 == 0 { "sbx-a" } else { "sbx-b" }; + pending.insert( + format!("channel-{i}"), + PendingRelay { + sender: oneshot_tx, + sandbox_id: sandbox_id.to_string(), + created_at: Instant::now(), + }, + ); + } + } + + let err = registry + .open_relay("sbx-a", Duration::from_millis(50)) + .await + .expect_err("open_relay should reject once global cap is reached"); + assert_eq!(err.code(), tonic::Code::ResourceExhausted); + assert!(err.message().contains("gateway relay capacity")); + } + + #[tokio::test] + async fn open_relay_rejects_when_per_sandbox_cap_reached() { + let registry = SupervisorSessionRegistry::new(); + let (tx, _rx) = mpsc::channel::(8); + registry.register("sbx".to_string(), "s".to_string(), tx); + + { + let mut pending = registry.pending_relays.lock().unwrap(); + for i in 0..MAX_PENDING_RELAYS_PER_SANDBOX { + let (oneshot_tx, _) = oneshot::channel(); + pending.insert( + format!("channel-{i}"), + PendingRelay { + sender: oneshot_tx, + sandbox_id: "sbx".to_string(), + created_at: Instant::now(), + }, + ); + } + } + + let err = registry + .open_relay("sbx", Duration::from_millis(50)) + .await + .expect_err("open_relay should reject when per-sandbox cap is reached"); + assert_eq!(err.code(), tonic::Code::ResourceExhausted); + assert!(err.message().contains("per-sandbox relay limit")); + + // A different sandbox still has headroom. + let (tx2, _rx2) = mpsc::channel::(8); + registry.register("sbx-other".to_string(), "s-other".to_string(), tx2); + registry + .open_relay("sbx-other", Duration::from_millis(50)) + .await + .expect("different sandbox should still accept new relays"); + } + #[tokio::test] async fn open_relay_uses_newest_session_after_supersede() { let registry = SupervisorSessionRegistry::new(); @@ -785,6 +884,7 @@ mod tests { "ch-1".to_string(), PendingRelay { sender: relay_tx, + sandbox_id: "sbx-test".to_string(), created_at: Instant::now(), }, ); @@ -802,6 +902,7 @@ mod tests { "ch-old".to_string(), PendingRelay { sender: relay_tx, + sandbox_id: "sbx-test".to_string(), created_at: Instant::now() - Duration::from_secs(60), }, ); @@ -829,6 +930,7 @@ mod tests { "ch-1".to_string(), PendingRelay { sender: relay_tx, + sandbox_id: "sbx-test".to_string(), created_at: Instant::now(), }, ); @@ -847,6 +949,7 @@ mod tests { "ch-io".to_string(), PendingRelay { sender: relay_tx, + sandbox_id: "sbx-test".to_string(), created_at: Instant::now(), }, ); @@ -877,6 +980,7 @@ mod tests { "ch-old".to_string(), PendingRelay { sender: relay_tx, + sandbox_id: "sbx-test".to_string(), created_at: Instant::now() - Duration::from_secs(60), }, ); @@ -899,6 +1003,7 @@ mod tests { "ch-fresh".to_string(), PendingRelay { sender: relay_tx, + sandbox_id: "sbx-test".to_string(), created_at: Instant::now(), }, ); diff --git a/crates/openshell-server/tests/supervisor_relay_integration.rs b/crates/openshell-server/tests/supervisor_relay_integration.rs index 7f976c80e..7ad77a98a 100644 --- a/crates/openshell-server/tests/supervisor_relay_integration.rs +++ b/crates/openshell-server/tests/supervisor_relay_integration.rs @@ -306,7 +306,15 @@ fn register_session( registry: &SupervisorSessionRegistry, sandbox_id: &str, ) -> mpsc::Receiver { - let (tx, rx) = mpsc::channel(8); + register_session_with_capacity(registry, sandbox_id, 8) +} + +fn register_session_with_capacity( + registry: &SupervisorSessionRegistry, + sandbox_id: &str, + capacity: usize, +) -> mpsc::Receiver { + let (tx, rx) = mpsc::channel(capacity); registry.register(sandbox_id.to_string(), "sess-1".to_string(), tx); rx } @@ -505,3 +513,54 @@ async fn concurrent_relays_multiplex_independently() { assert_eq!(&buf_a, b"stream-A"); assert_eq!(&buf_b, b"stream-B"); } + +/// Bursts more `open_relay` calls than the per-sandbox cap allows in parallel +/// and asserts the registry enforces the ceiling cleanly. A well-behaved +/// caller inside the cap still succeeds; overflow calls return `ResourceExhausted` +/// rather than racing the pending map into an inconsistent state. +#[tokio::test] +async fn open_relay_enforces_per_sandbox_cap_under_concurrent_burst() { + let registry = Arc::new(SupervisorSessionRegistry::new()); + let _channel = spawn_gateway(Arc::clone(®istry)).await; + // Oversized mpsc so the session doesn't backpressure the burst — the cap, + // not the channel, is what we're testing. + let _session_rx = register_session_with_capacity(®istry, "sbx", 256); + + // Fire 64 concurrent opens. Per-sandbox cap is 32, global cap is 256, + // so exactly 32 should succeed and 32 should be rejected with + // `ResourceExhausted` carrying the per-sandbox message. + let mut handles = Vec::with_capacity(64); + for _ in 0..64 { + let r = Arc::clone(®istry); + handles.push(tokio::spawn(async move { + r.open_relay("sbx", Duration::from_secs(1)).await + })); + } + + let mut ok = 0usize; + let mut exhausted = 0usize; + for h in handles { + match h.await.expect("task joined") { + Ok(_pair) => ok += 1, + Err(status) if status.code() == tonic::Code::ResourceExhausted => { + assert!( + status.message().contains("per-sandbox relay limit"), + "expected per-sandbox error message, got: {}", + status.message() + ); + exhausted += 1; + } + Err(other) => panic!("unexpected open_relay error: {other:?}"), + } + } + assert_eq!(ok, 32, "exactly per-sandbox cap should succeed"); + assert_eq!(exhausted, 32, "overflow should be rejected, not dropped"); + + // A different sandbox still has headroom — the per-sandbox cap doesn't + // leak onto unrelated tenants. + let _other_rx = register_session_with_capacity(®istry, "sbx-other", 8); + registry + .open_relay("sbx-other", Duration::from_secs(1)) + .await + .expect("other sandbox should not be affected by sbx cap"); +}