Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
7ad09dd
feat: add CLI shell and exec commands for deployment pod terminal access
V2arK Mar 12, 2026
4946858
fix: use urlparse for scheme replacement to satisfy CodeQL
V2arK Mar 12, 2026
e3bbd27
fix: apply black formatting and fix CodeQL url.startswith alert
V2arK Mar 12, 2026
0a440d5
style: condense multiline expressions for readability
V2arK Mar 12, 2026
f332513
fix: resolve pylint warnings in shell.py and test_shell.py
V2arK Mar 12, 2026
bbf751a
fix: skip PyTorch-dependent tests in sanity mode
V2arK Mar 12, 2026
ac47157
fix: break out of exec loop after end marker to prevent hanging
V2arK Mar 12, 2026
6db6e87
fix: re-enable OPOST after setraw to fix terminal rendering
V2arK Mar 12, 2026
7bac675
fix: replace pytest-asyncio with asyncio.run in tests for CI compat
V2arK Mar 12, 2026
01d757f
fix: match Web UI protocol - remove rows/cols from stdin messages, re…
V2arK Mar 12, 2026
e1da8f6
fix: send delayed resize to fix prompt rendering after shell startup
V2arK Mar 12, 2026
0656214
fix: await cancelled tasks for cleanup, reduce WS close_timeout to 2s
V2arK Mar 12, 2026
a006cdd
fix: toggle PTY width to force SIGWINCH and prompt redraw on connect
V2arK Mar 12, 2026
f3ba18e
fix: include rows/cols in stdin messages and send Ctrl+L after resize…
V2arK Mar 12, 2026
60b610d
fix: use stty to set PTY dimensions from inside shell instead of resi…
V2arK Mar 12, 2026
7103b1f
fix: re-enable OPOST after setraw to convert bare \n to \r\n like xte…
V2arK Mar 12, 2026
61cd4ad
fix: convert \n to \r\n in output and use stty to fix PTY dimensions …
V2arK Mar 12, 2026
188f317
feat: use pyte terminal emulator for interactive shell rendering
V2arK Mar 12, 2026
d624062
fix: swap rows/cols unpacking from shutil.get_terminal_size
V2arK Mar 12, 2026
38a2aab
fix: use alternate screen buffer to prevent scrollback in Warp terminal
V2arK Mar 12, 2026
edd8b09
fix: handle WebSocket ConnectionClosed to prevent hang on shell exit
V2arK Mar 12, 2026
4dd5b56
refactor: use pyte for exec ANSI stripping and add ConnectionClosed h…
V2arK Mar 12, 2026
7da5136
fix: treat ArgoCD Code message as reconnect signal, not shell exit code
V2arK Mar 12, 2026
50a6668
fix: stop reconnecting when shell has genuinely exited
V2arK Mar 12, 2026
66f3dc0
chore: add debug file logging to shell and exec for exit hang diagnosis
V2arK Mar 13, 2026
16251dd
fix: detect shell exit via idle timeout instead of Code message
V2arK Mar 13, 2026
95fb482
fix: exit immediately on exit echo, ignore echo exit with trailing pr…
V2arK Mar 13, 2026
39716ce
fix: skip websocket close handshake wait after session ends
V2arK Mar 13, 2026
ce152cd
refactor: extract shell logic from CLI to SDK layer
V2arK Mar 13, 2026
752fdc9
refactor: extract shell logic to SDK layer, rely on server close frame
V2arK Mar 13, 2026
ee8399a
ruff format
V2arK Mar 13, 2026
b39ca9e
refactor: remove debug logging, fix unused imports and SDK/CLI bounda…
V2arK Mar 13, 2026
4fd10c3
update redundancy
michaelshin Mar 17, 2026
6e052bc
clean up pyte
michaelshin Mar 17, 2026
34ca324
clean up implementation
michaelshin Mar 17, 2026
6866390
address comments
michaelshin Mar 17, 2026
2e4c525
fix lint
michaelshin Mar 17, 2026
346a814
address comments
michaelshin Mar 31, 2026
a11c114
revert
michaelshin Mar 31, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions centml/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from centml.cli.login import login, logout
from centml.cli.cluster import ls, get, delete, pause, resume, capacity
from centml.cli.shell import shell, exec_cmd


@click.group()
Expand Down Expand Up @@ -48,6 +49,8 @@ def ccluster():
ccluster.add_command(pause)
ccluster.add_command(resume)
ccluster.add_command(capacity)
ccluster.add_command(shell)
ccluster.add_command(exec_cmd, name="exec")


cli.add_command(ccluster, name="cluster")
84 changes: 84 additions & 0 deletions centml/cli/shell.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
"""CLI commands for interactive shell and command execution in deployment pods."""

import asyncio
import shlex
import sys

import click

from centml.cli.cluster import handle_exception
from centml.sdk import auth
from centml.sdk.api import get_centml_client
from centml.sdk.config import settings
from centml.sdk.shell import build_ws_url, exec_session, get_running_pods, interactive_session


def _select_pod(running_pods, deployment_id):
click.echo(f"Multiple running pods found for deployment {deployment_id}:")
for i, name in enumerate(running_pods, 1):
click.echo(f" [{i}] {name}")

choice = click.prompt(
"Select a pod", type=click.IntRange(1, len(running_pods)), prompt_suffix=f" [1-{len(running_pods)}]: "
)
return running_pods[choice - 1]


def _connect_args(deployment_id, pod, shell_type, first_pod=False):
"""Resolve pod, build WebSocket URL, and obtain auth token."""
with get_centml_client() as cclient:
running_pods = get_running_pods(cclient, deployment_id)
if not running_pods:
raise click.ClickException(f"No running pods found for deployment {deployment_id}")

if pod is not None and pod not in running_pods:
pods_list = ", ".join(running_pods)
raise click.ClickException(f"Pod '{pod}' not found. Available running pods: {pods_list}")

if pod is not None:
pod_name = pod
elif len(running_pods) == 1 or first_pod:
pod_name = running_pods[0]
elif not sys.stdin.isatty():
raise click.ClickException(
"Multiple running pods found and stdin is not a TTY. "
"Please specify a pod with --pod or use --first-pod."
)
else:
pod_name = _select_pod(running_pods, deployment_id)
Comment thread
michaelshin marked this conversation as resolved.

ws_url = build_ws_url(settings.CENTML_PLATFORM_API_URL, deployment_id, pod_name, shell_type)
token = auth.get_centml_token()
return ws_url, token


@click.command(help="Open an interactive shell to a deployment pod")
@click.argument("deployment_id", type=int)
@click.option("--pod", default=None, help="Specify a pod name")
@click.option("--shell", "shell_type", default=None, type=click.Choice(["bash", "sh", "zsh"]), help="Shell type")
@click.option(
"--first-pod", is_flag=True, default=False, help="Auto-select the first running pod (skip interactive selection)"
Comment thread
anandj91 marked this conversation as resolved.
)
@handle_exception
def shell(deployment_id, pod, shell_type, first_pod):
if not sys.stdin.isatty():
raise click.ClickException("Interactive shell requires a terminal (TTY)")

ws_url, token = _connect_args(deployment_id, pod, shell_type, first_pod)
exit_code = asyncio.run(interactive_session(ws_url, token))
sys.exit(exit_code)


@click.command(help="Execute a command in a deployment pod", context_settings={"ignore_unknown_options": True})
@click.argument("deployment_id", type=int)
@click.argument("command", nargs=-1, required=True, type=click.UNPROCESSED)
@click.option("--pod", default=None, help="Specific pod name")
@click.option("--shell", "shell_type", default=None, type=click.Choice(["bash", "sh", "zsh"]), help="Shell type")
@click.option(
"--first-pod", is_flag=True, default=False, help="Auto-select the first running pod (skip interactive selection)"
Comment thread
michaelshin marked this conversation as resolved.
)
@handle_exception
def exec_cmd(deployment_id, command, pod, shell_type, first_pod):
ws_url, token = _connect_args(deployment_id, pod, shell_type, first_pod)
exit_code = asyncio.run(exec_session(ws_url, token, shlex.join(command)))
sys.exit(exit_code)
3 changes: 3 additions & 0 deletions centml/sdk/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ def get(self, depl_type):
def get_status(self, id):
return self._api.get_deployment_status_deployments_status_deployment_id_get(id)

def get_status_v3(self, deployment_id):
return self._api.get_deployment_status_v3_deployments_status_v3_deployment_id_get(deployment_id)

def get_inference(self, id):
"""Get Inference deployment details - automatically handles both V2 and V3 deployments"""
# Try V3 first (recommended), fallback to V2 if deployment is V2
Expand Down
12 changes: 12 additions & 0 deletions centml/sdk/shell/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from centml.sdk.shell.exceptions import NoPodAvailableError, PodNotFoundError, ShellError
from centml.sdk.shell.session import build_ws_url, exec_session, get_running_pods, interactive_session

__all__ = [
"NoPodAvailableError",
"PodNotFoundError",
"ShellError",
"build_ws_url",
"exec_session",
"get_running_pods",
"interactive_session",
]
10 changes: 10 additions & 0 deletions centml/sdk/shell/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
class ShellError(Exception):
"""Base exception for shell operations."""


class NoPodAvailableError(ShellError):
"""No running pods found for the deployment."""


class PodNotFoundError(ShellError):
"""Specified pod not found among running pods."""
Comment on lines +1 to +10
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

where exactly are we using these errors?

Loading
Loading