From a2d81e1e642fb9598ca92a5b43d4af618f9b2534 Mon Sep 17 00:00:00 2001 From: Eddie Tejeda <669988+eddietejeda@users.noreply.github.com> Date: Fri, 24 Apr 2026 09:43:11 -0700 Subject: [PATCH 1/2] fix(context): strip .md suffix using correct byte length The .md extension is three bytes, but the normalizer took the last four characters as the suffix, so USER.md compared R.md to .md and never stripped. Use MD_SUFFIX.len() for slicing and document optional .md on show/pull/push in help and the hotdata skill. --- skills/hotdata/SKILL.md | 2 +- src/command.rs | 6 ++--- src/context.rs | 60 +++++++++++++++++++++++++++++++++++------ 3 files changed, 56 insertions(+), 12 deletions(-) diff --git a/skills/hotdata/SKILL.md b/skills/hotdata/SKILL.md index b1dd973..05926f0 100644 --- a/skills/hotdata/SKILL.md +++ b/skills/hotdata/SKILL.md @@ -43,7 +43,7 @@ If **`HOTDATA_WORKSPACE`** is set in the environment, the workspace is **locked* The workspace stores those documents only through the **context API**. The **authoritative** copy always lives on the server under the stem; common stems are **`context:DATAMODEL`** (semantic map) and **`context:GLOSSARY`** (glossary / runbooks). -The CLI command **`hotdata context push`** reads **`./.md`** and **`pull`** writes that file in the **current working directory**—those files exist only as a **transport surface** for the API, not as a second source of truth. **`hotdata context show `** prints Markdown to stdout so agents can read **`context:`** without any local file. Stems follow SQL table–identifier rules (ASCII letters, digits, underscore; no dot in the API name; max 128 characters; SQL reserved words are not allowed). +The CLI command **`hotdata context push`** reads **`./.md`** and **`pull`** writes that file in the **current working directory**—those files exist only as a **transport surface** for the API, not as a second source of truth. **`hotdata context show `** prints Markdown to stdout so agents can read **`context:`** without any local file. Stems follow SQL table–identifier rules (ASCII letters, digits, underscore; no dot in the API name; max 128 characters; SQL reserved words are not allowed). For **`show`**, **`pull`**, and **`push`**, the CLI accepts a trailing **`.md`** on the argument (e.g. **`USER.md`**) and treats it as stem **`USER`**—the workspace still stores **`USER`**, not `USER.md`. > **Agents: do not blindly run `hotdata context show DATAMODEL` on session start.** Run **`hotdata context list`** first (optional `--prefix DATAMODEL`). Call **`hotdata context show DATAMODEL` only if** the list includes the `DATAMODEL` stem. If **`show` exits 1** with *no context named …*, that is **normal** when nothing has been pushed yet—**not a hard failure**; do not retry in a loop, and **avoid speculative `show` in parallel** with other shell tools where one failure cancels sibling calls. Proceed without **context:DATAMODEL** until the user asks to create or load one. diff --git a/src/command.rs b/src/command.rs index 1cdd10f..1498a52 100644 --- a/src/command.rs +++ b/src/command.rs @@ -582,13 +582,13 @@ pub enum ContextCommands { /// Print context content to stdout Show { - /// Context name (same rules as a SQL table identifier; local file is .md) + /// Context name (same rules as a SQL table identifier; local file is .md). A trailing `.md` is ignored (e.g. `USER.md` → `USER`). name: String, }, /// Download context from the workspace to ./.md Pull { - /// Context name + /// Context name (trailing `.md` ignored, e.g. `USER.md` → `USER`) name: String, /// Overwrite ./.md if it already exists @@ -602,7 +602,7 @@ pub enum ContextCommands { /// Upload ./.md to the workspace as named context Push { - /// Context name + /// Context name (trailing `.md` ignored, e.g. `USER.md` → `USER`; reads `./USER.md`) name: String, /// Print what would be sent; do not POST diff --git a/src/context.rs b/src/context.rs index 79b52c7..78d56c3 100644 --- a/src/context.rs +++ b/src/context.rs @@ -49,6 +49,25 @@ struct UpsertResponse { context: WorkspaceContextEntry, } +/// Normalizes a context name from the CLI: trims, takes the final path segment, and strips a +/// trailing `.md` (any ASCII case) so `USER.md` or `./USER.md` refer to context stem `USER`. +pub fn normalize_context_cli_name(name: &str) -> String { + let trimmed = name.trim(); + let basename = std::path::Path::new(trimmed) + .file_name() + .and_then(|n| n.to_str()) + .unwrap_or(trimmed); + const MD_SUFFIX: &str = ".md"; + if basename.len() >= MD_SUFFIX.len() { + let start = basename.len() - MD_SUFFIX.len(); + let suffix = &basename[start..]; + if suffix.eq_ignore_ascii_case(MD_SUFFIX) { + return basename[..start].to_string(); + } + } + basename.to_string() +} + /// Validates a context stem (API `name` and basename before `.md`). /// Same rules as runtimedb `validate_table_name`. pub fn validate_context_stem(name: &str) -> Result<(), String> { @@ -148,13 +167,14 @@ pub fn list(workspace_id: &str, prefix: Option<&str>, format: &str) { } pub fn show(workspace_id: &str, name: &str) { - if let Err(e) = validate_context_stem(name) { + let name = normalize_context_cli_name(name); + if let Err(e) = validate_context_stem(&name) { eprintln!("error: {e}"); std::process::exit(1); } let api = ApiClient::new(Some(workspace_id)); - match fetch_context(&api, name) { + match fetch_context(&api, &name) { Ok(ctx) => { print!("{}", ctx.content); if !ctx.content.ends_with('\n') { @@ -178,12 +198,13 @@ pub fn show(workspace_id: &str, name: &str) { } pub fn pull(workspace_id: &str, name: &str, force: bool, dry_run: bool) { - if let Err(e) = validate_context_stem(name) { + let name = normalize_context_cli_name(name); + if let Err(e) = validate_context_stem(&name) { eprintln!("error: {e}"); std::process::exit(1); } - let path = local_md_path(name); + let path = local_md_path(&name); if !dry_run && !force && path.exists() { eprintln!( @@ -194,7 +215,7 @@ pub fn pull(workspace_id: &str, name: &str, force: bool, dry_run: bool) { } let api = ApiClient::new(Some(workspace_id)); - let ctx = match fetch_context(&api, name) { + let ctx = match fetch_context(&api, &name) { Ok(c) => c, Err(reqwest::StatusCode::NOT_FOUND) => { eprintln!( @@ -232,12 +253,13 @@ pub fn pull(workspace_id: &str, name: &str, force: bool, dry_run: bool) { } pub fn push(workspace_id: &str, name: &str, dry_run: bool) { - if let Err(e) = validate_context_stem(name) { + let name = normalize_context_cli_name(name); + if let Err(e) = validate_context_stem(&name) { eprintln!("error: {e}"); std::process::exit(1); } - let path = local_md_path(name); + let path = local_md_path(&name); if !path.is_file() { eprintln!( "{}", @@ -269,7 +291,7 @@ pub fn push(workspace_id: &str, name: &str, dry_run: bool) { } let api = ApiClient::new(Some(workspace_id)); - let body = json!({ "name": name, "content": content }); + let body = json!({ "name": &name, "content": content }); let resp: UpsertResponse = api.post("/context", &body); println!( @@ -330,4 +352,26 @@ mod tests { fn validate_rejects_reserved_uppercase() { assert!(validate_context_stem("SELECT").is_err()); } + + #[test] + fn normalize_strips_trailing_md() { + assert_eq!(normalize_context_cli_name("USER.md"), "USER"); + assert_eq!(normalize_context_cli_name("USER.MD"), "USER"); + assert_eq!(normalize_context_cli_name(" USER.md "), "USER"); + } + + #[test] + fn normalize_accepts_path_with_md() { + assert_eq!(normalize_context_cli_name("./DATAMODEL.md"), "DATAMODEL"); + } + + #[test] + fn normalize_preserves_stem_without_md() { + assert_eq!(normalize_context_cli_name("DATAMODEL"), "DATAMODEL"); + } + + #[test] + fn normalize_strips_md_one_char_stem() { + assert_eq!(normalize_context_cli_name("a.md"), "a"); + } } From ce70686b3d6e7b4cd0a2760b10a134a94770b99a Mon Sep 17 00:00:00 2001 From: Eddie Tejeda <669988+eddietejeda@users.noreply.github.com> Date: Fri, 24 Apr 2026 10:36:25 -0700 Subject: [PATCH 2/2] fix(context): avoid UTF-8 panic when probing .md suffix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check the last three bytes on the UTF-8 byte slice before slicing the str, so stems like x𝕌 (without .md) do not use a misaligned str index. Adds regression tests for multibyte stems with and without .md. --- src/context.rs | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/src/context.rs b/src/context.rs index 78d56c3..92cc68a 100644 --- a/src/context.rs +++ b/src/context.rs @@ -58,11 +58,17 @@ pub fn normalize_context_cli_name(name: &str) -> String { .and_then(|n| n.to_str()) .unwrap_or(trimmed); const MD_SUFFIX: &str = ".md"; - if basename.len() >= MD_SUFFIX.len() { - let start = basename.len() - MD_SUFFIX.len(); - let suffix = &basename[start..]; - if suffix.eq_ignore_ascii_case(MD_SUFFIX) { - return basename[..start].to_string(); + let md_len = MD_SUFFIX.len(); + let bytes = basename.as_bytes(); + if bytes.len() >= md_len { + let i = bytes.len() - md_len; + // Inspect bytes only: avoid slicing `str` at `i` until we know the last `md_len` bytes are + // ASCII `.md` (so `i` is a UTF-8 char boundary — e.g. `x𝕌` must not index `basename[2..]`). + if bytes[i] == b'.' + && bytes[i + 1].eq_ignore_ascii_case(&b'm') + && bytes[i + 2].eq_ignore_ascii_case(&b'd') + { + return basename[..i].to_string(); } } basename.to_string() @@ -374,4 +380,15 @@ mod tests { fn normalize_strips_md_one_char_stem() { assert_eq!(normalize_context_cli_name("a.md"), "a"); } + + #[test] + fn normalize_does_not_panic_multibyte_stem_without_md() { + // 1 ASCII byte + 4-byte UTF-8; byte index 2 is inside the codepoint — must not slice there. + assert_eq!(normalize_context_cli_name("x𝕌"), "x𝕌"); + } + + #[test] + fn normalize_strips_md_after_multibyte_char() { + assert_eq!(normalize_context_cli_name("x𝕌.md"), "x𝕌"); + } }