Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 18 additions & 2 deletions skills/codealive-context-engine/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -218,20 +218,35 @@ or your local file-read tool before drawing conclusions about behavior.
Retrieves the full source code content for artifacts found via search. Use this for external repositories you cannot access locally.

```bash
python scripts/fetch.py <identifier1> [identifier2...]
python scripts/fetch.py <identifier1> [identifier2...] [--data-source NAME_OR_ID]
```

| Constraint | Value |
|-----------|-------|
| Max identifiers per request | 20 |
| Identifiers source | `identifier` field from search results |
| Identifier format | `{owner/repo}::{path}::{symbol}` (symbols), `{owner/repo}::{path}` (files) |
| `--data-source NAME_OR_ID` | Optional. Data source Name or Id (from a result's `Source:` line) to disambiguate an identifier indexed in more than one data source |

For function-like artifacts the response includes a small **relationships
preview** (up to 3 outgoing/incoming calls per direction). To see the full
call graph, inheritance, or references, run `relationships.py` with the
artifact's identifier.

**Disambiguating an identifier that lives in more than one data source.** Artifact
identifiers are unique only per data source, so the same identifier can belong to
more than one data source. If you fetch such an identifier without `--data-source`,
the backend returns a **409** listing the candidate data sources instead of picking
one for you. Every listed candidate **will** resolve, so the workflow is: call without
`--data-source` → read the 409 candidates → try one → if that data source isn't the one
you want, try the next. To resolve it: take the
`Source:` name or id shown next to the search result you want and pass it back —
`python scripts/fetch.py <identifier> --data-source "backend"` (or the id).
The same `--data-source` flag works on `relationships.py`. If a `--data-source`-scoped
call finds nothing (the script prints a "nothing was found in data source …" hint),
the identifier belongs to a different data source or the selector is wrong: retry with
a different `Source:` value, or drop `--data-source` to get the 409 candidate list.

### `relationships.py` — Drill into an Artifact's Relationship Graph

Returns the full call graph (incoming/outgoing calls), inheritance hierarchy
Expand All @@ -241,7 +256,7 @@ identifier and want to understand how the artifact relates to the rest of the
codebase.

```bash
python scripts/relationships.py <identifier> [--profile PROFILE] [--max-count N]
python scripts/relationships.py <identifier> [--profile PROFILE] [--max-count N] [--data-source NAME_OR_ID]
```

| Option | Description |
Expand All @@ -251,6 +266,7 @@ python scripts/relationships.py <identifier> [--profile PROFILE] [--max-count N]
| `--profile allRelevant` | Calls + inheritance (4 groups) |
| `--profile referencesOnly` | Symbol references |
| `--max-count N` | Max related artifacts per relationship type (1–1000, default 50) |
| `--data-source NAME_OR_ID` | Optional. Data source Name or Id to disambiguate an identifier indexed in more than one data source (same 409 contract as `fetch.py`) |
| `--json` | Emit the raw JSON response instead of the formatted view |

**When this adds value vs the fetch preview:**
Expand Down
56 changes: 49 additions & 7 deletions skills/codealive-context-engine/scripts/fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
CodeAlive Fetch - Retrieve full content for code artifacts

Usage:
python fetch.py <identifier1> [identifier2...]
python fetch.py <identifier1> [identifier2...] [--data-source NAME_OR_ID]

Examples:
# Fetch a single artifact (symbol)
Expand All @@ -15,10 +15,18 @@
# Fetch multiple artifacts
python fetch.py "my-org/backend::src/auth.py::login" "my-org/backend::src/utils.py::helper"

# Disambiguate an identifier that exists in more than one data source
# (use the dataSource name or id from a search result)
python fetch.py "my-org/backend::src/auth.py::login" --data-source "backend"

Identifiers come from semantic/grep search results (the `identifier` field).
The format is: {owner/repo}::{path}::{symbol} (for symbols/chunks)
{owner/repo}::{path} (for files)

Pass --data-source (a data source Name or Id from a search result's `dataSource`)
to disambiguate an identifier that exists in more than one data source. Without it,
an ambiguous identifier returns a 409 listing the candidate data sources.

Maximum 20 identifiers per request.
"""

Expand Down Expand Up @@ -83,11 +91,23 @@ def _format_relationships_preview(relationships: dict) -> list:
return lines


def format_artifacts(data: dict) -> str:
def _data_source_miss_hint(data_source: str) -> str:
"""Recovery hint when a data-source-scoped fetch returns nothing."""
return (
f'\n💡 Hint: nothing was found in data source "{data_source}". The identifier may belong to a '
"different data source, or the --data-source value may be wrong. Try: re-run with --data-source "
"set to a different candidate (use the Source name or id from your search results, or run "
"datasources.py), or drop --data-source entirely — an ambiguous identifier then returns a 409 "
"listing the candidate data sources to choose from."
)


def format_artifacts(data: dict, data_source: str = None) -> str:
"""Format fetched artifacts for display."""
artifacts = data.get("artifacts", [])
if not artifacts:
return "No artifacts returned."
msg = "No artifacts returned."
return msg + _data_source_miss_hint(data_source) if data_source else msg

output = []
count = 0
Expand Down Expand Up @@ -119,7 +139,8 @@ def format_artifacts(data: dict) -> str:
has_any_relationships = True

if not output:
return "No artifacts found."
msg = "No artifacts found."
return msg + _data_source_miss_hint(data_source) if data_source else msg

output.append(f"\n({count} artifact(s))")

Expand All @@ -144,7 +165,26 @@ def main():
sys.exit(1)
sys.exit(0)

identifiers = sys.argv[1:]
identifiers = []
data_source = None
i = 1
while i < len(sys.argv):
arg = sys.argv[i]
if arg == "--data-source":
# Match the flag first, then require a value — otherwise a trailing "--data-source"
# with no value would be silently appended as an identifier.
if i + 1 >= len(sys.argv):
print("Error: --data-source requires a value.", file=sys.stderr)
sys.exit(1)
data_source = sys.argv[i + 1]
i += 2
else:
identifiers.append(arg)
i += 1
Comment thread
sciapanCA marked this conversation as resolved.

if not identifiers:
print("Error: At least one identifier is required.", file=sys.stderr)
sys.exit(1)

if len(identifiers) > 20:
print("Error: Maximum 20 identifiers per request.", file=sys.stderr)
Expand All @@ -154,11 +194,13 @@ def main():
client = CodeAliveClient()

print(f"📥 Fetching {len(identifiers)} artifact(s)", file=sys.stderr)
if data_source:
print(f" data source: {data_source}", file=sys.stderr)
print(file=sys.stderr)

result = client.fetch_artifacts(identifiers=identifiers)
result = client.fetch_artifacts(identifiers=identifiers, data_source=data_source)

print(format_artifacts(result))
print(format_artifacts(result, data_source=data_source))

except Exception as e:
print(f"❌ Error: {e}", file=sys.stderr)
Expand Down
18 changes: 18 additions & 0 deletions skills/codealive-context-engine/scripts/grep.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,24 @@ def format_grep_results(results: dict) -> str:
output.append(f" File: {file_path}")
if result.get("identifier"):
output.append(f" Identifier: {result['identifier']}")

# Surface the data-source name/id so they can be passed back as --data-source to
# fetch.py / relationships.py when an identifier is branch-ambiguous.
# dataSource may be a {name, id} object or a bare string, depending on the API response
# shape — handle both, mirroring search.py.
ds = result.get("dataSource")
if isinstance(ds, dict):
ds_name = ds.get("name")
ds_id = ds.get("id")
else:
ds_name = ds
ds_id = None
if ds_name and ds_id:
output.append(f" Source: {ds_name} (id: {ds_id})")
elif ds_name:
output.append(f" Source: {ds_name}")
elif ds_id:
output.append(f" Source: (id: {ds_id})")
if result.get("matchCount") is not None:
output.append(f" Match count: {result['matchCount']}")

Expand Down
68 changes: 59 additions & 9 deletions skills/codealive-context-engine/scripts/lib/api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,7 @@ def grep_search(
def fetch_artifacts(
self,
identifiers: List[str],
data_source: Optional[str] = None,
) -> Dict[str, Any]:
"""
Retrieve full content for code artifacts by their identifiers.
Expand All @@ -536,6 +537,10 @@ def fetch_artifacts(

Args:
identifiers: List of artifact identifiers from search results (max 20)
data_source: Optional data-source Name or Id to disambiguate an identifier that
exists in more than one data source. Copy the `dataSource.name`/`dataSource.id`
from a search result. Omit for normal lookups; an ambiguous identifier without
it returns a 409 listing the candidate data sources.

Returns:
Dict with 'artifacts' list. Each artifact has identifier, content,
Expand All @@ -545,13 +550,16 @@ def fetch_artifacts(
the full list and other relationship profiles.
"""
body: Dict[str, Any] = {"identifiers": identifiers}
if data_source:
body["dataSource"] = data_source
return self._make_request("POST", "/api/search/artifacts", body=body)

def get_artifact_relationships(
self,
identifier: str,
profile: str = "callsOnly",
max_count_per_type: int = 50,
data_source: Optional[str] = None,
) -> Dict[str, Any]:
"""
Retrieve relationship groups for a single artifact by profile.
Expand All @@ -569,6 +577,9 @@ def get_artifact_relationships(
- "referencesOnly": symbol references
max_count_per_type: Max related artifacts per relationship type
(1–1000, default 50).
data_source: Optional data-source Name or Id to disambiguate a source identifier
that exists in more than one data source. Omit for normal lookups; an ambiguous
identifier without it returns a 409 listing the candidate data sources.

Returns:
Dict with sourceIdentifier, profile, found, and a list of
Expand All @@ -594,6 +605,8 @@ def get_artifact_relationships(
"profile": api_profile,
"maxCountPerType": max_count_per_type,
}
if data_source:
body["dataSource"] = data_source
return self._make_request(
"POST", "/api/search/artifact-relationships", body=body
)
Expand Down Expand Up @@ -665,8 +678,8 @@ def main():
print(" search <query> <data_source1> [data_source2...] [--mode auto|fast|deep] [--description-detail short|full]")
print(" semantic-search <query> <data_source1> [data_source2...] [--path PATH] [--ext EXT] [--max-results N]")
print(" grep-search <query> <data_source1> [data_source2...] [--regex] [--path PATH] [--ext EXT] [--max-results N]")
print(" fetch <identifier1> [identifier2...]")
print(" relationships <identifier> [--profile callsOnly|inheritanceOnly|allRelevant|referencesOnly] [--max-count N]")
print(" fetch <identifier1> [identifier2...] [--data-source NAME_OR_ID]")
print(" relationships <identifier> [--profile callsOnly|inheritanceOnly|allRelevant|referencesOnly] [--max-count N] [--data-source NAME_OR_ID]")
print(" chat <question> <data_source1> [data_source2...] [--conversation-id ID]")
sys.exit(1)

Expand Down Expand Up @@ -791,12 +804,27 @@ def main():

elif command == "fetch":
if len(sys.argv) < 3:
print("Usage: fetch <identifier1> [identifier2...]")
print("Usage: fetch <identifier1> [identifier2...] [--data-source NAME_OR_ID]")
sys.exit(1)

identifiers = sys.argv[2:]
identifiers = []
data_source = None
i = 2
while i < len(sys.argv):
arg = sys.argv[i]
if arg == "--data-source":
# Match the flag first, then require a value — otherwise a trailing
# "--data-source" with no value would be silently appended as an identifier.
if i + 1 >= len(sys.argv):
print("Error: --data-source requires a value.", file=sys.stderr)
sys.exit(1)
data_source = sys.argv[i + 1]
i += 2
else:
identifiers.append(arg)
i += 1
Comment thread
sciapanCA marked this conversation as resolved.

result = client.fetch_artifacts(identifiers)
result = client.fetch_artifacts(identifiers, data_source=data_source)
print(json.dumps(result, indent=2))

elif command == "relationships":
Expand All @@ -807,20 +835,37 @@ def main():
identifier = sys.argv[2]
profile = "callsOnly"
max_count = 50
data_source = None

i = 3
while i < len(sys.argv):
arg = sys.argv[i]
if arg == "--profile" and i + 1 < len(sys.argv):
# Value-bearing flags match on the name first, then require a value, so a trailing
# flag with no value reports "requires a value" instead of being silently skipped.
if arg == "--profile":
if i + 1 >= len(sys.argv):
print("Error: --profile requires a value.", file=sys.stderr)
sys.exit(1)
profile = sys.argv[i + 1]
i += 2
elif arg == "--max-count" and i + 1 < len(sys.argv):
elif arg == "--max-count":
if i + 1 >= len(sys.argv):
print("Error: --max-count requires a value.", file=sys.stderr)
sys.exit(1)
max_count = int(sys.argv[i + 1])
i += 2
elif arg == "--data-source":
if i + 1 >= len(sys.argv):
print("Error: --data-source requires a value.", file=sys.stderr)
sys.exit(1)
data_source = sys.argv[i + 1]
i += 2
else:
i += 1

result = client.get_artifact_relationships(identifier, profile, max_count)
result = client.get_artifact_relationships(
identifier, profile, max_count, data_source=data_source
)
print(json.dumps(result, indent=2))

elif command == "chat":
Expand All @@ -835,7 +880,12 @@ def main():
i = 3
while i < len(sys.argv):
arg = sys.argv[i]
if arg == "--conversation-id" and i + 1 < len(sys.argv):
if arg == "--conversation-id":
# Match the flag first, then require a value — otherwise a trailing
# "--conversation-id" with no value would be silently appended as a data source.
if i + 1 >= len(sys.argv):
print("Error: --conversation-id requires a value.", file=sys.stderr)
sys.exit(1)
conversation_id = sys.argv[i + 1]
i += 2
else:
Expand Down
Loading