From 1077a4d853f371757d1998b4c0f554aa64a92b67 Mon Sep 17 00:00:00 2001 From: dustinvannoy-db Date: Thu, 11 Jun 2026 19:55:49 +0200 Subject: [PATCH 01/14] Install most skills from databricks-agent-skills --- README.md | 33 +- install.ps1 | 986 +++++++++++++++++++++++++++++++++++++++++----------- install.sh | 852 ++++++++++++++++++++++++++++++++++----------- 3 files changed, 1466 insertions(+), 405 deletions(-) diff --git a/README.md b/README.md index 55d6c8ec..78423749 100644 --- a/README.md +++ b/README.md @@ -106,7 +106,7 @@ Curated by Databricks field experts. Brings the patterns, skills, and 75+ execut ### Prerequisites - [uv](https://github.com/astral-sh/uv) - Python package manager -- [Databricks CLI](https://docs.databricks.com/aws/en/dev-tools/cli/) - Command line interface for Databricks +- [Databricks CLI](https://docs.databricks.com/aws/en/dev-tools/cli/) **v1.0.0+** - Command line interface for Databricks (v1.0.0+ ships `databricks aitools`, which installs most skills) - AI coding environment (one or more): - [Claude Code](https://claude.ai/code) - [Cursor](https://cursor.com) @@ -197,6 +197,37 @@ irm https://raw.githubusercontent.com/databricks-solutions/ai-dev-kit/main/insta **Next steps:** Respond to interactive prompts and follow the on-screen instructions. - Note: Cursor and Copilot require updating settings manually after install. +### Where Skills Come From + +The installer assembles skills from four sources: + +| Source | Skills | Mechanism | +|--------|--------|-----------| +| [databricks/databricks-agent-skills](https://github.com/databricks/databricks-agent-skills) | Most Databricks skills (jobs, pipelines, DABs, SQL, Unity Catalog, apps, …) | Delegated to `databricks aitools install` — requires **Databricks CLI v1.0.0+** | +| This repo | `databricks-genie` | Bundled copy | +| [mlflow/skills](https://github.com/mlflow/skills) | 8 MLflow skills | Fetched from `main` (override with `MLFLOW_REF`) | +| [databricks-solutions/apx](https://github.com/databricks-solutions/apx) | `databricks-app-apx` | Fetched from the latest stable tag (override with `APX_REF`) | + +Skills installed via `databricks aitools` are managed by the CLI afterwards — update them with `databricks aitools update` and remove them with `databricks aitools uninstall`. For tools the CLI can't target yet (Gemini CLI, Windsurf, Kiro), the installer links the same skills into each tool's skills directory. + +Use `--list-skills` to see every skill and profile, and `--dry-run` to preview exactly what an install would do (resolved refs and the `aitools` command) without changing anything. + +> **Breaking change:** skills now use the `databricks-agent-skills` names. `databricks-bundles` → `databricks-dabs`, `databricks-spark-declarative-pipelines` → `databricks-pipelines`; `databricks-config` is replaced by `databricks-core`, and `databricks-lakebase-autoscale`/`databricks-lakebase-provisioned` by `databricks-lakebase`. Explicit `--skills` requests for old names are migrated with a warning. + +
+Installer environment variables (click to expand) + +| Variable | Default | Purpose | +|----------|---------|---------| +| `APX_REF` | `latest` | Ref for the APX skill fetch: `latest` (highest stable tag), a tag/SHA, or `main` | +| `MLFLOW_REF` | `main` | Ref for the MLflow skills fetch (the repo is tagless) | +| `SKILLS_CHANNEL` | `stable` | Set to `dev` to make unset raw-fetch refs follow `main` | +| `INCLUDE_PRERELEASES` | `0` | Set to `1` to allow `-rc`/`-beta` tags when resolving `latest` | +| `DRY_RUN` | `false` | Set to `1` to print the install plan and exit | + +The installer also records what it installed (resolved refs, commit SHAs, `aitools` release) in `skills.lock` inside the scope-local `.ai-dev-kit/` state directory. + +
### Visual Builder App diff --git a/install.ps1 b/install.ps1 index 8635c42a..9e8f0ecb 100644 --- a/install.ps1 +++ b/install.ps1 @@ -59,6 +59,8 @@ $McpEntry = Join-Path $RepoDir "databricks-mcp-server\run_server.py" # Minimum required versions $MinCliVersion = "0.278.0" $MinSdkVersion = "0.85.0" +# Agent skills are delegated to `databricks aitools`, which ships with CLI v1.0.0+ +$MinAitoolsCliVersion = "1.0.0" # ─── Defaults ───────────────────────────────────────────────── $script:Profile_ = "DEFAULT" @@ -77,41 +79,76 @@ $script:SkillsProfile = "" $script:UserSkills = "" $script:ListSkills = $false $script:Channel = if ($env:DEVKIT_CHANNEL) { $env:DEVKIT_CHANNEL } else { "stable" } # stable or experimental +$script:DryRun = ($env:DRY_RUN -in @("true", "1")) -# Databricks skills (bundled in repo) -$script:Skills = @( - "databricks-agent-bricks", "databricks-aibi-dashboards", "databricks-apps-python", - "databricks-bundles", "databricks-config", "databricks-dbsql", "databricks-docs", "databricks-genie", - "databricks-iceberg", "databricks-jobs", "databricks-lakebase-autoscale", "databricks-lakebase-provisioned", - "databricks-metric-views", "databricks-mlflow-evaluation", "databricks-model-serving", "databricks-ai-functions", - "databricks-python-sdk", "databricks-spark-declarative-pipelines", "databricks-spark-structured-streaming", - "databricks-synthetic-data-gen", "databricks-unity-catalog", "databricks-unstructured-pdf-generation", - "databricks-vector-search", "databricks-zerobus-ingest", "spark-python-data-source" -) +# Raw-fetch ref overrides (see Resolve-Ref). SKILLS_CHANNEL=dev flips unset +# refs to `main` for living-at-head testing. +$script:SkillsChannel = if ($env:SKILLS_CHANNEL) { $env:SKILLS_CHANNEL } else { "stable" } +$script:ApxRef = if ($env:APX_REF) { $env:APX_REF } elseif ($script:SkillsChannel -eq "dev") { "main" } else { "latest" } +$script:MlflowRef = if ($env:MLFLOW_REF) { $env:MLFLOW_REF } else { "main" } # mlflow/skills is tagless -- main is intentional +$script:IncludePrereleases = ($env:INCLUDE_PRERELEASES -in @("true", "1")) + +# Databricks skills bundled in this repo (everything else moved to databricks/databricks-agent-skills) +$script:LocalSkills = @("databricks-genie") -# MLflow skills (fetched from mlflow/skills repo) +# MLflow skills (fetched from mlflow/skills repo; MLFLOW_REF defaults to main -- the repo is tagless) $script:MlflowSkills = @( "agent-evaluation", "analyze-mlflow-chat-session", "analyze-mlflow-trace", "instrumenting-with-mlflow-tracing", "mlflow-onboarding", "querying-mlflow-metrics", "retrieving-mlflow-traces", "searching-mlflow-docs" ) -$MlflowRawUrl = "https://raw.githubusercontent.com/mlflow/skills/main" +$MlflowBaseUrl = "https://raw.githubusercontent.com/mlflow/skills" -# APX skills (fetched from databricks-solutions/apx repo) +# APX skills (fetched from databricks-solutions/apx repo @ latest stable tag, see Resolve-Ref / APX_REF) $script:ApxSkills = @("databricks-app-apx") -$ApxRawUrl = "https://raw.githubusercontent.com/databricks-solutions/apx/main/skills/apx" - -# Agent skills (fetched from databricks/databricks-agent-skills repo) -$script:AgentSkills = @("databricks-core:databricks", "databricks-apps", "databricks-lakebase") -$AgentSkillsRawUrl = "https://raw.githubusercontent.com/databricks/databricks-agent-skills/main/skills" -$AgentSkillsApiUrl = "https://api.github.com/repos/databricks/databricks-agent-skills/git/trees/main?recursive=1" +$ApxBaseUrl = "https://raw.githubusercontent.com/databricks-solutions/apx" + +# Agent skills (from databricks/databricks-agent-skills, installed and managed by +# `databricks aitools`, which ships with the Databricks CLI v1.0.0+). +# The live inventory is discovered at runtime via `databricks aitools list -o json` +# (see Get-AgentBInventory); these lists are the fallback snapshot (v0.2.3). +$script:AgentBStableFallback = @( + "databricks-apps", "databricks-core", "databricks-dabs", "databricks-jobs", + "databricks-lakebase", "databricks-model-serving", "databricks-pipelines", + "databricks-serverless-migration", "databricks-vector-search" +) +$script:AgentBExperimentalFallback = @( + "databricks-agent-bricks", "databricks-ai-functions", "databricks-aibi-dashboards", + "databricks-apps-python", "databricks-dbsql", "databricks-docs", + "databricks-execution-compute", "databricks-iceberg", "databricks-lakeflow-connect", + "databricks-metric-views", "databricks-mlflow-evaluation", "databricks-python-sdk", + "databricks-spark-structured-streaming", "databricks-synthetic-data-gen", + "databricks-unity-catalog", "databricks-unstructured-pdf-generation", + "databricks-zerobus-ingest", "spark-python-data-source" +) +# Skills never installed by default (excluded from "all" and profile selections; +# still installable via an explicit --skills request) +$script:AgentBExcluded = @("databricks-execution-compute") +# Populated by Get-AgentBInventory (live or fallback) +$script:AgentBStable = @() +$script:AgentBExperimental = @() +$script:AgentBRelease = "" + +# Old skill names -> new names (breaking rename when sourcing moved to +# databricks-agent-skills). Explicit requests for old names are migrated with a warning. +$script:RenamedSkills = @{ + "databricks-bundles" = "databricks-dabs" + "databricks-spark-declarative-pipelines" = "databricks-pipelines" + "databricks-config" = "databricks-core" + "databricks" = "databricks-core" + "databricks-lakebase-autoscale" = "databricks-lakebase" + "databricks-lakebase-provisioned" = "databricks-lakebase" +} # ─── Skill profiles ────────────────────────────────────────── -$script:CoreSkills = @("databricks-config", "databricks-docs", "databricks-python-sdk", "databricks-unity-catalog") +# Core skills always installed regardless of profile selection (all from databricks-agent-skills) +$script:CoreSkills = @("databricks-core", "databricks-docs", "databricks-python-sdk", "databricks-unity-catalog") +# Profile definitions (non-core skills only -- core skills are always added). +# Names may come from any source; Resolve-Skills buckets them. $script:ProfileDataEngineer = @( - "databricks-spark-declarative-pipelines", "databricks-spark-structured-streaming", - "databricks-jobs", "databricks-bundles", "databricks-dbsql", "databricks-iceberg", + "databricks-pipelines", "databricks-spark-structured-streaming", "databricks-jobs", + "databricks-dabs", "databricks-dbsql", "databricks-iceberg", "databricks-lakeflow-connect", "databricks-zerobus-ingest", "spark-python-data-source", "databricks-metric-views", "databricks-synthetic-data-gen" ) @@ -119,8 +156,8 @@ $script:ProfileAnalyst = @( "databricks-aibi-dashboards", "databricks-dbsql", "databricks-genie", "databricks-metric-views" ) $script:ProfileAiMlEngineer = @( - "databricks-agent-bricks", "databricks-vector-search", "databricks-model-serving", - "databricks-genie", "databricks-ai-functions", "databricks-unstructured-pdf-generation", + "databricks-agent-bricks", "databricks-ai-functions", "databricks-vector-search", + "databricks-model-serving", "databricks-genie", "databricks-unstructured-pdf-generation", "databricks-mlflow-evaluation", "databricks-synthetic-data-gen", "databricks-jobs" ) $script:ProfileAiMlMlflow = @( @@ -129,29 +166,55 @@ $script:ProfileAiMlMlflow = @( "retrieving-mlflow-traces", "searching-mlflow-docs" ) $script:ProfileAppDeveloper = @( - "databricks-apps-python", "databricks-app-apx", "databricks-lakebase-autoscale", - "databricks-lakebase-provisioned", "databricks-model-serving", "databricks-dbsql", - "databricks-jobs", "databricks-bundles" + "databricks-apps", "databricks-apps-python", "databricks-app-apx", "databricks-lakebase", + "databricks-model-serving", "databricks-dbsql", "databricks-jobs", "databricks-dabs" ) -$script:ProfileAppDeveloperAgent = @("databricks-core:databricks", "databricks-apps", "databricks-lakebase") # Selected skills (populated during profile selection) -$script:SelectedSkills = @() +$script:SelectedLocalSkills = @() $script:SelectedMlflowSkills = @() $script:SelectedApxSkills = @() -$script:SelectedAgentSkills = @() +$script:SelectedAgentBSkills = @() + +# Resolved raw-fetch refs (populated by Resolve-FetchRefs) +$script:MlflowResolvedRef = "" +$script:ApxResolvedRef = "" + +# aitools agent mapping (populated by Resolve-AitoolsAgents) +$script:AitoolsAgents = "" +$script:UnsupportedAgentTools = @() # ─── --list-skills handler ──────────────────────────────────── -if ($script:ListSkills) { +# (function -- needs Get-AgentBInventory; invoked from Invoke-Main) + +# Number of skills the "all" profile installs (excluded agent skills omitted) +function Get-AllSkillsCount { + $n = $script:LocalSkills.Count + $script:MlflowSkills.Count + $script:ApxSkills.Count + + $script:AgentBStable.Count + $script:AgentBExperimental.Count + foreach ($skill in $script:AgentBExcluded) { + if (($script:AgentBStable -contains $skill) -or ($script:AgentBExperimental -contains $skill)) { $n-- } + } + return $n +} + +function Show-SkillsList { + Get-AgentBInventory + + $allCount = Get-AllSkillsCount + $deCount = $script:CoreSkills.Count + $script:ProfileDataEngineer.Count + $anCount = $script:CoreSkills.Count + $script:ProfileAnalyst.Count + $aiCount = $script:CoreSkills.Count + $script:ProfileAiMlEngineer.Count + $script:ProfileAiMlMlflow.Count + $apCount = $script:CoreSkills.Count + $script:ProfileAppDeveloper.Count + Write-Host "" Write-Host "Available Skill Profiles" -ForegroundColor White Write-Host "--------------------------------" Write-Host "" - Write-Host " all " -ForegroundColor White -NoNewline; Write-Host "All 34 skills (default)" - Write-Host " data-engineer " -ForegroundColor White -NoNewline; Write-Host "Pipelines, Spark, Jobs, Streaming (14 skills)" - Write-Host " analyst " -ForegroundColor White -NoNewline; Write-Host "Dashboards, SQL, Genie, Metrics (8 skills)" - Write-Host " ai-ml-engineer " -ForegroundColor White -NoNewline; Write-Host "Agents, RAG, Vector Search, MLflow (17 skills)" - Write-Host " app-developer " -ForegroundColor White -NoNewline; Write-Host "Apps, Lakebase, Deployment (10 skills)" + Write-Host " all " -ForegroundColor White -NoNewline; Write-Host "All $allCount skills (default)" + Write-Host " data-engineer " -ForegroundColor White -NoNewline; Write-Host "Pipelines, Spark, Jobs, Streaming ($deCount skills)" + Write-Host " analyst " -ForegroundColor White -NoNewline; Write-Host "Dashboards, SQL, Genie, Metrics ($anCount skills)" + Write-Host " ai-ml-engineer " -ForegroundColor White -NoNewline; Write-Host "Agents, RAG, Vector Search, MLflow ($aiCount skills)" + Write-Host " app-developer " -ForegroundColor White -NoNewline; Write-Host "Apps, Lakebase, Deployment ($apCount skills)" Write-Host "" Write-Host "Core Skills (always installed)" -ForegroundColor White Write-Host "--------------------------------" @@ -175,22 +238,34 @@ if ($script:ListSkills) { Write-Host "--------------------------------" foreach ($s in $script:ProfileAppDeveloper) { Write-Host " $s" } Write-Host "" - Write-Host "MLflow Skills (from mlflow/skills repo)" -ForegroundColor White + Write-Host "Bundled Skills (from this repo)" -ForegroundColor White + Write-Host "--------------------------------" + foreach ($s in $script:LocalSkills) { Write-Host " $s" } + Write-Host "" + Write-Host "MLflow Skills (from mlflow/skills repo @ $($script:MlflowRef))" -ForegroundColor White Write-Host "--------------------------------" foreach ($s in $script:MlflowSkills) { Write-Host " $s" } Write-Host "" - Write-Host "APX Skills (from databricks-solutions/apx repo)" -ForegroundColor White + Write-Host "APX Skills (from databricks-solutions/apx repo @ $($script:ApxRef))" -ForegroundColor White Write-Host "--------------------------------" foreach ($s in $script:ApxSkills) { Write-Host " $s" } Write-Host "" - Write-Host "Agent Skills (from databricks/databricks-agent-skills repo)" -ForegroundColor White + $releaseSuffix = if ($script:AgentBRelease) { " @ $($script:AgentBRelease)" } else { "" } + Write-Host "Agent Skills (from databricks/databricks-agent-skills$releaseSuffix -- managed by databricks aitools)" -ForegroundColor White Write-Host "--------------------------------" - foreach ($s in $script:AgentSkills) { Write-Host " $($s -replace '^.*:', '')" } + foreach ($s in $script:AgentBStable) { Write-Host " $s" } + Write-Host " experimental:" -ForegroundColor DarkGray + foreach ($s in $script:AgentBExperimental) { + if ($script:AgentBExcluded -contains $s) { + Write-Host " $s (excluded by default -- request explicitly via --skills)" -ForegroundColor DarkGray + } else { + Write-Host " $s" + } + } Write-Host "" Write-Host "Usage: .\install.ps1 --skills-profile data-engineer,ai-ml-engineer" -ForegroundColor DarkGray Write-Host " .\install.ps1 --skills databricks-jobs,databricks-dbsql" -ForegroundColor DarkGray Write-Host "" - return } # ─── Ensure tools are in PATH ──────────────────────────────── @@ -232,6 +307,7 @@ while ($i -lt $args.Count) { { $_ -in "--skills", "-Skills" } { $script:UserSkills = $args[$i + 1]; $i += 2 } { $_ -in "--list-skills", "-ListSkills" } { $script:ListSkills = $true; $i++ } { $_ -in "--experimental", "-Experimental" } { $script:Channel = "experimental"; $i++ } + { $_ -in "--dry-run", "-DryRun" } { $script:DryRun = $true; $i++ } { $_ -in "-f", "--force", "-Force" } { $script:Force = $true; $i++ } { $_ -in "-h", "--help", "-Help" } { Write-Host "Databricks AI Dev Kit Installer (Windows)" @@ -251,6 +327,7 @@ while ($i -lt $args.Count) { Write-Host " --skills LIST Comma-separated skill names to install (overrides profile)" Write-Host " --list-skills List available skills and profiles, then exit" Write-Host " --experimental Install from experimental branch (early access features)" + Write-Host " --dry-run Print what would be installed (resolved refs, aitools command) and exit" Write-Host " -f, --force Force reinstall" Write-Host " -h, --help Show this help" Write-Host "" @@ -258,6 +335,19 @@ while ($i -lt $args.Count) { Write-Host " AIDEVKIT_BRANCH Branch or tag to install (default: latest release)" Write-Host " AIDEVKIT_HOME Installation directory (default: ~/.ai-dev-kit)" Write-Host " DEVKIT_CHANNEL 'stable' (default) or 'experimental'" + Write-Host " APX_REF Ref for APX skill fetch: 'latest' (default), a tag/SHA, or 'main'" + Write-Host " MLFLOW_REF Ref for MLflow skills fetch (default: main)" + Write-Host " SKILLS_CHANNEL 'stable' (default) or 'dev' (unset raw-fetch refs follow main)" + Write-Host " INCLUDE_PRERELEASES Set to '1' to allow -rc/-beta tags when resolving 'latest'" + Write-Host " DRY_RUN Set to '1' to print the install plan and exit" + Write-Host "" + Write-Host "Notes:" + Write-Host " Most Databricks skills are installed via 'databricks aitools' (Databricks CLI v1.0.0+)" + Write-Host " and are updated/uninstalled with 'databricks aitools update|uninstall', not this script." + Write-Host " Renamed skills: databricks-bundles -> databricks-dabs," + Write-Host " databricks-spark-declarative-pipelines -> databricks-pipelines." + Write-Host " Replaced skills: databricks-config -> databricks-core," + Write-Host " databricks-lakebase-autoscale/provisioned -> databricks-lakebase." Write-Host "" Write-Host "Examples:" Write-Host " # Basic installation" @@ -868,77 +958,97 @@ function Install-McpServer { } # ─── Skill profile selection ────────────────────────────────── + +# Bucket one skill name into its source (returns "local"/"mlflow"/"apx"/"agentb", or "" for unknown) +function Get-SkillBucket { + param([string]$Name) + if ($script:LocalSkills -contains $Name) { return "local" } + if ($script:MlflowSkills -contains $Name) { return "mlflow" } + if ($script:ApxSkills -contains $Name) { return "apx" } + if (($script:AgentBStable -contains $Name) -or ($script:AgentBExperimental -contains $Name)) { return "agentb" } + return "" +} + +# Resolve selected skills from profile names or explicit skill list, +# bucketing each name into its source (local repo / mlflow / apx / agent-skills). function Resolve-Skills { - # Priority 1: Explicit --skills flag + Get-AgentBInventory + + $localSkills = @() + $mlflowSkills = @() + $apxSkills = @() + $agentBSkills = @() + + # Agent skills selected by default: everything except the excluded list + $defaultAgentB = @() + foreach ($skill in ($script:AgentBStable + $script:AgentBExperimental)) { + if ($script:AgentBExcluded -contains $skill) { continue } + $defaultAgentB += $skill + } + + # Priority 1: Explicit --skills flag (comma-separated skill names) if (-not [string]::IsNullOrWhiteSpace($script:UserSkills)) { - $userList = $script:UserSkills -split ',' - $dbSkills = @() + $script:CoreSkills - $mlflowSkills = @() - $apxSkills = @() - $agentSkills = @() - foreach ($skill in $userList) { + foreach ($skill in ($script:UserSkills -split ',')) { $skill = $skill.Trim() - if ($script:MlflowSkills -contains $skill) { - $mlflowSkills += $skill - } elseif ($script:ApxSkills -contains $skill) { - $apxSkills += $skill - } elseif ($script:AgentSkills | ForEach-Object { $_ -replace '^.*:', '' } | Where-Object { $_ -eq $skill }) { - $agentSkills += ($script:AgentSkills | Where-Object { ($_ -replace '^.*:', '') -eq $skill }) - } else { - $dbSkills += $skill + if ([string]::IsNullOrWhiteSpace($skill)) { continue } + $bucket = Get-SkillBucket -Name $skill + if (-not $bucket -and $script:RenamedSkills.ContainsKey($skill)) { + $newName = $script:RenamedSkills[$skill] + Write-Warn "Skill '$skill' was renamed/replaced by '$newName' -- installing '$newName'" + $skill = $newName + $bucket = Get-SkillBucket -Name $skill + } + switch ($bucket) { + "local" { $localSkills += $skill } + "mlflow" { $mlflowSkills += $skill } + "apx" { $apxSkills += $skill } + "agentb" { $agentBSkills += $skill } + default { Write-Err "Unknown skill: '$skill' (run with --list-skills to see available skills)" } } } - $script:SelectedSkills = $dbSkills | Select-Object -Unique - $script:SelectedMlflowSkills = $mlflowSkills | Select-Object -Unique - $script:SelectedApxSkills = $apxSkills | Select-Object -Unique - $script:SelectedAgentSkills = $agentSkills | Select-Object -Unique + $script:SelectedLocalSkills = @($localSkills | Select-Object -Unique) + $script:SelectedMlflowSkills = @($mlflowSkills | Select-Object -Unique) + $script:SelectedApxSkills = @($apxSkills | Select-Object -Unique) + $script:SelectedAgentBSkills = @($agentBSkills | Select-Object -Unique) return } # Priority 2: --skills-profile flag or interactive selection - if ([string]::IsNullOrWhiteSpace($script:SkillsProfile) -or $script:SkillsProfile -eq "all") { - $script:SelectedSkills = $script:Skills - $script:SelectedMlflowSkills = $script:MlflowSkills - $script:SelectedApxSkills = $script:ApxSkills - $script:SelectedAgentSkills = $script:AgentSkills + if ([string]::IsNullOrWhiteSpace($script:SkillsProfile) -or $script:SkillsProfile -eq "all" -or ($script:SkillsProfile -split ',' | ForEach-Object { $_.Trim() }) -contains "all") { + $script:SelectedLocalSkills = @($script:LocalSkills) + $script:SelectedMlflowSkills = @($script:MlflowSkills) + $script:SelectedApxSkills = @($script:ApxSkills) + $script:SelectedAgentBSkills = @($defaultAgentB) return } - # Build union of selected profiles - $dbSkills = @() + $script:CoreSkills - $mlflowSkills = @() - $apxSkills = @() - $agentSkills = @() - + # Build union of selected profiles (comma-separated, flat name lists bucketed per name) + $names = @() + $script:CoreSkills foreach ($profile in ($script:SkillsProfile -split ',')) { $profile = $profile.Trim() switch ($profile) { - "all" { - $script:SelectedSkills = $script:Skills - $script:SelectedMlflowSkills = $script:MlflowSkills - $script:SelectedApxSkills = $script:ApxSkills - $script:SelectedAgentSkills = $script:AgentSkills - return - } - "data-engineer" { $dbSkills += $script:ProfileDataEngineer } - "analyst" { $dbSkills += $script:ProfileAnalyst } - "ai-ml-engineer" { - $dbSkills += $script:ProfileAiMlEngineer - $mlflowSkills += $script:ProfileAiMlMlflow - } - "app-developer" { - $dbSkills += $script:ProfileAppDeveloper - $apxSkills += $script:ApxSkills - $agentSkills += $script:ProfileAppDeveloperAgent - } - default { Write-Warn "Unknown skill profile: $profile (ignored)" } + "data-engineer" { $names += $script:ProfileDataEngineer } + "analyst" { $names += $script:ProfileAnalyst } + "ai-ml-engineer" { $names += $script:ProfileAiMlEngineer + $script:ProfileAiMlMlflow } + "app-developer" { $names += $script:ProfileAppDeveloper } + default { Write-Warn "Unknown skill profile: $profile (ignored)" } + } + } + + foreach ($skill in $names) { + switch (Get-SkillBucket -Name $skill) { + "local" { $localSkills += $skill } + "mlflow" { $mlflowSkills += $skill } + "apx" { $apxSkills += $skill } + "agentb" { $agentBSkills += $skill } + default { Write-Warn "Skill '$skill' not found in any source (skipped)" } } } - $script:SelectedSkills = $dbSkills | Select-Object -Unique - $script:SelectedMlflowSkills = $mlflowSkills | Select-Object -Unique - $script:SelectedApxSkills = $apxSkills | Select-Object -Unique - $script:SelectedAgentSkills = $agentSkills | Select-Object -Unique + $script:SelectedLocalSkills = @($localSkills | Select-Object -Unique) + $script:SelectedMlflowSkills = @($mlflowSkills | Select-Object -Unique) + $script:SelectedApxSkills = @($apxSkills | Select-Object -Unique) + $script:SelectedAgentBSkills = @($agentBSkills | Select-Object -Unique) } function Invoke-PromptSkillsProfile { @@ -975,9 +1085,14 @@ function Invoke-PromptSkillsProfile { Write-Host " Select skill profile(s)" -ForegroundColor White # Custom checkbox with mutual exclusion: "All" deselects others, others deselect "All" + $allCount = Get-AllSkillsCount + $deCount = $script:CoreSkills.Count + $script:ProfileDataEngineer.Count + $anCount = $script:CoreSkills.Count + $script:ProfileAnalyst.Count + $aiCount = $script:CoreSkills.Count + $script:ProfileAiMlEngineer.Count + $script:ProfileAiMlMlflow.Count + $apCount = $script:CoreSkills.Count + $script:ProfileAppDeveloper.Count $pLabels = @("All Skills", "Data Engineer", "Business Analyst", "AI/ML Engineer", "App Developer", "Custom") $pValues = @("all", "data-engineer", "analyst", "ai-ml-engineer", "app-developer", "custom") - $pHints = @("Install everything (34 skills)", "Pipelines, Spark, Jobs, Streaming (14 skills)", "Dashboards, SQL, Genie, Metrics (8 skills)", "Agents, RAG, Vector Search, MLflow (17 skills)", "Apps, Lakebase, Deployment (10 skills)", "Pick individual skills") + $pHints = @("Install everything ($allCount skills)", "Pipelines, Spark, Jobs, Streaming ($deCount skills)", "Dashboards, SQL, Genie, Metrics ($anCount skills)", "Agents, RAG, Vector Search, MLflow ($aiCount skills)", "Apps, Lakebase, Deployment ($apCount skills)", "Pick individual skills") $pStates = @($true, $false, $false, $false, $false, $false) $pCount = 6 $pCursor = 0 @@ -1123,32 +1238,35 @@ function Invoke-PromptCustomSkills { param([string]$PreselectedProfiles) # Build pre-selection set from any profiles that were also checked - $preselected = @() + # (core skills start pre-selected -- they are recommended for every profile) + $preselected = @() + $script:CoreSkills foreach ($profile in ($PreselectedProfiles -split ' ')) { switch ($profile) { "data-engineer" { $preselected += $script:ProfileDataEngineer } "analyst" { $preselected += $script:ProfileAnalyst } "ai-ml-engineer" { $preselected += $script:ProfileAiMlEngineer + $script:ProfileAiMlMlflow } - "app-developer" { $preselected += $script:ProfileAppDeveloper + $script:ApxSkills + $script:ProfileAppDeveloperAgent } + "app-developer" { $preselected += $script:ProfileAppDeveloper } } } - # Normalize "source:install-name" entries (e.g. "databricks-core:databricks") to install-name only, - # so `-contains` exact-equality checks below match against the same names used in the menu. - $preselected = @($preselected | ForEach-Object { $_ -replace '^[^:]+:', '' }) Write-Host "" Write-Host " Select individual skills" -ForegroundColor White - Write-Host " Core skills (config, docs, python-sdk, unity-catalog) are always installed" -ForegroundColor DarkGray + Write-Host " Core skills (core, docs, python-sdk, unity-catalog) are recommended for all profiles" -ForegroundColor DarkGray $items = @( - @{ Label = "Spark Pipelines"; Value = "databricks-spark-declarative-pipelines"; State = ($preselected -contains "databricks-spark-declarative-pipelines"); Hint = "SDP/LDP, CDC, SCD Type 2" } - @{ Label = "Streaming"; Value = "databricks-spark-structured-streaming"; State = ($preselected -contains "databricks-spark-structured-streaming"); Hint = "Real-time streaming" } + @{ Label = "Core"; Value = "databricks-core"; State = ($preselected -contains "databricks-core"); Hint = "CLI auth, data exploration" } + @{ Label = "Docs"; Value = "databricks-docs"; State = ($preselected -contains "databricks-docs"); Hint = "Databricks documentation" } + @{ Label = "Python SDK"; Value = "databricks-python-sdk"; State = ($preselected -contains "databricks-python-sdk"); Hint = "SDK, Connect, REST API" } + @{ Label = "Unity Catalog"; Value = "databricks-unity-catalog"; State = ($preselected -contains "databricks-unity-catalog"); Hint = "System tables, volumes" } + @{ Label = "Spark Pipelines"; Value = "databricks-pipelines"; State = ($preselected -contains "databricks-pipelines"); Hint = "SDP/LDP, CDC, SCD Type 2" } + @{ Label = "Structured Streaming"; Value = "databricks-spark-structured-streaming"; State = ($preselected -contains "databricks-spark-structured-streaming"); Hint = "Real-time streaming" } @{ Label = "Jobs & Workflows"; Value = "databricks-jobs"; State = ($preselected -contains "databricks-jobs"); Hint = "Multi-task orchestration" } - @{ Label = "Asset Bundles"; Value = "databricks-bundles"; State = ($preselected -contains "databricks-bundles"); Hint = "DABs deployment" } + @{ Label = "Asset Bundles"; Value = "databricks-dabs"; State = ($preselected -contains "databricks-dabs"); Hint = "DABs deployment" } @{ Label = "Databricks SQL"; Value = "databricks-dbsql"; State = ($preselected -contains "databricks-dbsql"); Hint = "SQL warehouse queries" } @{ Label = "Iceberg"; Value = "databricks-iceberg"; State = ($preselected -contains "databricks-iceberg"); Hint = "Apache Iceberg tables" } + @{ Label = "Lakeflow Connect"; Value = "databricks-lakeflow-connect"; State = ($preselected -contains "databricks-lakeflow-connect"); Hint = "Managed ingestion connectors" } @{ Label = "Zerobus Ingest"; Value = "databricks-zerobus-ingest"; State = ($preselected -contains "databricks-zerobus-ingest"); Hint = "Streaming ingestion" } - @{ Label = "Python Data Src"; Value = "spark-python-data-source"; State = ($preselected -contains "spark-python-data-source"); Hint = "Custom Spark data sources" } + @{ Label = "Python Data Source"; Value = "spark-python-data-source"; State = ($preselected -contains "spark-python-data-source"); Hint = "Custom Spark data sources" } @{ Label = "Metric Views"; Value = "databricks-metric-views"; State = ($preselected -contains "databricks-metric-views"); Hint = "Metric definitions" } @{ Label = "AI/BI Dashboards"; Value = "databricks-aibi-dashboards"; State = ($preselected -contains "databricks-aibi-dashboards"); Hint = "Dashboard creation" } @{ Label = "Genie"; Value = "databricks-genie"; State = ($preselected -contains "databricks-genie"); Hint = "Natural language SQL" } @@ -1156,22 +1274,20 @@ function Invoke-PromptCustomSkills { @{ Label = "Vector Search"; Value = "databricks-vector-search"; State = ($preselected -contains "databricks-vector-search"); Hint = "Similarity search" } @{ Label = "Model Serving"; Value = "databricks-model-serving"; State = ($preselected -contains "databricks-model-serving"); Hint = "Deploy models/agents" } @{ Label = "MLflow Evaluation"; Value = "databricks-mlflow-evaluation"; State = ($preselected -contains "databricks-mlflow-evaluation"); Hint = "Model evaluation" } - @{ Label = "AI Functions"; Value = "databricks-ai-functions"; State = ($preselected -contains "databricks-ai-functions"); Hint = "AI Functions, document parsing & RAG" } + @{ Label = "AI Functions"; Value = "databricks-ai-functions"; State = ($preselected -contains "databricks-ai-functions"); Hint = "AI Functions, document parsing & RAG" } @{ Label = "Unstructured PDF"; Value = "databricks-unstructured-pdf-generation"; State = ($preselected -contains "databricks-unstructured-pdf-generation"); Hint = "Synthetic PDFs for RAG" } @{ Label = "Synthetic Data"; Value = "databricks-synthetic-data-gen"; State = ($preselected -contains "databricks-synthetic-data-gen"); Hint = "Generate test data" } - @{ Label = "Lakebase Autoscale"; Value = "databricks-lakebase-autoscale"; State = ($preselected -contains "databricks-lakebase-autoscale"); Hint = "Managed PostgreSQL" } - @{ Label = "Lakebase Provisioned"; Value = "databricks-lakebase-provisioned"; State = ($preselected -contains "databricks-lakebase-provisioned"); Hint = "Provisioned PostgreSQL" } - @{ Label = "App (AppKit + Python)"; Value = "databricks-apps-python"; State = ($preselected -contains "databricks-apps-python"); Hint = "AppKit, Dash, Streamlit, Flask" } + @{ Label = "Lakebase"; Value = "databricks-lakebase"; State = ($preselected -contains "databricks-lakebase"); Hint = "Managed PostgreSQL (OLTP)" } + @{ Label = "Serverless Migration"; Value = "databricks-serverless-migration"; State = ($preselected -contains "databricks-serverless-migration"); Hint = "Migrate to serverless compute" } + @{ Label = "Apps"; Value = "databricks-apps"; State = ($preselected -contains "databricks-apps"); Hint = "AppKit + all frameworks" } + @{ Label = "App (AppKit + Python)"; Value = "databricks-apps-python"; State = ($preselected -contains "databricks-apps-python"); Hint = "AppKit, Dash, Streamlit, Flask" } @{ Label = "App APX"; Value = "databricks-app-apx"; State = ($preselected -contains "databricks-app-apx"); Hint = "FastAPI + React" } - @{ Label = "Agent: Databricks"; Value = "databricks"; State = ($preselected -contains "databricks"); Hint = "CLI auth, data exploration" } - @{ Label = "Agent: Apps"; Value = "databricks-apps"; State = ($preselected -contains "databricks-apps"); Hint = "AppKit + all frameworks" } - @{ Label = "Agent: Lakebase"; Value = "databricks-lakebase"; State = ($preselected -contains "databricks-lakebase"); Hint = "Lakebase OLTP" } @{ Label = "MLflow Onboarding"; Value = "mlflow-onboarding"; State = ($preselected -contains "mlflow-onboarding"); Hint = "Getting started" } @{ Label = "Agent Evaluation"; Value = "agent-evaluation"; State = ($preselected -contains "agent-evaluation"); Hint = "Evaluate AI agents" } @{ Label = "MLflow Tracing"; Value = "instrumenting-with-mlflow-tracing"; State = ($preselected -contains "instrumenting-with-mlflow-tracing"); Hint = "Instrument with tracing" } @{ Label = "Analyze Traces"; Value = "analyze-mlflow-trace"; State = ($preselected -contains "analyze-mlflow-trace"); Hint = "Analyze trace data" } @{ Label = "Retrieve Traces"; Value = "retrieving-mlflow-traces"; State = ($preselected -contains "retrieving-mlflow-traces"); Hint = "Search & retrieve traces" } - @{ Label = "Analyze Chat"; Value = "analyze-mlflow-chat-session"; State = ($preselected -contains "analyze-mlflow-chat-session"); Hint = "Chat session analysis" } + @{ Label = "Analyze Chat Session"; Value = "analyze-mlflow-chat-session"; State = ($preselected -contains "analyze-mlflow-chat-session"); Hint = "Chat session analysis" } @{ Label = "Query Metrics"; Value = "querying-mlflow-metrics"; State = ($preselected -contains "querying-mlflow-metrics"); Hint = "MLflow metrics queries" } @{ Label = "Search MLflow Docs"; Value = "searching-mlflow-docs"; State = ($preselected -contains "searching-mlflow-docs"); Hint = "MLflow documentation" } ) @@ -1180,6 +1296,491 @@ function Invoke-PromptCustomSkills { $script:UserSkills = ($selected -split ' ') -join ',' } +# ─── Agent skills (databricks/databricks-agent-skills via `databricks aitools`) ─── + +# Discover the live skill inventory from `databricks aitools list -o json`. +# Falls back to the hardcoded snapshot when the CLI is missing/old/offline. +# Idempotent -- only fetches once. +function Get-AgentBInventory { + if ($script:AgentBStable.Count -gt 0) { return } + + $inventory = $null + if (Get-Command databricks -ErrorAction SilentlyContinue) { + $prevEAP = $ErrorActionPreference; $ErrorActionPreference = "Continue" + try { + $raw = & databricks aitools list -o json 2>$null + if ($LASTEXITCODE -eq 0 -and $raw) { + $inventory = (@($raw) -join "`n") | ConvertFrom-Json + } + } catch { + $inventory = $null + } + $ErrorActionPreference = $prevEAP + } + + if ($inventory -and $inventory.skills) { + $script:AgentBRelease = if ($inventory.release) { [string]$inventory.release } else { "" } + $script:AgentBStable = @($inventory.skills | Where-Object { -not $_.experimental } | ForEach-Object { $_.name }) + $script:AgentBExperimental = @($inventory.skills | Where-Object { $_.experimental } | ForEach-Object { $_.name }) + } + + if ($script:AgentBStable.Count -eq 0) { + $script:AgentBStable = @($script:AgentBStableFallback) + $script:AgentBExperimental = @($script:AgentBExperimentalFallback) + $script:AgentBRelease = "" + } +} + +# Gate for `databricks aitools` (ships with the Databricks CLI v1.0.0+). +# Interactive: offers to run the upgrade and re-checks in a loop. +# Silent/non-interactive: errors out with instructions. +# Returns $false if the user chose to skip agent skills. +function Confirm-AitoolsCli { + $attempts = 0 + while ($true) { + $cliVersion = "" + if (Get-Command databricks -ErrorAction SilentlyContinue) { + try { + $cliOutput = & databricks --version 2>&1 + if ($cliOutput -match '(\d+\.\d+\.\d+)') { $cliVersion = $Matches[1] } + } catch {} + } + if ($cliVersion -and ([version]$cliVersion -ge [version]$MinAitoolsCliVersion)) { + return $true + } + + $foundMsg = if ($cliVersion) { "Databricks CLI v$cliVersion is too old." } else { "Databricks CLI not found." } + + if ($script:Silent -or -not (Test-Interactive)) { + Write-Err "$foundMsg Agent skills are installed via 'databricks aitools', which requires Databricks CLI v$MinAitoolsCliVersion+. Upgrade: winget upgrade Databricks.DatabricksCLI (or winget install Databricks.DatabricksCLI). Then re-run this installer. (Or pass --skills with only non-agent skills to skip this requirement.)" + } + + $attempts++ + if ($attempts -gt 5) { + Write-Warn "Databricks CLI still not at v$MinAitoolsCliVersion+ after several attempts -- skipping agent skills" + return $false + } + + Write-Warn "$foundMsg Agent skills are installed via 'databricks aitools', which requires Databricks CLI v$MinAitoolsCliVersion+." + Write-Msg "Upgrade command: winget upgrade Databricks.DatabricksCLI (or winget install Databricks.DatabricksCLI if not yet installed)" + Write-Host "" + $choice = Read-Prompt -PromptText "Upgrade the Databricks CLI now? (y = run upgrade, r = re-check, s = skip agent skills, a = abort)" -Default "y" + switch -Regex ($choice) { + '^(y|yes)$' { + $prevEAP = $ErrorActionPreference; $ErrorActionPreference = "Continue" + if (Get-Command databricks -ErrorAction SilentlyContinue) { + & winget upgrade Databricks.DatabricksCLI + } else { + & winget install Databricks.DatabricksCLI + } + if ($LASTEXITCODE -ne 0) { Write-Warn "CLI upgrade failed -- you can retry or skip" } + $ErrorActionPreference = $prevEAP + # Refresh PATH so a newly installed CLI is found + $machinePath = [System.Environment]::GetEnvironmentVariable("Path", "Machine") + $userPath = [System.Environment]::GetEnvironmentVariable("Path", "User") + if ($machinePath -or $userPath) { + $env:Path = "$machinePath;$userPath;$env:Path" + $env:Path = (($env:Path -split ';' | Select-Object -Unique | Where-Object { $_ }) -join ';') + } + } + '^r$' { } + '^s$' { return $false } + '^a$' { Write-Err "Installation aborted (Databricks CLI v$MinAitoolsCliVersion+ required for agent skills)" } + } + } +} + +# Map selected tools to `aitools --agents` tokens; tools aitools cannot +# target (gemini, windsurf, kiro) are collected separately. +function Resolve-AitoolsAgents { + $agents = @() + $unsupported = @() + foreach ($tool in ($script:Tools -split ' ')) { + switch ($tool) { + "claude" { $agents += "claude-code" } + "cursor" { $agents += "cursor" } + "copilot" { $agents += "copilot" } + "codex" { $agents += "codex" } + "opencode" { $agents += "opencode" } + "antigravity" { $agents += "antigravity" } + { $_ -in "gemini", "windsurf", "kiro" } { $unsupported += $tool } + } + } + $script:AitoolsAgents = ($agents -join ',') + $script:UnsupportedAgentTools = @($unsupported) +} + +# Skills dirs for tools aitools can't target (deduped) +function Get-UnsupportedSkillDirs { + param([string]$BaseDir) + $dirs = @() + foreach ($tool in $script:UnsupportedAgentTools) { + switch ($tool) { + "gemini" { $dirs += Join-Path $BaseDir ".gemini\skills" } + "windsurf" { + if ($script:Scope -eq "global") { + $dirs += Join-Path $env:USERPROFILE ".codeium\windsurf\skills" + } else { + $dirs += Join-Path $BaseDir ".windsurf\skills" + } + } + "kiro" { + if ($script:Scope -eq "global") { + $dirs += Join-Path $env:USERPROFILE ".kiro\skills" + } else { + $dirs += Join-Path $BaseDir ".kiro\skills" + } + } + } + } + return @($dirs | Select-Object -Unique) +} + +# True if any selected agent skill is experimental +function Test-AgentBNeedsExperimental { + foreach ($skill in $script:SelectedAgentBSkills) { + if ($script:AgentBExperimental -contains $skill) { return $true } + } + return $false +} + +# Install agent skills by delegating to `databricks aitools install`. +# aitools owns these skills afterwards (list/update/uninstall) -- they are NOT +# tracked in this installer's manifest, except for the symlinks/copies created +# for tools aitools can't target. +function Install-AgentBSkills { + param([string]$BaseDir) + + $prevFile = Join-Path $script:StateDir ".agent-b-skills" + if ($script:SelectedAgentBSkills.Count -eq 0 -and -not (Test-Path $prevFile)) { return } + + Write-Step "Installing agent skills (via databricks aitools)" + + # Uninstall agent skills dropped since the previous run + if (Test-Path $prevFile) { + $dropped = @() + foreach ($line in (Get-Content $prevFile)) { + $prevSkill = "$line".Trim() + if ([string]::IsNullOrWhiteSpace($prevSkill)) { continue } + if ($script:SelectedAgentBSkills -notcontains $prevSkill) { $dropped += $prevSkill } + } + if ($dropped.Count -gt 0 -and (Get-Command databricks -ErrorAction SilentlyContinue)) { + $droppedCsv = $dropped -join ',' + $prevEAP = $ErrorActionPreference; $ErrorActionPreference = "Continue" + & databricks aitools uninstall --scope $script:Scope --skills $droppedCsv 2>&1 | Out-Null + $uninstallOk = ($LASTEXITCODE -eq 0) + $ErrorActionPreference = $prevEAP + if ($uninstallOk) { + Write-Msg "Removed deselected agent skills: $droppedCsv" + } else { + Write-Warn "Could not remove deselected agent skills -- run: databricks aitools uninstall --skills $droppedCsv" + } + } + } + + if ($script:SelectedAgentBSkills.Count -eq 0) { + Remove-Item $prevFile -Force -ErrorAction SilentlyContinue + return + } + + if (-not (Confirm-AitoolsCli)) { + Write-Warn "Agent skills skipped -- install later with: databricks aitools install" + return + } + + Resolve-AitoolsAgents + $skillsCsv = $script:SelectedAgentBSkills -join ',' + $needsExperimental = Test-AgentBNeedsExperimental + $count = $script:SelectedAgentBSkills.Count + + if ($script:AitoolsAgents) { + Write-Msg "Delegating $count agent skills to databricks aitools (agents: $($script:AitoolsAgents))" + $aitoolsArgs = @("aitools", "install", "--scope", $script:Scope, "--agents", $script:AitoolsAgents, "--skills", $skillsCsv) + if ($needsExperimental) { $aitoolsArgs += "--experimental" } + $aitoolsArgs += @("-p", $script:Profile_) + $prevEAP = $ErrorActionPreference; $ErrorActionPreference = "Continue" + if ($script:Silent) { + & databricks @aitoolsArgs 2>&1 | Out-Null + } else { + & databricks @aitoolsArgs + } + $installOk = ($LASTEXITCODE -eq 0) + $ErrorActionPreference = $prevEAP + if (-not $installOk) { + if ($script:Silent) { Write-Err "databricks aitools install failed" } + Write-Warn "databricks aitools install failed -- agent skills not installed" + return + } + Write-Ok "Agent skills ($count) installed -- manage with databricks aitools list|update|uninstall" + } + + # Tools aitools can't target: link/copy the skills from the canonical store + if ($script:UnsupportedAgentTools.Count -gt 0) { + Install-AgentBUnsupported -BaseDir $BaseDir -SkillsCsv $skillsCsv -NeedsExperimental $needsExperimental + } + + # Record the selection so a future profile change can uninstall dropped skills + if (-not (Test-Path $script:StateDir)) { + New-Item -ItemType Directory -Path $script:StateDir -Force | Out-Null + } + Set-Content -Path $prevFile -Value ($script:SelectedAgentBSkills -join "`n") -Encoding UTF8 +} + +# Deliver agent skills to Gemini CLI / Windsurf / Kiro. +# If aitools ran for at least one supported agent, symlink each skill from the +# canonical store (kept fresh by `databricks aitools update`); symlink creation +# can require elevated privileges on Windows, so fall back to copying. If no +# supported agent was selected, stage a throwaway project-scope install in a +# temp dir and copy real files from it. +function Install-AgentBUnsupported { + param([string]$BaseDir, [string]$SkillsCsv, [bool]$NeedsExperimental) + + $manifest = Join-Path $script:StateDir ".installed-skills" + if (-not (Test-Path $script:StateDir)) { + New-Item -ItemType Directory -Path $script:StateDir -Force | Out-Null + } + + $mode = "link" + $tmpDir = $null + if ($script:Scope -eq "global") { + $store = Join-Path $env:USERPROFILE ".databricks\aitools\skills" + } else { + $store = Join-Path $BaseDir ".databricks\aitools\skills" + } + + if (-not $script:AitoolsAgents) { + $mode = "copy" + $tmpDir = Join-Path ([System.IO.Path]::GetTempPath()) ("ai-dev-kit-aitools-" + [System.IO.Path]::GetRandomFileName()) + New-Item -ItemType Directory -Path $tmpDir -Force | Out-Null + $stageArgs = @("aitools", "install", "--scope", "project", "--agents", "claude-code", "--skills", $SkillsCsv) + if ($NeedsExperimental) { $stageArgs += "--experimental" } + $prevEAP = $ErrorActionPreference; $ErrorActionPreference = "Continue" + Push-Location $tmpDir + & databricks @stageArgs 2>&1 | Out-Null + $stageOk = ($LASTEXITCODE -eq 0) + Pop-Location + $ErrorActionPreference = $prevEAP + if (-not $stageOk) { + Remove-Item -Recurse -Force $tmpDir -ErrorAction SilentlyContinue + Write-Warn "Could not stage agent skills for: $($script:UnsupportedAgentTools -join ',')" + return + } + $store = Join-Path $tmpDir ".databricks\aitools\skills" + } + + $count = $script:SelectedAgentBSkills.Count + foreach ($dir in (Get-UnsupportedSkillDirs -BaseDir $BaseDir)) { + if ([string]::IsNullOrWhiteSpace($dir)) { continue } + if (-not (Test-Path $dir)) { + New-Item -ItemType Directory -Path $dir -Force | Out-Null + } + $usedMode = $mode + foreach ($skill in $script:SelectedAgentBSkills) { + $srcPath = Join-Path $store $skill + if (-not (Test-Path $srcPath)) { + Write-Warn "Agent skill '$skill' missing from aitools store -- skipped" + continue + } + # Remove real dirs and symlinks alike before re-creating + $destPath = Join-Path $dir $skill + $destItem = Get-Item -LiteralPath $destPath -Force -ErrorAction SilentlyContinue + if ($destItem) { + if ($destItem.Attributes -band [System.IO.FileAttributes]::ReparsePoint) { + $destItem.Delete() + } else { + Remove-Item -LiteralPath $destPath -Recurse -Force + } + } + if ($mode -eq "link") { + # Project-scope dirs are all \.\skills (2 levels deep), + # so a relative link survives moving the project directory. + $target = $srcPath + if ($script:Scope -eq "project") { $target = "..\..\.databricks\aitools\skills\$skill" } + try { + New-Item -ItemType SymbolicLink -Path $destPath -Target $target -ErrorAction Stop | Out-Null + } catch { + # Symlinks may require Developer Mode / admin on Windows -- copy instead + Copy-Item -Recurse $srcPath $destPath + $usedMode = "copy" + } + } else { + Copy-Item -Recurse $srcPath $destPath + } + Add-Content -Path $manifest -Value "$dir|$skill" -Encoding UTF8 + } + $shortDir = $dir -replace [regex]::Escape($env:USERPROFILE), '~' + Write-Ok "Agent skills ($count, $usedMode) -> $shortDir" + } + + if ($tmpDir) { Remove-Item -Recurse -Force $tmpDir -ErrorAction SilentlyContinue } +} + +# ─── Raw-fetch ref resolution (apx, mlflow) ─────────────────── + +# Resolve-Ref -Repo -Requested +# ""/"latest" -> highest stable semver tag (prereleases excluded unless +# INCLUDE_PRERELEASES=1; falls back to main if no tags). +# main/master -> passed through. +# anything else -> verified to exist as a tag/branch/SHA (fails loud). +# Uses `git ls-remote` (no API rate limits; git is a hard prerequisite). +function Resolve-Ref { + param([string]$Repo, [string]$Requested) + + $gitUrl = "https://github.com/$Repo.git" + $prevEAP = $ErrorActionPreference; $ErrorActionPreference = "Continue" + + if ([string]::IsNullOrWhiteSpace($Requested) -or $Requested -eq "latest") { + $tags = @(& git ls-remote --tags --refs $gitUrl 2>$null | ForEach-Object { + ("$_" -split "`t")[-1] -replace '^refs/tags/', '' + }) + $ErrorActionPreference = $prevEAP + $pattern = '^v?\d+\.\d+\.\d+$' + if ($script:IncludePrereleases) { $pattern = '^v?\d+\.\d+\.\d+(-[A-Za-z0-9.]+)?$' } + $best = $tags | Where-Object { $_ -match $pattern } | + Sort-Object { [version](($_ -replace '^v', '') -replace '-.*$', '') } | + Select-Object -Last 1 + if ($best) { return $best } + Write-Warn "Could not resolve latest tag for $Repo -- falling back to main" + return "main" + } + + if ($Requested -in @("main", "master")) { + $ErrorActionPreference = $prevEAP + return $Requested + } + + $found = & git ls-remote $gitUrl "refs/tags/$Requested" "refs/heads/$Requested" 2>$null + $ErrorActionPreference = $prevEAP + if ($found) { return $Requested } + try { + # bare commit SHA (not addressable via ls-remote) + Invoke-WebRequest -Uri "https://api.github.com/repos/$Repo/commits/$Requested" -UseBasicParsing -ErrorAction Stop | Out-Null + return $Requested + } catch { + Write-Err "Ref '$Requested' not found in $Repo" + } +} + +# Resolve refs for all selected raw-fetch sources (records script vars for the +# fetch URLs, summary, dry run, and lockfile) +function Resolve-FetchRefs { + if ($script:SelectedMlflowSkills.Count -gt 0) { + $script:MlflowResolvedRef = Resolve-Ref -Repo "mlflow/skills" -Requested $script:MlflowRef + } + if ($script:SelectedApxSkills.Count -gt 0) { + $script:ApxResolvedRef = Resolve-Ref -Repo "databricks-solutions/apx" -Requested $script:ApxRef + } +} + +# Best-effort commit SHA for a ref (empty on failure). Prefers the peeled +# tag object (^{}) so annotated tags resolve to the commit they point at. +function Get-GitHubSha { + param([string]$Repo, [string]$Ref) + + $sha = "" + $prevEAP = $ErrorActionPreference; $ErrorActionPreference = "Continue" + $lsOut = @(& git ls-remote "https://github.com/$Repo.git" "refs/tags/$Ref^{}" "refs/tags/$Ref" "refs/heads/$Ref" 2>$null) + $ErrorActionPreference = $prevEAP + if ($lsOut.Count -gt 0) { + $peeled = $lsOut | Where-Object { $_ -match '\^\{\}' } | Select-Object -First 1 + $line = if ($peeled) { $peeled } else { $lsOut[0] } + if ($line) { $sha = ("$line" -split "`t")[0] } + } + if (-not $sha) { + try { + $resp = Invoke-WebRequest -Uri "https://api.github.com/repos/$Repo/commits/$Ref" -UseBasicParsing -ErrorAction Stop + $sha = [string](($resp.Content | ConvertFrom-Json).sha) + } catch {} + } + return $sha +} + +# Record what was installed and from where (skills.lock in the scope-local state dir) +function Write-Lockfile { + if (-not (Test-Path $script:StateDir)) { + New-Item -ItemType Directory -Path $script:StateDir -Force | Out-Null + } + $lock = Join-Path $script:StateDir "skills.lock" + $now = (Get-Date).ToUniversalTime().ToString("yyyy-MM-ddTHH:mm:ssZ") + $sources = [ordered]@{} + + if ($script:SelectedMlflowSkills.Count -gt 0) { + $sha = Get-GitHubSha -Repo "mlflow/skills" -Ref $script:MlflowResolvedRef + $sources["mlflow/skills"] = [ordered]@{ + requested_ref = $script:MlflowRef + resolved_kind = "branch" + resolved_ref = $script:MlflowResolvedRef + resolved_sha = "$sha" + fetched_at = $now + } + } + if ($script:SelectedApxSkills.Count -gt 0) { + $kind = if ($script:ApxResolvedRef -in @("main", "master")) { "branch" } else { "release_tag" } + $sha = Get-GitHubSha -Repo "databricks-solutions/apx" -Ref $script:ApxResolvedRef + $sources["databricks-solutions/apx"] = [ordered]@{ + requested_ref = $script:ApxRef + resolved_kind = $kind + resolved_ref = $script:ApxResolvedRef + resolved_sha = "$sha" + fetched_at = $now + } + } + if ($script:SelectedAgentBSkills.Count -gt 0) { + $cliVersion = "" + if (Get-Command databricks -ErrorAction SilentlyContinue) { + try { + $cliOutput = & databricks --version 2>&1 + if ($cliOutput -match '(\d+\.\d+\.\d+)') { $cliVersion = $Matches[1] } + } catch {} + } + $sources["databricks/databricks-agent-skills"] = [ordered]@{ + install_method = "databricks-aitools" + cli_version = $cliVersion + skills_release = "$($script:AgentBRelease)" + fetched_at = $now + } + } + + if ($sources.Count -eq 0) { return } + [ordered]@{ sources = $sources } | ConvertTo-Json -Depth 5 | Set-Content -Path $lock -Encoding UTF8 +} + +# ─── Dry run ────────────────────────────────────────────────── +function Show-DryRunReport { + Resolve-AitoolsAgents + Write-Host "" + Write-Host "Dry run -- nothing was installed" -ForegroundColor White + Write-Host "--------------------------------" + $localList = if ($script:SelectedLocalSkills.Count -gt 0) { $script:SelectedLocalSkills -join ' ' } else { "" } + $mlflowList = if ($script:SelectedMlflowSkills.Count -gt 0) { $script:SelectedMlflowSkills -join ' ' } else { "" } + $apxList = if ($script:SelectedApxSkills.Count -gt 0) { $script:SelectedApxSkills -join ' ' } else { "" } + $mlflowRefDisplay = if ($script:MlflowResolvedRef) { $script:MlflowResolvedRef } else { "n/a" } + $apxRefDisplay = if ($script:ApxResolvedRef) { $script:ApxResolvedRef } else { "n/a" } + Write-Msg "Bundled skills (this repo): $localList" + Write-Msg "MLflow skills @ ${mlflowRefDisplay}: $mlflowList" + Write-Msg "APX skills @ ${apxRefDisplay}: $apxList" + if ($script:SelectedAgentBSkills.Count -gt 0) { + $skillsCsv = $script:SelectedAgentBSkills -join ',' + $expFlag = if (Test-AgentBNeedsExperimental) { " --experimental" } else { "" } + $releaseSuffix = if ($script:AgentBRelease) { " @ $($script:AgentBRelease)" } else { "" } + Write-Msg "Agent skills (databricks-agent-skills$releaseSuffix): $($script:SelectedAgentBSkills -join ' ')" + if ($script:AitoolsAgents) { + Write-Msg "Would run: databricks aitools install --scope $($script:Scope) --agents $($script:AitoolsAgents) --skills $skillsCsv$expFlag -p $($script:Profile_)" + } + if ($script:UnsupportedAgentTools.Count -gt 0) { + $mode = if ($script:AitoolsAgents) { "symlink from the aitools canonical store" } else { "copy via a temp-dir aitools install" } + Write-Msg "Would deliver agent skills to $($script:UnsupportedAgentTools -join ',') ($mode):" + $dryBaseDir = if ($script:Scope -eq "global") { $env:USERPROFILE } else { (Get-Location).Path } + foreach ($dir in (Get-UnsupportedSkillDirs -BaseDir $dryBaseDir)) { + Write-Msg " -> $dir" + } + } + } else { + Write-Msg "Agent skills: " + } + Write-Host "" +} + # ─── Install skills ────────────────────────────────────────── function Install-Skills { param([string]$BaseDir) @@ -1231,19 +1832,21 @@ function Install-Skills { $dirs = $dirs | Select-Object -Unique # Count selected skills for display - $dbCount = $script:SelectedSkills.Count + $localCount = $script:SelectedLocalSkills.Count $mlflowCount = $script:SelectedMlflowSkills.Count $apxCount = $script:SelectedApxSkills.Count - $agentCount = $script:SelectedAgentSkills.Count - $totalCount = $dbCount + $mlflowCount + $apxCount + $agentCount - Write-Msg "Installing $totalCount skills" - - # Build set of all skills being installed now + $totalCount = $localCount + $mlflowCount + $apxCount + Write-Msg "Installing $totalCount skills (agent skills are installed separately via databricks aitools)" + + # Skills this installer manages directly. Agent skills are deliberately NOT + # in this set: any same-named entry from an older install is a stale real + # copy that must be removed -- `databricks aitools` will not overwrite an + # existing real directory, so leaving it would shadow the new install. + # (Symlinks for tools aitools can't target are re-created each run.) $allNewSkills = @() - $allNewSkills += $script:SelectedSkills + $allNewSkills += $script:SelectedLocalSkills $allNewSkills += $script:SelectedMlflowSkills $allNewSkills += $script:SelectedApxSkills - $allNewSkills += $script:SelectedAgentSkills | ForEach-Object { $_ -replace '^.*:', '' } # Clean up previously installed skills that are no longer selected # Check scope-local manifest first, fall back to global for upgrades from older versions @@ -1260,11 +1863,16 @@ function Install-Skills { $prevSkill = $parts[1] # Skip if this skill is still selected if ($allNewSkills -contains $prevSkill) { continue } - # Only remove if the directory exists + # Remove real dirs and symlinks alike $prevPath = Join-Path $prevDir $prevSkill - if (Test-Path $prevPath) { - Remove-Item -Recurse -Force $prevPath - Write-Msg "Removed deselected skill: $prevSkill" + $prevItem = Get-Item -LiteralPath $prevPath -Force -ErrorAction SilentlyContinue + if ($prevItem) { + if ($prevItem.Attributes -band [System.IO.FileAttributes]::ReparsePoint) { + $prevItem.Delete() + } else { + Remove-Item -LiteralPath $prevPath -Recurse -Force + } + Write-Msg "Removed previously installed skill: $prevSkill" } } } @@ -1272,12 +1880,18 @@ function Install-Skills { # Start fresh manifest $manifestEntries = @() + # Raw-fetch URLs pinned to the resolved refs + $mlflowRef = if ($script:MlflowResolvedRef) { $script:MlflowResolvedRef } else { "main" } + $apxRef = if ($script:ApxResolvedRef) { $script:ApxResolvedRef } else { "main" } + $mlflowRawUrl = "$MlflowBaseUrl/$mlflowRef" + $apxRawUrl = "$ApxBaseUrl/$apxRef/skills/apx" + foreach ($dir in $dirs) { if (-not (Test-Path $dir)) { New-Item -ItemType Directory -Path $dir -Force | Out-Null } - # Install Databricks skills from repo - foreach ($skill in $script:SelectedSkills) { + # Install bundled Databricks skills from this repo + foreach ($skill in $script:SelectedLocalSkills) { $src = Join-Path $script:RepoDir "databricks-skills\$skill" if (-not (Test-Path $src)) { continue } $dest = Join-Path $dir $skill @@ -1286,7 +1900,7 @@ function Install-Skills { $manifestEntries += "$dir|$skill" } $shortDir = $dir -replace [regex]::Escape($env:USERPROFILE), '~' - Write-Ok "Databricks skills ($dbCount) -> $shortDir" + Write-Ok "Databricks skills ($localCount) -> $shortDir" # Install MLflow skills from mlflow/skills repo if ($script:SelectedMlflowSkills.Count -gt 0) { @@ -1296,12 +1910,12 @@ function Install-Skills { if (-not (Test-Path $destDir)) { New-Item -ItemType Directory -Path $destDir -Force | Out-Null } - $url = "$MlflowRawUrl/$skill/SKILL.md" + $url = "$mlflowRawUrl/$skill/SKILL.md" try { Invoke-WebRequest -Uri $url -OutFile (Join-Path $destDir "SKILL.md") -UseBasicParsing -ErrorAction Stop foreach ($ref in @("reference.md", "examples.md", "api.md")) { try { - Invoke-WebRequest -Uri "$MlflowRawUrl/$skill/$ref" -OutFile (Join-Path $destDir $ref) -UseBasicParsing -ErrorAction Stop + Invoke-WebRequest -Uri "$mlflowRawUrl/$skill/$ref" -OutFile (Join-Path $destDir $ref) -UseBasicParsing -ErrorAction Stop } catch {} } $manifestEntries += "$dir|$skill" @@ -1310,7 +1924,7 @@ function Install-Skills { } } $ErrorActionPreference = $prevEAP - Write-Ok "MLflow skills ($mlflowCount) -> $shortDir" + Write-Ok "MLflow skills ($mlflowCount, @ $mlflowRef) -> $shortDir" } # Install APX skills from databricks-solutions/apx repo @@ -1321,12 +1935,12 @@ function Install-Skills { if (-not (Test-Path $destDir)) { New-Item -ItemType Directory -Path $destDir -Force | Out-Null } - $url = "$ApxRawUrl/SKILL.md" + $url = "$apxRawUrl/SKILL.md" try { Invoke-WebRequest -Uri $url -OutFile (Join-Path $destDir "SKILL.md") -UseBasicParsing -ErrorAction Stop foreach ($ref in @("backend-patterns.md", "frontend-patterns.md")) { try { - Invoke-WebRequest -Uri "$ApxRawUrl/$ref" -OutFile (Join-Path $destDir $ref) -UseBasicParsing -ErrorAction Stop + Invoke-WebRequest -Uri "$apxRawUrl/$ref" -OutFile (Join-Path $destDir $ref) -UseBasicParsing -ErrorAction Stop } catch {} } $manifestEntries += "$dir|$skill" @@ -1336,75 +1950,7 @@ function Install-Skills { } } $ErrorActionPreference = $prevEAP2 - Write-Ok "APX skills ($apxCount) -> $shortDir" - } - - # Install Agent skills from databricks/databricks-agent-skills repo - if ($script:SelectedAgentSkills.Count -gt 0) { - # Fetch the full repo tree once (single API call) for all skills. - # Collapse pretty-printed JSON whitespace so the path/mode/type fields - # land adjacent for the per-entry regex below. - $agentTree = $null - $agentSuccess = 0 - try { - $rawTree = Invoke-WebRequest -Uri $AgentSkillsApiUrl -UseBasicParsing -ErrorAction Stop | Select-Object -ExpandProperty Content - $agentTree = ($rawTree -replace '\s+', ' ') - } catch { - Write-Warn "Could not fetch agent skills tree from GitHub API" - } - if ($agentTree) { - $prevEAP3 = $ErrorActionPreference; $ErrorActionPreference = "Continue" - foreach ($entry in $script:SelectedAgentSkills) { - $srcName = ($entry -split ':')[0] - $installName = ($entry -replace '^.*:', '') - $destDir = Join-Path $dir $installName - # Wipe any prior install so upstream-deleted files don't persist - if (Test-Path $destDir) { - Remove-Item -Recurse -Force $destDir -ErrorAction SilentlyContinue - } - New-Item -ItemType Directory -Path $destDir -Force | Out-Null - # Extract file paths under skills// — match only entries whose - # next JSON fields are `"mode": "...", "type": "blob"`, so directory - # entries (type=tree) are skipped. agentTree has been whitespace-collapsed - # above; the GitHub tree API returns fields in order path → mode → type. - $filePaths = [regex]::Matches($agentTree, '"path": *"(skills/' + [regex]::Escape($srcName) + '/[^"]+)", *"mode": *"[^"]+", *"type": *"blob"') | - ForEach-Object { $_.Groups[1].Value } - if (-not $filePaths) { - Remove-Item $destDir -ErrorAction SilentlyContinue - Write-Warn "Could not fetch agent skill '$srcName'" - continue - } - $okFlag = $true - foreach ($filePath in $filePaths) { - $rel = $filePath.Substring("skills/$srcName/".Length) - $dest = Join-Path $destDir ($rel -replace '/', '\') - $destParent = Split-Path $dest -Parent - if (-not (Test-Path $destParent)) { - New-Item -ItemType Directory -Path $destParent -Force | Out-Null - } - try { - Invoke-WebRequest -Uri "$AgentSkillsRawUrl/$srcName/$rel" -OutFile $dest -UseBasicParsing -ErrorAction Stop - } catch { - $okFlag = $false - } - } - if ($okFlag) { - $manifestEntries += "$dir|$installName" - $agentSuccess++ - } else { - Remove-Item -Recurse -Force $destDir -ErrorAction SilentlyContinue - Write-Warn "Could not install agent skill '$srcName'" - } - } - $ErrorActionPreference = $prevEAP3 - } - if ($agentSuccess -eq $agentCount) { - Write-Ok "Agent skills ($agentCount) -> $shortDir" - } elseif ($agentSuccess -gt 0) { - Write-Warn "Agent skills (only $agentSuccess of $agentCount installed) -> $shortDir" - } else { - Write-Warn "Agent skills (0 of $agentCount installed) -> $shortDir" - } + Write-Ok "APX skills ($apxCount, @ $apxRef) -> $shortDir" } } @@ -1843,6 +2389,9 @@ function Show-Summary { Write-Msg "Location: $($script:InstallDir)" Write-Msg "Scope: $($script:Scope)" Write-Msg "Tools: $(($script:Tools -split ' ') -join ', ')" + if ($script:SelectedAgentBSkills.Count -gt 0) { + Write-Msg "Agent skills are managed by databricks aitools -- update with databricks aitools update" + } Write-Host "" Write-Msg "Next steps:" $step = 1 @@ -2097,6 +2646,9 @@ function Invoke-PromptAuth { # ─── Main ───────────────────────────────────────────────────── function Invoke-Main { + # --list-skills exits early (uses the live aitools inventory when available) + if ($script:ListSkills) { Show-SkillsList; return } + if (-not $script:Silent) { Write-Host "" Write-Host "Databricks AI Dev Kit Installer" -ForegroundColor White @@ -2110,6 +2662,9 @@ function Invoke-Main { Write-Step "Checking prerequisites" Test-Dependencies + # Discover the agent-skills inventory (live via `databricks aitools list`, or fallback) + Get-AgentBInventory + # Tool selection Write-Step "Selecting tools" Invoke-DetectTools @@ -2138,7 +2693,8 @@ function Invoke-Main { Write-Step "Skill profiles" Invoke-PromptSkillsProfile Resolve-Skills - $skCount = $script:SelectedSkills.Count + $script:SelectedMlflowSkills.Count + $script:SelectedApxSkills.Count + Resolve-FetchRefs + $skCount = $script:SelectedLocalSkills.Count + $script:SelectedMlflowSkills.Count + $script:SelectedApxSkills.Count + $script:SelectedAgentBSkills.Count if (-not [string]::IsNullOrWhiteSpace($script:UserSkills)) { Write-Ok "Custom selection ($skCount skills)" } else { @@ -2168,7 +2724,7 @@ function Invoke-Main { Write-Host " MCP server: " -NoNewline; Write-Host $script:InstallDir -ForegroundColor Green } if ($script:InstallSkills) { - $skTotal = $script:SelectedSkills.Count + $script:SelectedMlflowSkills.Count + $script:SelectedApxSkills.Count + $script:SelectedAgentSkills.Count + $skTotal = $script:SelectedLocalSkills.Count + $script:SelectedMlflowSkills.Count + $script:SelectedApxSkills.Count + $script:SelectedAgentBSkills.Count if (-not [string]::IsNullOrWhiteSpace($script:UserSkills)) { Write-Host " Skills: " -NoNewline Write-Host "custom selection ($skTotal skills)" -ForegroundColor Green -NoNewline @@ -2179,6 +2735,14 @@ function Invoke-Main { Write-Host "$profileDisplay ($skTotal skills)" -ForegroundColor Green -NoNewline Write-Host " (will be overwritten, backup your changes first)" -ForegroundColor Yellow } + if ($script:SelectedAgentBSkills.Count -gt 0) { + Write-Host " Agent skills: " -NoNewline + Write-Host "via databricks aitools" -ForegroundColor Green -NoNewline + Write-Host " (requires Databricks CLI v$MinAitoolsCliVersion+)" -ForegroundColor DarkGray + } + if ($script:SelectedApxSkills.Count -gt 0 -and $script:ApxResolvedRef) { + Write-Host " APX ref: " -NoNewline; Write-Host $script:ApxResolvedRef -ForegroundColor Green + } } if ($script:InstallMcp) { Write-Host " MCP config: " -NoNewline; Write-Host "yes" -ForegroundColor Green @@ -2186,6 +2750,12 @@ function Invoke-Main { Write-Host "" } + # ── Dry run: report the plan and exit before any changes ── + if ($script:DryRun) { + Show-DryRunReport + exit 0 + } + if (-not $script:Silent) { $confirm = Read-Prompt -PromptText "Proceed with installation? (y/n)" -Default "y" if ($confirm -notin @("y", "Y", "yes")) { @@ -2219,9 +2789,15 @@ function Invoke-Main { Write-Ok "Repository cloned ($Branch)" } - # Install skills + # Install skills managed by this installer (bundled + mlflow + apx) if ($script:InstallSkills) { Install-Skills -BaseDir $baseDir + + # Install agent skills (delegated to `databricks aitools`) + Install-AgentBSkills -BaseDir $baseDir + + # Record resolved sources + Write-Lockfile } # Write GEMINI.md if gemini is selected diff --git a/install.sh b/install.sh index 239452b9..74e9446b 100644 --- a/install.sh +++ b/install.sh @@ -54,10 +54,23 @@ USER_MCP_PATH="${DEVKIT_MCP_PATH:-}" SKILLS_PROFILE="${DEVKIT_SKILLS_PROFILE:-}" USER_SKILLS="${DEVKIT_SKILLS:-}" CHANNEL="${DEVKIT_CHANNEL:-stable}" # stable or experimental +DRY_RUN="${DRY_RUN:-false}" + +# Raw-fetch ref overrides (see resolve_ref). SKILLS_CHANNEL=dev flips unset +# refs to `main` for living-at-head testing. +SKILLS_CHANNEL="${SKILLS_CHANNEL:-stable}" +if [ "$SKILLS_CHANNEL" = "dev" ]; then + APX_REF="${APX_REF:-main}" +else + APX_REF="${APX_REF:-latest}" +fi +MLFLOW_REF="${MLFLOW_REF:-main}" # mlflow/skills is tagless — main is intentional +INCLUDE_PRERELEASES="${INCLUDE_PRERELEASES:-0}" # Convert string booleans from env vars to actual booleans [ "$FORCE" = "true" ] || [ "$FORCE" = "1" ] && FORCE=true || FORCE=false [ "$SILENT" = "true" ] || [ "$SILENT" = "1" ] && SILENT=true || SILENT=false +[ "$DRY_RUN" = "true" ] || [ "$DRY_RUN" = "1" ] && DRY_RUN=true || DRY_RUN=false # Check if scope was explicitly set via env var [ -n "${DEVKIT_SCOPE:-}" ] && SCOPE_EXPLICIT=true @@ -84,43 +97,58 @@ INSTALL_SKILLS=true # Minimum required versions MIN_CLI_VERSION="0.278.0" MIN_SDK_VERSION="0.85.0" +# Agent skills are delegated to `databricks aitools`, which ships with CLI v1.0.0+ +MIN_AITOOLS_CLI_VERSION="1.0.0" # Colors G='\033[0;32m' Y='\033[1;33m' R='\033[0;31m' BL='\033[0;34m' B='\033[1m' D='\033[2m' N='\033[0m' -# Databricks skills (bundled in repo) -SKILLS="databricks-agent-bricks databricks-ai-functions databricks-aibi-dashboards databricks-apps-python databricks-bundles databricks-config databricks-dbsql databricks-docs databricks-genie databricks-iceberg databricks-jobs databricks-lakebase-autoscale databricks-lakebase-provisioned databricks-metric-views databricks-mlflow-evaluation databricks-model-serving databricks-python-sdk databricks-spark-declarative-pipelines databricks-spark-structured-streaming databricks-synthetic-data-gen databricks-unity-catalog databricks-unstructured-pdf-generation databricks-vector-search databricks-zerobus-ingest spark-python-data-source" +# Databricks skills bundled in this repo (everything else moved to databricks/databricks-agent-skills) +LOCAL_SKILLS="databricks-genie" -# MLflow skills (fetched from mlflow/skills repo) +# MLflow skills (fetched from mlflow/skills repo; MLFLOW_REF defaults to main — the repo is tagless) MLFLOW_SKILLS="agent-evaluation analyze-mlflow-chat-session analyze-mlflow-trace instrumenting-with-mlflow-tracing mlflow-onboarding querying-mlflow-metrics retrieving-mlflow-traces searching-mlflow-docs" -MLFLOW_RAW_URL="https://raw.githubusercontent.com/mlflow/skills/main" +MLFLOW_BASE_URL="https://raw.githubusercontent.com/mlflow/skills" -# APX skills (fetched from databricks-solutions/apx repo) +# APX skills (fetched from databricks-solutions/apx repo @ latest stable tag, see resolve_ref / APX_REF) APX_SKILLS="databricks-app-apx" -APX_RAW_URL="https://raw.githubusercontent.com/databricks-solutions/apx/main/skills/apx" - -# Agent skills (fetched from databricks/databricks-agent-skills repo) -AGENT_SKILLS="databricks-core:databricks databricks-apps databricks-lakebase" -AGENT_SKILLS_RAW_URL="https://raw.githubusercontent.com/databricks/databricks-agent-skills/main/skills" -AGENT_SKILLS_API_URL="https://api.github.com/repos/databricks/databricks-agent-skills/git/trees/main?recursive=1" +APX_BASE_URL="https://raw.githubusercontent.com/databricks-solutions/apx" + +# Agent skills (from databricks/databricks-agent-skills, installed and managed by +# `databricks aitools`, which ships with the Databricks CLI v1.0.0+). +# The live inventory is discovered at runtime via `databricks aitools list -o json` +# (see fetch_agent_b_inventory); these lists are the fallback snapshot (v0.2.3). +AGENT_B_STABLE_FALLBACK="databricks-apps databricks-core databricks-dabs databricks-jobs databricks-lakebase databricks-model-serving databricks-pipelines databricks-serverless-migration databricks-vector-search" +AGENT_B_EXPERIMENTAL_FALLBACK="databricks-agent-bricks databricks-ai-functions databricks-aibi-dashboards databricks-apps-python databricks-dbsql databricks-docs databricks-execution-compute databricks-iceberg databricks-lakeflow-connect databricks-metric-views databricks-mlflow-evaluation databricks-python-sdk databricks-spark-structured-streaming databricks-synthetic-data-gen databricks-unity-catalog databricks-unstructured-pdf-generation databricks-zerobus-ingest spark-python-data-source" +# Skills never installed by default (excluded from "all" and profile selections; +# still installable via an explicit --skills request) +AGENT_B_EXCLUDED="databricks-execution-compute" +# Populated by fetch_agent_b_inventory (live or fallback) +AGENT_B_STABLE="" +AGENT_B_EXPERIMENTAL="" +AGENT_B_RELEASE="" + +# Old skill names → new names (breaking rename when sourcing moved to +# databricks-agent-skills). Explicit requests for old names are migrated with a warning. +RENAMED_SKILLS="databricks-bundles:databricks-dabs databricks-spark-declarative-pipelines:databricks-pipelines databricks-config:databricks-core databricks:databricks-core databricks-lakebase-autoscale:databricks-lakebase databricks-lakebase-provisioned:databricks-lakebase" # ─── Skill profiles ────────────────────────────────────────── -# Core skills always installed regardless of profile selection -CORE_SKILLS="databricks-config databricks-docs databricks-python-sdk databricks-unity-catalog" +# Core skills always installed regardless of profile selection (all from databricks-agent-skills) +CORE_SKILLS="databricks-core databricks-docs databricks-python-sdk databricks-unity-catalog" -# Profile definitions (non-core skills only — core skills are always added) -PROFILE_DATA_ENGINEER="databricks-spark-declarative-pipelines databricks-spark-structured-streaming databricks-jobs databricks-bundles databricks-dbsql databricks-iceberg databricks-zerobus-ingest spark-python-data-source databricks-metric-views databricks-synthetic-data-gen" +# Profile definitions (non-core skills only — core skills are always added). +# Names may come from any source; resolve_skills buckets them. +PROFILE_DATA_ENGINEER="databricks-pipelines databricks-spark-structured-streaming databricks-jobs databricks-dabs databricks-dbsql databricks-iceberg databricks-lakeflow-connect databricks-zerobus-ingest spark-python-data-source databricks-metric-views databricks-synthetic-data-gen" PROFILE_ANALYST="databricks-aibi-dashboards databricks-dbsql databricks-genie databricks-metric-views" PROFILE_AIML_ENGINEER="databricks-agent-bricks databricks-ai-functions databricks-vector-search databricks-model-serving databricks-genie databricks-unstructured-pdf-generation databricks-mlflow-evaluation databricks-synthetic-data-gen databricks-jobs" PROFILE_AIML_MLFLOW="agent-evaluation analyze-mlflow-chat-session analyze-mlflow-trace instrumenting-with-mlflow-tracing mlflow-onboarding querying-mlflow-metrics retrieving-mlflow-traces searching-mlflow-docs" -PROFILE_APP_DEVELOPER="databricks-apps-python databricks-app-apx databricks-lakebase-autoscale databricks-lakebase-provisioned databricks-model-serving databricks-dbsql databricks-jobs databricks-bundles" -PROFILE_APP_DEVELOPER_AGENT="databricks-core:databricks databricks-apps databricks-lakebase" +PROFILE_APP_DEVELOPER="databricks-apps databricks-apps-python databricks-app-apx databricks-lakebase databricks-model-serving databricks-dbsql databricks-jobs databricks-dabs" # Selected skills (populated during profile selection) -SELECTED_SKILLS="" +SELECTED_LOCAL_SKILLS="" SELECTED_MLFLOW_SKILLS="" SELECTED_APX_SKILLS="" -SELECTED_AGENT_SKILLS="" +SELECTED_AGENT_B_SKILLS="" # Output helpers msg() { [ "$SILENT" = true ] || echo -e " $*"; } @@ -144,6 +172,7 @@ while [ $# -gt 0 ]; do --silent) SILENT=true; shift ;; --tools) USER_TOOLS="$2"; shift 2 ;; --experimental) CHANNEL="experimental"; shift ;; + --dry-run) DRY_RUN=true; shift ;; -f|--force) FORCE=true; shift ;; -h|--help) echo "Databricks AI Dev Kit Installer" @@ -163,6 +192,7 @@ while [ $# -gt 0 ]; do echo " --skills LIST Comma-separated skill names to install (overrides profile)" echo " --list-skills List available skills and profiles, then exit" echo " --experimental Install from experimental branch (early access features)" + echo " --dry-run Print what would be installed (resolved refs, aitools command) and exit" echo " -f, --force Force reinstall" echo " -h, --help Show this help" echo "" @@ -178,6 +208,19 @@ while [ $# -gt 0 ]; do echo " DEVKIT_SILENT Set to 'true' for silent mode" echo " DEVKIT_CHANNEL 'stable' (default) or 'experimental'" echo " AIDEVKIT_HOME Installation directory (default: ~/.ai-dev-kit)" + echo " APX_REF Ref for APX skill fetch: 'latest' (default), a tag/SHA, or 'main'" + echo " MLFLOW_REF Ref for MLflow skills fetch (default: main)" + echo " SKILLS_CHANNEL 'stable' (default) or 'dev' (unset raw-fetch refs follow main)" + echo " INCLUDE_PRERELEASES Set to '1' to allow -rc/-beta tags when resolving 'latest'" + echo " DRY_RUN Set to '1' to print the install plan and exit" + echo "" + echo "Notes:" + echo " Most Databricks skills are installed via 'databricks aitools' (Databricks CLI v1.0.0+)" + echo " and are updated/uninstalled with 'databricks aitools update|uninstall', not this script." + echo " Renamed skills: databricks-bundles -> databricks-dabs," + echo " databricks-spark-declarative-pipelines -> databricks-pipelines." + echo " Replaced skills: databricks-config -> databricks-core," + echo " databricks-lakebase-autoscale/provisioned -> databricks-lakebase." echo "" echo "Examples:" echo " # Using environment variables" @@ -189,16 +232,38 @@ while [ $# -gt 0 ]; do done # ─── --list-skills handler ───────────────────────────────────── -if [ "${LIST_SKILLS:-false}" = true ]; then +# (function — needs fetch_agent_b_inventory; invoked after function definitions below) +_count() { echo $#; } + +# Number of skills the "all" profile installs (excluded agent skills omitted) +_count_all_skills() { + local n skill + n=$(_count $LOCAL_SKILLS $MLFLOW_SKILLS $APX_SKILLS $AGENT_B_STABLE $AGENT_B_EXPERIMENTAL) + for skill in $AGENT_B_EXCLUDED; do + _in_list "$skill" "$AGENT_B_STABLE $AGENT_B_EXPERIMENTAL" && n=$((n - 1)) + done + echo "$n" +} + +list_skills_and_exit() { + fetch_agent_b_inventory + + local all_count de_count an_count ai_count ap_count + all_count=$(_count_all_skills) + de_count=$(_count $CORE_SKILLS $PROFILE_DATA_ENGINEER) + an_count=$(_count $CORE_SKILLS $PROFILE_ANALYST) + ai_count=$(_count $CORE_SKILLS $PROFILE_AIML_ENGINEER $PROFILE_AIML_MLFLOW) + ap_count=$(_count $CORE_SKILLS $PROFILE_APP_DEVELOPER) + echo "" echo -e "${B}Available Skill Profiles${N}" echo "────────────────────────────────" echo "" - echo -e " ${B}all${N} All 37 skills (default)" - echo -e " ${B}data-engineer${N} Pipelines, Spark, Jobs, Streaming (14 skills)" - echo -e " ${B}analyst${N} Dashboards, SQL, Genie, Metrics (8 skills)" - echo -e " ${B}ai-ml-engineer${N} Agents, RAG, Vector Search, MLflow (17 skills)" - echo -e " ${B}app-developer${N} Apps, Lakebase, Deployment (10 skills)" + echo -e " ${B}all${N} All ${all_count} skills (default)" + echo -e " ${B}data-engineer${N} Pipelines, Spark, Jobs, Streaming (${de_count} skills)" + echo -e " ${B}analyst${N} Dashboards, SQL, Genie, Metrics (${an_count} skills)" + echo -e " ${B}ai-ml-engineer${N} Agents, RAG, Vector Search, MLflow (${ai_count} skills)" + echo -e " ${B}app-developer${N} Apps, Lakebase, Deployment (${ap_count} skills)" echo "" echo -e "${B}Core Skills${N} (always installed)" echo "────────────────────────────────" @@ -234,29 +299,43 @@ if [ "${LIST_SKILLS:-false}" = true ]; then echo -e " $skill" done echo "" - echo -e "${B}MLflow Skills${N} (from mlflow/skills repo)" + echo -e "${B}Bundled Skills${N} (from this repo)" + echo "────────────────────────────────" + for skill in $LOCAL_SKILLS; do + echo -e " $skill" + done + echo "" + echo -e "${B}MLflow Skills${N} (from mlflow/skills repo @ ${MLFLOW_REF})" echo "────────────────────────────────" for skill in $MLFLOW_SKILLS; do echo -e " $skill" done echo "" - echo -e "${B}APX Skills${N} (from databricks-solutions/apx repo)" + echo -e "${B}APX Skills${N} (from databricks-solutions/apx repo @ ${APX_REF})" echo "────────────────────────────────" for skill in $APX_SKILLS; do echo -e " $skill" done echo "" - echo -e "${B}Agent Skills${N} (from databricks/databricks-agent-skills repo)" + echo -e "${B}Agent Skills${N} (from databricks/databricks-agent-skills${AGENT_B_RELEASE:+ @ $AGENT_B_RELEASE} — managed by ${B}databricks aitools${N})" echo "────────────────────────────────" - for entry in $AGENT_SKILLS; do - echo -e " ${entry#*:}" + for skill in $AGENT_B_STABLE; do + echo -e " $skill" + done + echo -e " ${D}experimental:${N}" + for skill in $AGENT_B_EXPERIMENTAL; do + if echo "$AGENT_B_EXCLUDED" | tr ' ' '\n' | grep -Fxq "$skill"; then + echo -e " ${D}$skill (excluded by default — request explicitly via --skills)${N}" + else + echo -e " $skill" + fi done echo "" echo -e "${D}Usage: bash install.sh --skills-profile data-engineer,ai-ml-engineer${N}" echo -e "${D} bash install.sh --skills databricks-jobs,databricks-dbsql${N}" echo "" exit 0 -fi +} # Set configuration URLs after parsing branch argument REPO_URL="https://github.com/databricks-solutions/ai-dev-kit.git" @@ -700,91 +779,118 @@ prompt_mcp_path() { } # ─── Skill profile selection ────────────────────────────────── -# Resolve selected skills from profile names or explicit skill list +# Exact-match membership test: _in_list +# (`grep -w` is unsafe here — `-` is a word boundary, so `grep -w databricks` +# would match `databricks-jobs` etc.) +_in_list() { echo "$2" | tr ' ' '\n' | grep -Fxq "$1"; } + +# Map an old skill name to its replacement (prints the new name, or fails) +migrate_renamed_skill() { + local entry + for entry in $RENAMED_SKILLS; do + if [ "${entry%%:*}" = "$1" ]; then + echo "${entry#*:}" + return 0 + fi + done + return 1 +} + +# Resolve selected skills from profile names or explicit skill list, +# bucketing each name into its source (local repo / mlflow / apx / agent-skills). resolve_skills() { - local db_skills="" mlflow_skills="" apx_skills="" agent_skills="" + fetch_agent_b_inventory + + local local_skills="" mlflow_skills="" apx_skills="" agent_b_skills="" + + # Bucket one skill name into its source list (fails for unknown names) + _bucket() { + if _in_list "$1" "$LOCAL_SKILLS"; then + local_skills="${local_skills:+$local_skills }$1" + elif _in_list "$1" "$MLFLOW_SKILLS"; then + mlflow_skills="${mlflow_skills:+$mlflow_skills }$1" + elif _in_list "$1" "$APX_SKILLS"; then + apx_skills="${apx_skills:+$apx_skills }$1" + elif _in_list "$1" "$AGENT_B_STABLE $AGENT_B_EXPERIMENTAL"; then + agent_b_skills="${agent_b_skills:+$agent_b_skills }$1" + else + return 1 + fi + } + + # Dedupe + normalize whitespace (empty input stays truly empty so `[ -n ]` works) + _dedupe() { echo "$*" | tr ' ' '\n' | sed '/^$/d' | sort -u | tr '\n' ' ' | sed 's/[[:space:]]*$//'; } + + _store_selection() { + SELECTED_LOCAL_SKILLS=$(_dedupe "$local_skills") + SELECTED_MLFLOW_SKILLS=$(_dedupe "$mlflow_skills") + SELECTED_APX_SKILLS=$(_dedupe "$apx_skills") + SELECTED_AGENT_B_SKILLS=$(_dedupe "$agent_b_skills") + } + + # Agent skills selected by default: everything except the excluded list + _default_agent_b() { + local skill + for skill in $AGENT_B_STABLE $AGENT_B_EXPERIMENTAL; do + _in_list "$skill" "$AGENT_B_EXCLUDED" && continue + agent_b_skills="${agent_b_skills:+$agent_b_skills }$skill" + done + } # Priority 1: Explicit --skills flag (comma-separated skill names) if [ -n "$USER_SKILLS" ]; then - local user_list - user_list=$(echo "$USER_SKILLS" | tr ',' ' ') - # Separate into DB, MLflow, APX, and Agent buckets - db_skills="" - for skill in $user_list; do - # Exact-match bucketing — `grep -w` treats `-` as a word boundary, so e.g. - # `grep -w databricks` would match `databricks-app-apx` and misclassify - # an agent install-name (`databricks`) as APX. - if echo "$MLFLOW_SKILLS" | tr ' ' '\n' | grep -Fxq "$skill"; then - mlflow_skills="${mlflow_skills:+$mlflow_skills }$skill" - elif echo "$APX_SKILLS" | tr ' ' '\n' | grep -Fxq "$skill"; then - apx_skills="${apx_skills:+$apx_skills }$skill" - elif echo "$AGENT_SKILLS" | tr ' ' '\n' | sed 's/.*://' | grep -Fxq "$skill"; then - # Look up the full source:install-name entry (or bare entry if no colon) - agent_skills="${agent_skills:+$agent_skills }$(echo "$AGENT_SKILLS" | tr ' ' '\n' | grep -E "^.*:${skill}$|^${skill}$")" - else - db_skills="${db_skills:+$db_skills }$skill" + local skill new_name + for skill in $(echo "$USER_SKILLS" | tr ',' ' '); do + if _bucket "$skill"; then + continue + fi + if new_name=$(migrate_renamed_skill "$skill"); then + warn "Skill '$skill' was renamed/replaced by '$new_name' — installing '$new_name'" + _bucket "$new_name" && continue fi + die "Unknown skill: '$skill' (run with --list-skills to see available skills)" done - # Deduplicate - SELECTED_SKILLS=$(echo "$db_skills" | tr ' ' '\n' | sort -u | tr '\n' ' ') - SELECTED_MLFLOW_SKILLS=$(echo "$mlflow_skills" | tr ' ' '\n' | sort -u | tr '\n' ' ') - SELECTED_APX_SKILLS=$(echo "$apx_skills" | tr ' ' '\n' | sort -u | tr '\n' ' ') - SELECTED_AGENT_SKILLS=$(echo "$agent_skills" | tr ' ' '\n' | sort -u | tr '\n' ' ') + _store_selection return fi # Priority 2: --skills-profile flag or interactive selection if [ -z "$SKILLS_PROFILE" ] || [ "$SKILLS_PROFILE" = "all" ]; then - SELECTED_SKILLS="$SKILLS" - SELECTED_MLFLOW_SKILLS="$MLFLOW_SKILLS" - SELECTED_APX_SKILLS="$APX_SKILLS" - SELECTED_AGENT_SKILLS="$AGENT_SKILLS" + local_skills="$LOCAL_SKILLS" + mlflow_skills="$MLFLOW_SKILLS" + apx_skills="$APX_SKILLS" + _default_agent_b + _store_selection return fi # Build union of selected profiles (comma-separated) - db_skills="$CORE_SKILLS" - mlflow_skills="" - apx_skills="" - agent_skills="" - - local profiles - profiles=$(echo "$SKILLS_PROFILE" | tr ',' ' ') - for profile in $profiles; do + local names="$CORE_SKILLS" + local profile + for profile in $(echo "$SKILLS_PROFILE" | tr ',' ' '); do case $profile in all) - SELECTED_SKILLS="$SKILLS" - SELECTED_MLFLOW_SKILLS="$MLFLOW_SKILLS" - SELECTED_APX_SKILLS="$APX_SKILLS" - SELECTED_AGENT_SKILLS="$AGENT_SKILLS" + local_skills="$LOCAL_SKILLS" + mlflow_skills="$MLFLOW_SKILLS" + apx_skills="$APX_SKILLS" + agent_b_skills="" + _default_agent_b + _store_selection return ;; - data-engineer) - db_skills="$db_skills $PROFILE_DATA_ENGINEER" - ;; - analyst) - db_skills="$db_skills $PROFILE_ANALYST" - ;; - ai-ml-engineer) - db_skills="$db_skills $PROFILE_AIML_ENGINEER" - mlflow_skills="$mlflow_skills $PROFILE_AIML_MLFLOW" - ;; - app-developer) - db_skills="$db_skills $PROFILE_APP_DEVELOPER" - apx_skills="$apx_skills $APX_SKILLS" - agent_skills="$agent_skills $PROFILE_APP_DEVELOPER_AGENT" - ;; - *) - warn "Unknown skill profile: $profile (ignored)" - ;; + data-engineer) names="$names $PROFILE_DATA_ENGINEER" ;; + analyst) names="$names $PROFILE_ANALYST" ;; + ai-ml-engineer) names="$names $PROFILE_AIML_ENGINEER $PROFILE_AIML_MLFLOW" ;; + app-developer) names="$names $PROFILE_APP_DEVELOPER" ;; + *) warn "Unknown skill profile: $profile (ignored)" ;; esac done - # Deduplicate - SELECTED_SKILLS=$(echo "$db_skills" | tr ' ' '\n' | sort -u | tr '\n' ' ') - SELECTED_MLFLOW_SKILLS=$(echo "$mlflow_skills" | tr ' ' '\n' | sort -u | tr '\n' ' ') - SELECTED_APX_SKILLS=$(echo "$apx_skills" | tr ' ' '\n' | sort -u | tr '\n' ' ') - SELECTED_AGENT_SKILLS=$(echo "$agent_skills" | tr ' ' '\n' | sort -u | tr '\n' ' ') + local skill + for skill in $names; do + _bucket "$skill" || warn "Skill '$skill' not found in any source (skipped)" + done + _store_selection } # Interactive skill profile selection (multi-select) @@ -823,9 +929,15 @@ prompt_skills_profile() { echo -e " ${B}Select skill profile(s)${N}" # Custom checkbox with mutual exclusion: "All" deselects others, others deselect "All" + local all_count de_count an_count ai_count ap_count + all_count=$(_count_all_skills) + de_count=$(_count $CORE_SKILLS $PROFILE_DATA_ENGINEER) + an_count=$(_count $CORE_SKILLS $PROFILE_ANALYST) + ai_count=$(_count $CORE_SKILLS $PROFILE_AIML_ENGINEER $PROFILE_AIML_MLFLOW) + ap_count=$(_count $CORE_SKILLS $PROFILE_APP_DEVELOPER) local -a p_labels=("All Skills" "Data Engineer" "Business Analyst" "AI/ML Engineer" "App Developer" "Custom") local -a p_values=("all" "data-engineer" "analyst" "ai-ml-engineer" "app-developer" "custom") - local -a p_hints=("Install everything (34 skills)" "Pipelines, Spark, Jobs, Streaming (14 skills)" "Dashboards, SQL, Genie, Metrics (8 skills)" "Agents, RAG, Vector Search, MLflow (17 skills)" "Apps, Lakebase, Deployment (10 skills)" "Pick individual skills") + local -a p_hints=("Install everything (${all_count} skills)" "Pipelines, Spark, Jobs, Streaming (${de_count} skills)" "Dashboards, SQL, Genie, Metrics (${an_count} skills)" "Agents, RAG, Vector Search, MLflow (${ai_count} skills)" "Apps, Lakebase, Deployment (${ap_count} skills)" "Pick individual skills") local -a p_states=(1 0 0 0 0 0) # "All" selected by default local p_count=6 local p_cursor=0 @@ -936,35 +1048,38 @@ prompt_custom_skills() { local preselected_profiles="$1" # Build pre-selection set from any profiles that were also checked - local preselected="" + # (core skills start pre-selected — they are recommended for every profile) + local preselected="$CORE_SKILLS" for profile in $preselected_profiles; do case $profile in data-engineer) preselected="$preselected $PROFILE_DATA_ENGINEER" ;; analyst) preselected="$preselected $PROFILE_ANALYST" ;; ai-ml-engineer) preselected="$preselected $PROFILE_AIML_ENGINEER $PROFILE_AIML_MLFLOW" ;; - app-developer) preselected="$preselected $PROFILE_APP_DEVELOPER $APX_SKILLS $PROFILE_APP_DEVELOPER_AGENT" ;; + app-developer) preselected="$preselected $PROFILE_APP_DEVELOPER" ;; esac done _is_preselected() { - # Strip "source:" prefix from each entry (e.g. "databricks-core:databricks" → "databricks"), - # then exact-match against $1. Plain `grep -w` is unsafe here because `-` is a non-word - # character — `grep -w databricks` would match `databricks-jobs`, `databricks-apps`, etc. - echo "$preselected" | tr ' ' '\n' | sed 's/.*://' | grep -Fxq "$1" && echo "on" || echo "off" + _in_list "$1" "$preselected" && echo "on" || echo "off" } echo "" echo -e " ${B}Select individual skills${N}" - echo -e " ${D}Core skills (config, docs, python-sdk, unity-catalog) are always installed${N}" + echo -e " ${D}Core skills (core, docs, python-sdk, unity-catalog) are recommended for all profiles${N}" local selected selected=$(checkbox_select \ - "Spark Pipelines|databricks-spark-declarative-pipelines|$(_is_preselected databricks-spark-declarative-pipelines)|SDP/LDP, CDC, SCD Type 2" \ + "Core|databricks-core|$(_is_preselected databricks-core)|CLI auth, data exploration" \ + "Docs|databricks-docs|$(_is_preselected databricks-docs)|Databricks documentation" \ + "Python SDK|databricks-python-sdk|$(_is_preselected databricks-python-sdk)|SDK, Connect, REST API" \ + "Unity Catalog|databricks-unity-catalog|$(_is_preselected databricks-unity-catalog)|System tables, volumes" \ + "Spark Pipelines|databricks-pipelines|$(_is_preselected databricks-pipelines)|SDP/LDP, CDC, SCD Type 2" \ "Structured Streaming|databricks-spark-structured-streaming|$(_is_preselected databricks-spark-structured-streaming)|Real-time streaming" \ "Jobs & Workflows|databricks-jobs|$(_is_preselected databricks-jobs)|Multi-task orchestration" \ - "Asset Bundles|databricks-bundles|$(_is_preselected databricks-bundles)|DABs deployment" \ + "Asset Bundles|databricks-dabs|$(_is_preselected databricks-dabs)|DABs deployment" \ "Databricks SQL|databricks-dbsql|$(_is_preselected databricks-dbsql)|SQL warehouse queries" \ "Iceberg|databricks-iceberg|$(_is_preselected databricks-iceberg)|Apache Iceberg tables" \ + "Lakeflow Connect|databricks-lakeflow-connect|$(_is_preselected databricks-lakeflow-connect)|Managed ingestion connectors" \ "Zerobus Ingest|databricks-zerobus-ingest|$(_is_preselected databricks-zerobus-ingest)|Streaming ingestion" \ "Python Data Source|spark-python-data-source|$(_is_preselected spark-python-data-source)|Custom Spark data sources" \ "Metric Views|databricks-metric-views|$(_is_preselected databricks-metric-views)|Metric definitions" \ @@ -977,13 +1092,11 @@ prompt_custom_skills() { "AI Functions|databricks-ai-functions|$(_is_preselected databricks-ai-functions)|AI Functions, document parsing & RAG" \ "Unstructured PDF|databricks-unstructured-pdf-generation|$(_is_preselected databricks-unstructured-pdf-generation)|Synthetic PDFs for RAG" \ "Synthetic Data|databricks-synthetic-data-gen|$(_is_preselected databricks-synthetic-data-gen)|Generate test data" \ - "Lakebase Autoscale|databricks-lakebase-autoscale|$(_is_preselected databricks-lakebase-autoscale)|Managed PostgreSQL" \ - "Lakebase Provisioned|databricks-lakebase-provisioned|$(_is_preselected databricks-lakebase-provisioned)|Provisioned PostgreSQL" \ + "Lakebase|databricks-lakebase|$(_is_preselected databricks-lakebase)|Managed PostgreSQL (OLTP)" \ + "Serverless Migration|databricks-serverless-migration|$(_is_preselected databricks-serverless-migration)|Migrate to serverless compute" \ + "Apps|databricks-apps|$(_is_preselected databricks-apps)|AppKit + all frameworks" \ "App (AppKit + Python)|databricks-apps-python|$(_is_preselected databricks-apps-python)|AppKit, Dash, Streamlit, Flask" \ "App APX|databricks-app-apx|$(_is_preselected databricks-app-apx)|FastAPI + React" \ - "Agent: Databricks|databricks|$(_is_preselected databricks)|CLI auth, data exploration" \ - "Agent: Apps|databricks-apps|$(_is_preselected databricks-apps)|AppKit + all frameworks" \ - "Agent: Lakebase|databricks-lakebase|$(_is_preselected databricks-lakebase)|Lakebase OLTP" \ "MLflow Onboarding|mlflow-onboarding|$(_is_preselected mlflow-onboarding)|Getting started" \ "Agent Evaluation|agent-evaluation|$(_is_preselected agent-evaluation)|Evaluate AI agents" \ "MLflow Tracing|instrumenting-with-mlflow-tracing|$(_is_preselected instrumenting-with-mlflow-tracing)|Instrument with tracing" \ @@ -1003,6 +1116,375 @@ version_gte() { printf '%s\n%s' "$2" "$1" | sort -V -C } +# ─── Agent skills (databricks/databricks-agent-skills via `databricks aitools`) ─── + +# Discover the live skill inventory from `databricks aitools list -o json`. +# Falls back to the hardcoded snapshot when the CLI is missing/old/offline. +# Idempotent — only fetches once. +fetch_agent_b_inventory() { + [ -n "$AGENT_B_STABLE" ] && return + + local json="" + if command -v databricks >/dev/null 2>&1; then + json=$(databricks aitools list -o json 2>/dev/null) || json="" + fi + + if [ -n "$json" ]; then + AGENT_B_RELEASE=$(echo "$json" | grep -m1 '"release"' | sed -E 's/.*"release": *"([^"]*)".*/\1/') + # Pair each "name" with the "experimental" flag that follows it + local parsed + parsed=$(echo "$json" | awk ' + /"name":/ { gsub(/[",]/, "", $2); name=$2 } + /"experimental":/ { gsub(/[",]/, "", $2); if (name != "") { print $2, name; name="" } }') + AGENT_B_STABLE=$(echo "$parsed" | awk '$1=="false"{print $2}' | tr '\n' ' ') + AGENT_B_EXPERIMENTAL=$(echo "$parsed" | awk '$1=="true"{print $2}' | tr '\n' ' ') + fi + + if [ -z "$AGENT_B_STABLE" ]; then + AGENT_B_STABLE="$AGENT_B_STABLE_FALLBACK" + AGENT_B_EXPERIMENTAL="$AGENT_B_EXPERIMENTAL_FALLBACK" + AGENT_B_RELEASE="" + fi +} + +# Gate for `databricks aitools` (ships with the Databricks CLI v1.0.0+). +# Interactive: offers to run the upgrade and re-checks in a loop. +# Silent/non-interactive: dies with instructions. +# Returns 1 if the user chose to skip agent skills. +ensure_aitools_cli() { + local attempts=0 + while true; do + local cli_version="" + if command -v databricks >/dev/null 2>&1; then + cli_version=$(databricks --version 2>/dev/null | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -1) + fi + if [ -n "$cli_version" ] && version_gte "$cli_version" "$MIN_AITOOLS_CLI_VERSION"; then + return 0 + fi + + local found_msg="Databricks CLI not found." + [ -n "$cli_version" ] && found_msg="Databricks CLI v${cli_version} is too old." + + if [ "$SILENT" = true ] || ! is_interactive; then + die "$found_msg Agent skills are installed via 'databricks aitools', which requires Databricks CLI v${MIN_AITOOLS_CLI_VERSION}+. + Upgrade: ${B}curl -fsSL https://raw.githubusercontent.com/databricks/setup-cli/main/install.sh | sh${N} + Then re-run this installer. (Or pass --skills with only non-agent skills to skip this requirement.)" + fi + + attempts=$((attempts + 1)) + if [ "$attempts" -gt 5 ]; then + warn "Databricks CLI still not at v${MIN_AITOOLS_CLI_VERSION}+ after several attempts — skipping agent skills" + return 1 + fi + + warn "$found_msg Agent skills are installed via ${B}databricks aitools${N}, which requires Databricks CLI v${MIN_AITOOLS_CLI_VERSION}+." + msg "Upgrade command: ${B}curl -fsSL https://raw.githubusercontent.com/databricks/setup-cli/main/install.sh | sh${N}" + echo "" + local choice + choice=$(prompt "Upgrade the Databricks CLI now? ${D}(y = run upgrade, r = re-check, s = skip agent skills, a = abort)${N}" "y") + case "$choice" in + y|Y|yes) + curl -fsSL https://raw.githubusercontent.com/databricks/setup-cli/main/install.sh | sh || warn "CLI upgrade failed — you can retry or skip" + hash -r 2>/dev/null || true + ;; + r|R) hash -r 2>/dev/null || true ;; + s|S) return 1 ;; + a|A) die "Installation aborted (Databricks CLI v${MIN_AITOOLS_CLI_VERSION}+ required for agent skills)" ;; + esac + done +} + +# Map selected $TOOLS to `aitools --agents` tokens; tools aitools cannot +# target (gemini, windsurf, kiro) are collected separately. +AITOOLS_AGENTS="" +UNSUPPORTED_AGENT_TOOLS="" +map_aitools_agents() { + AITOOLS_AGENTS="" + UNSUPPORTED_AGENT_TOOLS="" + local tool + for tool in $TOOLS; do + case $tool in + claude) AITOOLS_AGENTS="${AITOOLS_AGENTS:+$AITOOLS_AGENTS,}claude-code" ;; + cursor) AITOOLS_AGENTS="${AITOOLS_AGENTS:+$AITOOLS_AGENTS,}cursor" ;; + copilot) AITOOLS_AGENTS="${AITOOLS_AGENTS:+$AITOOLS_AGENTS,}copilot" ;; + codex) AITOOLS_AGENTS="${AITOOLS_AGENTS:+$AITOOLS_AGENTS,}codex" ;; + opencode) AITOOLS_AGENTS="${AITOOLS_AGENTS:+$AITOOLS_AGENTS,}opencode" ;; + antigravity) AITOOLS_AGENTS="${AITOOLS_AGENTS:+$AITOOLS_AGENTS,}antigravity" ;; + gemini|windsurf|kiro) UNSUPPORTED_AGENT_TOOLS="${UNSUPPORTED_AGENT_TOOLS:+$UNSUPPORTED_AGENT_TOOLS }$tool" ;; + esac + done +} + +# Skills dirs for tools aitools can't target (one per line, deduped) +unsupported_skill_dirs() { + local base_dir=$1 tool + for tool in $UNSUPPORTED_AGENT_TOOLS; do + case $tool in + gemini) echo "$base_dir/.gemini/skills" ;; + windsurf) [ "$SCOPE" = "global" ] && echo "$HOME/.codeium/windsurf/skills" || echo "$base_dir/.windsurf/skills" ;; + kiro) [ "$SCOPE" = "global" ] && echo "$HOME/.kiro/skills" || echo "$base_dir/.kiro/skills" ;; + esac + done | sort -u +} + +# True if any selected agent skill is experimental +agent_b_needs_experimental() { + local skill + for skill in $SELECTED_AGENT_B_SKILLS; do + _in_list "$skill" "$AGENT_B_EXPERIMENTAL" && return 0 + done + return 1 +} + +# Install agent skills by delegating to `databricks aitools install`. +# aitools owns these skills afterwards (list/update/uninstall) — they are NOT +# tracked in this installer's manifest, except for the symlinks/copies created +# for tools aitools can't target. +install_agent_b_skills() { + local base_dir=$1 + local prev_file="$STATE_DIR/.agent-b-skills" + [ -z "$SELECTED_AGENT_B_SKILLS" ] && [ ! -f "$prev_file" ] && return + + step "Installing agent skills (via databricks aitools)" + + # Uninstall agent skills dropped since the previous run + if [ -f "$prev_file" ]; then + local dropped="" skill + for skill in $(cat "$prev_file"); do + _in_list "$skill" "$SELECTED_AGENT_B_SKILLS" || dropped="${dropped:+$dropped,}$skill" + done + if [ -n "$dropped" ] && command -v databricks >/dev/null 2>&1; then + if databricks aitools uninstall --scope "$SCOPE" --skills "$dropped" >/dev/null 2>&1; then + msg "${D}Removed deselected agent skills: ${dropped}${N}" + else + warn "Could not remove deselected agent skills — run: ${B}databricks aitools uninstall --skills $dropped${N}" + fi + fi + fi + + if [ -z "$SELECTED_AGENT_B_SKILLS" ]; then + rm -f "$prev_file" + return + fi + + if ! ensure_aitools_cli; then + warn "Agent skills skipped — install later with: ${B}databricks aitools install${N}" + return + fi + + map_aitools_agents + local skills_csv exp_flag="" + skills_csv=$(echo "$SELECTED_AGENT_B_SKILLS" | tr -s ' ' ',' | sed 's/^,//;s/,$//') + agent_b_needs_experimental && exp_flag="--experimental" + local count + count=$(_count $SELECTED_AGENT_B_SKILLS) + + if [ -n "$AITOOLS_AGENTS" ]; then + msg "Delegating ${B}${count}${N} agent skills to ${B}databricks aitools${N} (agents: ${AITOOLS_AGENTS})" + if [ "$SILENT" = true ]; then + databricks aitools install --scope "$SCOPE" --agents "$AITOOLS_AGENTS" --skills "$skills_csv" $exp_flag -p "$PROFILE" >/dev/null 2>&1 \ + || die "databricks aitools install failed" + else + if ! databricks aitools install --scope "$SCOPE" --agents "$AITOOLS_AGENTS" --skills "$skills_csv" $exp_flag -p "$PROFILE"; then + warn "databricks aitools install failed — agent skills not installed" + return + fi + fi + ok "Agent skills ($count) installed — manage with ${B}databricks aitools list|update|uninstall${N}" + fi + + # Tools aitools can't target: link/copy the skills from the canonical store + if [ -n "$UNSUPPORTED_AGENT_TOOLS" ]; then + install_agent_b_unsupported "$base_dir" "$skills_csv" "$exp_flag" + fi + + # Record the selection so a future profile change can uninstall dropped skills + mkdir -p "$STATE_DIR" + echo "$SELECTED_AGENT_B_SKILLS" | tr ' ' '\n' | sed '/^$/d' > "$prev_file" +} + +# Deliver agent skills to Gemini CLI / Windsurf / Kiro. +# If aitools ran for at least one supported agent, symlink each skill from the +# canonical store (kept fresh by `databricks aitools update`). Otherwise stage a +# throwaway project-scope install in a temp dir and copy real files from it. +install_agent_b_unsupported() { + local base_dir=$1 skills_csv=$2 exp_flag=$3 + local manifest="$STATE_DIR/.installed-skills" + + local mode="link" store tmp_dir="" + if [ "$SCOPE" = "global" ]; then + store="$HOME/.databricks/aitools/skills" + else + store="$base_dir/.databricks/aitools/skills" + fi + + if [ -z "$AITOOLS_AGENTS" ]; then + mode="copy" + tmp_dir=$(mktemp -d) + if ! (cd "$tmp_dir" && databricks aitools install --scope project --agents claude-code --skills "$skills_csv" $exp_flag >/dev/null 2>&1); then + rm -rf "$tmp_dir" + warn "Could not stage agent skills for: $(echo "$UNSUPPORTED_AGENT_TOOLS" | tr ' ' ',')" + return + fi + store="$tmp_dir/.databricks/aitools/skills" + fi + + local dir skill target count + count=$(_count $SELECTED_AGENT_B_SKILLS) + while IFS= read -r dir; do + [ -z "$dir" ] && continue + mkdir -p "$dir" + for skill in $SELECTED_AGENT_B_SKILLS; do + if [ ! -d "$store/$skill" ]; then + warn "Agent skill '$skill' missing from aitools store — skipped" + continue + fi + rm -rf "$dir/$skill" + if [ "$mode" = "link" ]; then + # Project-scope dirs are all /./skills (2 levels deep), + # so a relative link survives moving the project directory. + target="$store/$skill" + [ "$SCOPE" = "project" ] && target="../../.databricks/aitools/skills/$skill" + ln -s "$target" "$dir/$skill" + else + cp -R "$store/$skill" "$dir/$skill" + fi + echo "$dir|$skill" >> "$manifest" + done + ok "Agent skills ($count, $mode) → ${dir#$HOME/}" + done < <(unsupported_skill_dirs "$base_dir") + + [ -n "$tmp_dir" ] && rm -rf "$tmp_dir" + return 0 +} + +# ─── Raw-fetch ref resolution (apx, mlflow) ─────────────────── + +# resolve_ref +# ""/"latest" → highest stable semver tag (prereleases excluded unless +# INCLUDE_PRERELEASES=1; falls back to main if no tags). +# main/master → passed through. +# anything else → verified to exist as a tag/branch/SHA (fails loud). +# Uses `git ls-remote` (no API rate limits; git is a hard prerequisite) and +# `sort -V` (GNU coreutils; available in macOS bash environments). +resolve_ref() { + local repo=$1 requested=$2 + local git_url="https://github.com/${repo}.git" + case "$requested" in + ""|latest) + local tags pattern best + tags=$(git ls-remote --tags --refs "$git_url" 2>/dev/null | sed 's|.*refs/tags/||') + pattern='^v?[0-9]+\.[0-9]+\.[0-9]+$' + [ "$INCLUDE_PRERELEASES" = "1" ] && pattern='^v?[0-9]+\.[0-9]+\.[0-9]+(-[A-Za-z0-9.]+)?$' + best=$(echo "$tags" | grep -E "$pattern" | sort -V | tail -1) + if [ -n "$best" ]; then + echo "$best" + else + warn "Could not resolve latest tag for ${repo} — falling back to main" >&2 + echo "main" + fi + ;; + main|master) + echo "$requested" + ;; + *) + if git ls-remote "$git_url" "refs/tags/${requested}" "refs/heads/${requested}" 2>/dev/null | grep -q .; then + echo "$requested" + elif curl -fsSL -o /dev/null "https://api.github.com/repos/${repo}/commits/${requested}" 2>/dev/null; then + echo "$requested" # bare commit SHA (not addressable via ls-remote) + else + die "Ref '${requested}' not found in ${repo}" + fi + ;; + esac +} + +# Resolve refs for all selected raw-fetch sources (records globals for the +# fetch URLs, summary, dry run, and lockfile) +MLFLOW_RESOLVED_REF="" +APX_RESOLVED_REF="" +resolve_fetch_refs() { + [ -n "$SELECTED_MLFLOW_SKILLS" ] && MLFLOW_RESOLVED_REF=$(resolve_ref "mlflow/skills" "$MLFLOW_REF") + [ -n "$SELECTED_APX_SKILLS" ] && APX_RESOLVED_REF=$(resolve_ref "databricks-solutions/apx" "$APX_REF") + return 0 +} + +# Best-effort commit SHA for a ref (empty on failure). Prefers the peeled +# tag object (^{}) so annotated tags resolve to the commit they point at. +github_sha() { + local out sha + out=$(git ls-remote "https://github.com/$1.git" "refs/tags/$2^{}" "refs/tags/$2" "refs/heads/$2" 2>/dev/null) + sha=$(echo "$out" | grep '\^{}' | head -1 | cut -f1) + [ -z "$sha" ] && sha=$(echo "$out" | head -1 | cut -f1) + if [ -z "$sha" ]; then + sha=$(curl -fsSL "https://api.github.com/repos/$1/commits/$2" 2>/dev/null \ + | grep -m1 '"sha":' | sed -E 's/.*"sha": *"([^"]+)".*/\1/') + fi + echo "$sha" +} + +# Record what was installed and from where (skills.lock in the scope-local state dir) +write_lockfile() { + local lock="$STATE_DIR/skills.lock" + mkdir -p "$STATE_DIR" + local now entries="" sha kind + now=$(date -u +"%Y-%m-%dT%H:%M:%SZ") + + if [ -n "$SELECTED_MLFLOW_SKILLS" ]; then + sha=$(github_sha "mlflow/skills" "$MLFLOW_RESOLVED_REF") + entries=" \"mlflow/skills\": {\"requested_ref\": \"${MLFLOW_REF}\", \"resolved_kind\": \"branch\", \"resolved_ref\": \"${MLFLOW_RESOLVED_REF}\", \"resolved_sha\": \"${sha}\", \"fetched_at\": \"${now}\"}" + fi + if [ -n "$SELECTED_APX_SKILLS" ]; then + kind="release_tag" + case "$APX_RESOLVED_REF" in main|master) kind="branch" ;; esac + sha=$(github_sha "databricks-solutions/apx" "$APX_RESOLVED_REF") + entries="${entries:+$entries, +} \"databricks-solutions/apx\": {\"requested_ref\": \"${APX_REF}\", \"resolved_kind\": \"${kind}\", \"resolved_ref\": \"${APX_RESOLVED_REF}\", \"resolved_sha\": \"${sha}\", \"fetched_at\": \"${now}\"}" + fi + if [ -n "$SELECTED_AGENT_B_SKILLS" ]; then + local cli_version + cli_version=$(databricks --version 2>/dev/null | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -1) + entries="${entries:+$entries, +} \"databricks/databricks-agent-skills\": {\"install_method\": \"databricks-aitools\", \"cli_version\": \"${cli_version}\", \"skills_release\": \"${AGENT_B_RELEASE}\", \"fetched_at\": \"${now}\"}" + fi + + [ -z "$entries" ] && return 0 + printf '{\n "sources": {\n%s\n }\n}\n' "$entries" > "$lock" +} + +# ─── Dry run ────────────────────────────────────────────────── +dry_run_report() { + map_aitools_agents + echo "" + echo -e "${B}Dry run — nothing was installed${N}" + echo "────────────────────────────────" + msg "Bundled skills (this repo): ${SELECTED_LOCAL_SKILLS:-}" + msg "MLflow skills @ ${MLFLOW_RESOLVED_REF:-n/a}: ${SELECTED_MLFLOW_SKILLS:-}" + msg "APX skills @ ${APX_RESOLVED_REF:-n/a}: ${SELECTED_APX_SKILLS:-}" + if [ -n "$SELECTED_AGENT_B_SKILLS" ]; then + local skills_csv exp_flag="" + skills_csv=$(echo "$SELECTED_AGENT_B_SKILLS" | tr -s ' ' ',' | sed 's/^,//;s/,$//') + agent_b_needs_experimental && exp_flag=" --experimental" + msg "Agent skills (databricks-agent-skills${AGENT_B_RELEASE:+ @ $AGENT_B_RELEASE}): ${SELECTED_AGENT_B_SKILLS}" + if [ -n "$AITOOLS_AGENTS" ]; then + msg "Would run: ${B}databricks aitools install --scope ${SCOPE} --agents ${AITOOLS_AGENTS} --skills ${skills_csv}${exp_flag} -p ${PROFILE}${N}" + fi + if [ -n "$UNSUPPORTED_AGENT_TOOLS" ]; then + local mode="symlink from the aitools canonical store" + [ -z "$AITOOLS_AGENTS" ] && mode="copy via a temp-dir aitools install" + msg "Would deliver agent skills to $(echo "$UNSUPPORTED_AGENT_TOOLS" | tr ' ' ',') ($mode):" + local dir base_dir + [ "$SCOPE" = "global" ] && base_dir="$HOME" || base_dir="$(pwd)" + while IFS= read -r dir; do + [ -n "$dir" ] && msg " → $dir" + done < <(unsupported_skill_dirs "$base_dir") + fi + else + msg "Agent skills: " + fi + echo "" +} + # Check Databricks CLI version meets minimum requirement check_cli_version() { local cli_version @@ -1188,34 +1670,35 @@ install_skills() { dirs=("${unique[@]}") # Count selected skills for display - local db_count=0 mlflow_count=0 apx_count=0 agent_count=0 - for _ in $SELECTED_SKILLS; do db_count=$((db_count + 1)); done - for _ in $SELECTED_MLFLOW_SKILLS; do mlflow_count=$((mlflow_count + 1)); done - for _ in $SELECTED_APX_SKILLS; do apx_count=$((apx_count + 1)); done - for _ in $SELECTED_AGENT_SKILLS; do agent_count=$((agent_count + 1)); done - local total_count=$((db_count + mlflow_count + apx_count + agent_count)) - msg "Installing ${B}${total_count}${N} skills" - - # Build set of all skills being installed now - local agent_install_names - agent_install_names=$(echo "$SELECTED_AGENT_SKILLS" | tr ' ' '\n' | sed 's/.*://' | tr '\n' ' ') - local all_new_skills="$SELECTED_SKILLS $SELECTED_MLFLOW_SKILLS $SELECTED_APX_SKILLS $agent_install_names" - - # Clean up previously installed skills that are no longer selected + local local_count mlflow_count apx_count + local_count=$(_count $SELECTED_LOCAL_SKILLS) + mlflow_count=$(_count $SELECTED_MLFLOW_SKILLS) + apx_count=$(_count $SELECTED_APX_SKILLS) + local total_count=$((local_count + mlflow_count + apx_count)) + msg "Installing ${B}${total_count}${N} skills (agent skills are installed separately via databricks aitools)" + + # Skills this installer manages directly. Agent skills are deliberately NOT + # in this set: any same-named entry from an older install is a stale real + # copy that must be removed — `databricks aitools` will not overwrite an + # existing real directory, so leaving it would shadow the new install. + # (Symlinks for tools aitools can't target are re-created each run.) + local all_new_skills="$SELECTED_LOCAL_SKILLS $SELECTED_MLFLOW_SKILLS $SELECTED_APX_SKILLS" + + # Clean up previously installed skills that are no longer managed here # Check scope-local manifest first, fall back to global for upgrades from older versions local manifest="$STATE_DIR/.installed-skills" [ ! -f "$manifest" ] && [ "$SCOPE" = "project" ] && [ -f "$INSTALL_DIR/.installed-skills" ] && manifest="$INSTALL_DIR/.installed-skills" if [ -f "$manifest" ]; then while IFS='|' read -r prev_dir prev_skill; do [ -z "$prev_skill" ] && continue - # Skip if this skill is still selected (exact match — see _is_preselected for why) - if echo "$all_new_skills" | tr ' ' '\n' | grep -Fxq "$prev_skill"; then + # Skip if this skill is still selected (exact match — see _in_list for why) + if _in_list "$prev_skill" "$all_new_skills"; then continue fi - # Only remove if the directory exists - if [ -d "$prev_dir/$prev_skill" ]; then + # Remove real dirs and symlinks alike (rm -rf on a symlink removes the link) + if [ -d "$prev_dir/$prev_skill" ] || [ -L "$prev_dir/$prev_skill" ]; then rm -rf "$prev_dir/$prev_skill" - msg "${D}Removed deselected skill: $prev_skill${N}" + msg "${D}Removed previously installed skill: $prev_skill${N}" fi done < "$manifest" fi @@ -1225,35 +1708,38 @@ install_skills() { mkdir -p "$STATE_DIR" : > "$manifest.tmp" + local mlflow_raw_url="$MLFLOW_BASE_URL/${MLFLOW_RESOLVED_REF:-main}" + local apx_raw_url="$APX_BASE_URL/${APX_RESOLVED_REF:-main}/skills/apx" + for dir in "${dirs[@]}"; do mkdir -p "$dir" - # Install Databricks skills from repo - for skill in $SELECTED_SKILLS; do + # Install bundled Databricks skills from this repo + for skill in $SELECTED_LOCAL_SKILLS; do local src="$REPO_DIR/databricks-skills/$skill" [ ! -d "$src" ] && continue rm -rf "$dir/$skill" cp -r "$src" "$dir/$skill" echo "$dir|$skill" >> "$manifest.tmp" done - ok "Databricks skills ($db_count) → ${dir#$HOME/}" + ok "Databricks skills ($local_count) → ${dir#$HOME/}" # Install MLflow skills from mlflow/skills repo if [ -n "$SELECTED_MLFLOW_SKILLS" ]; then for skill in $SELECTED_MLFLOW_SKILLS; do local dest_dir="$dir/$skill" mkdir -p "$dest_dir" - local url="$MLFLOW_RAW_URL/$skill/SKILL.md" + local url="$mlflow_raw_url/$skill/SKILL.md" if curl -fsSL "$url" -o "$dest_dir/SKILL.md" 2>/dev/null; then # Try to fetch optional reference files for ref in reference.md examples.md api.md; do - curl -fsSL "$MLFLOW_RAW_URL/$skill/$ref" -o "$dest_dir/$ref" 2>/dev/null || true + curl -fsSL "$mlflow_raw_url/$skill/$ref" -o "$dest_dir/$ref" 2>/dev/null || true done echo "$dir|$skill" >> "$manifest.tmp" else rm -rf "$dest_dir" fi done - ok "MLflow skills ($mlflow_count) → ${dir#$HOME/}" + ok "MLflow skills ($mlflow_count, @ ${MLFLOW_RESOLVED_REF}) → ${dir#$HOME/}" fi # Install APX skills from databricks-solutions/apx repo @@ -1261,74 +1747,18 @@ install_skills() { for skill in $SELECTED_APX_SKILLS; do local dest_dir="$dir/$skill" mkdir -p "$dest_dir" - local url="$APX_RAW_URL/SKILL.md" + local url="$apx_raw_url/SKILL.md" if curl -fsSL "$url" -o "$dest_dir/SKILL.md" 2>/dev/null; then # Try to fetch optional reference files for ref in backend-patterns.md frontend-patterns.md; do - curl -fsSL "$APX_RAW_URL/$ref" -o "$dest_dir/$ref" 2>/dev/null || true + curl -fsSL "$apx_raw_url/$ref" -o "$dest_dir/$ref" 2>/dev/null || true done echo "$dir|$skill" >> "$manifest.tmp" else rmdir "$dest_dir" 2>/dev/null || warn "Could not install APX skill '$skill' — consider removing $dest_dir if it is no longer needed" fi done - ok "APX skills ($apx_count) → ${dir#$HOME/}" - fi - - # Install Agent skills from databricks/databricks-agent-skills repo - if [ -n "$SELECTED_AGENT_SKILLS" ]; then - # Fetch the full repo tree once (single API call) for all skills. - # Collapse pretty-printed JSON to a single line + squeeze whitespace so the - # path/mode/type fields land adjacent for the per-entry regex below. - local agent_tree agent_success=0 - agent_tree=$(curl -fsSL "$AGENT_SKILLS_API_URL" 2>/dev/null | tr -d '\n' | tr -s ' ') - for entry in $SELECTED_AGENT_SKILLS; do - local src_name="${entry%%:*}" - local install_name="${entry#*:}" - local dest_dir="$dir/$install_name" - # Wipe any prior install so upstream-deleted files don't persist - rm -rf "$dest_dir" - mkdir -p "$dest_dir" - # Extract file paths under skills// — match only entries whose - # next JSON fields are `"mode": "...", "type": "blob"`, so directory - # entries (type=tree) are skipped. Note the agent_tree has been - # whitespace-collapsed above; the GitHub tree API returns fields in - # the order path → mode → type → sha → size → url, so this pattern - # matches each blob exactly once. - local files - files=$(echo "$agent_tree" \ - | grep -oE '"path": *"skills/'"$src_name"'/[^"]+", *"mode": *"[^"]+", *"type": *"blob"' \ - | sed 's/.*"path": *"\([^"]*\)".*/\1/') - if [ -z "$files" ]; then - rmdir "$dest_dir" 2>/dev/null || true - warn "Could not fetch agent skill '$src_name'" - continue - fi - local ok_flag=1 - while IFS= read -r filepath; do - [ -z "$filepath" ] && continue - local rel="${filepath#skills/$src_name/}" - local dest="$dest_dir/$rel" - mkdir -p "$(dirname "$dest")" - if ! curl -fsSL "$AGENT_SKILLS_RAW_URL/$src_name/${rel}" -o "$dest" 2>/dev/null; then - ok_flag=0 - fi - done <<< "$files" - if [ "$ok_flag" -eq 1 ]; then - echo "$dir|$install_name" >> "$manifest.tmp" - agent_success=$((agent_success + 1)) - else - rm -rf "$dest_dir" - warn "Could not install agent skill '$src_name'" - fi - done - if [ "$agent_success" -eq "$agent_count" ]; then - ok "Agent skills ($agent_count) → ${dir#$HOME/}" - elif [ "$agent_success" -gt 0 ]; then - warn "Agent skills (only $agent_success of $agent_count installed) → ${dir#$HOME/}" - else - warn "Agent skills (0 of $agent_count installed) → ${dir#$HOME/}" - fi + ok "APX skills ($apx_count, @ ${APX_RESOLVED_REF}) → ${dir#$HOME/}" fi done @@ -1727,6 +2157,9 @@ summary() { msg "Location: $INSTALL_DIR" msg "Scope: $SCOPE" msg "Tools: $(echo "$TOOLS" | tr ' ' ', ')" + if [ -n "$SELECTED_AGENT_B_SKILLS" ]; then + msg "Agent skills are managed by ${B}databricks aitools${N} — update with ${B}databricks aitools update${N}" + fi echo "" msg "${B}Next steps:${N}" local step=1 @@ -1951,12 +2384,15 @@ prompt_auth() { # Main main() { + # --list-skills exits early (uses the live aitools inventory when available) + [ "${LIST_SKILLS:-false}" = true ] && list_skills_and_exit + if [ "$SILENT" = false ]; then echo "" echo -e "${B}Databricks AI Dev Kit Installer${N}" echo "────────────────────────────────" fi - + # ── Step 1: Release channel selection (may re-exec from experimental branch) ── prompt_channel @@ -1964,6 +2400,9 @@ main() { step "Checking prerequisites" check_deps + # Discover the agent-skills inventory (live via `databricks aitools list`, or fallback) + fetch_agent_b_inventory + # ── Step 2: Interactive tool selection ── step "Selecting tools" detect_tools @@ -1992,9 +2431,10 @@ main() { step "Skill profiles" prompt_skills_profile resolve_skills + resolve_fetch_refs # Count for display - local sk_count=0 - for _ in $SELECTED_SKILLS $SELECTED_MLFLOW_SKILLS $SELECTED_APX_SKILLS; do sk_count=$((sk_count + 1)); done + local sk_count + sk_count=$(_count $SELECTED_LOCAL_SKILLS $SELECTED_MLFLOW_SKILLS $SELECTED_APX_SKILLS $SELECTED_AGENT_B_SKILLS) if [ -n "$USER_SKILLS" ]; then ok "Custom selection ($sk_count skills)" else @@ -2022,15 +2462,23 @@ main() { if [ -n "$USER_SKILLS" ]; then echo -e " Skills: ${G}custom selection${N} ${Y}(will be overwritten, backup your changes first)${N}" else - local sk_total=0 - for _ in $SELECTED_SKILLS $SELECTED_MLFLOW_SKILLS $SELECTED_APX_SKILLS $SELECTED_AGENT_SKILLS; do sk_total=$((sk_total + 1)); done + local sk_total + sk_total=$(_count $SELECTED_LOCAL_SKILLS $SELECTED_MLFLOW_SKILLS $SELECTED_APX_SKILLS $SELECTED_AGENT_B_SKILLS) echo -e " Skills: ${G}${SKILLS_PROFILE:-all} ($sk_total skills)${N} ${Y}(will be overwritten, backup your changes first)${N}" fi + [ -n "$SELECTED_AGENT_B_SKILLS" ] && echo -e " Agent skills: ${G}via databricks aitools${N} ${D}(requires Databricks CLI v${MIN_AITOOLS_CLI_VERSION}+)${N}" + [ -n "$SELECTED_APX_SKILLS" ] && [ -n "$APX_RESOLVED_REF" ] && echo -e " APX ref: ${G}${APX_RESOLVED_REF}${N}" fi [ "$INSTALL_MCP" = true ] && echo -e " MCP config: ${G}yes${N}" echo "" fi + # ── Dry run: report the plan and exit before any changes ── + if [ "$DRY_RUN" = true ]; then + dry_run_report + exit 0 + fi + if [ "$SILENT" = false ] && is_interactive; then local confirm confirm=$(prompt "Proceed with installation? ${D}(y/n)${N}" "y") @@ -2058,9 +2506,15 @@ main() { ok "Repository cloned ($BRANCH)" fi - # Install skills + # Install skills managed by this installer (bundled + mlflow + apx) [ "$INSTALL_SKILLS" = true ] && install_skills "$base_dir" + # Install agent skills (delegated to `databricks aitools`) + [ "$INSTALL_SKILLS" = true ] && install_agent_b_skills "$base_dir" + + # Record resolved sources + [ "$INSTALL_SKILLS" = true ] && write_lockfile + # Write GEMINI.md if gemini is selected if echo "$TOOLS" | grep -q gemini; then if [ "$SCOPE" = "global" ]; then From 7f0662eaa87c4358b84091379cc0a55b7a976784 Mon Sep 17 00:00:00 2001 From: dustinvannoy-db Date: Fri, 12 Jun 2026 00:08:30 +0200 Subject: [PATCH 02/14] Make MCP server opt-in/deprecated and clean up both installers Delegate the MCP venv build to databricks-mcp-server/setup.{sh,ps1} (new canonical setup scripts) instead of inlining it in the installers. MCP now defaults off with an experimental-style "Deprecated MCP Server" opt-in prompt and a --mcp flag. Also fixes the verified bash/PowerShell divergences from review: - Claude global MCP path -> ~/.claude.json on Windows (was ~/.claude\mcp.json) - $script:Profile_ typo in the Cursor-global message - remove dead check_sdk_version (SDK check moved into setup scripts) - add the Claude update-check SessionStart hook on Windows - defer_loading parity; consolidate the three MCP-JSON writers into one - env-var parity (DEVKIT_* + DEVKIT_BRANCH/AIDEVKIT_BRANCH alias) - prompt_channel uses is_interactive; scope prompt reuses the shared selector - generate the custom-skills picker from the live inventory - PowerShell JSON writers init $existing and merge independent of the venv Co-authored-by: Isaac --- databricks-mcp-server/setup.ps1 | 164 +++++++++ databricks-mcp-server/setup.sh | 158 ++++++--- install.ps1 | 577 +++++++++++++------------------- install.sh | 412 ++++++++++------------- 4 files changed, 670 insertions(+), 641 deletions(-) create mode 100644 databricks-mcp-server/setup.ps1 diff --git a/databricks-mcp-server/setup.ps1 b/databricks-mcp-server/setup.ps1 new file mode 100644 index 00000000..836cb23a --- /dev/null +++ b/databricks-mcp-server/setup.ps1 @@ -0,0 +1,164 @@ +# +# Setup script for databricks-mcp-server (Windows). +# +# Creates the Python virtual environment and installs the MCP server (plus its +# databricks-tools-core dependency) in editable mode, then verifies the install. +# +# This is the single source of truth for building the MCP server runtime on +# Windows. The unified installers only write the editor config files that point +# at the venv — when the user opts into the (deprecated) MCP server, they +# delegate the actual environment build here. +# +# Usage: +# .\databricks-mcp-server\setup.ps1 [OPTIONS] +# +# Options: +# -VenvDir DIR Location for the virtual environment (default: \.venv) +# -Python VER Python version to request from uv (default: 3.11) +# -Quiet Suppress progress output (errors still print) +# -Help Show this help +# + +$ErrorActionPreference = "Stop" + +# ─── Parse arguments (accept both -Style and --style for parity with bash) ─── +$ScriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path +$ParentDir = Split-Path -Parent $ScriptDir +$ToolsCoreDir = Join-Path $ParentDir "databricks-tools-core" + +$VenvDir = Join-Path $ScriptDir ".venv" +$PythonVersion = "3.11" +$Quiet = $false + +$i = 0 +while ($i -lt $args.Count) { + switch ($args[$i]) { + { $_ -in "--venv-dir", "-VenvDir" } { $VenvDir = $args[$i + 1]; $i += 2 } + { $_ -in "--python", "-Python" } { $PythonVersion = $args[$i + 1]; $i += 2 } + { $_ -in "--quiet", "-Quiet" } { $Quiet = $true; $i++ } + { $_ -in "-h", "--help", "-Help" } { + Write-Host "Setup the Databricks MCP server runtime (Windows)" + Write-Host "" + Write-Host "Usage: .\databricks-mcp-server\setup.ps1 [OPTIONS]" + Write-Host "" + Write-Host " -VenvDir DIR Virtual environment location (default: