Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
201 commits
Select commit Hold shift + click to select a range
127fb84
Plumb packed sequence length through local training backends
FurtherAI Apr 8, 2026
2ef7969
Add Megatron trainability runtime and service flow
FurtherAI Apr 8, 2026
c52bff6
Fix minor regressions
FurtherAI Apr 8, 2026
0199fc1
Merge remote-tracking branch 'origin/main' into austin/deepep_compile…
FurtherAI Apr 8, 2026
3d6e892
Install nvshmem and remove patches
FurtherAI Apr 8, 2026
16fd201
Update CI to sm_90 for DeepEP
FurtherAI Apr 8, 2026
9dfc106
Fix CI uv cache upload hangs
FurtherAI Apr 9, 2026
4481357
Add megatron model support phase 1 scaffolding
FurtherAI Apr 9, 2026
c0d308b
Extract provider hooks into qwen model handler
FurtherAI Apr 9, 2026
78d07e8
Move megatron lora traversal into model handlers
FurtherAI Apr 9, 2026
e356dfb
Add canonical megatron adapter export helpers
FurtherAI Apr 9, 2026
8a9672d
Add megatron param name canonicalization helpers
FurtherAI Apr 9, 2026
906f6ef
Add dedicated megatron merged runtime flow
FurtherAI Apr 9, 2026
654698b
Add split vllm runtime package
FurtherAI Apr 9, 2026
04cebfa
Add megatron model support discovery scaffold
FurtherAI Apr 9, 2026
b2ce459
Add non-zero oracle signal checks
FurtherAI Apr 9, 2026
549f73d
Improve architecture coverage recommendations
FurtherAI Apr 9, 2026
0ae31ce
Add minimal layer coverage workflow API
FurtherAI Apr 9, 2026
1b293e5
Remove duplicate oracle replay suite variant
FurtherAI Apr 9, 2026
9dc5cdc
Add SFT HF parity scaffolding
FurtherAI Apr 9, 2026
c2bec58
Extract megatron weight export helpers
FurtherAI Apr 9, 2026
4da6ab9
Use real HF parity deltas
FurtherAI Apr 9, 2026
60bc3f1
Achieve Qwen3.5 HF parity
FurtherAI Apr 9, 2026
7076db9
Remove flex attention compile disable plumbing
FurtherAI Apr 10, 2026
2727104
Wire HF parity into validation workflow
FurtherAI Apr 13, 2026
e835237
Stabilize megatron HF parity runtime
FurtherAI Apr 13, 2026
84d59e0
Drop HF parity delta checks
FurtherAI Apr 13, 2026
362160a
Wire lora coverage and correctness into workflow
FurtherAI Apr 13, 2026
8e43cdd
Wire merged vllm serving into workflow
FurtherAI Apr 13, 2026
3580730
Isolate workflow stages in subprocesses
FurtherAI Apr 14, 2026
95b07e6
Add model support trainability workflow stages
FurtherAI Apr 14, 2026
592d99e
Add realistic packed-position validation and runtime cleanup
FurtherAI Apr 15, 2026
0cf988b
Use real preprocess in packed position validation
FurtherAI Apr 15, 2026
1db721a
Move megatron preprocess patching into model handlers
FurtherAI Apr 15, 2026
9b4c2ac
Replace chat template rollout with conformance suite
FurtherAI Apr 16, 2026
d0a3198
Wait for dedicated vLLM health before serving
FurtherAI Apr 16, 2026
8dd17f6
Fix Qwen3.5 trainability and packed position handling
FurtherAI Apr 16, 2026
faeca8a
Log correctness runs and narrow DeepEP gating
FurtherAI Apr 16, 2026
5ac1f0c
WIP snapshot current megatron bridge/model support state
FurtherAI Apr 21, 2026
c15075f
Split Megatron runtime trainable modes for HF parity
FurtherAI Apr 21, 2026
0f96868
Restore Qwen3.5 text-only SP embedding scatter
FurtherAI Apr 21, 2026
aa708cc
Restore oracle flex attention eager path
FurtherAI Apr 21, 2026
cad8003
Fix Qwen3.5 GDN LoRA TP shard ordering
FurtherAI Apr 22, 2026
383f0aa
Gate DeepEP to supported runtime dtypes
FurtherAI Apr 22, 2026
1144295
Revert invalid flex attention compile toggle
FurtherAI Apr 22, 2026
1cd848e
Restore oracle-only DeepEP fp32 override
FurtherAI Apr 22, 2026
df39090
Generalize LoRA shard manifests and pin block mask compile backend
FurtherAI Apr 22, 2026
5a9388f
Fix sensitivity harness for Qwen3.5 workflow
FurtherAI Apr 22, 2026
6eb6d91
Validate packed position ids with oracle metric
FurtherAI Apr 24, 2026
c307576
Add vllm separation integration test harness
FurtherAI Apr 27, 2026
cb9fa84
Cut over ART core to external vLLM runtime
FurtherAI Apr 27, 2026
740c79e
Add vLLM separation integration checks
FurtherAI Apr 27, 2026
c29563f
Update lockfile for vLLM separation
FurtherAI Apr 27, 2026
31e430d
Fix vLLM separation test package imports
FurtherAI Apr 27, 2026
ae73761
Resolve vLLM separation test repo root via git
FurtherAI Apr 27, 2026
74f3c44
Fix runtime project root resolution in worktrees
FurtherAI Apr 27, 2026
f0888ec
Add service import smoke for vLLM-free ART env
FurtherAI Apr 27, 2026
c7ac04a
Fix service import smoke command
FurtherAI Apr 27, 2026
686285b
Implement multi-rank Megatron merged sync orchestration
FurtherAI Apr 27, 2026
9785444
Fix concurrent init assertion in merged sync tests
FurtherAI Apr 27, 2026
983a2d0
Add runtime boundary service checks
FurtherAI Apr 27, 2026
84ae38b
Add opt-in live local backend runtime smoke
FurtherAI Apr 27, 2026
db39cec
Add direct runtime live smoke
FurtherAI Apr 27, 2026
5c1f4bb
Fix runtime sleep route pause mode import
FurtherAI Apr 27, 2026
6f9d2d7
Add live Megatron separation smokes
FurtherAI Apr 27, 2026
8262767
Fix merged NCCL bootstrap across split runtimes
FurtherAI Apr 27, 2026
1e8f6a2
Normalize raw NCCL ids in runtime wrapper
FurtherAI Apr 27, 2026
42c9237
Fix runtime normalization regression test
FurtherAI Apr 27, 2026
b2006a8
Load full runtime patches in vLLM worker plugins
FurtherAI Apr 27, 2026
8ebb936
Fail fast when Megatron job worker exits
FurtherAI Apr 27, 2026
a7fa7ac
Keep NCCL bootstrap store alive during sync
FurtherAI Apr 27, 2026
f4747fa
Add workflow-style trainability validation matrix
FurtherAI Apr 27, 2026
3e8c61f
Add EP LoRA localization in runtime
FurtherAI Apr 27, 2026
42cecd5
Fix EP MoE LoRA alignment in runtime
FurtherAI Apr 27, 2026
54a8217
Fix runtime EP alignment test harness
FurtherAI Apr 27, 2026
d27afb8
Fix runtime EP LoRA align expert map handling
FurtherAI Apr 27, 2026
f72fff1
Add Qwen3 MoE DeepEP compile workaround
FurtherAI Apr 27, 2026
03506c8
Fix unsloth yes-no trainability config
FurtherAI Apr 27, 2026
b748494
Import unsloth during art startup
FurtherAI Apr 27, 2026
824943d
Tune unsloth yes-no validation defaults
FurtherAI Apr 27, 2026
579cc27
Stabilize unsloth yes-no validation
FurtherAI Apr 27, 2026
f0f772c
Handle unsloth banner in import tests
FurtherAI Apr 27, 2026
670d120
Use default trainability logprob settings
FurtherAI Apr 27, 2026
09fe7eb
Release GPU state between trainability tests
FurtherAI Apr 27, 2026
e831345
Use 1024 packed sequence validation defaults
FurtherAI Apr 27, 2026
513ff43
Stabilize live yes-no validation defaults
FurtherAI Apr 27, 2026
cda94a5
Retry GPU memory recovery in live validation
FurtherAI Apr 27, 2026
69d540a
Add longer Megatron separation live smokes
FurtherAI Apr 28, 2026
9456acb
Remove Megatron auto-setup fallback
FurtherAI Apr 28, 2026
663c6d8
Launch Megatron worker in active env
FurtherAI Apr 28, 2026
9fb5650
Launch vLLM runtime from dedicated env
FurtherAI Apr 28, 2026
b63af40
Fix runtime launcher regression test
FurtherAI Apr 28, 2026
70bd723
Add GDN shared-prefix packed sequence support
FurtherAI Apr 30, 2026
4d17742
Handle sparse Qwen3 MoE expert parity grads
FurtherAI Apr 30, 2026
1fdda3b
Fix GDN sequence-parallel output shapes
FurtherAI Apr 30, 2026
26ae3b8
Respect rollout mode in yes-no trainability
FurtherAI Apr 30, 2026
a5a0446
Cast GDN bucket outputs before scatter
FurtherAI Apr 30, 2026
2ffcb65
Add GDN layout planning support
FurtherAI May 1, 2026
96cdf53
Package vLLM runtime as managed bundle
FurtherAI May 1, 2026
b4a570e
Add ART service lifecycle cleanup
FurtherAI May 2, 2026
e251187
Fix lifecycle cleanup edge cases
FurtherAI May 2, 2026
8f0fcb3
Run Megatron trainability tests out of process
FurtherAI May 2, 2026
a72638d
Allow slow actor startup imports
FurtherAI May 2, 2026
3824036
Fix merged trainability model list assertion
FurtherAI May 2, 2026
133adba
Avoid managed process signal wait deadlock
FurtherAI May 2, 2026
1161211
Stop managed children when wrapper dies
FurtherAI May 2, 2026
77fecd1
Restore dedicated Unsloth SFT guard
FurtherAI May 2, 2026
068c9ce
Address remaining vLLM separation review findings
FurtherAI May 2, 2026
243ef8c
Add Qwen3.5/3.6 native vLLM LoRA support path
FurtherAI May 2, 2026
20cc5ea
Update vLLM runtime to official 0.19.1
FurtherAI May 2, 2026
3dd13ad
Wire native LoRA support through handlers
FurtherAI May 2, 2026
986cb6e
Adapt runtime routes to vLLM 0.19 app API
FurtherAI May 2, 2026
3fc3120
Fix dense Qwen35 text-only validation path
FurtherAI May 3, 2026
5c6a8d9
Add env gate for workflow sensitivity stage
FurtherAI May 3, 2026
44f88d5
Prepare native vLLM MoE LoRA checkpoints
FurtherAI May 3, 2026
6ee8f27
Relax packed position id MoE tolerance
FurtherAI May 3, 2026
ea8bf50
Mark Qwen3.5 MoE native LoRA as validated
FurtherAI May 3, 2026
423224f
Enable Qwen3.5/3.6 LoRA rollout defaults
FurtherAI May 3, 2026
ec9fcb3
Lazy-load tinker server export
FurtherAI May 3, 2026
4485f45
Stub tinker in renderer unit tests
FurtherAI May 3, 2026
aa4b825
Lazy-load tinker native backend export
FurtherAI May 3, 2026
4f8781b
Gate shared expert parallel by model family
FurtherAI May 3, 2026
9c95945
Split dense and MoE shared config expectations
FurtherAI May 3, 2026
c4f46ce
Revert "Lazy-load tinker server export"
FurtherAI May 3, 2026
9dc95d3
Revert "Lazy-load tinker native backend export"
FurtherAI May 3, 2026
293758e
Remove shared FC1 LoRA shape fallback
FurtherAI May 3, 2026
0825421
Revert runtime LoRA checkpoint rewriting
FurtherAI May 3, 2026
61755c4
Add native vLLM LoRA layout probe
FurtherAI May 3, 2026
58508ca
Expand native vLLM LoRA layout probe
FurtherAI May 3, 2026
84b9861
Make Megatron LoRA disk checkpoints vLLM canonical
FurtherAI May 3, 2026
f445bb3
Keep Megatron LoRA shards native
FurtherAI May 3, 2026
133da5e
Avoid redundant identity LoRA config save
FurtherAI May 3, 2026
4c7ef23
Split Megatron dense and MoE model support
FurtherAI May 3, 2026
ee53c05
Gate Megatron model support registry
FurtherAI May 3, 2026
15f70c3
Filter oracle variants by visible GPUs
FurtherAI May 3, 2026
16ccb57
Add Qwen3 dense probe handler
FurtherAI May 3, 2026
9c77732
Use registry for Megatron model support gating
FurtherAI May 3, 2026
40b1391
Remove qwen bridge fakes from provider tests
FurtherAI May 3, 2026
c1cc9d9
Canonicalize dense TP gate-up traces
FurtherAI May 3, 2026
24ca82c
Allow tiny absolute oracle loss drift
FurtherAI May 3, 2026
2ded12d
Rename unsupported_arch to unvalidated_arch. And remove loss threshol…
FurtherAI May 4, 2026
72ae53f
Fold oracle extended topologies into defaults
FurtherAI May 4, 2026
b03f70d
Use real CP size for shared-prefix GDN
FurtherAI May 4, 2026
64030f9
Allow full GDN specs with sequence parallel shards
FurtherAI May 4, 2026
75d5e86
Trace GDN modules in oracle forward reports
FurtherAI May 4, 2026
d222600
Canonicalize componentwise LoRA trace outputs
FurtherAI May 4, 2026
a968ab6
Slightly bump oracle correctness threshold for loss
FurtherAI May 4, 2026
cb85c5e
Validate Qwen3 native vLLM LoRA mode
FurtherAI May 4, 2026
c178ac5
Remove unsourced Qwen3.6 pricing
FurtherAI May 4, 2026
eda42b1
Remove Megatron optional fallback paths
FurtherAI May 4, 2026
38f4faf
Make selected Megatron paths strict
FurtherAI May 4, 2026
d6c129d
Update provider recompute test fixture
FurtherAI May 4, 2026
4bcf909
Fix provider recompute test model
FurtherAI May 4, 2026
a5e1915
Correct provider support fixture models
FurtherAI May 4, 2026
c56d89d
Fix model support stage worker arch flag
FurtherAI May 4, 2026
8df90dd
Parallelize yes-no eval prompts
FurtherAI May 4, 2026
c785024
Make native vLLM LoRA a quick serving gate
FurtherAI May 4, 2026
1ff559f
Use fresh native LoRA serving artifacts
FurtherAI May 4, 2026
57eddc1
Propagate unvalidated model validation flag
FurtherAI May 4, 2026
8fd8aa4
Delegate GDN projections to Megatron modules
FurtherAI May 4, 2026
7948e6a
Canonicalize GDN forward traces
FurtherAI May 4, 2026
05c6164
Keep GDN trace metadata in test harness
FurtherAI May 4, 2026
1225b08
Use dense topology for dense trainability
FurtherAI May 5, 2026
3c5cd55
Disable Qwen35 DeepEP permute compile
FurtherAI May 5, 2026
7a9917b
Test Qwen35 DeepEP compile workaround
FurtherAI May 5, 2026
674c256
Lower yes-no trainability reward gate
FurtherAI May 5, 2026
5b520e3
Validate native vLLM LoRA for Qwen3 dense
FurtherAI May 5, 2026
d70ab2c
Promote dense Qwen models to validated support
FurtherAI May 5, 2026
3d77ba3
Avoid eager model support workflow imports
FurtherAI May 5, 2026
3663266
Use compact packed GDN kernels for local buckets
FurtherAI May 5, 2026
5d32ac0
Use chunked FLA GDN kernel
FurtherAI May 6, 2026
697f392
Use fused Megatron cross entropy
FurtherAI May 6, 2026
632eefb
Remove legacy GDN executor path
FurtherAI May 6, 2026
4d60c94
Add harness CE fusion override worker
FurtherAI May 6, 2026
d57b48e
Add GDN timing hooks to harness wrapper
FurtherAI May 6, 2026
02f221b
Organize Megatron modules and integration tests
FurtherAI May 7, 2026
06814b0
Fix HF parity invariant handler call
FurtherAI May 7, 2026
df52d07
Port main dependency and lifecycle updates
FurtherAI May 8, 2026
4c1fde1
Update Qwen handler for newer bridge mappings
FurtherAI May 8, 2026
6c66d67
Validate Qwen3.5 vLLM LoRA layout
FurtherAI May 8, 2026
470f966
Remove flex attention compile tuning options
FurtherAI May 8, 2026
6b43ef0
Ignore train inference mismatch artifacts
FurtherAI May 8, 2026
5fe1f1b
Avoid assert bytecode in flex attention forward
FurtherAI May 8, 2026
70e9db4
Report flex attention bias type mismatches
FurtherAI May 8, 2026
f79e63e
Propagate Qwen3.5 MTP shared-prefix attention
FurtherAI May 8, 2026
1506236
Forward Qwen3.5 MTP attention bias to layers
FurtherAI May 8, 2026
dd16e0a
Avoid checkpointing Qwen3.5 MTP attention state
FurtherAI May 8, 2026
5bf2c87
Disable Qwen3.5 MTP in ART Megatron
FurtherAI May 8, 2026
e9b869d
Drop MTP diagnostic flex attention changes
FurtherAI May 8, 2026
d26ecb7
Assert Qwen3.5 ART training has no MTP
FurtherAI May 8, 2026
6b40e71
Clean PR artifacts and fix type checks
FurtherAI May 8, 2026
aafedae
Merge remote-tracking branch 'origin/main' into austin/vllm_separation
FurtherAI May 8, 2026
7edba06
Unify runtime process supervision
FurtherAI May 9, 2026
a31a581
Model asyncio subprocess contract in runtime tests
FurtherAI May 9, 2026
815d577
Defer supervised wait coroutine creation
FurtherAI May 9, 2026
f662370
Prune oracle topology artifacts by default
FurtherAI May 9, 2026
7434fdf
Handle vLLM EP dummy LoRA warmup
FurtherAI May 9, 2026
e84cc4c
Keep vLLM MoE LoRA stacking idempotent
FurtherAI May 9, 2026
ef2c7b9
Add train inference mismatch workflow stage
FurtherAI May 10, 2026
a0c071b
Update workflow test oracle artifact mocks
FurtherAI May 10, 2026
cee9112
Preserve recent Unsloth training fixes
FurtherAI May 11, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/package-install.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
- name: Build wheel
run: uv build --wheel --out-dir dist
run: python scripts/build_package.py --wheel

- name: Smoke test uv add + sync for backend extra
run: |
Expand Down
107 changes: 89 additions & 18 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@ permissions:
id-token: write

jobs:
release:
build-package:
runs-on: ubuntu-latest
if: github.event.pull_request.merged == true && startsWith(github.event.pull_request.head.ref, 'release/')
outputs:
version: ${{ steps.get_version.outputs.VERSION }}
steps:
- uses: actions/checkout@v4
with:
Expand All @@ -21,52 +23,121 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
python-version: "3.11"

- name: Install uv
run: |
curl -LsSf https://astral.sh/uv/install.sh | sh
echo "$HOME/.cargo/bin" >> $GITHUB_PATH

- name: Install dependencies
run: |
uv venv
uv pip install -e .
uv pip install hatch
echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"

- name: Build package
run: uv run hatch build
run: python scripts/build_package.py

- name: Get version from pyproject.toml
id: get_version
run: |
VERSION=$(python -c "import tomllib; print(tomllib.load(open('pyproject.toml', 'rb'))['project']['version'])")
echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
echo "VERSION=$VERSION" >> "$GITHUB_OUTPUT"

- name: Upload package artifact
uses: actions/upload-artifact@v4
with:
name: python-distributions
path: dist/*

runtime-smoke:
runs-on: art-large-runner
needs: build-package
steps:
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"

- name: Install uv
run: |
curl -LsSf https://astral.sh/uv/install.sh | sh
echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"

- name: Download package artifact
uses: actions/download-artifact@v4
with:
name: python-distributions
path: dist

- name: Smoke test managed vLLM runtime install
run: |
export ART_VLLM_RUNTIME_CACHE_DIR="${RUNNER_TEMP}/art-vllm-runtime-cache"
export UV_LINK_MODE=copy
wheel_path="$(python - <<'PY'
from pathlib import Path

print(next(Path("dist").glob("openpipe_art-*.whl")).resolve())
PY
)"

project_dir="$(mktemp -d)"
cd "$project_dir"
uv init --name art-runtime-smoke --python 3.11 --bare
uv add "openpipe-art[backend] @ file://${wheel_path}"
uv sync
uv run python - <<'PY'
from pathlib import Path
import subprocess

from art.vllm_runtime import ensure_vllm_runtime

runtime_bin = ensure_vllm_runtime()
runtime_python = Path(runtime_bin).parent / "python"
subprocess.run([str(runtime_bin), "--help"], check=True)
subprocess.run(
[
str(runtime_python),
"-c",
"import art_vllm_runtime, torch, vllm; print('runtime imports ok')",
],
check=True,
)
print(runtime_bin)
PY

publish:
runs-on: ubuntu-latest
needs: [build-package, runtime-smoke]
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Download package artifact
uses: actions/download-artifact@v4
with:
name: python-distributions
path: dist

- name: Create git tag
run: |
git config --local user.email "action@github.com"
git config --local user.name "GitHub Action"
git tag v${{ steps.get_version.outputs.VERSION }}
git push origin v${{ steps.get_version.outputs.VERSION }}
git tag v${{ needs.build-package.outputs.version }}
git push origin v${{ needs.build-package.outputs.version }}

- name: Publish draft release
env:
GH_TOKEN: ${{ github.token }}
run: |
# Check if draft release exists and publish it
if gh release view v${{ steps.get_version.outputs.VERSION }} --json isDraft | jq -r '.isDraft' | grep -q true; then
gh release edit v${{ steps.get_version.outputs.VERSION }} --draft=false
if gh release view v${{ needs.build-package.outputs.version }} --json isDraft | jq -r '.isDraft' | grep -q true; then
gh release edit v${{ needs.build-package.outputs.version }} --draft=false
else
echo "::error::No draft release found for v${{ steps.get_version.outputs.VERSION }}"
echo "::error::No draft release found for v${{ needs.build-package.outputs.version }}"
exit 1
fi

- name: Upload assets to release
env:
GH_TOKEN: ${{ github.token }}
run: |
gh release upload v${{ steps.get_version.outputs.VERSION }} dist/*
gh release upload v${{ needs.build-package.outputs.version }} dist/*

- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,5 @@ trajectories/
.ruff_cache/
!/src/art/wandb/
!/src/art/wandb/**
/src/art/wandb/__pycache__/
/src/art/wandb/__pycache__/
scratch/
2 changes: 1 addition & 1 deletion dev/bench_cute_grouped_lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from pydantic import BaseModel, ConfigDict, Field, field_validator
import torch

from art.megatron.cute_grouped_lora_quack import quack_grouped_lora
from art.megatron.kernels.cute_grouped_lora_quack import quack_grouped_lora

GroupedLoraFn = Callable[
[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor],
Expand Down
19 changes: 14 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@ backend = [
"gql<4",
"nvidia-cudnn-frontend<1.21 ; sys_platform == 'linux'",
"nvidia-resiliency-ext<0.5 ; sys_platform == 'linux'",
"vllm @ https://github.com/vivekkalyan/vllm/releases/download/v0.17.0-art1/vllm-0.17.0%2Bart1-cp38-abi3-manylinux_2_31_x86_64.whl ; sys_platform == 'linux'",
]
megatron = [
"numpy<2",
"torch==2.10.0",
"quack-kernels==0.2.5",
"apex @ git+https://github.com/NVIDIA/apex.git@25.09",
"transformer-engine==2.11.0",
"transformer-engine-cu12==2.11.0",
Expand Down Expand Up @@ -83,9 +83,6 @@ tinker = [
[project.scripts]
art = "art.cli:app"

[project.entry-points."vllm.general_plugins"]
art = "art.vllm.patches:patch_transformers_v5_compat"

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
Expand All @@ -103,17 +100,30 @@ packages = ["src/art", "src/mp_actors"]
sources = ["src"]

[tool.hatch.build.targets.sdist]
sources = []
only-include = [
".agents/skills",
"LICENSE",
"README.md",
"THIRD-PARTY-NOTICES",
"pyproject.toml",
"src",
]
exclude = [
"/dev",
"/wandb",
"/.art",
"/.local",
"/.ruff_cache",
"/.venv",
"/dist",
"/scratch",
"/unsloth_compiled_cache",
"/.git",
"/.github",
"/examples/*/data",
"/examples/*/wandb",
"/tests/unsloth_compiled_cache",
"**/__pycache__",
"**/*.pyc",
]
Expand Down Expand Up @@ -217,7 +227,6 @@ allowed-unresolved-imports = [
"unsloth.**",
"unsloth_zoo.**",
"uvicorn.**",
"vllm.**",
"wandb.**",
# langgraph deps
"langchain_core.**",
Expand Down
Loading
Loading