Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 6 additions & 15 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,12 @@ option(WITH_ASCEND "Enable Ascend backend" OFF)

option(WITH_TORCH "Enable PyTorch C++ backend" OFF)

# Custom `AscendC` kernels under `src/ascend/custom/`. `ON` by default
# so CI and routine dev builds always exercise `implementation_index=1/2`
# for `RmsNorm` / `AddRmsNorm`. Gated by `WITH_ASCEND` in
# `src/CMakeLists.txt` — non-Ascend builds ignore it. Pass
# `-DBUILD_ASCEND_CUSTOM=OFF` to skip the `ccec` build on Ascend
# machines where the custom kernels aren't needed.
#
# When `ON`, `src/CMakeLists.txt` drives the standalone
# `src/ascend/custom/build.sh` via `execute_process` at configure time
# (sidesteps a `CANN` `extract_host_stub.py` path bug that breaks
# in-tree `ascendc_library()` under `scikit-build-core` temp-dir builds)
# and links the produced `libno_workspace_kernel.a` into the `ops`
# module with `--whole-archive`. Requires `torch_npu` and the
# `AscendC` toolchain (`ccec`).
option(BUILD_ASCEND_CUSTOM "Build custom AscendC kernels" ON)
# Default OFF until CANN's `extract_host_stub.py` path handling is fixed for
# `scikit-build-core` temp-dir builds (triggers `KeyError` on the preprocessed
# object path). Enable explicitly with `-DBUILD_CUSTOM_KERNEL=ON` when the
# toolchain is compatible or when building via the standalone
# `src/ascend/custom/build.sh` script.
option(BUILD_CUSTOM_KERNEL "Build custom AscendC kernel PyTorch extension (requires `torch_npu`)" OFF)

option(AUTO_DETECT_DEVICES "Automatically detect available devices" OFF)
option(AUTO_DETECT_BACKENDS "Automatically detect available backends" OFF)
Expand Down
9 changes: 0 additions & 9 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,6 @@ name = "InfiniOps"
version = "0.1.0"

[project.optional-dependencies]
# TODO: `torch` here is unconstrained. On Ascend hosts, the working
# torch is the Ascend-matched `torch 2.9.0+cpu` paired with
# `torch_npu 2.9.0.post1+…`. A `pip install -e .[dev] --force-reinstall`
# will re-resolve `torch` to the latest PyPI version (currently
# `torch 2.11.0`), which now declares `cuda-toolkit` / `nvidia-cublas` /
# `nvidia-cudnn` / … as hard deps — downloads GBs of CUDA wheels and
# kills the `torch_npu` / `vllm-ascend` pairing. Needs a platform-aware
# split (e.g. `torch; platform_machine != 'aarch64'`, or move `torch`
# out of `dev` and require it pre-installed in the container image).
dev = ["pytest", "pytest-cov", "pytest-xdist", "ruff", "torch", "pyyaml"]

[tool.scikit-build.wheel]
Expand Down
27 changes: 2 additions & 25 deletions scripts/generate_wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,29 +112,9 @@ def _find_vector_tensor_params(op_name):
return set(re.findall(r"std::vector<Tensor>\s+(\w+)", source))


def _find_params_with_defaults(op_name):
"""Return ``{param_name: default_literal}`` for base-header params that
carry a `= <literal>` default value. `libclang`'s cursor API does not
expose defaults reliably, so we regex-scan the source. Only used for
plain scalar defaults such as ``bool pre_gathered = false``.
"""
source = (_BASE_DIR / f"{op_name}.h").read_text()

mapping = {}

for name, default in re.findall(
r"\b(?:bool|int(?:64_t|32_t|8_t|16_t)?|std::size_t|std::uint\w+_t|float|double)\s+(\w+)\s*=\s*([^,\)]+?)\s*(?:,|\))",
source,
):
mapping[name] = default.strip()

return mapping


def _generate_pybind11(operator):
optional_tensor_params = _find_optional_tensor_params(operator.name)
vector_tensor_params = _find_vector_tensor_params(operator.name)
params_with_defaults = _find_params_with_defaults(operator.name)

def _is_optional_tensor(arg):
if arg.spelling in optional_tensor_params:
Expand Down Expand Up @@ -206,10 +186,6 @@ def _generate_py_args(node):

if _is_optional(arg):
parts.append(f'py::arg("{arg.spelling}") = py::none()')
elif arg.spelling in params_with_defaults:
parts.append(
f'py::arg("{arg.spelling}") = {params_with_defaults[arg.spelling]}'
)
else:
parts.append(f'py::arg("{arg.spelling}")')

Expand Down Expand Up @@ -281,7 +257,8 @@ def _generate_call(op_name, call, method=True):
}})
.def_static("clear_cache", &Self::clear_cache);

{callers}}}
{callers}
}}

}} // namespace infini::ops

Expand Down
70 changes: 4 additions & 66 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -241,66 +241,8 @@ if(WITH_ASCEND)
list(APPEND DEVICE_LIST "ascend")

# Custom `AscendC` kernels (PyTorch extension, requires `torch_npu`).
if(BUILD_ASCEND_CUSTOM)
# In-tree `ascendc_library()` trips the `CANN` `extract_host_stub.py`
# path-handling bug under `scikit-build-core`'s temp-dir builds
# (`KeyError` on `/./workspace/...` paths in `$<TARGET_OBJECTS>`).
# Work around it by driving the standalone `src/ascend/custom/build.sh`
# — that script invokes a separate `cmake` with
# `src/ascend/custom/` as its `SOURCE_DIR`, avoiding the buggy
# path shape. The produced `.a` is imported and linked into
# `ops` with `--whole-archive`.
set(_custom_build_dir "${CMAKE_SOURCE_DIR}/build/build_ascend_custom")
set(_custom_lib "${_custom_build_dir}/lib/libno_workspace_kernel.a")

if(NOT DEFINED SOC_VERSION OR "${SOC_VERSION}" STREQUAL "")
include(${CMAKE_CURRENT_SOURCE_DIR}/ascend/custom/cmake/detect_soc.cmake)
infiniops_detect_soc(SOC_VERSION)
endif()

# Drive `build.sh` as a build-phase target with explicit source
# dependencies so that editing any `op_host/` or `op_kernel/`
# source re-triggers the build (plain `execute_process` at
# configure time would only gate on file existence and leave
# stale `.a` files in place).
file(GLOB_RECURSE _custom_srcs CONFIGURE_DEPENDS
"${CMAKE_CURRENT_SOURCE_DIR}/ascend/custom/*.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/ascend/custom/*.h"
"${CMAKE_CURRENT_SOURCE_DIR}/ascend/custom/build.sh")

# Scrub env inherited from the outer `scikit-build-core` invocation
# before handing control to `build.sh`:
# * `CMAKE_GENERATOR` / `CMAKE_EXPORT_COMPILE_COMMANDS` leaking
# into the inner `cmake` change the path format passed to
# `ninja`'s `_host_cpp` rule and re-trigger the `CANN`
# `extract_host_stub.py` `KeyError` (`/./workspace/...`) that
# standalone `build.sh` avoids.
# * `PYTHONPATH` from `pip`'s build-isolation overlay makes the
# child `python3` skip the system `site-packages` — child
# `cmake` modules that `import torch` (`config_envs.cmake`)
# then fail with `ModuleNotFoundError` even though `torch` is
# installed.
add_custom_command(
OUTPUT ${_custom_lib}
COMMAND ${CMAKE_COMMAND} -E env
--unset=CMAKE_GENERATOR
--unset=CMAKE_EXPORT_COMPILE_COMMANDS
--unset=CMAKE_BUILD_PARALLEL_LEVEL
--unset=PYTHONPATH
"BUILD_DIR=${_custom_build_dir}"
"CMAKE_EXE=${CMAKE_COMMAND}"
bash ${CMAKE_CURRENT_SOURCE_DIR}/ascend/custom/build.sh ${SOC_VERSION}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/ascend/custom
DEPENDS ${_custom_srcs}
COMMENT "Building custom AscendC kernels (SOC_VERSION=${SOC_VERSION})"
VERBATIM)

add_custom_target(no_workspace_kernel_build ALL DEPENDS ${_custom_lib})

add_library(no_workspace_kernel STATIC IMPORTED GLOBAL)
set_target_properties(no_workspace_kernel PROPERTIES
IMPORTED_LOCATION "${_custom_lib}")
add_dependencies(no_workspace_kernel no_workspace_kernel_build)
if(BUILD_CUSTOM_KERNEL)
add_subdirectory(ascend/custom)

# Link the compiled `AscendC` kernel objects into `infiniops` so that
# custom kernel implementations (e.g. `RmsNorm` index 1) can call
Expand Down Expand Up @@ -437,13 +379,9 @@ if(GENERATE_PYTHON_BINDINGS)
# The `Operator<..., 1>` template instantiations that call
# `aclrtlaunch_*` live in `ops.cc`, so link here with
# `--whole-archive` to ensure all launch functions are available.
# `$<TARGET_FILE>` works for both real `ascendc_library()` targets and
# `IMPORTED` targets pointing at a pre-built `.a`.
if(BUILD_ASCEND_CUSTOM)
if(BUILD_CUSTOM_KERNEL)
target_link_libraries(ops PRIVATE
-Wl,--whole-archive $<TARGET_FILE:no_workspace_kernel> -Wl,--no-whole-archive)
# `ops` link step must wait for `build.sh` to produce the `.a`.
add_dependencies(ops no_workspace_kernel_build)
-Wl,--whole-archive no_workspace_kernel -Wl,--no-whole-archive)
endif()

set_target_properties(infiniops PROPERTIES INSTALL_RPATH "$ORIGIN")
Expand Down
144 changes: 0 additions & 144 deletions src/ascend/add_rms_norm/kernel.h

This file was deleted.

Loading
Loading