From 3fb8b6f7b5cdbff7982b30fda53b8bcb52472b21 Mon Sep 17 00:00:00 2001 From: gongchensu Date: Fri, 24 Apr 2026 16:13:00 +0800 Subject: [PATCH 1/3] feat(hygon): add Hygon backend infrastructure --- .gitignore | 2 + CMakeLists.txt | 126 +++++++++++++++++++++++++++++++++++++- README.md | 4 ++ examples/CMakeLists.txt | 8 ++- src/CMakeLists.txt | 51 ++++++++++++++- src/hygon/blas.h | 40 ++++++++++++ src/hygon/blas_utils.h | 30 +++++++++ src/hygon/device_.h | 101 ++++++++++++++++++++++++++++++ src/hygon/runtime_.h | 44 +++++++++++++ src/hygon/runtime_utils.h | 15 +++++ src/pybind11_utils.h | 35 ++++++++++- 11 files changed, 448 insertions(+), 8 deletions(-) create mode 100644 src/hygon/blas.h create mode 100644 src/hygon/blas_utils.h create mode 100644 src/hygon/device_.h create mode 100644 src/hygon/runtime_.h create mode 100644 src/hygon/runtime_utils.h diff --git a/.gitignore b/.gitignore index 2effaff2..4a5d4b7a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,7 @@ # Generated files build/ +build-*/ +cmake-build-*/ generated/ # Prerequisites diff --git a/CMakeLists.txt b/CMakeLists.txt index 91c2b015..677511cd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,6 +11,7 @@ set(PYBIND11_ENABLE_EXTRAS ON) option(WITH_CPU "Enable CPU backend" OFF) option(WITH_NVIDIA "Enable CUDA backend" OFF) option(WITH_ILUVATAR "Enable Iluvatar GPU backend" OFF) +option(WITH_HYGON "Enable Hygon GPU backend" OFF) option(WITH_METAX "Enable MetaX backend" OFF) option(WITH_CAMBRICON "Enable Cambricon backend" OFF) option(WITH_MOORE "Enable Moore backend" OFF) @@ -29,6 +30,31 @@ option(AUTO_DETECT_DEVICES "Automatically detect available devices" OFF) option(AUTO_DETECT_BACKENDS "Automatically detect available backends" OFF) option(GENERATE_PYTHON_BINDINGS "Generate Python bindings" OFF) +set(_DEFAULT_HYGON_DTK_ROOT "/opt/dtk") + +function(_infiniops_find_hygon_cuda_root out_var dtk_root) + set(_candidates + "${dtk_root}/cuda" + "${dtk_root}/cuda/cuda" + ) + + file(GLOB _versioned_cuda_dirs LIST_DIRECTORIES true "${dtk_root}/cuda/cuda-*") + if(_versioned_cuda_dirs) + list(SORT _versioned_cuda_dirs) + list(REVERSE _versioned_cuda_dirs) + list(APPEND _candidates ${_versioned_cuda_dirs}) + endif() + + foreach(_candidate IN LISTS _candidates) + if(EXISTS "${_candidate}/bin/nvcc") + set(${out_var} "${_candidate}" PARENT_SCOPE) + return() + endif() + endforeach() + + set(${out_var} "" PARENT_SCOPE) +endfunction() + if(AUTO_DETECT_DEVICES) message(STATUS "Auto-detecting available devices...") @@ -48,6 +74,21 @@ if(AUTO_DETECT_DEVICES) message(STATUS "Auto-detected Iluvatar environment.") endif() + set(_hygon_detected FALSE) + if(DEFINED ENV{DTK_ROOT} AND NOT "$ENV{DTK_ROOT}" STREQUAL "") + set(_hygon_detected TRUE) + else() + _infiniops_find_hygon_cuda_root(_HYGON_CUDA_DETECT_ROOT "${_DEFAULT_HYGON_DTK_ROOT}") + if(_HYGON_CUDA_DETECT_ROOT) + set(_hygon_detected TRUE) + endif() + endif() + + if(_hygon_detected) + set(WITH_HYGON ON) + message(STATUS "Auto-detected Hygon environment.") + endif() + if(DEFINED ENV{MACA_PATH}) set(WITH_METAX ON) message(STATUS "Auto-detected MetaX environment from MACA_PATH") @@ -132,6 +173,17 @@ if(WITH_TORCH) OUTPUT_STRIP_TRAILING_WHITESPACE ) + execute_process( + COMMAND ${Python_EXECUTABLE} -c "import pathlib, torch; p = pathlib.Path(torch.__file__).resolve().parent.parent / 'torch.libs'; print(str(p) if p.exists() else '')" + OUTPUT_VARIABLE _torch_private_lib_dir + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + + set(TORCH_RUNTIME_DIRS ${_torch_lib_dirs}) + if(_torch_private_lib_dir) + list(APPEND TORCH_RUNTIME_DIRS ${_torch_private_lib_dir}) + endif() + find_library(TORCH_LIB torch HINTS ${_torch_lib_dirs} REQUIRED) find_library(TORCH_CPU_LIB torch_cpu HINTS ${_torch_lib_dirs} REQUIRED) find_library(C10_LIB c10 HINTS ${_torch_lib_dirs} REQUIRED) @@ -181,14 +233,14 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src) # Only one CUDA-like GPU backend can be enabled at a time. set(_gpu_backend_count 0) -foreach(_gpu_backend WITH_NVIDIA WITH_ILUVATAR WITH_METAX WITH_MOORE WITH_ASCEND) +foreach(_gpu_backend WITH_NVIDIA WITH_ILUVATAR WITH_HYGON WITH_METAX WITH_MOORE WITH_ASCEND) if(${_gpu_backend}) math(EXPR _gpu_backend_count "${_gpu_backend_count} + 1") endif() endforeach() if(_gpu_backend_count GREATER 1) - message(FATAL_ERROR "`WITH_NVIDIA`, `WITH_ILUVATAR`, `WITH_METAX`, `WITH_MOORE`, and `WITH_ASCEND` are mutually exclusive. Build one GPU backend at a time.") + message(FATAL_ERROR "`WITH_NVIDIA`, `WITH_ILUVATAR`, `WITH_HYGON`, `WITH_METAX`, `WITH_MOORE`, and `WITH_ASCEND` are mutually exclusive. Build one GPU backend at a time.") endif() if(WITH_NVIDIA) @@ -227,6 +279,70 @@ if(WITH_ILUVATAR) add_compile_options($<$:-x$ivcore>) endif() +if(WITH_HYGON) + add_compile_definitions(WITH_HYGON=1) + set(DTK_ROOT $ENV{DTK_ROOT}) + if(NOT DTK_ROOT) + set(DTK_ROOT "${_DEFAULT_HYGON_DTK_ROOT}") + endif() + if(NOT EXISTS "${DTK_ROOT}") + message(FATAL_ERROR "`WITH_HYGON` is `ON` but `DTK_ROOT` (`${DTK_ROOT}`) does not exist.") + endif() + + set(_HYGON_ARCH_DEFAULT "gfx906") + if(DEFINED ENV{HYGON_ARCH} AND NOT "$ENV{HYGON_ARCH}" STREQUAL "") + set(_HYGON_ARCH_DEFAULT "$ENV{HYGON_ARCH}") + else() + find_program(HYGON_ROCMINFO_EXECUTABLE NAMES rocminfo HINTS "${DTK_ROOT}/bin") + if(HYGON_ROCMINFO_EXECUTABLE) + execute_process( + COMMAND ${HYGON_ROCMINFO_EXECUTABLE} + OUTPUT_VARIABLE _HYGON_ROCMINFO_OUTPUT + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + string(REGEX MATCH "gfx[0-9]+" _HYGON_ARCH_AUTO "${_HYGON_ROCMINFO_OUTPUT}") + if(_HYGON_ARCH_AUTO) + set(_HYGON_ARCH_DEFAULT "${_HYGON_ARCH_AUTO}") + endif() + endif() + endif() + + set(HYGON_ARCH "${_HYGON_ARCH_DEFAULT}" CACHE STRING "Hygon GPU architecture") + _infiniops_find_hygon_cuda_root(HYGON_CUDA_ROOT "${DTK_ROOT}") + + if(NOT HYGON_CUDA_ROOT) + message(FATAL_ERROR "`WITH_HYGON` is `ON` but no DTK `nvcc` was found under `${DTK_ROOT}`. Checked `${DTK_ROOT}/cuda/bin/nvcc`, `${DTK_ROOT}/cuda/cuda/bin/nvcc`, and `${DTK_ROOT}/cuda/cuda-*/bin/nvcc`.") + endif() + + set(CMAKE_CUDA_COMPILER "${HYGON_CUDA_ROOT}/bin/nvcc" CACHE FILEPATH "Hygon CUDA compiler (DTK nvcc)") + set(CUDAToolkit_ROOT "${HYGON_CUDA_ROOT}" CACHE PATH "Hygon CUDA toolkit root") + set(CMAKE_CUDA_ARCHITECTURES OFF CACHE STRING "Disable default CUDA arch flags for Hygon" FORCE) + set(CMAKE_CUDA_FLAGS "-std=c++17 -fPIC -arch=${HYGON_ARCH} -Wno-return-type -Wno-error=unused-private-field" CACHE STRING "Hygon CUDA flags") + set(CMAKE_CUDA_SEPARABLE_COMPILATION OFF CACHE BOOL "Disable RDC for Hygon") + + # DTK's nvcc wrapper derives its toolkit root from `CUDA_PATH`. + set(ENV{CUDA_PATH} "${HYGON_CUDA_ROOT}") + set(ENV{CUDA_HOME} "${HYGON_CUDA_ROOT}") + + # DTK's nvcc wrapper may invoke `nvcc` by name during compiler checks. + set(ENV{PATH} "${HYGON_CUDA_ROOT}/bin:$ENV{PATH}") + + # The actual Ninja build runs in fresh processes. Keep a launcher command + # for CUDA-backed Python bindings that need the DTK wrapper environment. + set(_HYGON_RULE_LAUNCH_ENV + "${CMAKE_COMMAND} -E env CUDA_PATH=${HYGON_CUDA_ROOT} CUDA_HOME=${HYGON_CUDA_ROOT} PATH=${HYGON_CUDA_ROOT}/bin:$ENV{PATH}") + + include_directories("${DTK_ROOT}/include") + include_directories("${HYGON_CUDA_ROOT}/include") + link_directories("${DTK_ROOT}/lib") + link_directories("${HYGON_CUDA_ROOT}/lib64") + + message(STATUS "Hygon: CUDA compiler ${CMAKE_CUDA_COMPILER}, arch ${HYGON_ARCH}, DTK root ${DTK_ROOT}") + enable_language(CUDA) + find_package(CUDAToolkit REQUIRED) +endif() + if(WITH_METAX) add_compile_definitions(WITH_METAX=1) @@ -310,7 +426,7 @@ if(WITH_ASCEND) endif() # If all other platforms are not enabled, CPU is enabled by default. -if(NOT WITH_NVIDIA AND NOT WITH_ILUVATAR AND NOT WITH_METAX AND NOT WITH_MOORE AND NOT WITH_CAMBRICON AND NOT WITH_ASCEND) +if(NOT WITH_NVIDIA AND NOT WITH_ILUVATAR AND NOT WITH_HYGON AND NOT WITH_METAX AND NOT WITH_MOORE AND NOT WITH_CAMBRICON AND NOT WITH_ASCEND) add_compile_definitions(WITH_CPU=1) endif() @@ -318,6 +434,10 @@ if(WITH_METAX OR WITH_MOORE) set(PYBIND11_ENABLE_EXTRAS OFF) endif() +if(WITH_HYGON AND NOT EXISTS "${DTK_ROOT}/llvm/lib/LLVMgold.so") + set(PYBIND11_ENABLE_EXTRAS OFF) +endif() + add_subdirectory(src) add_subdirectory(examples) diff --git a/README.md b/README.md index 6b9fc6f6..01e7f00d 100644 --- a/README.md +++ b/README.md @@ -31,12 +31,16 @@ pip install . -C cmake.define.WITH_CPU=ON -C cmake.define.WITH_NVIDIA=ON | `-DWITH_NVIDIA=[ON\|OFF]` | Compile the Nvidia implementation | OFF | | `-DWITH_METAX=[ON\|OFF]` | Compile the MetaX implementation | OFF | | `-DWITH_ILUVATAR=[ON\|OFF]` | Compile the Iluvatar implementation | OFF | +| `-DWITH_HYGON=[ON\|OFF]` | Compile the Hygon implementation | OFF | | `-DWITH_MOORE=[ON\|OFF]` | Compile the Moore implementation | OFF | | `-DWITH_CAMBRICON=[ON\|OFF]` | Compile the Cambricon implementation | OFF | +| `-DWITH_ASCEND=[ON\|OFF]` | Compile the Ascend implementation | OFF | | `-DAUTO_DETECT_DEVICES=[ON\|OFF]` | Auto-detect available platforms | ON | If no accelerator options are provided and auto-detection finds nothing, `WITH_CPU` is enabled by default. +For Hygon builds, set `DTK_ROOT` to the DTK installation root if it is not installed at `/opt/dtk`. You can override the default DCU arch with `-DHYGON_ARCH=` when configuring CMake. + ## Contributing See [CONTRIBUTING.md](CONTRIBUTING.md) for code style, commit conventions, PR workflow, development guide, and troubleshooting. diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 68ebc1b5..18f70090 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -9,8 +9,14 @@ foreach(source_file ${EXAMPLE_SOURCES}) target_link_libraries(${example_name} PRIVATE infiniops) target_include_directories(${example_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) - + get_filename_component(example_dir ${source_file} DIRECTORY) target_include_directories(${example_name} PRIVATE ${example_dir}) + + if(WITH_TORCH) + foreach(_torch_dir ${TORCH_RUNTIME_DIRS}) + target_link_options(${example_name} PRIVATE "LINKER:-rpath-link,${_torch_dir}") + endforeach() + endif() endforeach() diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 32c92949..8b24420d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -77,6 +77,34 @@ if(WITH_ILUVATAR) list(APPEND DEVICE_LIST "iluvatar") endif() +if(WITH_HYGON) + set(HYGON_PATTERNS + "cuda/*.cc" + "cuda/*.cpp" + "cuda/*.cu" + "hygon/*.cc" + "hygon/*.cpp" + "hygon/*.cu" + ) + + file(GLOB_RECURSE HYGON_SOURCES CONFIGURE_DEPENDS ${HYGON_PATTERNS}) + + enable_language(CUDA) + + target_compile_definitions(infiniops PUBLIC WITH_HYGON=1) + target_sources(infiniops PRIVATE ${HYGON_SOURCES}) + + find_package(CUDAToolkit REQUIRED) + target_link_libraries(infiniops PUBLIC CUDA::cudart CUDA::cublas) + + set_target_properties(infiniops PROPERTIES + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON + ) + + list(APPEND DEVICE_LIST "hygon") +endif() + if(WITH_METAX) set(METAX_PATTERNS "cuda/*.cc" @@ -343,7 +371,7 @@ if(GENERATE_PYTHON_BINDINGS) set(PYBIND11_SOURCES "${PROJECT_SOURCE_DIR}/generated/bindings/ops.cc") # TODO: There might be a better solution. - if(WITH_NVIDIA OR WITH_ILUVATAR) + if(WITH_NVIDIA OR WITH_ILUVATAR OR WITH_HYGON) set_source_files_properties(${PYBIND11_SOURCES} PROPERTIES LANGUAGE CUDA) endif() @@ -370,6 +398,13 @@ if(GENERATE_PYTHON_BINDINGS) pybind11_add_module(ops NO_EXTRAS ${PYBIND11_SOURCES}) endif() + if(WITH_HYGON) + set_target_properties(ops PROPERTIES + RULE_LAUNCH_COMPILE "${_HYGON_RULE_LAUNCH_ENV}" + RULE_LAUNCH_LINK "${_HYGON_RULE_LAUNCH_ENV}" + ) + endif() + target_include_directories(ops PRIVATE ${PROJECT_SOURCE_DIR}) target_link_libraries(ops PRIVATE infiniops) @@ -383,9 +418,19 @@ if(GENERATE_PYTHON_BINDINGS) target_link_libraries(ops PRIVATE -Wl,--whole-archive no_workspace_kernel -Wl,--no-whole-archive) endif() + set(_INFINIOPS_INSTALL_RPATH "$ORIGIN") + if(WITH_TORCH) + list(APPEND _INFINIOPS_INSTALL_RPATH ${TORCH_RUNTIME_DIRS}) + endif() + + if(WITH_HYGON) + list(APPEND _INFINIOPS_INSTALL_RPATH + "${HYGON_CUDA_ROOT}/lib64" + ) + endif() - set_target_properties(infiniops PROPERTIES INSTALL_RPATH "$ORIGIN") - set_target_properties(ops PROPERTIES INSTALL_RPATH "$ORIGIN") + set_target_properties(infiniops PROPERTIES INSTALL_RPATH "${_INFINIOPS_INSTALL_RPATH}") + set_target_properties(ops PROPERTIES INSTALL_RPATH "${_INFINIOPS_INSTALL_RPATH}") install(TARGETS infiniops ops DESTINATION .) diff --git a/src/hygon/blas.h b/src/hygon/blas.h new file mode 100644 index 00000000..1a2c3d07 --- /dev/null +++ b/src/hygon/blas.h @@ -0,0 +1,40 @@ +#ifndef INFINI_OPS_HYGON_BLAS_H_ +#define INFINI_OPS_HYGON_BLAS_H_ + +#include + +// clang-format off +#include "cublas_v2.h" +// clang-format on + +#include "cuda/blas.h" +#include "data_type.h" +#include "hygon/blas_utils.h" +#include "hygon/runtime_.h" + +namespace infini::ops { + +template <> +struct Blas : public Runtime { + using BlasHandle = cublasHandle_t; + + static constexpr auto BLAS_OP_N = CUBLAS_OP_N; + + static constexpr auto BLAS_OP_T = CUBLAS_OP_T; + + static constexpr auto BLAS_GEMM_DEFAULT = CUBLAS_GEMM_DEFAULT_TENSOR_OP; + + static constexpr auto BlasCreate = cublasCreate; + + static constexpr auto BlasSetStream = cublasSetStream; + + static constexpr auto BlasDestroy = cublasDestroy; + + static constexpr auto BlasGemmStridedBatchedEx = [](auto&&... args) { + return cublasGemmStridedBatchedEx(std::forward(args)...); + }; +}; + +} // namespace infini::ops + +#endif diff --git a/src/hygon/blas_utils.h b/src/hygon/blas_utils.h new file mode 100644 index 00000000..4123372f --- /dev/null +++ b/src/hygon/blas_utils.h @@ -0,0 +1,30 @@ +#ifndef INFINI_OPS_HYGON_BLAS_UTILS_H_ +#define INFINI_OPS_HYGON_BLAS_UTILS_H_ + +// clang-format off +#include "cublas_v2.h" +// clang-format on + +#include "cuda/blas_utils.h" +#include "data_type.h" + +namespace infini::ops { + +template <> +struct BlasUtils { + static auto GetDataType(DataType dtype) { + if (dtype == DataType::kFloat16) return CUDA_R_16F; + if (dtype == DataType::kBFloat16) return CUDA_R_16BF; + return CUDA_R_32F; + } + + static auto GetComputeType(DataType dtype) { + if (dtype == DataType::kFloat16 || dtype == DataType::kBFloat16) + return CUBLAS_COMPUTE_32F; + return CUBLAS_COMPUTE_32F; + } +}; + +} // namespace infini::ops + +#endif diff --git a/src/hygon/device_.h b/src/hygon/device_.h new file mode 100644 index 00000000..7df5dbd8 --- /dev/null +++ b/src/hygon/device_.h @@ -0,0 +1,101 @@ +#ifndef INFINI_OPS_HYGON_DEVICE__H_ +#define INFINI_OPS_HYGON_DEVICE__H_ + +#include +#include + +// clang-format off +#include +#include +#include +// clang-format on + +#include "cuda/caster.cuh" +#include "data_type.h" +#include "device.h" + +namespace infini::ops { + +template <> +struct DeviceEnabled : std::true_type {}; + +// Some DTK toolchains expose the underlying bf16 structs but gate the +// nv_bfloat16 typedefs behind CUDA_NO_BFLOAT16. +using cuda_bfloat16 = __nv_bfloat16; + +using cuda_bfloat162 = __nv_bfloat162; + +namespace detail { + +template <> +struct ToFloat { + __host__ __device__ float operator()(half x) { return __half2float(x); } +}; + +template <> +struct ToFloat { + __host__ __device__ float operator()(__nv_bfloat16 x) { + return __bfloat162float(x); + } +}; + +template <> +struct FromFloat { + __host__ __device__ half operator()(float f) { return __float2half(f); } +}; + +template <> +struct FromFloat { + __host__ __device__ __nv_bfloat16 operator()(float f) { + return __float2bfloat16(f); + } +}; + +} // namespace detail + +template <> +struct TypeMap { + using type = half; +}; + +template <> +struct TypeMap { + using type = __nv_bfloat16; +}; + +// Caches `cudaDeviceProp` per device, initialized once at first access. +class DevicePropertyCache { + public: + static const cudaDeviceProp& GetCurrentDeviceProps() { + int device_id = 0; + cudaGetDevice(&device_id); + return GetDeviceProps(device_id); + } + + static const cudaDeviceProp& GetDeviceProps(int device_id) { + static std::vector cache = []() { + int count = 0; + cudaGetDeviceCount(&count); + if (count == 0) return std::vector{}; + std::vector props(count); + for (int i = 0; i < count; ++i) { + cudaGetDeviceProperties(&props[i], i); + } + return props; + }(); + + assert(device_id >= 0 && device_id < static_cast(cache.size())); + return cache[device_id]; + } +}; + +inline int QueryMaxThreadsPerBlock() { + return DevicePropertyCache::GetCurrentDeviceProps().maxThreadsPerBlock; +} + +template <> +struct Caster : CudaCasterImpl {}; + +} // namespace infini::ops + +#endif diff --git a/src/hygon/runtime_.h b/src/hygon/runtime_.h new file mode 100644 index 00000000..356e0424 --- /dev/null +++ b/src/hygon/runtime_.h @@ -0,0 +1,44 @@ +#ifndef INFINI_OPS_HYGON_RUNTIME_H_ +#define INFINI_OPS_HYGON_RUNTIME_H_ + +#include + +// clang-format off +#include +// clang-format on + +#include "cuda/runtime.h" +#include "hygon/device_.h" +#include "hygon/runtime_utils.h" + +namespace infini::ops { + +template <> +struct Runtime + : CudaRuntime> { + using Stream = cudaStream_t; + + static constexpr Device::Type kDeviceType = Device::Type::kHygon; + + static constexpr auto Malloc = [](auto&&... args) { + return cudaMalloc(std::forward(args)...); + }; + + static constexpr auto Memcpy = cudaMemcpy; + + static constexpr auto Free = [](auto&&... args) { + return cudaFree(std::forward(args)...); + }; + + static constexpr auto MemcpyHostToDevice = cudaMemcpyHostToDevice; + + static constexpr auto MemcpyDeviceToHost = cudaMemcpyDeviceToHost; + + static constexpr auto Memset = cudaMemset; +}; + +static_assert(Runtime::Validate()); + +} // namespace infini::ops + +#endif diff --git a/src/hygon/runtime_utils.h b/src/hygon/runtime_utils.h new file mode 100644 index 00000000..afa84265 --- /dev/null +++ b/src/hygon/runtime_utils.h @@ -0,0 +1,15 @@ +#ifndef INFINI_OPS_HYGON_RUNTIME_UTILS_H_ +#define INFINI_OPS_HYGON_RUNTIME_UTILS_H_ + +#include "cuda/runtime_utils.h" +#include "hygon/device_.h" + +namespace infini::ops { + +template <> +struct RuntimeUtils + : CudaRuntimeUtils {}; + +} // namespace infini::ops + +#endif diff --git a/src/pybind11_utils.h b/src/pybind11_utils.h index f13d3116..555645ef 100644 --- a/src/pybind11_utils.h +++ b/src/pybind11_utils.h @@ -1,8 +1,10 @@ #ifndef INFINI_OPS_PYBIND11_UTILS_H_ #define INFINI_OPS_PYBIND11_UTILS_H_ +#include #include #include +#include #include "tensor.h" #include "torch/device_.h" @@ -38,7 +40,38 @@ inline Device::Type DeviceTypeFromString(const std::string& name) { return it->second; } - return Device::TypeFromString(name); + std::vector supported_names; + + for (const auto& [torch_name, device_type] : kTorchNameToTypes) { + const auto internal_name = std::string{Device::StringFromType(device_type)}; + + if (name == internal_name) { + return device_type; + } + + supported_names.push_back(torch_name); + supported_names.push_back(internal_name); + } + + std::sort(supported_names.begin(), supported_names.end()); + supported_names.erase( + std::unique(supported_names.begin(), supported_names.end()), + supported_names.end()); + + std::string message = + "Unsupported device type `" + name + + "` for this InfiniOps build. Supported device names: "; + + for (std::size_t i = 0; i < supported_names.size(); ++i) { + if (i != 0) { + message += ", "; + } + message += supported_names[i]; + } + + message += ". Rebuild InfiniOps with the matching backend enabled."; + + throw std::invalid_argument(message); } inline Tensor TensorFromPybind11Handle(py::handle obj) { From a7a1b9a1d85357124d5eceae82c0494eb1db7e7d Mon Sep 17 00:00:00 2001 From: gongchensu Date: Fri, 24 Apr 2026 16:17:43 +0800 Subject: [PATCH 2/3] feat(hygon-add): add Hygon backend support for Add --- src/hygon/add/kernel.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 src/hygon/add/kernel.h diff --git a/src/hygon/add/kernel.h b/src/hygon/add/kernel.h new file mode 100644 index 00000000..3c17b482 --- /dev/null +++ b/src/hygon/add/kernel.h @@ -0,0 +1,18 @@ +#ifndef INFINI_OPS_HYGON_ADD_KERNEL_H_ +#define INFINI_OPS_HYGON_ADD_KERNEL_H_ + +#include "cuda/add/kernel.h" +#include "hygon/runtime_.h" + +namespace infini::ops { + +template <> +class Operator + : public CudaAdd> { + public: + using CudaAdd>::CudaAdd; +}; + +} // namespace infini::ops + +#endif From c8d8b56ba64b00a03f2cd19812a55f09b10539e9 Mon Sep 17 00:00:00 2001 From: gongchensu Date: Fri, 24 Apr 2026 16:18:12 +0800 Subject: [PATCH 3/3] feat(hygon-gemm): add Hygon backend support for Gemm --- examples/runtime_api.h | 5 +++++ src/hygon/gemm/cublas.h | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 src/hygon/gemm/cublas.h diff --git a/examples/runtime_api.h b/examples/runtime_api.h index c1ef3161..accb2341 100644 --- a/examples/runtime_api.h +++ b/examples/runtime_api.h @@ -10,6 +10,9 @@ #elif WITH_ILUVATAR #include "cuda/iluvatar/gemm/cublas.h" #include "cuda/iluvatar/runtime_.h" +#elif WITH_HYGON +#include "hygon/gemm/cublas.h" +#include "hygon/runtime_.h" #elif WITH_METAX #include "cuda/metax/gemm/mcblas.h" #include "cuda/metax/runtime_.h" @@ -35,6 +38,8 @@ namespace infini::ops { using DefaultRuntimeUtils = Runtime; #elif WITH_ILUVATAR using DefaultRuntimeUtils = Runtime; +#elif WITH_HYGON +using DefaultRuntimeUtils = Runtime; #elif WITH_METAX using DefaultRuntimeUtils = Runtime; #elif WITH_CAMBRICON diff --git a/src/hygon/gemm/cublas.h b/src/hygon/gemm/cublas.h new file mode 100644 index 00000000..fefb2621 --- /dev/null +++ b/src/hygon/gemm/cublas.h @@ -0,0 +1,18 @@ +#ifndef INFINI_OPS_HYGON_GEMM_CUBLAS_H_ +#define INFINI_OPS_HYGON_GEMM_CUBLAS_H_ + +#include "cuda/gemm/blas.h" +#include "hygon/blas.h" + +namespace infini::ops { + +template <> +class Operator + : public BlasGemm> { + public: + using BlasGemm>::BlasGemm; +}; + +} // namespace infini::ops + +#endif