From 6341be8ae99463c9d1890890260f8ead167d81fe Mon Sep 17 00:00:00 2001 From: wooway777 Date: Mon, 15 Jun 2026 19:30:13 +0800 Subject: [PATCH 1/2] issue/1287 - direct gemm to InfiniOps --- src/infinicore/ops/add/add_infiniops.cc | 59 +++++++++++++++++++ src/infinicore/ops/gemm/gemm_infiniops.cc | 68 +++++++++++++++++++++ src/infinicore/ops/infiniops_impl.hpp | 72 +++++++++++++++++++++++ xmake.lua | 64 ++++++++++++++++++++ 4 files changed, 263 insertions(+) create mode 100644 src/infinicore/ops/add/add_infiniops.cc create mode 100644 src/infinicore/ops/gemm/gemm_infiniops.cc create mode 100644 src/infinicore/ops/infiniops_impl.hpp diff --git a/src/infinicore/ops/add/add_infiniops.cc b/src/infinicore/ops/add/add_infiniops.cc new file mode 100644 index 000000000..1ff8cc555 --- /dev/null +++ b/src/infinicore/ops/add/add_infiniops.cc @@ -0,0 +1,59 @@ +#include "infinicore/ops/add.hpp" + +#ifdef ENABLE_INFINIOPS_API +#include "../infiniops_impl.hpp" + +namespace infinicore::op::add_impl::infiniops { +namespace { + +using TensorMeta = ::infinicore::op::infiniops::TensorMeta; + +struct PlannedMeta { + TensorMeta c, a, b; + graph::GraphTensor c_tensor, a_tensor, b_tensor; +}; + +} // namespace + +void *plan(Tensor c, const Tensor &a, const Tensor &b) { + INFINICORE_ASSERT(c->device().getType() == Device::Type::NVIDIA); + INFINICORE_ASSERT_TENSORS_SAME_DEVICE(c, a, b); + + return new PlannedMeta{ + TensorMeta(c), + TensorMeta(a), + TensorMeta(b), + graph::GraphTensor(c), + graph::GraphTensor(a), + graph::GraphTensor(b)}; +} + +void run(void *planned_meta) { + auto planned = reinterpret_cast(planned_meta); + + infini::ops::Handle handle; + handle.set_stream(context::getStream()); + infini::ops::Config config; + + infini::ops::Operator::Call( + handle, + config, + planned->a.tensor(planned->a_tensor), + planned->b.tensor(planned->b_tensor), + planned->c.tensor(planned->c_tensor)); +} + +void cleanup(void **planned_meta_ptr) { + delete *reinterpret_cast(planned_meta_ptr); + *planned_meta_ptr = nullptr; +} + +static bool registered = []() { + Add::plan_dispatcher().registerDevice(Device::Type::NVIDIA, &plan); + Add::run_dispatcher().registerDevice(Device::Type::NVIDIA, &run); + Add::cleanup_dispatcher().registerDevice(Device::Type::NVIDIA, &cleanup); + return true; +}(); + +} // namespace infinicore::op::add_impl::infiniops +#endif diff --git a/src/infinicore/ops/gemm/gemm_infiniops.cc b/src/infinicore/ops/gemm/gemm_infiniops.cc new file mode 100644 index 000000000..e28544ed1 --- /dev/null +++ b/src/infinicore/ops/gemm/gemm_infiniops.cc @@ -0,0 +1,68 @@ +#include "infinicore/ops/gemm.hpp" + +#ifdef ENABLE_INFINIOPS_API +#include "../infiniops_impl.hpp" + +#include + +namespace infinicore::op::gemm_impl::infiniops { +namespace { + +using TensorMeta = ::infinicore::op::infiniops::TensorMeta; + +struct PlannedMeta { + TensorMeta c, a, b; + graph::GraphTensor c_tensor, a_tensor, b_tensor; + float alpha, beta; +}; + +} // namespace + +void *plan(Tensor c, const Tensor &a, const Tensor &b, float alpha, float beta) { + INFINICORE_ASSERT(c->device().getType() == Device::Type::NVIDIA); + INFINICORE_ASSERT_TENSORS_SAME_DEVICE(c, a, b); + + return new PlannedMeta{ + TensorMeta(c), + TensorMeta(a), + TensorMeta(b), + graph::GraphTensor(c), + graph::GraphTensor(a), + graph::GraphTensor(b), + alpha, + beta}; +} + +void run(void *planned_meta) { + auto planned = reinterpret_cast(planned_meta); + + infini::ops::Handle handle; + handle.set_stream(context::getStream()); + infini::ops::Config config; + + infini::ops::Operator::Call( + handle, + config, + planned->a.tensor(planned->a_tensor), + planned->b.tensor(planned->b_tensor), + std::optional{planned->alpha}, + std::optional{planned->beta}, + std::optional{}, + std::optional{}, + planned->c.tensor(planned->c_tensor)); +} + +void cleanup(void **planned_meta_ptr) { + delete *reinterpret_cast(planned_meta_ptr); + *planned_meta_ptr = nullptr; +} + +static bool registered = []() { + Gemm::plan_dispatcher().registerDevice(Device::Type::NVIDIA, &plan); + Gemm::run_dispatcher().registerDevice(Device::Type::NVIDIA, &run); + Gemm::cleanup_dispatcher().registerDevice(Device::Type::NVIDIA, &cleanup); + return true; +}(); + +} // namespace infinicore::op::gemm_impl::infiniops +#endif diff --git a/src/infinicore/ops/infiniops_impl.hpp b/src/infinicore/ops/infiniops_impl.hpp new file mode 100644 index 000000000..d125bb443 --- /dev/null +++ b/src/infinicore/ops/infiniops_impl.hpp @@ -0,0 +1,72 @@ +#pragma once + +#include "../utils.hpp" +#include "infinicore/tensor.hpp" + +#include + +#include "infini/operator_call_instantiations.h" +#include "tensor.h" + +namespace infinicore::op::infiniops { + +inline infini::ops::DataType toInfiniOpsDtype(DataType dtype) { + switch (dtype) { + case DataType::I8: + return infini::ops::DataType::kInt8; + case DataType::I16: + return infini::ops::DataType::kInt16; + case DataType::I32: + return infini::ops::DataType::kInt32; + case DataType::I64: + return infini::ops::DataType::kInt64; + case DataType::U8: + case DataType::BYTE: + return infini::ops::DataType::kUInt8; + case DataType::U16: + return infini::ops::DataType::kUInt16; + case DataType::U32: + return infini::ops::DataType::kUInt32; + case DataType::U64: + return infini::ops::DataType::kUInt64; + case DataType::F16: + return infini::ops::DataType::kFloat16; + case DataType::BF16: + return infini::ops::DataType::kBFloat16; + case DataType::F32: + return infini::ops::DataType::kFloat32; + case DataType::F64: + return infini::ops::DataType::kFloat64; + default: + throw std::runtime_error("InfiniOps backend does not support this tensor dtype."); + } +} + +inline infini::ops::Device toInfiniOpsDevice(const Device &device) { + INFINICORE_ASSERT(device.getType() == Device::Type::NVIDIA); + return infini::ops::Device{infini::ops::Device::Type::kNvidia, static_cast(device.getIndex())}; +} + +struct TensorMeta { + Shape shape; + Strides strides; + infini::ops::DataType dtype; + infini::ops::Device device; + + explicit TensorMeta(const Tensor &tensor) + : shape(tensor->shape()), + strides(tensor->strides()), + dtype(toInfiniOpsDtype(tensor->dtype())), + device(toInfiniOpsDevice(tensor->device())) {} + + infini::ops::Tensor tensor(const void *data) const { + return infini::ops::Tensor( + const_cast(data), shape, dtype, device, strides); + } + + infini::ops::Tensor tensor(const Tensor &tensor) const { + return this->tensor(tensor->data()); + } +}; + +} // namespace infinicore::op::infiniops diff --git a/xmake.lua b/xmake.lua index 89930d758..a4260e509 100644 --- a/xmake.lua +++ b/xmake.lua @@ -266,6 +266,19 @@ if has_config("ccl") then add_defines("ENABLE_CCL") end +-- InfiniOps +option("infiniops") + set_default(false) + set_showmenu(true) + set_description("Whether to use InfiniOps kernels where adapters are available") +option_end() + +option("infiniops-root") + set_default("submodules/InfiniOps") + set_showmenu(true) + set_description("Path to the InfiniOps repository used by --infiniops") +option_end() + -- Mutual Awareness Analyzer option("mutual-awareness") set_default(false) @@ -353,6 +366,11 @@ target("infiniop") set_kind("shared") add_deps("infinirt") + if has_config("nv-gpu") then + local cuda_root = os.getenv("CUDA_HOME") or os.getenv("CUDA_PATH") or get_config("cuda") or "/usr/local/cuda" + add_includedirs(cuda_root .. "/include") + end + if has_config("cpu") then add_deps("infiniop-cpu") end @@ -467,6 +485,40 @@ target("infinicore_cpp_api") add_includedirs("include") add_includedirs(INFINI_ROOT.."/include", { public = true }) + if has_config("nv-gpu") then + local cuda_root = os.getenv("CUDA_HOME") or os.getenv("CUDA_PATH") or get_config("cuda") or "/usr/local/cuda" + add_includedirs(cuda_root .. "/include") + end + if has_config("infiniops") then + local infiniops_root = path.absolute(get_config("infiniops-root") or "submodules/InfiniOps", os.projectdir()) + local infiniops_builddir = path.join(infiniops_root, "build") + if not os.isdir(infiniops_root) then + raise("InfiniOps root not found: " .. infiniops_root) + end + if not has_config("nv-gpu") then + raise("InfiniOps integration currently has adapters only for NVIDIA") + end + add_defines("ENABLE_INFINIOPS_API") + add_includedirs(infiniops_root .. "/src", infiniops_root .. "/include", infiniops_root .. "/generated/include") + add_linkdirs(infiniops_builddir .. "/src") + add_links("infiniops") + add_rpathdirs(infiniops_builddir .. "/src") + add_installfiles(infiniops_builddir .. "/src/libinfiniops.so", {prefixdir = "lib"}) + before_build(function (target) + import("core.base.option") + local infiniops_root = path.absolute(get_config("infiniops-root") or "submodules/InfiniOps", os.projectdir()) + local infiniops_builddir = path.join(infiniops_root, "build") + os.execv("cmake", { + "-S", infiniops_root, + "-B", infiniops_builddir, + "-DWITH_NVIDIA=ON", + "-DGENERATE_OPERATOR_CALL_INSTANTIATIONS=ON", + "-DGENERATE_PYTHON_BINDINGS=OFF", + "-DCMAKE_BUILD_TYPE=Release" + }) + os.execv("cmake", {"--build", infiniops_builddir, "--target", "infiniops"}) + end) + end add_linkdirs(INFINI_ROOT.."/lib") add_links("infiniop", "infinirt", "infiniccl") @@ -675,6 +727,18 @@ target("_infinicore") add_files("src/infinicore/pybind11/**.cc") + if has_config("infiniops") then + after_install(function (target) + local INFINI_ROOT = os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini") + local infiniops_root = path.absolute(get_config("infiniops-root") or "submodules/InfiniOps", os.projectdir()) + local infiniops_lib = path.join(infiniops_root, "build", "src", "libinfiniops.so") + os.mkdir(path.join(INFINI_ROOT, "lib")) + os.cp(infiniops_lib, path.join(INFINI_ROOT, "lib")) + os.mkdir(path.join(os.projectdir(), "python", "infinicore", "lib")) + os.cp(infiniops_lib, path.join(os.projectdir(), "python", "infinicore", "lib")) + end) + end + set_installdir("python/infinicore") target_end() From b33dddb0b36ec5fc47534a51ee006d1885bdad0e Mon Sep 17 00:00:00 2001 From: wooway777 Date: Tue, 16 Jun 2026 12:15:25 +0800 Subject: [PATCH 2/2] issue/1287 - option b: 2 in 1 --- src/infinicore/ops/add/add_infiniop.cc | 46 +++++++++++++++ src/infinicore/ops/add/add_infiniops.cc | 59 -------------------- src/infinicore/ops/gemm/gemm_infiniop.cc | 52 +++++++++++++++++ src/infinicore/ops/gemm/gemm_infiniops.cc | 68 ----------------------- 4 files changed, 98 insertions(+), 127 deletions(-) delete mode 100644 src/infinicore/ops/add/add_infiniops.cc delete mode 100644 src/infinicore/ops/gemm/gemm_infiniops.cc diff --git a/src/infinicore/ops/add/add_infiniop.cc b/src/infinicore/ops/add/add_infiniop.cc index bb377d667..5d8cf5407 100644 --- a/src/infinicore/ops/add/add_infiniop.cc +++ b/src/infinicore/ops/add/add_infiniop.cc @@ -2,16 +2,46 @@ #include "../infiniop_impl.hpp" +#ifdef ENABLE_INFINIOPS_API +#include "../infiniops_impl.hpp" +#endif + +#include + namespace infinicore::op::add_impl::infiniop { INFINIOP_CACHABLE_DESCRIPTOR(Descriptor, Add, 100); +#ifdef ENABLE_INFINIOPS_API +using TensorMeta = ::infinicore::op::infiniops::TensorMeta; +#endif + struct PlannedMeta { std::shared_ptr descriptor; graph::GraphTensor workspace, c, a, b; +#ifdef ENABLE_INFINIOPS_API + bool use_infiniops = false; + std::optional c_meta, a_meta, b_meta; +#endif }; void *plan(Tensor c, const Tensor &a, const Tensor &b) { +#ifdef ENABLE_INFINIOPS_API + if (c->device().getType() == Device::Type::NVIDIA) { + INFINICORE_ASSERT_TENSORS_SAME_DEVICE(c, a, b); + return new PlannedMeta{ + nullptr, + graph::GraphTensor(c), + graph::GraphTensor(c), + graph::GraphTensor(a), + graph::GraphTensor(b), + true, + TensorMeta(c), + TensorMeta(a), + TensorMeta(b)}; + } +#endif + size_t seed = hash_combine(c, b, a); INFINIOP_CACHABLE_DESCRIPTOR_GET_OR_CREATE( @@ -32,6 +62,22 @@ void *plan(Tensor c, const Tensor &a, const Tensor &b) { void run(void *planned_meta) { auto planned = reinterpret_cast(planned_meta); +#ifdef ENABLE_INFINIOPS_API + if (planned->use_infiniops) { + infini::ops::Handle handle; + handle.set_stream(context::getStream()); + infini::ops::Config config; + + infini::ops::Operator::Call( + handle, + config, + planned->a_meta->tensor(planned->a), + planned->b_meta->tensor(planned->b), + planned->c_meta->tensor(planned->c)); + return; + } +#endif + INFINICORE_CHECK_ERROR(infiniopAdd( planned->descriptor->desc, planned->workspace->data(), diff --git a/src/infinicore/ops/add/add_infiniops.cc b/src/infinicore/ops/add/add_infiniops.cc deleted file mode 100644 index 1ff8cc555..000000000 --- a/src/infinicore/ops/add/add_infiniops.cc +++ /dev/null @@ -1,59 +0,0 @@ -#include "infinicore/ops/add.hpp" - -#ifdef ENABLE_INFINIOPS_API -#include "../infiniops_impl.hpp" - -namespace infinicore::op::add_impl::infiniops { -namespace { - -using TensorMeta = ::infinicore::op::infiniops::TensorMeta; - -struct PlannedMeta { - TensorMeta c, a, b; - graph::GraphTensor c_tensor, a_tensor, b_tensor; -}; - -} // namespace - -void *plan(Tensor c, const Tensor &a, const Tensor &b) { - INFINICORE_ASSERT(c->device().getType() == Device::Type::NVIDIA); - INFINICORE_ASSERT_TENSORS_SAME_DEVICE(c, a, b); - - return new PlannedMeta{ - TensorMeta(c), - TensorMeta(a), - TensorMeta(b), - graph::GraphTensor(c), - graph::GraphTensor(a), - graph::GraphTensor(b)}; -} - -void run(void *planned_meta) { - auto planned = reinterpret_cast(planned_meta); - - infini::ops::Handle handle; - handle.set_stream(context::getStream()); - infini::ops::Config config; - - infini::ops::Operator::Call( - handle, - config, - planned->a.tensor(planned->a_tensor), - planned->b.tensor(planned->b_tensor), - planned->c.tensor(planned->c_tensor)); -} - -void cleanup(void **planned_meta_ptr) { - delete *reinterpret_cast(planned_meta_ptr); - *planned_meta_ptr = nullptr; -} - -static bool registered = []() { - Add::plan_dispatcher().registerDevice(Device::Type::NVIDIA, &plan); - Add::run_dispatcher().registerDevice(Device::Type::NVIDIA, &run); - Add::cleanup_dispatcher().registerDevice(Device::Type::NVIDIA, &cleanup); - return true; -}(); - -} // namespace infinicore::op::add_impl::infiniops -#endif diff --git a/src/infinicore/ops/gemm/gemm_infiniop.cc b/src/infinicore/ops/gemm/gemm_infiniop.cc index 33a7271c0..283e5bb70 100644 --- a/src/infinicore/ops/gemm/gemm_infiniop.cc +++ b/src/infinicore/ops/gemm/gemm_infiniop.cc @@ -1,17 +1,49 @@ #include "../infiniop_impl.hpp" #include "infinicore/ops/gemm.hpp" +#ifdef ENABLE_INFINIOPS_API +#include "../infiniops_impl.hpp" +#endif + +#include + namespace infinicore::op::gemm_impl::infiniop { INFINIOP_CACHABLE_DESCRIPTOR(Descriptor, Gemm, 100); +#ifdef ENABLE_INFINIOPS_API +using TensorMeta = ::infinicore::op::infiniops::TensorMeta; +#endif + struct PlannedMeta { std::shared_ptr descriptor; graph::GraphTensor workspace, c, a, b; float alpha, beta; +#ifdef ENABLE_INFINIOPS_API + bool use_infiniops = false; + std::optional c_meta, a_meta, b_meta; +#endif }; void *plan(Tensor c, const Tensor &a, const Tensor &b, float alpha, float beta) { +#ifdef ENABLE_INFINIOPS_API + if (c->device().getType() == Device::Type::NVIDIA) { + INFINICORE_ASSERT_TENSORS_SAME_DEVICE(c, a, b); + return new PlannedMeta{ + nullptr, + graph::GraphTensor(c), + graph::GraphTensor(c), + graph::GraphTensor(a), + graph::GraphTensor(b), + alpha, + beta, + true, + TensorMeta(c), + TensorMeta(a), + TensorMeta(b)}; + } +#endif + size_t seed = hash_combine(c, a, b); INFINIOP_CACHABLE_DESCRIPTOR_GET_OR_CREATE( @@ -34,6 +66,26 @@ void *plan(Tensor c, const Tensor &a, const Tensor &b, float alpha, float beta) void run(void *planned_meta) { auto planned = reinterpret_cast(planned_meta); +#ifdef ENABLE_INFINIOPS_API + if (planned->use_infiniops) { + infini::ops::Handle handle; + handle.set_stream(context::getStream()); + infini::ops::Config config; + + infini::ops::Operator::Call( + handle, + config, + planned->a_meta->tensor(planned->a), + planned->b_meta->tensor(planned->b), + std::optional{planned->alpha}, + std::optional{planned->beta}, + std::optional{}, + std::optional{}, + planned->c_meta->tensor(planned->c)); + return; + } +#endif + INFINICORE_CHECK_ERROR(infiniopGemm( planned->descriptor->desc, planned->workspace->data(), planned->workspace->numel(), planned->c->data(), planned->a->data(), planned->b->data(), planned->alpha, planned->beta, context::getStream())); diff --git a/src/infinicore/ops/gemm/gemm_infiniops.cc b/src/infinicore/ops/gemm/gemm_infiniops.cc deleted file mode 100644 index e28544ed1..000000000 --- a/src/infinicore/ops/gemm/gemm_infiniops.cc +++ /dev/null @@ -1,68 +0,0 @@ -#include "infinicore/ops/gemm.hpp" - -#ifdef ENABLE_INFINIOPS_API -#include "../infiniops_impl.hpp" - -#include - -namespace infinicore::op::gemm_impl::infiniops { -namespace { - -using TensorMeta = ::infinicore::op::infiniops::TensorMeta; - -struct PlannedMeta { - TensorMeta c, a, b; - graph::GraphTensor c_tensor, a_tensor, b_tensor; - float alpha, beta; -}; - -} // namespace - -void *plan(Tensor c, const Tensor &a, const Tensor &b, float alpha, float beta) { - INFINICORE_ASSERT(c->device().getType() == Device::Type::NVIDIA); - INFINICORE_ASSERT_TENSORS_SAME_DEVICE(c, a, b); - - return new PlannedMeta{ - TensorMeta(c), - TensorMeta(a), - TensorMeta(b), - graph::GraphTensor(c), - graph::GraphTensor(a), - graph::GraphTensor(b), - alpha, - beta}; -} - -void run(void *planned_meta) { - auto planned = reinterpret_cast(planned_meta); - - infini::ops::Handle handle; - handle.set_stream(context::getStream()); - infini::ops::Config config; - - infini::ops::Operator::Call( - handle, - config, - planned->a.tensor(planned->a_tensor), - planned->b.tensor(planned->b_tensor), - std::optional{planned->alpha}, - std::optional{planned->beta}, - std::optional{}, - std::optional{}, - planned->c.tensor(planned->c_tensor)); -} - -void cleanup(void **planned_meta_ptr) { - delete *reinterpret_cast(planned_meta_ptr); - *planned_meta_ptr = nullptr; -} - -static bool registered = []() { - Gemm::plan_dispatcher().registerDevice(Device::Type::NVIDIA, &plan); - Gemm::run_dispatcher().registerDevice(Device::Type::NVIDIA, &run); - Gemm::cleanup_dispatcher().registerDevice(Device::Type::NVIDIA, &cleanup); - return true; -}(); - -} // namespace infinicore::op::gemm_impl::infiniops -#endif