Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions src/infinicore/ops/add/add_infiniop.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,46 @@

#include "../infiniop_impl.hpp"

#ifdef ENABLE_INFINIOPS_API
#include "../infiniops_impl.hpp"
#endif

#include <optional>

namespace infinicore::op::add_impl::infiniop {

INFINIOP_CACHABLE_DESCRIPTOR(Descriptor, Add, 100);

#ifdef ENABLE_INFINIOPS_API
using TensorMeta = ::infinicore::op::infiniops::TensorMeta;
#endif

struct PlannedMeta {
std::shared_ptr<Descriptor> descriptor;
graph::GraphTensor workspace, c, a, b;
#ifdef ENABLE_INFINIOPS_API
bool use_infiniops = false;
std::optional<TensorMeta> c_meta, a_meta, b_meta;
#endif
};

void *plan(Tensor c, const Tensor &a, const Tensor &b) {
#ifdef ENABLE_INFINIOPS_API
if (c->device().getType() == Device::Type::NVIDIA) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(c, a, b);
return new PlannedMeta{
nullptr,
graph::GraphTensor(c),
graph::GraphTensor(c),
graph::GraphTensor(a),
graph::GraphTensor(b),
true,
TensorMeta(c),
TensorMeta(a),
TensorMeta(b)};
}
#endif

size_t seed = hash_combine(c, b, a);

INFINIOP_CACHABLE_DESCRIPTOR_GET_OR_CREATE(
Expand All @@ -32,6 +62,22 @@ void *plan(Tensor c, const Tensor &a, const Tensor &b) {
void run(void *planned_meta) {
auto planned = reinterpret_cast<PlannedMeta *>(planned_meta);

#ifdef ENABLE_INFINIOPS_API
if (planned->use_infiniops) {
infini::ops::Handle handle;
handle.set_stream(context::getStream());
infini::ops::Config config;

infini::ops::Operator<infini::ops::Add>::Call(
handle,
config,
planned->a_meta->tensor(planned->a),
planned->b_meta->tensor(planned->b),
planned->c_meta->tensor(planned->c));
return;
}
#endif

INFINICORE_CHECK_ERROR(infiniopAdd(
planned->descriptor->desc,
planned->workspace->data(),
Expand Down
52 changes: 52 additions & 0 deletions src/infinicore/ops/gemm/gemm_infiniop.cc
Original file line number Diff line number Diff line change
@@ -1,17 +1,49 @@
#include "../infiniop_impl.hpp"
#include "infinicore/ops/gemm.hpp"

#ifdef ENABLE_INFINIOPS_API
#include "../infiniops_impl.hpp"
#endif

#include <optional>

namespace infinicore::op::gemm_impl::infiniop {

INFINIOP_CACHABLE_DESCRIPTOR(Descriptor, Gemm, 100);

#ifdef ENABLE_INFINIOPS_API
using TensorMeta = ::infinicore::op::infiniops::TensorMeta;
#endif

struct PlannedMeta {
std::shared_ptr<Descriptor> descriptor;
graph::GraphTensor workspace, c, a, b;
float alpha, beta;
#ifdef ENABLE_INFINIOPS_API
bool use_infiniops = false;
std::optional<TensorMeta> c_meta, a_meta, b_meta;
#endif
};

void *plan(Tensor c, const Tensor &a, const Tensor &b, float alpha, float beta) {
#ifdef ENABLE_INFINIOPS_API
if (c->device().getType() == Device::Type::NVIDIA) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(c, a, b);
return new PlannedMeta{
nullptr,
graph::GraphTensor(c),
graph::GraphTensor(c),
graph::GraphTensor(a),
graph::GraphTensor(b),
alpha,
beta,
true,
TensorMeta(c),
TensorMeta(a),
TensorMeta(b)};
}
#endif

size_t seed = hash_combine(c, a, b);

INFINIOP_CACHABLE_DESCRIPTOR_GET_OR_CREATE(
Expand All @@ -34,6 +66,26 @@ void *plan(Tensor c, const Tensor &a, const Tensor &b, float alpha, float beta)
void run(void *planned_meta) {
auto planned = reinterpret_cast<PlannedMeta *>(planned_meta);

#ifdef ENABLE_INFINIOPS_API
if (planned->use_infiniops) {
infini::ops::Handle handle;
handle.set_stream(context::getStream());
infini::ops::Config config;

infini::ops::Operator<infini::ops::Gemm>::Call(
handle,
config,
planned->a_meta->tensor(planned->a),
planned->b_meta->tensor(planned->b),
std::optional<float>{planned->alpha},
std::optional<float>{planned->beta},
std::optional<int>{},
std::optional<int>{},
planned->c_meta->tensor(planned->c));
return;
}
#endif

INFINICORE_CHECK_ERROR(infiniopGemm(
planned->descriptor->desc, planned->workspace->data(), planned->workspace->numel(),
planned->c->data(), planned->a->data(), planned->b->data(), planned->alpha, planned->beta, context::getStream()));
Expand Down
72 changes: 72 additions & 0 deletions src/infinicore/ops/infiniops_impl.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#pragma once

#include "../utils.hpp"
#include "infinicore/tensor.hpp"

#include <stdexcept>

#include "infini/operator_call_instantiations.h"
#include "tensor.h"

namespace infinicore::op::infiniops {

inline infini::ops::DataType toInfiniOpsDtype(DataType dtype) {
switch (dtype) {
case DataType::I8:
return infini::ops::DataType::kInt8;
case DataType::I16:
return infini::ops::DataType::kInt16;
case DataType::I32:
return infini::ops::DataType::kInt32;
case DataType::I64:
return infini::ops::DataType::kInt64;
case DataType::U8:
case DataType::BYTE:
return infini::ops::DataType::kUInt8;
case DataType::U16:
return infini::ops::DataType::kUInt16;
case DataType::U32:
return infini::ops::DataType::kUInt32;
case DataType::U64:
return infini::ops::DataType::kUInt64;
case DataType::F16:
return infini::ops::DataType::kFloat16;
case DataType::BF16:
return infini::ops::DataType::kBFloat16;
case DataType::F32:
return infini::ops::DataType::kFloat32;
case DataType::F64:
return infini::ops::DataType::kFloat64;
default:
throw std::runtime_error("InfiniOps backend does not support this tensor dtype.");
}
}

inline infini::ops::Device toInfiniOpsDevice(const Device &device) {
INFINICORE_ASSERT(device.getType() == Device::Type::NVIDIA);
return infini::ops::Device{infini::ops::Device::Type::kNvidia, static_cast<int>(device.getIndex())};
}

struct TensorMeta {
Shape shape;
Strides strides;
infini::ops::DataType dtype;
infini::ops::Device device;

explicit TensorMeta(const Tensor &tensor)
: shape(tensor->shape()),
strides(tensor->strides()),
dtype(toInfiniOpsDtype(tensor->dtype())),
device(toInfiniOpsDevice(tensor->device())) {}

infini::ops::Tensor tensor(const void *data) const {
return infini::ops::Tensor(
const_cast<void *>(data), shape, dtype, device, strides);
}

infini::ops::Tensor tensor(const Tensor &tensor) const {
return this->tensor(tensor->data());
}
};

} // namespace infinicore::op::infiniops
64 changes: 64 additions & 0 deletions xmake.lua
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,19 @@ if has_config("ccl") then
add_defines("ENABLE_CCL")
end

-- InfiniOps
option("infiniops")
set_default(false)
set_showmenu(true)
set_description("Whether to use InfiniOps kernels where adapters are available")
option_end()

option("infiniops-root")
set_default("submodules/InfiniOps")
set_showmenu(true)
set_description("Path to the InfiniOps repository used by --infiniops")
option_end()

-- Mutual Awareness Analyzer
option("mutual-awareness")
set_default(false)
Expand Down Expand Up @@ -353,6 +366,11 @@ target("infiniop")
set_kind("shared")
add_deps("infinirt")

if has_config("nv-gpu") then
local cuda_root = os.getenv("CUDA_HOME") or os.getenv("CUDA_PATH") or get_config("cuda") or "/usr/local/cuda"
add_includedirs(cuda_root .. "/include")
end

if has_config("cpu") then
add_deps("infiniop-cpu")
end
Expand Down Expand Up @@ -467,6 +485,40 @@ target("infinicore_cpp_api")

add_includedirs("include")
add_includedirs(INFINI_ROOT.."/include", { public = true })
if has_config("nv-gpu") then
local cuda_root = os.getenv("CUDA_HOME") or os.getenv("CUDA_PATH") or get_config("cuda") or "/usr/local/cuda"
add_includedirs(cuda_root .. "/include")
end
if has_config("infiniops") then
local infiniops_root = path.absolute(get_config("infiniops-root") or "submodules/InfiniOps", os.projectdir())
local infiniops_builddir = path.join(infiniops_root, "build")
if not os.isdir(infiniops_root) then
raise("InfiniOps root not found: " .. infiniops_root)
end
if not has_config("nv-gpu") then
raise("InfiniOps integration currently has adapters only for NVIDIA")
end
add_defines("ENABLE_INFINIOPS_API")
add_includedirs(infiniops_root .. "/src", infiniops_root .. "/include", infiniops_root .. "/generated/include")
add_linkdirs(infiniops_builddir .. "/src")
add_links("infiniops")
add_rpathdirs(infiniops_builddir .. "/src")
add_installfiles(infiniops_builddir .. "/src/libinfiniops.so", {prefixdir = "lib"})
before_build(function (target)
import("core.base.option")
local infiniops_root = path.absolute(get_config("infiniops-root") or "submodules/InfiniOps", os.projectdir())
local infiniops_builddir = path.join(infiniops_root, "build")
os.execv("cmake", {
"-S", infiniops_root,
"-B", infiniops_builddir,
"-DWITH_NVIDIA=ON",
"-DGENERATE_OPERATOR_CALL_INSTANTIATIONS=ON",
"-DGENERATE_PYTHON_BINDINGS=OFF",
"-DCMAKE_BUILD_TYPE=Release"
})
os.execv("cmake", {"--build", infiniops_builddir, "--target", "infiniops"})
end)
end

add_linkdirs(INFINI_ROOT.."/lib")
add_links("infiniop", "infinirt", "infiniccl")
Expand Down Expand Up @@ -675,6 +727,18 @@ target("_infinicore")

add_files("src/infinicore/pybind11/**.cc")

if has_config("infiniops") then
after_install(function (target)
local INFINI_ROOT = os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini")
local infiniops_root = path.absolute(get_config("infiniops-root") or "submodules/InfiniOps", os.projectdir())
local infiniops_lib = path.join(infiniops_root, "build", "src", "libinfiniops.so")
os.mkdir(path.join(INFINI_ROOT, "lib"))
os.cp(infiniops_lib, path.join(INFINI_ROOT, "lib"))
os.mkdir(path.join(os.projectdir(), "python", "infinicore", "lib"))
os.cp(infiniops_lib, path.join(os.projectdir(), "python", "infinicore", "lib"))
end)
end

set_installdir("python/infinicore")
target_end()

Expand Down
Loading