Skip to content
This repository was archived by the owner on May 9, 2024. It is now read-only.

Add GpuSharedMemory Tests for Intel GPU #537

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions omniscidb/QueryEngine/CompilationOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <ostream>
#endif

#include <iostream>
#include "Compiler/CodegenTraitsDescriptor.h"
#include "Shared/Config.h"
#include "Shared/DeviceType.h"
Expand Down Expand Up @@ -68,6 +69,7 @@ struct CompilationOptions {
case ExecutorDeviceType::CPU:
return compiler::cpu_cgen_traits_desc;
case ExecutorDeviceType::GPU:
std::cout << "Check is_l0?" << is_l0 << std::endl;
return (is_l0 ? compiler::l0_cgen_traits_desc : compiler::cuda_cgen_traits_desc);
}
return {};
Expand Down
3 changes: 2 additions & 1 deletion omniscidb/QueryEngine/Compiler/Backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ CodegenTraits CodegenTraits::get(CodegenTraitsDescriptor codegen_traits_desc) {
CHECK(descCallingConvToLLVM.find(codegen_traits_desc.conv_) !=
descCallingConvToLLVM.end());
return CodegenTraits(codegen_traits_desc.local_addr_space_,
codegen_traits_desc.smem_addr_space_,
codegen_traits_desc.shared_addr_space_,
codegen_traits_desc.global_addr_space_,
descCallingConvToLLVM.at(codegen_traits_desc.conv_),
codegen_traits_desc.triple_);
Expand Down Expand Up @@ -1014,6 +1014,7 @@ std::shared_ptr<L0CompilationContext> L0Backend::generateNativeGPUCode(
spirv_src->addOperand(llvm::MDNode::get(ctx, spirv_src_ops));

compiler::optimize_ir(func, module, live_funcs, false /*smem_used*/, co);
std::cout << "GenerateNativeGPUCode" << std::endl;

// Remove the remaining freeze instruction after the optimization
std::vector<llvm::Instruction*> to_erase;
Expand Down
20 changes: 13 additions & 7 deletions omniscidb/QueryEngine/Compiler/Backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,18 @@ namespace compiler {

class CodegenTraits {
explicit CodegenTraits(unsigned local_addr_space,
unsigned smem_addr_space,
unsigned shared_addr_space,
unsigned global_addr_space,
llvm::CallingConv::ID calling_conv,
llvm::StringRef triple = "")
: local_addr_space_(local_addr_space)
, smem_addr_space_(smem_addr_space)
, shared_addr_space_(shared_addr_space)
, global_addr_space_(global_addr_space)
, conv_(calling_conv)
, triple_(triple) {}

const unsigned local_addr_space_;
const unsigned smem_addr_space_;
const unsigned shared_addr_space_;
const unsigned global_addr_space_;
const llvm::CallingConv::ID conv_;
const llvm::StringRef triple_;
Expand All @@ -60,12 +60,12 @@ class CodegenTraits {
CodegenTraits& operator=(const CodegenTraits&) = delete;

static CodegenTraits get(unsigned local_addr_space,
unsigned smem_addr_space,
unsigned shared_addr_space,
unsigned global_addr_space,
llvm::CallingConv::ID calling_conv,
llvm::StringRef triple = "") {
return CodegenTraits(
local_addr_space, smem_addr_space, global_addr_space, calling_conv, triple);
local_addr_space, shared_addr_space, global_addr_space, calling_conv, triple);
}

static CodegenTraits get(CodegenTraitsDescriptor codegen_traits_desc);
Expand All @@ -76,8 +76,10 @@ class CodegenTraits {
const std::string triple = "");

CodegenTraitsDescriptor getDescriptor() {
std::cout << "Backend.h getDescriptor() shared_addr_space = " << shared_addr_space_
<< std::endl;
return CodegenTraitsDescriptor(local_addr_space_,
smem_addr_space_,
shared_addr_space_,
global_addr_space_,
llvmCallingConvToDesc.at(conv_),
triple_.str());
Expand All @@ -87,7 +89,9 @@ class CodegenTraits {
return llvm::PointerType::get(ElementType, local_addr_space_);
}
llvm::PointerType* smemPointerType(llvm::Type* ElementType) const {
return llvm::PointerType::get(ElementType, smem_addr_space_);
std::cout << "Backend.h shared_addr_space = " << shared_addr_space_ << std::endl;
assert(shared_addr_space_ == 3); // Shared mem addr spcae on Intel GPU is 3
return llvm::PointerType::get(ElementType, shared_addr_space_);
}
llvm::PointerType* globalPointerType(llvm::Type* ElementType) const {
return llvm::PointerType::get(ElementType, global_addr_space_);
Expand Down Expand Up @@ -234,4 +238,6 @@ void setSharedMemory(ExecutorDeviceType dt,
GPUTarget& gpu_target,
const std::shared_ptr<compiler::Backend>& backend);

void replace_function(llvm::Module* from, llvm::Module* to, const std::string& fname);

} // namespace compiler
11 changes: 7 additions & 4 deletions omniscidb/QueryEngine/Compiler/CodegenTraitsDescriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,22 @@ namespace compiler {
enum class CallingConvDesc { C, SPIR_FUNC };
struct CodegenTraitsDescriptor {
CodegenTraitsDescriptor(unsigned local_addr_space,
unsigned smem_addr_space,
unsigned shared_addr_space,
unsigned global_addr_space,
CallingConvDesc calling_conv,
std::string_view triple)
: local_addr_space_(local_addr_space)
, smem_addr_space_(smem_addr_space)
, shared_addr_space_(shared_addr_space)
, global_addr_space_(global_addr_space)
, conv_(calling_conv)
, triple_(triple) {}
CodegenTraitsDescriptor(){};
CodegenTraitsDescriptor() {
std::cout << "CodegenTraitsDescriptor shared_addr_space=" << shared_addr_space_
<< std::endl;
};

unsigned local_addr_space_{0};
unsigned smem_addr_space_{0};
unsigned shared_addr_space_{0};
unsigned global_addr_space_{0};
CallingConvDesc conv_;
std::string_view triple_{"DUMMY"};
Expand Down
10 changes: 8 additions & 2 deletions omniscidb/QueryEngine/GpuSharedMemoryUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ void GpuSharedMemCodeBuilder::codegen(const CompilationOptions& co) {
CHECK(init_func_);
codegenInitialization();
compiler::verify_function_ir(init_func_);
DUMP_MODULE(init_func_->getParent(), "after.codegenInitialization.ll");

// codegen the reduction function:
reduction_func_ = createReductionFunction(co);
Expand Down Expand Up @@ -242,10 +243,12 @@ llvm::Value* codegen_smem_dest_slot_ptr(llvm::LLVMContext& context,
auto ptr_type = [&context, &traits](const size_t slot_bytes,
const hdk::ir::Type* type) {
if (slot_bytes == sizeof(int32_t)) {
return traits.smemPointerType(llvm::Type::getInt32Ty(context));
// return traits.smemPointerType(llvm::Type::getInt32Ty(context)); // quickfix
return llvm::Type::getInt32PtrTy(context, /*address_space=*/3);
} else {
CHECK(slot_bytes == sizeof(int64_t));
return traits.smemPointerType(llvm::Type::getInt64Ty(context));
// return traits.smemPointerType(llvm::Type::getInt64Ty(context)); // quickfix
return llvm::Type::getInt64PtrTy(context, /*address_space=*/3);
}
UNREACHABLE() << "Invalid slot size encountered: " << std::to_string(slot_bytes);
return traits.smemPointerType(llvm::Type::getInt32Ty(context));
Expand Down Expand Up @@ -327,6 +330,9 @@ void GpuSharedMemCodeBuilder::codegenInitialization() {
dest_byte_stream,
byte_offset_ll);

// DUMP(traits_, "output_traits_")
// std::cout << "traits_" << traits_;

llvm::Value* init_value_ll = nullptr;
if (slot_size == sizeof(int32_t)) {
init_value_ll =
Expand Down
13 changes: 13 additions & 0 deletions omniscidb/Tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ endif()
add_executable(StringDictionaryBenchmark StringDictionaryBenchmark.cpp)

if(ENABLE_L0)
add_executable(GpuSharedMemoryTestIntel GpuSharedMemoryTestIntel.cpp ResultSetTestUtils.cpp)
add_executable(L0MgrExecuteTest L0MgrExecuteTest.cpp)
add_executable(SpirvBuildTest SpirvBuildTest.cpp)
add_executable(DataMgrWithL0Test DataMgrWithL0Test.cpp)
Expand All @@ -69,11 +70,13 @@ if(ENABLE_L0)
target_link_libraries(SpirvBuildTest gtest ${llvm_libs})
target_link_libraries(DataMgrWithL0Test DataMgr gtest)
target_link_libraries(IntelGPUEnablingTest gtest QueryEngine ArrowQueryRunner)
target_link_libraries(GpuSharedMemoryTestIntel gtest QueryEngine ArrowQueryRunner)

add_test(L0MgrExecuteTest L0MgrExecuteTest ${TEST_ARGS})
add_test(SpirvBuildTest SpirvBuildTest ${TEST_ARGS})
add_test(DataMgrWithL0Test DataMgrWithL0Test ${TEST_ARGS})
add_test(IntelGPUEnablingTest IntelGPUEnablingTest ${TEST_ARGS})
add_test(GpuSharedMemoryTestIntel GpuSharedMemoryTestIntel ${TEST_ARGS})
endif()

add_executable(CostModelTest CostModel/CostModelTest.cpp)
Expand Down Expand Up @@ -134,6 +137,10 @@ else()
target_link_libraries(StringDictionaryBenchmark benchmark gtest StringDictionary Logger Utils $<$<AND:$<CXX_COMPILER_ID:GNU>,$<VERSION_LESS:$<CXX_COMPILER_VERSION>,9.0>>:stdc++fs> ${CMAKE_DL_LIBS} ${Boost_LIBRARIES} ${ZLIB_LIBRARIES})
endif()

if(ENABLE_L0)
target_link_libraries(GpuSharedMemoryTestIntel gtest Logger QueryEngine)
endif()

if(ENABLE_CUDA)
target_link_libraries(GpuSharedMemoryTest gtest Logger QueryEngine)
endif()
Expand Down Expand Up @@ -180,6 +187,7 @@ if(ENABLE_CUDA)
add_test(GpuSharedMemoryTest GpuSharedMemoryTest ${TEST_ARGS})
endif()
if(ENABLE_L0)
add_test(GpuSharedMemoryTestIntel GpuSharedMemoryTestIntel ${TEST_ARGS})
add_test(NAME PuntToCpu COMMAND ArrowBasedExecuteTest "--gtest_filter=Select.Punt*" ${TEST_ARGS})
set_tests_properties(PuntToCpu PROPERTIES LABELS "enabling")
add_test(NAME StreamingTopNFallback COMMAND ArrowBasedExecuteTest "--gtest_filter=Select.TopKHeap:Select.TimeInterval:Select.OverflowAndUnderFlow" ${TEST_ARGS})
Expand Down Expand Up @@ -236,6 +244,10 @@ if(ENABLE_CUDA)
list(APPEND TEST_PROGRAMS GpuSharedMemoryTest)
endif()

if(ENABLE_L0)
list(APPEND TEST_PROGRAMS GpuSharedMemoryTestIntel)
endif()

#if(NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
# list(APPEND TEST_PROGRAMS UdfTest)
#endif()
Expand Down Expand Up @@ -323,6 +335,7 @@ add_custom_target(topk_tests

if(ENABLE_L0)
set(ENABLING_TESTS
GpuSharedMemoryTestIntel
ArrowStorageSqlTest # taxi queries
SpirvBuildTest
L0MgrExecuteTest
Expand Down
Loading