Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
e7f623c
add MPI to CMake configuration
TimThuering May 28, 2026
673917c
add global yaml dump with data from all MPI ranks to system_hardware_…
TimThuering May 28, 2026
1f113ef
update documentation
TimThuering May 28, 2026
772d226
add global yaml output for individual hardware sampler on all MPI ranks
TimThuering May 28, 2026
9ae4c80
add mpi4py compatibility for python bindings
TimThuering May 29, 2026
b3912c2
add dump_yaml_global to system_hardware_sampler python bindings
TimThuering May 29, 2026
59caf70
add dump_yaml_global to hardware_sampler python bindings
TimThuering May 29, 2026
0cfe527
added system hardware sampler creation which avoids duplicates for MP…
TimThuering Jun 8, 2026
bea8838
added synchronous start and stop sampling for MPI
TimThuering Jun 8, 2026
04210fb
python bindings for MPI-aware constructor and functions
TimThuering Jun 8, 2026
89116b4
fixes for mpi backend with intel GPUs
TimThuering Jun 9, 2026
a7c6543
fix for python bindings
TimThuering Jun 12, 2026
45d5099
fix for non-mpi mode
TimThuering Jun 12, 2026
5532369
restructure MPI related utility functions
TimThuering Jun 17, 2026
99d7f10
consistent formatting
TimThuering Jun 17, 2026
1d3accf
add additional cmake check for mpi4py include path
TimThuering Jun 17, 2026
9fab189
add mpi4py version check to cmake
TimThuering Jun 17, 2026
f43800f
cmake variable renamings
TimThuering Jun 22, 2026
41d2029
update std::runtime_error constructor calls
TimThuering Jun 22, 2026
666130e
update include guard name in mpi4py_communicator.hpp
TimThuering Jun 22, 2026
5be4fed
split variable declaration into separate lines
TimThuering Jun 22, 2026
e4716ff
change std::runtime_error constructor calls
TimThuering Jun 22, 2026
c66b510
add [[nodiscard]] and const to gpu_*/utility.cpp files
TimThuering Jun 22, 2026
5ec4072
added missing const
TimThuering Jun 22, 2026
45b7587
change std::runtime_error constructor call
TimThuering Jun 22, 2026
7459d19
changed prefix from std::string to std::string_view
TimThuering Jun 22, 2026
f3bbb01
change auto to acutal types
TimThuering Jun 22, 2026
c75de31
add constructor/destructor to hostname_comm_info
TimThuering Jun 22, 2026
8d7011e
refactor MPI related functions
TimThuering Jun 23, 2026
9701c70
add documentation in visible_gpu_device header
TimThuering Jun 25, 2026
62c31df
update CMake error message if mpi4py causes problems
TimThuering Jun 25, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 37 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ add_library(${HWS_LIBRARY_NAME} SHARED ${HWS_SOURCES})
add_library(hws::hws ALIAS ${HWS_LIBRARY_NAME})

# set install target
set(HWS_TARGETS_TO_INSTALL )
set(HWS_TARGETS_TO_INSTALL)

# use C++17
target_compile_features(${HWS_LIBRARY_NAME} PUBLIC cxx_std_17)
Expand Down Expand Up @@ -147,6 +147,42 @@ else ()
endif ()


####################################################################################################################
## enable MPI support ##
####################################################################################################################
set(HWS_ENABLE_MPI_SUPPORT AUTO CACHE STRING "Enable MPI support.")
set_property(CACHE HWS_ENABLE_MPI_SUPPORT PROPERTY STRINGS AUTO ON OFF)
# Default: assume MPI support inactive
set(HWS_MPI_SUPPORT_ACTIVE FALSE CACHE BOOL "MPI support enabled in core library")

if (HWS_ENABLE_MPI_SUPPORT MATCHES "AUTO" OR HWS_ENABLE_MPI_SUPPORT)
# try finding MPI
find_package(MPI QUIET)

# check if MPI could be found
if (NOT MPI_FOUND)
if (HWS_ENABLE_MPI_SUPPORT MATCHES "ON")
message(SEND_ERROR "Cannot find MPI but MPI support was explicitly requested!")
else ()
message(STATUS "Cannot find MPI. MPI support disabled.")
endif ()
else ()
message(STATUS "Enable MPI support (${MPI_CXX_VERSION}).")

# link against necessary libraries
target_link_libraries(${HWS_LIBRARY_NAME} PUBLIC MPI::MPI_CXX)

# add compile definition
target_compile_definitions(${HWS_LIBRARY_NAME} PUBLIC HWS_MPI_SUPPORT_ENABLED)

# Expose that MPI is really enabled for the Python bindings (and potentially other submodules) via a cache variable.
set(HWS_MPI_SUPPORT_ACTIVE TRUE CACHE BOOL "MPI support enabled in core library" FORCE)

target_sources(${HWS_LIBRARY_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/mpi_utility.cpp)
endif ()
endif ()


####################################################################################################################
## enable Python bindings ##
####################################################################################################################
Expand Down
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ The `[optional_options]` can be one or multiple of:
- `HWS_SAMPLING_INTERVAL=100ms` (default: `100ms`): set the sampling interval in milliseconds
- `HWS_ENABLE_PYTHON_BINDINGS=ON|OFF` (default: `ON`): enable Python bindings

- `HWS_ENABLE_MPI_SUPPORT=ON|OFF|AUTO` (default: `AUTO`):
- `ON`: check whether MPI is available and fail if this is not the case
- `AUTO`: check whether MPI is available but **do not** fail if this is not the case
- `OFF`: do not check whether MPI is available

### Installing via CMake

The library supports the `install` target:
Expand Down
49 changes: 49 additions & 0 deletions bindings/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,55 @@ target_include_directories(${HWS_PYTHON_BINDINGS_LIBRARY_NAME} PRIVATE ${CMAKE_C
target_link_libraries(${HWS_PYTHON_BINDINGS_LIBRARY_NAME} PRIVATE ${HWS_LIBRARY_NAME})
target_compile_definitions(${HWS_PYTHON_BINDINGS_LIBRARY_NAME} PRIVATE PYBIND11_DETAILED_ERROR_MESSAGES)

if(HWS_MPI_SUPPORT_ACTIVE)
message(STATUS "MPI support enabled. Adding mpi4py include directory and linking against MPI.")
# Get mpi4py's C header location, simultaneously checking if mpi4py is importable in the current Python environment
execute_process(
COMMAND "${Python_EXECUTABLE}" -c
"import mpi4py, sys; sys.stdout.write(mpi4py.get_include())"

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible, that mpi4py is installed, but broken without include path? Should we additionally check for that?

RESULT_VARIABLE HWS_MPI4PY_IMPORT_RESULT
OUTPUT_VARIABLE HWS_MPI4PY_INCLUDE_DIR
OUTPUT_STRIP_TRAILING_WHITESPACE
)

if(HWS_MPI4PY_IMPORT_RESULT)
message(FATAL_ERROR
"MPI support is enabled in hws (HWS_ENABLE_MPI_SUPPORT=AUTO/ON and MPI_FOUND) "
"but mpi4py is not importable in Python_EXECUTABLE='${Python_EXECUTABLE}'. "
"To fix this, either:\n"
" 1. Reinstall mpi4py in this environment \n"
" 2. Disable Python bindings: -DHWS_ENABLE_PYTHON_BINDINGS=OFF\n"
" 3. Disable MPI support: -DHWS_ENABLE_MPI_SUPPORT=OFF")
endif()

if(NOT EXISTS "${HWS_MPI4PY_INCLUDE_DIR}/mpi4py/mpi4py.h")
message(FATAL_ERROR
"mpi4py include path '${HWS_MPI4PY_INCLUDE_DIR}' does not contain mpi4py/mpi4py.h. "
"The mpi4py installation appears to be broken. "
"To fix this, either:\n"
" 1. Reinstall mpi4py in this environment \n"
" 2. Disable Python bindings: -DHWS_ENABLE_PYTHON_BINDINGS=OFF\n"
" 3. Disable MPI support: -DHWS_ENABLE_MPI_SUPPORT=OFF")
endif()

execute_process(
COMMAND "${Python_EXECUTABLE}" -c
"import mpi4py, sys; sys.stdout.write(mpi4py.__version__)"
OUTPUT_VARIABLE HWS_MPI4PY_VERSION
OUTPUT_STRIP_TRAILING_WHITESPACE
)
if(HWS_MPI4PY_VERSION VERSION_LESS "4.0")
message(FATAL_ERROR
"mpi4py>=4.0 is required but found ${HWS_MPI4PY_VERSION} in Python_EXECUTABLE='${Python_EXECUTABLE}'. "
"Upgrade mpi4py or disable python bindings.")
endif()

target_include_directories(${HWS_PYTHON_BINDINGS_LIBRARY_NAME} PRIVATE ${HWS_MPI4PY_INCLUDE_DIR})

# Propagate the same macro used on the C++ side into the Python module
target_compile_definitions(${HWS_PYTHON_BINDINGS_LIBRARY_NAME} PRIVATE HWS_MPI_SUPPORT_ENABLED)
endif()

include(GNUInstallDirs)
# install Python bindings
install(TARGETS ${HWS_PYTHON_BINDINGS_LIBRARY_NAME}
Expand Down
10 changes: 10 additions & 0 deletions bindings/hardware_sampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@
#include "relative_event.hpp" // hws::detail::relative_event
#include <string> // std::string

#if defined(HWS_MPI_SUPPORT_ENABLED)
#include "mpi4py_communicator.hpp"
#include <mpi.h>
#endif

namespace py = pybind11;

void init_hardware_sampler(py::module_ &m) {
Expand Down Expand Up @@ -62,6 +67,11 @@ void init_hardware_sampler(py::module_ &m) {
.def("relative_time_points", [](const hws::hardware_sampler &self) { return hws::detail::durations_from_reference_time(self.sampling_time_points(), self.get_event(0).time_point); }, "get the relative durations of the respective hardware samples in seconds (as \"normal\" number)")
.def("sampling_interval", &hws::hardware_sampler::sampling_interval, "get the sampling interval of this hardware sampler (in ms)")
.def("dump_yaml", py::overload_cast<const std::string &>(&hws::hardware_sampler::dump_yaml, py::const_), "dump all hardware samples to the given YAML file")
#if defined(HWS_MPI_SUPPORT_ENABLED)
.def("dump_yaml_global", [](const hws::hardware_sampler &self, const std::string &filename, py::object py_comm) {
const MPI_Comm comm = mpi_comm_from_python(py_comm);
self.dump_yaml_global(filename, comm); }, py::arg("filename"), py::arg("comm"), "Let MPI rank 0 dump the hardware samples of this hardware sampler of all MPI ranks to the given YAML file using the provided mpi4py communicator.")
#endif
.def("as_yaml_string", &hws::hardware_sampler::as_yaml_string, "return all hardware samples including additional information like events as YAML string")
.def("samples_only_as_yaml_string", &hws::hardware_sampler::samples_only_as_yaml_string, "return all hardware samples as YAML string")
.def("__repr__", [](const hws::hardware_sampler &self) {
Expand Down
39 changes: 38 additions & 1 deletion bindings/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,18 @@
* See the LICENSE.md file in the project root for full license information.
*/

#include "hws/version.hpp" // hws::version::version
#include "hws/version.hpp" // hws::version::version

#include "pybind11/pybind11.h" // PYBIND11_MODULE, py::module_

#include <string_view> // std::string_view

#if defined(HWS_MPI_SUPPORT_ENABLED)
#include "mpi4py_communicator.hpp"
#include <mpi.h>
#include <mpi4py/mpi4py.h>
#endif

#define HWS_IS_DEFINED_HELPER(x) #x
#define HWS_IS_DEFINED(x) (std::string_view{ #x } != std::string_view{ HWS_IS_DEFINED_HELPER(x) })

Expand All @@ -32,6 +38,15 @@ PYBIND11_MODULE(HardwareSampling, m) {
m.doc() = "Hardware Sampling for CPUs and GPUs";
m.attr("__version__") = hws::version::version;

// MPI support
#if defined(HWS_MPI_SUPPORT_ENABLED)
// Initialize mpi4py C-API so PyMPIComm_* are usable
if (import_mpi4py() < 0) {
throw py::error_already_set();
}
#endif
m.def("has_mpi_support", []() { return HWS_IS_DEFINED(HWS_MPI_SUPPORT_ENABLED); });

init_event(m);
init_sample_category(m);
init_relative_event(m);
Expand Down Expand Up @@ -64,3 +79,25 @@ PYBIND11_MODULE(HardwareSampling, m) {

init_version(m);
}

#if defined(HWS_MPI_SUPPORT_ENABLED)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you have a look at PyBind's "type caster"? Maybe they could be used for an automatic conversion between the MPI handles instead of this helper function? 🤔

/**
* Extracts an MPI_Comm from a python mpi4py.MPI.Comm object.
* Has to be in same translation unit as the import_mpi4py() call to ensure that the mpi4py C-API is initialized and the PyMPIComm_Type is available.
*
* @param py_comm a Python object that is expected to be an mpi4py.MPI.Comm instance
* @return the extracted MPI_Comm
*/
MPI_Comm mpi_comm_from_python(py::object py_comm) {
if (!PyObject_TypeCheck(py_comm.ptr(), &PyMPIComm_Type)) {
throw std::runtime_error{"expected mpi4py.MPI.Comm as communicator argument"};
}

MPI_Comm *comm_ptr = PyMPIComm_Get(py_comm.ptr());
if (comm_ptr == nullptr) {
throw std::runtime_error{"could not extract MPI_Comm from mpi4py communicator"};
}

return *comm_ptr;
}
#endif
27 changes: 27 additions & 0 deletions bindings/mpi4py_communicator.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/**
* @file
* @author Tim Thüring
* @copyright 2024-today All Rights Reserved
* @license This file is released under the MIT license.
* See the LICENSE.md file in the project root for full license information.
*
* @brief Utility functions for transforming mpi4py communicators into C++ MPI communicators
*/

#ifndef HWS_BINDINGS_MPI4PY_COMMUNICATOR_HPP
#define HWS_BINDINGS_MPI4PY_COMMUNICATOR_HPP
#pragma once

#include "pybind11/pybind11.h"

#if defined(HWS_MPI_SUPPORT_ENABLED)
#include <mpi.h>
#endif

namespace py = pybind11;

#if defined(HWS_MPI_SUPPORT_ENABLED)
MPI_Comm mpi_comm_from_python(py::object py_comm);
#endif

#endif // HWS_BINDINGS_MPI4PY_COMMUNICATOR_HPP
59 changes: 59 additions & 0 deletions bindings/system_hardware_sampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,71 @@
#include "relative_event.hpp" // hws::detail::relative_event
#include <string> // std::string

#if defined(HWS_MPI_SUPPORT_ENABLED)
#include "mpi4py_communicator.hpp"
#include <mpi.h>
#endif

namespace py = pybind11;

void init_system_hardware_sampler(py::module_ &m) {
#if defined(HWS_MPI_SUPPORT_ENABLED)
// bind mpi sampling mode enum
py::enum_<hws::detail::mpi_sampling_mode>(m, "MPISamplingMode")
.value("PER_RANK", hws::detail::mpi_sampling_mode::per_rank)
.value("WHOLE_NODE", hws::detail::mpi_sampling_mode::whole_node)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"PER_NODE". Maybe also add a "WORLD" mode to output it from everywhere? Or is this the case for "PER_NODE" (if yes, then this name is a bit misleading).

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The sampling modes define the rules the system hardware sampler follows when creating the hardware samplers and is not related to the output. "PER_RANK" explicitly requests the current behavior where every rank creates a system_hardware_sampler that track all devices it can see. This might lead to dubplicates if multiple ranks are on one node.
"WHOLE_NODE" ensures that every device visible to at least one rank is sampled by exactly one hardware sampler, even if multiple ranks see the same device.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok this way it makes more sense. What is the default behavior? In my head, WHOLE_NODE would make the most sense 🤔
Also, how are events handled in case of WHOLE_NODE?

.export_values();
#endif
// bind the pure virtual hardware sampler base class
py::class_<hws::system_hardware_sampler>(m, "SystemHardwareSampler")
.def(py::init<>(), "construct a new system hardware sampler with the default sampling interval")
.def(py::init<hws::sample_category>(), "construct a new system hardware sampler with the default sampling interval sampling only the provided sample_category samples")
.def(py::init<std::chrono::milliseconds>(), "construct a new system hardware sampler for with the specified sampling interval")
.def(py::init<std::chrono::milliseconds, hws::sample_category>(), "construct a new system hardware sampler for with the specified sampling interval sampling only the provided sample_category samples")
#if defined(HWS_MPI_SUPPORT_ENABLED)
// MPI-aware constructors

// (MPI_Comm, mode, category=all)
.def(py::init([](py::object py_comm,
hws::detail::mpi_sampling_mode mode,
hws::sample_category category) {
MPI_Comm comm = mpi_comm_from_python(py_comm);
return std::make_unique<hws::system_hardware_sampler>(comm, mode, category);
}),
py::arg("comm"),
py::arg("mode"),
py::arg("category") = hws::sample_category::all,
"construct a new system hardware sampler with the default sampling interval and MPI support using the given mpi4py communicator and sampling mode")

// (MPI_Comm, mode, sampling_interval, category=all)
.def(py::init([](py::object py_comm,
hws::detail::mpi_sampling_mode mode,
std::chrono::milliseconds sampling_interval,
hws::sample_category category) {
MPI_Comm comm = mpi_comm_from_python(py_comm);
return std::make_unique<hws::system_hardware_sampler>(comm, mode, sampling_interval, category);
}),
py::arg("comm"),
py::arg("mode"),
py::arg("sampling_interval"),
py::arg("category") = hws::sample_category::all,
"construct a new system hardware sampler with the specified sampling interval and MPI support using the given mpi4py communicator and sampling mode")

// Non-MPI overloads
.def("start", py::overload_cast<>(&hws::system_hardware_sampler::start_sampling), "start hardware sampling for all available hardware samplers")
.def("stop", py::overload_cast<>(&hws::system_hardware_sampler::stop_sampling), "stop hardware sampling for all available hardware samplers")
// MPI-aware overloads
.def("start", [](hws::system_hardware_sampler &self, py::object py_comm) {
MPI_Comm comm = mpi_comm_from_python(py_comm);
self.start_sampling(comm); }, py::arg("comm"), "start hardware sampling for all available hardware samplers; executes an MPI barrier on the given communicator before starting")
.def("stop", [](hws::system_hardware_sampler &self, py::object py_comm) {
MPI_Comm comm = mpi_comm_from_python(py_comm);
self.stop_sampling(comm); }, py::arg("comm"), "stop hardware sampling for all available hardware samplers; executes an MPI barrier on the given communicator after stopping")
#else
// No MPI support: only the simple overloads exist, no ambiguity
.def("start", &hws::system_hardware_sampler::start_sampling, "start hardware sampling for all available hardware samplers")
.def("stop", &hws::system_hardware_sampler::stop_sampling, "stop hardware sampling for all available hardware samplers")
#endif
.def("pause", &hws::system_hardware_sampler::pause_sampling, "pause hardware sampling for all available hardware samplers")
.def("resume", &hws::system_hardware_sampler::resume_sampling, "resume hardware sampling for all available hardware samplers")
.def("has_started", &hws::system_hardware_sampler::has_sampling_started, "check whether hardware sampling has already been started for all hardware samplers")
Expand Down Expand Up @@ -67,5 +121,10 @@ void init_system_hardware_sampler(py::module_ &m) {
.def("sampler", [](hws::system_hardware_sampler &self, const std::size_t idx) { return self.sampler(idx).get(); }, "get the i-th hardware sampler available for the whole system")
.def("dump_yaml", py::overload_cast<const std::string &>(&hws::system_hardware_sampler::dump_yaml, py::const_), "dump all hardware samples for all hardware samplers to the given YAML file")
.def("as_yaml_string", &hws::system_hardware_sampler::as_yaml_string, "return all hardware samples for all hardware samplers as YAML string")
#if defined(HWS_MPI_SUPPORT_ENABLED)
.def("dump_yaml_global", [](const hws::system_hardware_sampler &self, const std::string &filename, py::object py_comm) {
const MPI_Comm comm = mpi_comm_from_python(py_comm);
self.dump_yaml_global(filename, comm); }, py::arg("filename"), py::arg("comm"), "Let MPI rank 0 dump the hardware samples of all hardware samplers of all MPI ranks to the given YAML file using the provided mpi4py communicator.")
#endif
.def("__repr__", [](const hws::system_hardware_sampler &self) { return fmt::format("<hws.SystemHardwareSampler with {} samples>", self.num_samplers()); });
}
18 changes: 18 additions & 0 deletions include/hws/gpu_amd/utility.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@
#include <stdexcept> // std::runtime_error
#include <string> // std::string

#if defined(HWS_MPI_SUPPORT_ENABLED)
#include "hws/visible_gpu_device.hpp" // hws::detail::visible_gpu_device

#include <vector> // std::vector
#endif

namespace hws::detail {

/**
Expand Down Expand Up @@ -68,6 +74,18 @@ namespace hws::detail {
*/
[[nodiscard]] std::string performance_level_to_string(rsmi_dev_perf_level_t perf_level);


#if defined(HWS_MPI_SUPPORT_ENABLED)

/**
* @brief creates a list of all visible AMD GPU devices
*
* @return a vector of all visible AMD GPU devices on the local node, each with its local index and physical ID
*/
[[nodiscard]] std::vector<visible_gpu_device> enumerate_local_amd_devices();

#endif

} // namespace hws::detail

#endif // HWS_GPU_AMD_UTILITY_HPP_
15 changes: 15 additions & 0 deletions include/hws/gpu_intel/utility.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@
#include <string_view> // std::string_view
#include <vector> // std::vector

#if defined(HWS_MPI_SUPPORT_ENABLED)
#include "hws/visible_gpu_device.hpp" // hws::detail::visible_gpu_device
#endif

namespace hws::detail {

/**
Expand Down Expand Up @@ -75,6 +79,17 @@ namespace hws::detail {
*/
[[nodiscard]] std::string memory_location_to_name(zes_mem_loc_t mem_loc);

#if defined(HWS_MPI_SUPPORT_ENABLED)

/**
* @brief creates a list of all visible Intel GPU devices
*
* @return a vector of all visible Intel GPU devices on the local node, each with its local index and physical ID
*/
[[nodiscard]] std::vector<visible_gpu_device> enumerate_local_intel_devices();

#endif

} // namespace hws::detail

#endif // HWS_GPU_INTEL_UTILITY_HPP_
Loading