diff --git a/doc/env.md b/doc/env.md index 5921830d16..b39a4932ab 100644 --- a/doc/env.md +++ b/doc/env.md @@ -89,6 +89,14 @@ These environment variables also apply to third-party programs using the C++ int List of customized OP plugin libraries to load, such as `/path/to/plugin1.so:/path/to/plugin2.so` on Linux and `/path/to/plugin1.dll;/path/to/plugin2.dll` on Windows. ::: +:::{envvar} DP_BACKEND_PLUGIN_PATH + +**Type**: List of directories, split by `:` on Unix and `;` on Windows + +List of directories used to search for C/C++ backend plugin libraries before the directory that contains `libdeepmd_cc`. +This controls backend implementation plugins, such as `libdeepmd_backend_tf.so` and `libdeepmd_backend_pt.so`, and is separate from {envvar}`DP_PLUGIN_PATH`, which loads customized OP plugins. +::: + :::{envvar} DP_PROFILER {{ pytorch_icon }} Enable the built-in PyTorch Kineto profiler for the PyTorch C++ (inference) backend. diff --git a/doc/inference/cxx.md b/doc/inference/cxx.md index ec8a3248a1..e16f359440 100644 --- a/doc/inference/cxx.md +++ b/doc/inference/cxx.md @@ -28,7 +28,7 @@ See {cpp:class}`deepmd::DeepPot` for details. You can compile `infer_water.cpp` using `gcc`: ```sh -gcc infer_water.cpp -L $deepmd_root/lib -L $tensorflow_root/lib -I $deepmd_root/include -Wl,--no-as-needed -ldeepmd_cc -lstdc++ -ltensorflow_cc -Wl,-rpath=$deepmd_root/lib -Wl,-rpath=$tensorflow_root/lib -o infer_water +gcc infer_water.cpp -L $deepmd_root/lib -I $deepmd_root/include -Wl,--no-as-needed -ldeepmd_cc -lstdc++ -Wl,-rpath=$deepmd_root/lib -o infer_water ``` and then run the program: @@ -37,6 +37,37 @@ and then run the program: ./infer_water ``` +## Backend plugins + +The C and C++ libraries load backend implementations as runtime plugins. +An application links to `libdeepmd_cc` or `libdeepmd_c`; it does not need to link directly to TensorFlow, PyTorch, JAX, or Paddle. +When a model is opened, DeePMD-kit detects the backend from the model format and loads only the corresponding backend plugin. + +The plugin library names are: + +- TensorFlow: `libdeepmd_backend_tf.so` +- PyTorch: `libdeepmd_backend_pt.so` +- PyTorch exportable: `libdeepmd_backend_ptexpt.so` +- JAX: `libdeepmd_backend_jax.so` +- Paddle: `libdeepmd_backend_pd.so` + +On macOS the suffix is `.dylib`; on Windows the libraries use `.dll` without the `lib` prefix. +Native installs and the pre-compiled C library package place these plugins in the same `lib` directory as `libdeepmd_cc` and `libdeepmd_c`. +Python wheels place them in `deepmd/lib`. + +The backend plugin search order is: + +1. directories listed in {envvar}`DP_BACKEND_PLUGIN_PATH`, split by `:` on Unix and `;` on Windows; +1. the directory that contains `libdeepmd_cc`; +1. the platform dynamic loader search path for the bare plugin library name. + +If the requested plugin or its backend runtime cannot be loaded, only that backend fails with a `Unable to load ... backend plugin` error. +Other backends can still run as long as their own plugins and runtime libraries are available. +This also allows a no-backend `libdeepmd_cc` or `libdeepmd_c` build; install or copy the backend plugin next to the library, or set {envvar}`DP_BACKEND_PLUGIN_PATH`, before loading a model that uses that backend. +Build instructions for this layout are in [Install DeePMD-kit's C++ interface](../install/install-from-source.md#install-deepmd-kits-c-interface). + +{envvar}`DP_PLUGIN_PATH` is different: it is used for customized OP plugin libraries after the backend has been selected. + ## C interface Although C is harder to write, the C library will not be affected by different versions of C++ compilers. @@ -85,7 +116,7 @@ See {cpp:func}`DP_DeepPotCompute` for details. You can compile `infer_water.c` using `gcc`: ```sh -gcc infer_water.c -L $deepmd_root/lib -L $tensorflow_root/lib -I $deepmd_root/include -Wl,--no-as-needed -ldeepmd_c -Wl,-rpath=$deepmd_root/lib -Wl,-rpath=$tensorflow_root/lib -o infer_water +gcc infer_water.c -L $deepmd_root/lib -I $deepmd_root/include -Wl,--no-as-needed -ldeepmd_c -Wl,-rpath=$deepmd_root/lib -o infer_water ``` and then run the program: @@ -120,7 +151,7 @@ See {cpp:class}`deepmd::hpp::DeepPot` for details. You can compile `infer_water_hpp.cpp` using `gcc`: ```sh -gcc infer_water_hpp.cpp -L $deepmd_root/lib -L $tensorflow_root/lib -I $deepmd_root/include -Wl,--no-as-needed -ldeepmd_c -Wl,-rpath=$deepmd_root/lib -Wl,-rpath=$tensorflow_root/lib -o infer_water_hpp +gcc infer_water_hpp.cpp -L $deepmd_root/lib -I $deepmd_root/include -Wl,--no-as-needed -ldeepmd_c -Wl,-rpath=$deepmd_root/lib -o infer_water_hpp ``` and then run the program: diff --git a/doc/install/install-from-source.md b/doc/install/install-from-source.md index ac7dc5a2c0..4d82ef4af7 100644 --- a/doc/install/install-from-source.md +++ b/doc/install/install-from-source.md @@ -370,12 +370,29 @@ The installation requires CMake 3.25.2 or later for all platforms (CPU, CUDA, an pip install -U cmake ``` -You must enable at least one backend. +For a backend-enabled install, enable at least one backend. +If you only want backend-neutral C/C++ libraries, use the backend-neutral tab below and provide backend plugins at runtime. If you enable two or more backends, these backend libraries must be built in a compatible way, e.g. using the same `_GLIBCXX_USE_CXX11_ABI` flag. We recommend using [conda packages](https://docs.deepmodeling.com/faq/conda.html) from [conda-forge](https://conda-forge.org), which are usually compatible to each other. ::::{tab-set} +:::{tab-item} Backend-neutral C/C++ libraries + +To build only `libdeepmd_cc` and `libdeepmd_c` without backend plugins, leave all backend options disabled, or set them explicitly: + +```bash +cmake -DBUILD_CPP_IF=ON -DBUILD_PY_IF=OFF \ + -DENABLE_TENSORFLOW=OFF -DENABLE_PYTORCH=OFF \ + -DENABLE_JAX=OFF -DENABLE_PADDLE=OFF \ + -DCMAKE_INSTALL_PREFIX=$deepmd_root .. +``` + +This install does not include backend plugins. +Use backend plugin libraries from a backend-enabled build or package at runtime, either by placing them next to the installed C/C++ libraries or by setting {envvar}`DP_BACKEND_PLUGIN_PATH`. +See [C/C++ backend plugins](../inference/cxx.md#backend-plugins) for runtime plugin discovery. +::: + :::{tab-item} TensorFlow {{ tensorflow_icon }} / JAX {{ jax_icon }} I assume you have activated the TensorFlow Python environment and want to install DeePMD-kit into path `$deepmd_root`, then execute CMake diff --git a/source/api_c/CMakeLists.txt b/source/api_c/CMakeLists.txt index aca0c96476..03ca851c73 100644 --- a/source/api_c/CMakeLists.txt +++ b/source/api_c/CMakeLists.txt @@ -31,11 +31,21 @@ endif(BUILD_PY_IF) if(PACKAGE_C) message(STATUS "Packaging C API library") + get_property(DEEPMD_BACKEND_PLUGIN_TARGETS GLOBAL + PROPERTY DEEPMD_BACKEND_PLUGIN_TARGETS) + set(PACKAGE_C_RUNTIME_LIBRARIES "$" + "$") + foreach(_target ${DEEPMD_BACKEND_PLUGIN_TARGETS}) + list(APPEND PACKAGE_C_RUNTIME_LIBRARIES "$") + endforeach() + string(REPLACE ";" " " PACKAGE_C_RUNTIME_LIBRARIES_CODE + "${PACKAGE_C_RUNTIME_LIBRARIES}") # follow pypa/auditwheel convention + install(CODE "set(_dp_runtime_libraries ${PACKAGE_C_RUNTIME_LIBRARIES_CODE})") install( CODE [[ file(GET_RUNTIME_DEPENDENCIES - LIBRARIES $ $ + LIBRARIES ${_dp_runtime_libraries} RESOLVED_DEPENDENCIES_VAR _r_deps PRE_EXCLUDE_REGEXES "libgcc_s\\.so.*" "libstdc\\+\\+\\.so.*" @@ -59,6 +69,9 @@ if(PACKAGE_C) install(TARGETS ${libname} DESTINATION ${CMAKE_BINARY_DIR}/libdeepmd_c/lib) install(TARGETS ${LIB_DEEPMD_OP} DESTINATION ${CMAKE_BINARY_DIR}/libdeepmd_c/lib) + foreach(_target ${DEEPMD_BACKEND_PLUGIN_TARGETS}) + install(TARGETS ${_target} DESTINATION ${CMAKE_BINARY_DIR}/libdeepmd_c/lib) + endforeach() set(cmake_files_install_dir "${CMAKE_BINARY_DIR}/libdeepmd_c/lib/cmake/${CMAKE_PROJECT_NAME}") diff --git a/source/api_cc/CMakeLists.txt b/source/api_cc/CMakeLists.txt index 90b7c08449..501eefe842 100644 --- a/source/api_cc/CMakeLists.txt +++ b/source/api_cc/CMakeLists.txt @@ -5,32 +5,32 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/include/version.h.in version.h @ONLY) file(GLOB LIB_SRC src/*.cc src/*.cpp) file(GLOB INC_SRC include/*.h ${CMAKE_CURRENT_BINARY_DIR}/version.h) +set(DEEPMD_BACKEND_IMPL_SRC + ${CMAKE_CURRENT_SOURCE_DIR}/src/DataModifierTF.cc + ${CMAKE_CURRENT_SOURCE_DIR}/src/DeepPotJAX.cc + ${CMAKE_CURRENT_SOURCE_DIR}/src/DeepPotPD.cc + ${CMAKE_CURRENT_SOURCE_DIR}/src/DeepPotPT.cc + ${CMAKE_CURRENT_SOURCE_DIR}/src/DeepPotPTExpt.cc + ${CMAKE_CURRENT_SOURCE_DIR}/src/DeepPotTF.cc + ${CMAKE_CURRENT_SOURCE_DIR}/src/DeepSpinPT.cc + ${CMAKE_CURRENT_SOURCE_DIR}/src/DeepSpinPTExpt.cc + ${CMAKE_CURRENT_SOURCE_DIR}/src/DeepSpinTF.cc + ${CMAKE_CURRENT_SOURCE_DIR}/src/DeepTensorPT.cc + ${CMAKE_CURRENT_SOURCE_DIR}/src/DeepTensorTF.cc) +set(DEEPMD_BACKEND_PLUGIN_SRC + ${CMAKE_CURRENT_SOURCE_DIR}/src/DeepPotJAXPlugin.cc + ${CMAKE_CURRENT_SOURCE_DIR}/src/DeepPotPDPlugin.cc + ${CMAKE_CURRENT_SOURCE_DIR}/src/DeepPotPTExptPlugin.cc + ${CMAKE_CURRENT_SOURCE_DIR}/src/DeepPotPTPlugin.cc + ${CMAKE_CURRENT_SOURCE_DIR}/src/DeepPotTFPlugin.cc) +list(REMOVE_ITEM LIB_SRC ${DEEPMD_BACKEND_IMPL_SRC} + ${DEEPMD_BACKEND_PLUGIN_SRC}) + set(libname "${LIB_DEEPMD_CC}") add_library(${libname} SHARED ${LIB_SRC}) -# link: libdeepmd libdeepmd_op libtensorflow_cc libtensorflow_framework +# link: libdeepmd. Backend runtimes are loaded through backend plugins. target_link_libraries(${libname} PUBLIC ${LIB_DEEPMD}) -if(ENABLE_TENSORFLOW) - target_link_libraries(${libname} PRIVATE TensorFlow::tensorflow_cc - TensorFlow::tensorflow_framework) - target_compile_definitions(${libname} PRIVATE BUILD_TENSORFLOW) -endif() -if(ENABLE_PYTORCH AND "${OP_CXX_ABI_PT}" EQUAL "${OP_CXX_ABI}") - target_link_libraries(${libname} PRIVATE "${TORCH_LIBRARIES}") - target_compile_definitions(${libname} PRIVATE BUILD_PYTORCH) -endif() -if(ENABLE_JAX) - target_link_libraries(${libname} PRIVATE TensorFlow::tensorflow_c) - target_compile_definitions(${libname} PRIVATE BUILD_JAX) -endif() -if(ENABLE_PADDLE AND NOT BUILD_PY_IF) - target_link_libraries(${libname} PUBLIC "${PADDLE_LIBRARIES}") - target_compile_definitions(${libname} PUBLIC BUILD_PADDLE) - if(DP_VARIANT STREQUAL "rocm") - target_link_libraries(${libname} - PUBLIC "${hip_LIB_INSTALL_DIR}/libgalaxyhip.so") - endif() -endif() target_include_directories( ${libname} @@ -54,6 +54,92 @@ if(CMAKE_TESTING_ENABLED) endif() target_compile_features(${libname} PUBLIC cxx_std_11) +if(BUILD_PY_IF) + set(DEEPMD_API_CC_INSTALL_DESTINATION deepmd/lib/) +else() + set(DEEPMD_API_CC_INSTALL_DESTINATION lib/) +endif() + +function(deepmd_configure_backend_plugin target_name) + target_link_libraries(${target_name} PRIVATE ${LIB_DEEPMD_CC}) + target_include_directories( + ${target_name} + PRIVATE $ + $) + target_compile_features(${target_name} PUBLIC cxx_std_11) + set_target_properties( + ${target_name} + PROPERTIES INSTALL_RPATH "$ORIGIN;${BACKEND_LIBRARY_PATH}" + INSTALL_RPATH_USE_LINK_PATH TRUE + BUILD_RPATH "$ORIGIN/../op/tf;$ORIGIN/../op/pt;$ORIGIN/../op/pd") + if(CMAKE_TESTING_ENABLED) + target_link_libraries(${target_name} PRIVATE coverage_config) + endif() + install(TARGETS ${target_name} + DESTINATION ${DEEPMD_API_CC_INSTALL_DESTINATION}) + set_property(GLOBAL APPEND PROPERTY DEEPMD_BACKEND_PLUGIN_TARGETS + ${target_name}) +endfunction() + +if(ENABLE_TENSORFLOW) + # TensorFlow helper functions in common.cc are compiled with BUILD_TENSORFLOW + # into this plugin so the main libdeepmd_cc can stay backend-neutral. + add_library( + deepmd_backend_tf SHARED + src/DataModifierTF.cc src/DeepPotTF.cc src/DeepPotTFPlugin.cc + src/DeepSpinTF.cc src/DeepTensorTF.cc src/common.cc) + deepmd_configure_backend_plugin(deepmd_backend_tf) + if(UNIX AND NOT APPLE) + target_link_libraries( + deepmd_backend_tf + PRIVATE -Wl,--no-as-needed TensorFlow::tensorflow_cc -Wl,--as-needed + TensorFlow::tensorflow_framework) + else() + target_link_libraries( + deepmd_backend_tf PRIVATE TensorFlow::tensorflow_cc + TensorFlow::tensorflow_framework) + endif() + target_compile_definitions(deepmd_backend_tf PRIVATE BUILD_TENSORFLOW + TF_PRIVATE) + if(Protobuf_LIBRARY) + target_link_libraries(deepmd_backend_tf PRIVATE ${Protobuf_LIBRARY}) + endif() +endif() + +if(ENABLE_PYTORCH AND "${OP_CXX_ABI_PT}" EQUAL "${OP_CXX_ABI}") + add_library(deepmd_backend_pt SHARED src/DeepPotPT.cc src/DeepPotPTPlugin.cc + src/DeepSpinPT.cc src/DeepTensorPT.cc) + deepmd_configure_backend_plugin(deepmd_backend_pt) + target_link_libraries(deepmd_backend_pt PRIVATE "${TORCH_LIBRARIES}") + target_compile_definitions(deepmd_backend_pt PRIVATE BUILD_PYTORCH) + + add_library( + deepmd_backend_ptexpt SHARED + src/DeepPotPTExpt.cc src/DeepPotPTExptPlugin.cc src/DeepSpinPTExpt.cc) + deepmd_configure_backend_plugin(deepmd_backend_ptexpt) + target_link_libraries(deepmd_backend_ptexpt PRIVATE "${TORCH_LIBRARIES}") + target_compile_definitions(deepmd_backend_ptexpt PRIVATE BUILD_PYTORCH) +endif() + +if(ENABLE_JAX) + add_library(deepmd_backend_jax SHARED src/DeepPotJAX.cc + src/DeepPotJAXPlugin.cc) + deepmd_configure_backend_plugin(deepmd_backend_jax) + target_link_libraries(deepmd_backend_jax PRIVATE TensorFlow::tensorflow_c) + target_compile_definitions(deepmd_backend_jax PRIVATE BUILD_JAX) +endif() + +if(ENABLE_PADDLE AND NOT BUILD_PY_IF) + add_library(deepmd_backend_pd SHARED src/DeepPotPD.cc src/DeepPotPDPlugin.cc) + deepmd_configure_backend_plugin(deepmd_backend_pd) + target_link_libraries(deepmd_backend_pd PRIVATE "${PADDLE_LIBRARIES}") + target_compile_definitions(deepmd_backend_pd PRIVATE BUILD_PADDLE) + if(DP_VARIANT STREQUAL "rocm") + target_link_libraries(deepmd_backend_pd + PRIVATE "${hip_LIB_INSTALL_DIR}/libgalaxyhip.so") + endif() +endif() + if(BUILD_PY_IF) install(TARGETS ${libname} DESTINATION deepmd/lib/) else(BUILD_PY_IF) diff --git a/source/api_cc/include/BackendPlugin.h b/source/api_cc/include/BackendPlugin.h new file mode 100644 index 0000000000..4149917414 --- /dev/null +++ b/source/api_cc/include/BackendPlugin.h @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: LGPL-3.0-or-later +#pragma once + +#include +#include +#include + +#include "common.h" + +namespace deepmd { +class DeepPotBackend; +class DeepSpinBackend; +class DeepTensorBase; +class DipoleChargeModifierBase; + +/** + * @brief C ABI symbol names exported by backend plugin libraries. + **/ +constexpr const char* DEEPMD_DEEPPOT_PLUGIN_CREATE_SYMBOL = + "deepmd_create_deeppot_backend_v1"; +constexpr const char* DEEPMD_DEEPPOT_PLUGIN_DELETE_SYMBOL = + "deepmd_delete_deeppot_backend_v1"; +constexpr const char* DEEPMD_DEEPSPIN_PLUGIN_CREATE_SYMBOL = + "deepmd_create_deepspin_backend_v1"; +constexpr const char* DEEPMD_DEEPSPIN_PLUGIN_DELETE_SYMBOL = + "deepmd_delete_deepspin_backend_v1"; +constexpr const char* DEEPMD_DEEPTENSOR_PLUGIN_CREATE_SYMBOL = + "deepmd_create_deeptensor_backend_v1"; +constexpr const char* DEEPMD_DEEPTENSOR_PLUGIN_DELETE_SYMBOL = + "deepmd_delete_deeptensor_backend_v1"; +constexpr const char* DEEPMD_DIPOLE_CHARGE_MODIFIER_PLUGIN_CREATE_SYMBOL = + "deepmd_create_dipole_charge_modifier_backend_v1"; +constexpr const char* DEEPMD_DIPOLE_CHARGE_MODIFIER_PLUGIN_DELETE_SYMBOL = + "deepmd_delete_dipole_charge_modifier_backend_v1"; +constexpr const char* DEEPMD_CONVERT_PBTXT_TO_PB_PLUGIN_SYMBOL = + "deepmd_convert_pbtxt_to_pb_v1"; +constexpr const char* DEEPMD_BACKEND_PLUGIN_FREE_ERROR_SYMBOL = + "deepmd_free_backend_error_v1"; + +/** + * @brief C ABI function pointer types exported by backend plugin libraries. + **/ +extern "C" { +typedef void* (*deepmd_create_deeppot_backend_fn)(const char* model, + int gpu_rank, + const char* file_content, + std::size_t file_content_size, + char** error_message); +typedef void (*deepmd_delete_deeppot_backend_fn)(void* backend); +typedef void* (*deepmd_create_deepspin_backend_fn)( + const char* model, + int gpu_rank, + const char* file_content, + std::size_t file_content_size, + char** error_message); +typedef void (*deepmd_delete_deepspin_backend_fn)(void* backend); +typedef void* (*deepmd_create_deeptensor_backend_fn)(const char* model, + int gpu_rank, + const char* name_scope, + char** error_message); +typedef void (*deepmd_delete_deeptensor_backend_fn)(void* backend); +typedef void* (*deepmd_create_dipole_charge_modifier_backend_fn)( + const char* model, + int gpu_rank, + const char* name_scope, + char** error_message); +typedef void (*deepmd_delete_dipole_charge_modifier_backend_fn)(void* backend); +typedef int (*deepmd_convert_pbtxt_to_pb_fn)(const char* pbtxt, + const char* pb, + char** error_message); +typedef void (*deepmd_free_backend_error_fn)(char* error_message); +} + +/** + * @brief Create a DeepPot backend through a runtime backend plugin. + * @param[in] backend The backend plugin to load. + * @param[in] model The name of the frozen model file. + * @param[in] gpu_rank The GPU rank. + * @param[in] file_content The content of the model file. If it is not empty, + * the backend will read from the string instead of the file. + * @return The DeepPot backend instance. + **/ +std::shared_ptr create_deeppot_backend_from_plugin( + DPBackend backend, + const std::string& model, + const int& gpu_rank, + const std::string& file_content); + +/** + * @brief Create a DeepSpin backend through a runtime backend plugin. + * @param[in] backend The backend plugin to load. + * @param[in] model The name of the frozen model file. + * @param[in] gpu_rank The GPU rank. + * @param[in] file_content The content of the model file. If it is not empty, + * the backend will read from the string instead of the file. + * @return The DeepSpin backend instance. + **/ +std::shared_ptr create_deepspin_backend_from_plugin( + DPBackend backend, + const std::string& model, + const int& gpu_rank, + const std::string& file_content); + +/** + * @brief Create a DeepTensor backend through a runtime backend plugin. + * @param[in] backend The backend plugin to load. + * @param[in] model The name of the frozen model file. + * @param[in] gpu_rank The GPU rank. + * @param[in] name_scope The TensorFlow name scope. + * @return The DeepTensor backend instance. + **/ +std::shared_ptr create_deeptensor_backend_from_plugin( + DPBackend backend, + const std::string& model, + const int& gpu_rank, + const std::string& name_scope); + +/** + * @brief Create a DipoleChargeModifier backend through a runtime backend + * plugin. + * @param[in] backend The backend plugin to load. + * @param[in] model The name of the frozen model file. + * @param[in] gpu_rank The GPU rank. + * @param[in] name_scope The TensorFlow name scope. + * @return The DipoleChargeModifier backend instance. + **/ +std::shared_ptr +create_dipole_charge_modifier_backend_from_plugin( + DPBackend backend, + const std::string& model, + const int& gpu_rank, + const std::string& name_scope); + +/** + * @brief Convert a TensorFlow pbtxt model to pb through the TensorFlow backend + * plugin. + * @param[in] fn_pb_txt The input pbtxt file. + * @param[in] fn_pb The output pb file. + **/ +void convert_pbtxt_to_pb_from_plugin(const std::string& fn_pb_txt, + const std::string& fn_pb); + +/** + * @brief Get the display name of a backend. + * @param[in] backend The backend enum value. + * @return The backend display name. + **/ +std::string backend_name(DPBackend backend); + +} // namespace deepmd diff --git a/source/api_cc/include/common.h b/source/api_cc/include/common.h index 2d6db36dbb..42bfb8a670 100644 --- a/source/api_cc/include/common.h +++ b/source/api_cc/include/common.h @@ -190,6 +190,11 @@ void get_env_nthreads(int& num_intra_nthreads, int& num_inter_nthreads); */ void load_op_library(); +/** + * @brief Dynamically load the OP library required by a backend. + */ +void load_op_library(DPBackend backend); + /** @struct deepmd::deepmd_exception **/ diff --git a/source/api_cc/include/commonTF.h b/source/api_cc/include/commonTF.h index 003b330308..12bf4361fc 100644 --- a/source/api_cc/include/commonTF.h +++ b/source/api_cc/include/commonTF.h @@ -1,4 +1,6 @@ // SPDX-License-Identifier: LGPL-3.0-or-later +#pragma once + #include #include diff --git a/source/api_cc/src/BackendPlugin.cc b/source/api_cc/src/BackendPlugin.cc new file mode 100644 index 0000000000..c5968238cf --- /dev/null +++ b/source/api_cc/src/BackendPlugin.cc @@ -0,0 +1,466 @@ +// SPDX-License-Identifier: LGPL-3.0-or-later +#include "BackendPlugin.h" + +#include +#include +#include +#include +#include +#include + +#include "DataModifier.h" +#include "DeepPot.h" +#include "DeepSpin.h" +#include "DeepTensor.h" + +#if defined(_WIN32) +#include +#else +#include +#endif + +namespace { + +struct PluginHandle { + void* handle = nullptr; + deepmd::deepmd_create_deeppot_backend_fn create_deeppot = nullptr; + deepmd::deepmd_delete_deeppot_backend_fn delete_deeppot = nullptr; + deepmd::deepmd_free_backend_error_fn free_error = nullptr; + std::string path; +}; + +std::string path_separator() { +#if defined(_WIN32) + return ";"; +#else + return ":"; +#endif +} + +std::vector split_paths(const std::string& paths) { + std::vector result; + const std::string sep = path_separator(); + std::string::size_type begin = 0; + while (begin <= paths.size()) { + const std::string::size_type end = paths.find(sep, begin); + std::string item = paths.substr( + begin, end == std::string::npos ? std::string::npos : end - begin); + if (!item.empty()) { + result.push_back(item); + } + if (end == std::string::npos) { + break; + } + begin = end + sep.size(); + } + return result; +} + +std::string dirname_of(const std::string& path) { + const std::string::size_type pos = path.find_last_of("/\\"); + if (pos == std::string::npos) { + return ""; + } + return path.substr(0, pos); +} + +std::string join_path(const std::string& dir, const std::string& name) { + if (dir.empty()) { + return name; + } + const char last = dir[dir.size() - 1]; + if (last == '/' || last == '\\') { + return dir + name; + } +#if defined(_WIN32) + return dir + "\\" + name; +#else + return dir + "/" + name; +#endif +} + +std::string current_library_dir() { +#if defined(_WIN32) + HMODULE module = nullptr; + if (!GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | + GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, + reinterpret_cast(¤t_library_dir), + &module)) { + return ""; + } + char path[MAX_PATH]; + const DWORD size = GetModuleFileNameA(module, path, MAX_PATH); + if (size == 0 || size == MAX_PATH) { + return ""; + } + return dirname_of(std::string(path, size)); +#else + Dl_info info; + if (dladdr(reinterpret_cast(¤t_library_dir), &info) == 0 || + info.dli_fname == nullptr) { + return ""; + } + return dirname_of(info.dli_fname); +#endif +} + +std::string library_file_name(const std::string& library_name) { +#if defined(_WIN32) + return library_name + ".dll"; +#elif defined(__APPLE__) + return "lib" + library_name + ".dylib"; +#else + return "lib" + library_name + ".so"; +#endif +} + +std::string backend_library_name(deepmd::DPBackend backend) { + switch (backend) { + case deepmd::DPBackend::TensorFlow: + return "deepmd_backend_tf"; + case deepmd::DPBackend::PyTorch: + return "deepmd_backend_pt"; + case deepmd::DPBackend::PyTorchExportable: + return "deepmd_backend_ptexpt"; + case deepmd::DPBackend::Paddle: + return "deepmd_backend_pd"; + case deepmd::DPBackend::JAX: + return "deepmd_backend_jax"; + default: + throw deepmd::deepmd_exception("Unknown backend plugin request"); + } +} + +void* open_library(const std::string& path, std::string& error) { +#if defined(_WIN32) + HMODULE handle = LoadLibraryA(path.c_str()); + if (handle == nullptr) { + error = "LoadLibrary failed"; + } + return reinterpret_cast(handle); +#else + dlerror(); + void* handle = dlopen(path.c_str(), RTLD_NOW | RTLD_LOCAL); + if (handle == nullptr) { + const char* dl_error = dlerror(); + error = dl_error == nullptr ? "dlopen failed" : dl_error; + } + return handle; +#endif +} + +void* load_symbol(void* handle, + const std::string& path, + const char* symbol_name) { +#if defined(_WIN32) + void* symbol = reinterpret_cast( + GetProcAddress(reinterpret_cast(handle), symbol_name)); + if (symbol == nullptr) { + throw deepmd::deepmd_exception("Backend plugin " + path + + " does not export " + symbol_name); + } + return symbol; +#else + dlerror(); + void* symbol = dlsym(handle, symbol_name); + const char* dl_error = dlerror(); + if (dl_error != nullptr) { + throw deepmd::deepmd_exception("Backend plugin " + path + + " does not export " + symbol_name + ": " + + std::string(dl_error)); + } + return symbol; +#endif +} + +template +FunctionType load_typed_symbol(const std::shared_ptr& plugin, + const char* symbol_name) { + return reinterpret_cast( + load_symbol(plugin->handle, plugin->path, symbol_name)); +} + +std::shared_ptr load_plugin(deepmd::DPBackend backend) { + static std::mutex mutex; + static std::map> plugins; + + std::lock_guard lock(mutex); + const auto iter = plugins.find(backend); + if (iter != plugins.end()) { + return iter->second; + } + + const std::string backend_name = deepmd::backend_name(backend); + const std::string library_name = + library_file_name(backend_library_name(backend)); + std::vector candidates; + + const char* env_plugin_path = std::getenv("DP_BACKEND_PLUGIN_PATH"); + if (env_plugin_path != nullptr) { + for (const auto& dir : split_paths(env_plugin_path)) { + candidates.push_back(join_path(dir, library_name)); + } + } + + const std::string own_dir = current_library_dir(); + if (!own_dir.empty()) { + candidates.push_back(join_path(own_dir, library_name)); + } + candidates.push_back(library_name); + + std::ostringstream errors; + for (const auto& candidate : candidates) { + std::string error; + void* handle = open_library(candidate, error); + if (handle == nullptr) { + errors << "\n " << candidate << ": " << error; + continue; + } + + std::shared_ptr plugin(new PluginHandle); + plugin->handle = handle; + plugin->path = candidate; + plugin->create_deeppot = + reinterpret_cast(load_symbol( + handle, candidate, deepmd::DEEPMD_DEEPPOT_PLUGIN_CREATE_SYMBOL)); + plugin->delete_deeppot = + reinterpret_cast(load_symbol( + handle, candidate, deepmd::DEEPMD_DEEPPOT_PLUGIN_DELETE_SYMBOL)); + plugin->free_error = reinterpret_cast( + load_symbol(handle, candidate, + deepmd::DEEPMD_BACKEND_PLUGIN_FREE_ERROR_SYMBOL)); + plugins[backend] = plugin; + return plugin; + } + + throw deepmd::deepmd_exception( + "Unable to load " + backend_name + " backend plugin (" + library_name + + "). Set DP_BACKEND_PLUGIN_PATH or install the plugin next to " + "libdeepmd_cc. Tried:" + + errors.str()); +} + +} // namespace + +std::string deepmd::backend_name(DPBackend backend) { + switch (backend) { + case deepmd::DPBackend::TensorFlow: + return "TensorFlow"; + case deepmd::DPBackend::PyTorch: + return "PyTorch"; + case deepmd::DPBackend::PyTorchExportable: + return "PyTorch Exportable"; + case deepmd::DPBackend::Paddle: + return "PaddlePaddle"; + case deepmd::DPBackend::JAX: + return "JAX"; + case deepmd::DPBackend::Unknown: + return "Unknown"; + } + return "Unknown"; +} + +static std::string take_plugin_error( + const std::shared_ptr& plugin, char* error_message) { + std::string message; + if (error_message != nullptr) { + message = error_message; + plugin->free_error(error_message); + } + return message; +} + +template +void* call_backend_create(const std::shared_ptr& plugin, + char*& error_message, + const std::string& action, + Function create) { + try { + return create(); + } catch (const std::exception& e) { + std::string message = take_plugin_error(plugin, error_message); + throw deepmd::deepmd_exception(action + " from " + plugin->path + + ": plugin threw an exception: " + e.what() + + (message.empty() ? "" : ": " + message)); + } catch (...) { + std::string message = take_plugin_error(plugin, error_message); + throw deepmd::deepmd_exception(action + " from " + plugin->path + + ": plugin threw an unknown exception" + + (message.empty() ? "" : ": " + message)); + } +} + +std::shared_ptr +deepmd::create_deeppot_backend_from_plugin(DPBackend backend, + const std::string& model, + const int& gpu_rank, + const std::string& file_content) { + std::shared_ptr plugin = load_plugin(backend); + + char* error_message = nullptr; + const std::string action = + "Failed to create " + backend_name(backend) + " DeepPot backend"; + void* backend_handle = + call_backend_create(plugin, error_message, action, [&]() { + return plugin->create_deeppot(model.c_str(), gpu_rank, + file_content.data(), file_content.size(), + &error_message); + }); + + if (backend_handle == nullptr) { + std::string message = take_plugin_error(plugin, error_message); + throw deepmd::deepmd_exception(action + " from " + plugin->path + + (message.empty() ? "" : ": " + message)); + } + + if (error_message != nullptr) { + plugin->free_error(error_message); + } + + return std::shared_ptr( + static_cast(backend_handle), + [plugin](DeepPotBackend* ptr) { plugin->delete_deeppot(ptr); }); +} + +std::shared_ptr +deepmd::create_deepspin_backend_from_plugin(DPBackend backend, + const std::string& model, + const int& gpu_rank, + const std::string& file_content) { + std::shared_ptr plugin = load_plugin(backend); + deepmd_create_deepspin_backend_fn create_deepspin = + load_typed_symbol( + plugin, DEEPMD_DEEPSPIN_PLUGIN_CREATE_SYMBOL); + deepmd_delete_deepspin_backend_fn delete_deepspin = + load_typed_symbol( + plugin, DEEPMD_DEEPSPIN_PLUGIN_DELETE_SYMBOL); + + char* error_message = nullptr; + const std::string action = + "Failed to create " + backend_name(backend) + " DeepSpin backend"; + void* backend_handle = + call_backend_create(plugin, error_message, action, [&]() { + return create_deepspin(model.c_str(), gpu_rank, file_content.data(), + file_content.size(), &error_message); + }); + if (backend_handle == nullptr) { + std::string message = take_plugin_error(plugin, error_message); + throw deepmd::deepmd_exception(action + " from " + plugin->path + + (message.empty() ? "" : ": " + message)); + } + if (error_message != nullptr) { + plugin->free_error(error_message); + } + return std::shared_ptr( + static_cast(backend_handle), + [plugin, delete_deepspin](DeepSpinBackend* ptr) { + delete_deepspin(ptr); + }); +} + +std::shared_ptr +deepmd::create_deeptensor_backend_from_plugin(DPBackend backend, + const std::string& model, + const int& gpu_rank, + const std::string& name_scope) { + std::shared_ptr plugin = load_plugin(backend); + deepmd_create_deeptensor_backend_fn create_deeptensor = + load_typed_symbol( + plugin, DEEPMD_DEEPTENSOR_PLUGIN_CREATE_SYMBOL); + deepmd_delete_deeptensor_backend_fn delete_deeptensor = + load_typed_symbol( + plugin, DEEPMD_DEEPTENSOR_PLUGIN_DELETE_SYMBOL); + + char* error_message = nullptr; + const std::string action = + "Failed to create " + backend_name(backend) + " DeepTensor backend"; + void* backend_handle = + call_backend_create(plugin, error_message, action, [&]() { + return create_deeptensor(model.c_str(), gpu_rank, name_scope.c_str(), + &error_message); + }); + if (backend_handle == nullptr) { + std::string message = take_plugin_error(plugin, error_message); + throw deepmd::deepmd_exception(action + " from " + plugin->path + + (message.empty() ? "" : ": " + message)); + } + if (error_message != nullptr) { + plugin->free_error(error_message); + } + return std::shared_ptr( + static_cast(backend_handle), + [plugin, delete_deeptensor](DeepTensorBase* ptr) { + delete_deeptensor(ptr); + }); +} + +std::shared_ptr +deepmd::create_dipole_charge_modifier_backend_from_plugin( + DPBackend backend, + const std::string& model, + const int& gpu_rank, + const std::string& name_scope) { + std::shared_ptr plugin = load_plugin(backend); + deepmd_create_dipole_charge_modifier_backend_fn create_modifier = + load_typed_symbol( + plugin, DEEPMD_DIPOLE_CHARGE_MODIFIER_PLUGIN_CREATE_SYMBOL); + deepmd_delete_dipole_charge_modifier_backend_fn delete_modifier = + load_typed_symbol( + plugin, DEEPMD_DIPOLE_CHARGE_MODIFIER_PLUGIN_DELETE_SYMBOL); + + char* error_message = nullptr; + const std::string action = "Failed to create " + backend_name(backend) + + " DipoleChargeModifier backend"; + void* backend_handle = + call_backend_create(plugin, error_message, action, [&]() { + return create_modifier(model.c_str(), gpu_rank, name_scope.c_str(), + &error_message); + }); + if (backend_handle == nullptr) { + std::string message = take_plugin_error(plugin, error_message); + throw deepmd::deepmd_exception(action + " from " + plugin->path + + (message.empty() ? "" : ": " + message)); + } + if (error_message != nullptr) { + plugin->free_error(error_message); + } + return std::shared_ptr( + static_cast(backend_handle), + [plugin, delete_modifier](DipoleChargeModifierBase* ptr) { + delete_modifier(ptr); + }); +} + +void deepmd::convert_pbtxt_to_pb_from_plugin(const std::string& fn_pb_txt, + const std::string& fn_pb) { + std::shared_ptr plugin = load_plugin(DPBackend::TensorFlow); + deepmd_convert_pbtxt_to_pb_fn convert_pbtxt_to_pb = + load_typed_symbol( + plugin, DEEPMD_CONVERT_PBTXT_TO_PB_PLUGIN_SYMBOL); + + char* error_message = nullptr; + int status = 1; + try { + status = + convert_pbtxt_to_pb(fn_pb_txt.c_str(), fn_pb.c_str(), &error_message); + } catch (const deepmd::deepmd_exception&) { + throw; + } catch (const std::exception& e) { + throw deepmd::deepmd_exception("Backend plugin " + plugin->path + + " threw an exception: " + e.what()); + } catch (...) { + throw deepmd::deepmd_exception("Backend plugin " + plugin->path + + " threw an unknown exception"); + } + + if (status != 0) { + std::string message = take_plugin_error(plugin, error_message); + throw deepmd::deepmd_exception( + "Failed to convert pbtxt with TensorFlow backend plugin from " + + plugin->path + (message.empty() ? "" : ": " + message)); + } + + if (error_message != nullptr) { + plugin->free_error(error_message); + } +} diff --git a/source/api_cc/src/BackendPluginFactory.h b/source/api_cc/src/BackendPluginFactory.h new file mode 100644 index 0000000000..0d0330b5ca --- /dev/null +++ b/source/api_cc/src/BackendPluginFactory.h @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: LGPL-3.0-or-later +#pragma once + +#include +#include +#include +#include + +#include "BackendPlugin.h" +#include "DataModifier.h" +#include "DeepPot.h" +#include "DeepSpin.h" +#include "DeepTensor.h" +#include "errors.h" + +namespace deepmd { +namespace plugin { + +inline void set_error_message(char** error_message, + const std::string& message) { + if (error_message == nullptr) { + return; + } + char* buffer = static_cast(std::malloc(message.size() + 1)); + if (buffer == nullptr) { + *error_message = nullptr; + return; + } + std::memcpy(buffer, message.c_str(), message.size() + 1); + *error_message = buffer; +} + +template +void* create_deeppot_backend(const char* model, + int gpu_rank, + const char* file_content, + std::size_t file_content_size, + char** error_message) { + try { + const std::string content(file_content == nullptr ? "" : file_content, + file_content_size); + return static_cast( + new Backend(model == nullptr ? "" : model, gpu_rank, content)); + } catch (const deepmd::deepmd_exception& e) { + set_error_message(error_message, e.what()); + } catch (const std::exception& e) { + set_error_message(error_message, e.what()); + } catch (...) { + set_error_message(error_message, "unknown backend plugin error"); + } + return nullptr; +} + +template +void* create_deepspin_backend(const char* model, + int gpu_rank, + const char* file_content, + std::size_t file_content_size, + char** error_message) { + try { + const std::string content(file_content == nullptr ? "" : file_content, + file_content_size); + return static_cast( + new Backend(model == nullptr ? "" : model, gpu_rank, content)); + } catch (const deepmd::deepmd_exception& e) { + set_error_message(error_message, e.what()); + } catch (const std::exception& e) { + set_error_message(error_message, e.what()); + } catch (...) { + set_error_message(error_message, "unknown backend plugin error"); + } + return nullptr; +} + +template +void* create_deeptensor_backend(const char* model, + int gpu_rank, + const char* name_scope, + char** error_message) { + try { + return static_cast( + new Backend(model == nullptr ? "" : model, gpu_rank, + name_scope == nullptr ? "" : name_scope)); + } catch (const deepmd::deepmd_exception& e) { + set_error_message(error_message, e.what()); + } catch (const std::exception& e) { + set_error_message(error_message, e.what()); + } catch (...) { + set_error_message(error_message, "unknown backend plugin error"); + } + return nullptr; +} + +template +void* create_dipole_charge_modifier_backend(const char* model, + int gpu_rank, + const char* name_scope, + char** error_message) { + try { + return static_cast( + new Backend(model == nullptr ? "" : model, gpu_rank, + name_scope == nullptr ? "" : name_scope)); + } catch (const deepmd::deepmd_exception& e) { + set_error_message(error_message, e.what()); + } catch (const std::exception& e) { + set_error_message(error_message, e.what()); + } catch (...) { + set_error_message(error_message, "unknown backend plugin error"); + } + return nullptr; +} + +inline void delete_deeppot_backend(void* backend) { + delete static_cast(backend); +} + +inline void delete_deepspin_backend(void* backend) { + delete static_cast(backend); +} + +inline void delete_deeptensor_backend(void* backend) { + delete static_cast(backend); +} + +inline void delete_dipole_charge_modifier_backend(void* backend) { + delete static_cast(backend); +} + +inline void free_error_message(char* error_message) { + std::free(error_message); +} + +} // namespace plugin +} // namespace deepmd diff --git a/source/api_cc/src/DataModifier.cc b/source/api_cc/src/DataModifier.cc index 4f319b4f66..ac363b4c47 100644 --- a/source/api_cc/src/DataModifier.cc +++ b/source/api_cc/src/DataModifier.cc @@ -1,9 +1,7 @@ // SPDX-License-Identifier: LGPL-3.0-or-later #include "DataModifier.h" -#ifdef BUILD_TENSORFLOW -#include "DataModifierTF.h" -#endif +#include "BackendPlugin.h" #include "common.h" using namespace deepmd; @@ -30,16 +28,17 @@ void DipoleChargeModifier::init(const std::string& model, } const DPBackend backend = get_backend(model); if (deepmd::DPBackend::TensorFlow == backend) { -#ifdef BUILD_TENSORFLOW - dcm = std::make_shared(model, gpu_rank, - name_scope_); -#else - throw deepmd::deepmd_exception("TensorFlow backend is not built"); -#endif + dcm = create_dipole_charge_modifier_backend_from_plugin( + backend, model, gpu_rank, name_scope_); } else if (deepmd::DPBackend::PyTorch == backend) { throw deepmd::deepmd_exception("PyTorch backend is not supported yet"); + } else if (deepmd::DPBackend::PyTorchExportable == backend) { + throw deepmd::deepmd_exception( + "PyTorch Exportable backend is not supported yet"); } else if (deepmd::DPBackend::Paddle == backend) { throw deepmd::deepmd_exception("PaddlePaddle backend is not supported yet"); + } else if (deepmd::DPBackend::JAX == backend) { + throw deepmd::deepmd_exception("JAX backend is not supported yet"); } else { throw deepmd::deepmd_exception("Unknown file type"); } diff --git a/source/api_cc/src/DataModifierTF.cc b/source/api_cc/src/DataModifierTF.cc index 80cf6120a3..69da0cc57f 100644 --- a/source/api_cc/src/DataModifierTF.cc +++ b/source/api_cc/src/DataModifierTF.cc @@ -39,7 +39,7 @@ void DipoleChargeModifierTF::init(const std::string& model, get_env_nthreads(num_intra_nthreads, num_inter_nthreads); options.config.set_inter_op_parallelism_threads(num_inter_nthreads); options.config.set_intra_op_parallelism_threads(num_intra_nthreads); - deepmd::load_op_library(); + deepmd::load_op_library(deepmd::DPBackend::TensorFlow); int gpu_num = -1; #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM DPGetDeviceCount(gpu_num); // check current device environment diff --git a/source/api_cc/src/DeepPot.cc b/source/api_cc/src/DeepPot.cc index ae543e1e31..22b0e95f8e 100644 --- a/source/api_cc/src/DeepPot.cc +++ b/source/api_cc/src/DeepPot.cc @@ -5,20 +5,8 @@ #include #include "AtomMap.h" +#include "BackendPlugin.h" #include "common.h" -#ifdef BUILD_TENSORFLOW -#include "DeepPotTF.h" -#endif -#ifdef BUILD_PYTORCH -#include "DeepPotPT.h" -#include "DeepPotPTExpt.h" -#endif -#if defined(BUILD_TENSORFLOW) || defined(BUILD_JAX) -#include "DeepPotJAX.h" -#endif -#ifdef BUILD_PADDLE -#include "DeepPotPD.h" -#endif #include "device.h" using namespace deepmd; @@ -44,43 +32,11 @@ void DeepPot::init(const std::string& model, return; } const DPBackend backend = get_backend(model); - if (deepmd::DPBackend::TensorFlow == backend) { -#ifdef BUILD_TENSORFLOW - dp = std::make_shared(model, gpu_rank, file_content); -#else - throw deepmd::deepmd_exception("TensorFlow backend is not built"); -#endif - } else if (deepmd::DPBackend::PyTorch == backend) { -#ifdef BUILD_PYTORCH - dp = std::make_shared(model, gpu_rank, file_content); -#else - throw deepmd::deepmd_exception("PyTorch backend is not built"); -#endif - } else if (deepmd::DPBackend::PyTorchExportable == backend) { -#if defined(BUILD_PYTORCH) && BUILD_PT_EXPT - dp = std::make_shared(model, gpu_rank, file_content); -#else - throw deepmd::deepmd_exception( - "PyTorch Exportable backend is not available (missing AOTInductor " - "headers at build time)"); -#endif - } else if (deepmd::DPBackend::Paddle == backend) { -#ifdef BUILD_PADDLE - dp = std::make_shared(model, gpu_rank, file_content); -#else - throw deepmd::deepmd_exception("PaddlePaddle backend is not supported yet"); -#endif - } else if (deepmd::DPBackend::JAX == backend) { -#if defined(BUILD_TENSORFLOW) || defined(BUILD_JAX) - dp = std::make_shared(model, gpu_rank, file_content); -#else - throw deepmd::deepmd_exception( - "TensorFlow backend is not built, which is used to load JAX2TF " - "SavedModels"); -#endif - } else { + if (deepmd::DPBackend::Unknown == backend) { throw deepmd::deepmd_exception("Unknown file type"); } + dp = create_deeppot_backend_from_plugin(backend, model, gpu_rank, + file_content); inited = true; dpbase = dp; // make sure the base funtions work } diff --git a/source/api_cc/src/DeepPotJAXPlugin.cc b/source/api_cc/src/DeepPotJAXPlugin.cc new file mode 100644 index 0000000000..ac2d5485a1 --- /dev/null +++ b/source/api_cc/src/DeepPotJAXPlugin.cc @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: LGPL-3.0-or-later +#if defined(BUILD_TENSORFLOW) || defined(BUILD_JAX) + +#include "BackendPluginFactory.h" +#include "DeepPotJAX.h" + +extern "C" void* deepmd_create_deeppot_backend_v1(const char* model, + int gpu_rank, + const char* file_content, + std::size_t file_content_size, + char** error_message) { + return deepmd::plugin::create_deeppot_backend( + model, gpu_rank, file_content, file_content_size, error_message); +} + +extern "C" void deepmd_delete_deeppot_backend_v1(void* backend) { + deepmd::plugin::delete_deeppot_backend(backend); +} + +extern "C" void deepmd_free_backend_error_v1(char* error_message) { + deepmd::plugin::free_error_message(error_message); +} + +#endif // defined(BUILD_TENSORFLOW) || defined(BUILD_JAX) diff --git a/source/api_cc/src/DeepPotPD.cc b/source/api_cc/src/DeepPotPD.cc index 51d45a9182..9859b29da0 100644 --- a/source/api_cc/src/DeepPotPD.cc +++ b/source/api_cc/src/DeepPotPD.cc @@ -202,7 +202,7 @@ void DeepPotPD::init(const std::string& model, << std::endl; return; } - deepmd::load_op_library(); + deepmd::load_op_library(deepmd::DPBackend::Paddle); // NOTE: Only support 1 GPU now. int gpu_num = 1; if (gpu_num > 0) { diff --git a/source/api_cc/src/DeepPotPDPlugin.cc b/source/api_cc/src/DeepPotPDPlugin.cc new file mode 100644 index 0000000000..fbc920cfb5 --- /dev/null +++ b/source/api_cc/src/DeepPotPDPlugin.cc @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: LGPL-3.0-or-later +#ifdef BUILD_PADDLE + +#include "BackendPluginFactory.h" +#include "DeepPotPD.h" + +extern "C" void* deepmd_create_deeppot_backend_v1(const char* model, + int gpu_rank, + const char* file_content, + std::size_t file_content_size, + char** error_message) { + return deepmd::plugin::create_deeppot_backend( + model, gpu_rank, file_content, file_content_size, error_message); +} + +extern "C" void deepmd_delete_deeppot_backend_v1(void* backend) { + deepmd::plugin::delete_deeppot_backend(backend); +} + +extern "C" void deepmd_free_backend_error_v1(char* error_message) { + deepmd::plugin::free_error_message(error_message); +} + +#endif // BUILD_PADDLE diff --git a/source/api_cc/src/DeepPotPT.cc b/source/api_cc/src/DeepPotPT.cc index d69dbb8f82..e434989c31 100644 --- a/source/api_cc/src/DeepPotPT.cc +++ b/source/api_cc/src/DeepPotPT.cc @@ -53,7 +53,7 @@ void DeepPotPT::init(const std::string& model, << std::endl; return; } - deepmd::load_op_library(); + deepmd::load_op_library(deepmd::DPBackend::PyTorch); int gpu_num = torch::cuda::device_count(); gpu_id = (gpu_num > 0) ? (gpu_rank % gpu_num) : 0; gpu_enabled = torch::cuda::is_available(); diff --git a/source/api_cc/src/DeepPotPTExpt.cc b/source/api_cc/src/DeepPotPTExpt.cc index c8c1bfcfad..33f036757f 100644 --- a/source/api_cc/src/DeepPotPTExpt.cc +++ b/source/api_cc/src/DeepPotPTExpt.cc @@ -69,7 +69,7 @@ void DeepPotPTExpt::init(const std::string& model, // before the AOTI module loads. Without this, multi-rank GNN .pt2 // archives fail at pair_style time with // ``Could not find schema for deepmd_export::border_op``. - deepmd::load_op_library(); + deepmd::load_op_library(deepmd::DPBackend::PyTorchExportable); if (!file_content.empty()) { throw deepmd::deepmd_exception( diff --git a/source/api_cc/src/DeepPotPTExptPlugin.cc b/source/api_cc/src/DeepPotPTExptPlugin.cc new file mode 100644 index 0000000000..cfda4ff6f9 --- /dev/null +++ b/source/api_cc/src/DeepPotPTExptPlugin.cc @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: LGPL-3.0-or-later +#include "DeepPotPTExpt.h" +#include "DeepSpinPTExpt.h" + +#ifdef BUILD_PYTORCH + +#include "BackendPluginFactory.h" + +extern "C" void* deepmd_create_deeppot_backend_v1(const char* model, + int gpu_rank, + const char* file_content, + std::size_t file_content_size, + char** error_message) { +#if BUILD_PT_EXPT + return deepmd::plugin::create_deeppot_backend( + model, gpu_rank, file_content, file_content_size, error_message); +#else + deepmd::plugin::set_error_message( + error_message, + "PyTorch Exportable backend is not available (missing AOTInductor " + "headers at build time)"); + return nullptr; +#endif +} + +extern "C" void deepmd_delete_deeppot_backend_v1(void* backend) { + deepmd::plugin::delete_deeppot_backend(backend); +} + +extern "C" void* deepmd_create_deepspin_backend_v1( + const char* model, + int gpu_rank, + const char* file_content, + std::size_t file_content_size, + char** error_message) { +#if BUILD_PT_EXPT_SPIN + return deepmd::plugin::create_deepspin_backend( + model, gpu_rank, file_content, file_content_size, error_message); +#else + deepmd::plugin::set_error_message( + error_message, + "PyTorch Exportable spin backend is not available (missing AOTInductor " + "headers at build time)"); + return nullptr; +#endif +} + +extern "C" void deepmd_delete_deepspin_backend_v1(void* backend) { + deepmd::plugin::delete_deepspin_backend(backend); +} + +extern "C" void deepmd_free_backend_error_v1(char* error_message) { + deepmd::plugin::free_error_message(error_message); +} + +#endif // BUILD_PYTORCH diff --git a/source/api_cc/src/DeepPotPTPlugin.cc b/source/api_cc/src/DeepPotPTPlugin.cc new file mode 100644 index 0000000000..ae584b12b6 --- /dev/null +++ b/source/api_cc/src/DeepPotPTPlugin.cc @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: LGPL-3.0-or-later +#ifdef BUILD_PYTORCH + +#include "BackendPluginFactory.h" +#include "DeepPotPT.h" +#include "DeepSpinPT.h" +#include "DeepTensorPT.h" + +extern "C" void* deepmd_create_deeppot_backend_v1(const char* model, + int gpu_rank, + const char* file_content, + std::size_t file_content_size, + char** error_message) { + return deepmd::plugin::create_deeppot_backend( + model, gpu_rank, file_content, file_content_size, error_message); +} + +extern "C" void deepmd_delete_deeppot_backend_v1(void* backend) { + deepmd::plugin::delete_deeppot_backend(backend); +} + +extern "C" void* deepmd_create_deepspin_backend_v1( + const char* model, + int gpu_rank, + const char* file_content, + std::size_t file_content_size, + char** error_message) { + return deepmd::plugin::create_deepspin_backend( + model, gpu_rank, file_content, file_content_size, error_message); +} + +extern "C" void deepmd_delete_deepspin_backend_v1(void* backend) { + deepmd::plugin::delete_deepspin_backend(backend); +} + +extern "C" void* deepmd_create_deeptensor_backend_v1(const char* model, + int gpu_rank, + const char* name_scope, + char** error_message) { + return deepmd::plugin::create_deeptensor_backend( + model, gpu_rank, name_scope, error_message); +} + +extern "C" void deepmd_delete_deeptensor_backend_v1(void* backend) { + deepmd::plugin::delete_deeptensor_backend(backend); +} + +extern "C" void deepmd_free_backend_error_v1(char* error_message) { + deepmd::plugin::free_error_message(error_message); +} + +#endif // BUILD_PYTORCH diff --git a/source/api_cc/src/DeepPotTF.cc b/source/api_cc/src/DeepPotTF.cc index 7656590ea6..18071fa866 100644 --- a/source/api_cc/src/DeepPotTF.cc +++ b/source/api_cc/src/DeepPotTF.cc @@ -431,7 +431,7 @@ void DeepPotTF::init(const std::string& model, get_env_nthreads(num_intra_nthreads, num_inter_nthreads); options.config.set_inter_op_parallelism_threads(num_inter_nthreads); options.config.set_intra_op_parallelism_threads(num_intra_nthreads); - deepmd::load_op_library(); + deepmd::load_op_library(deepmd::DPBackend::TensorFlow); if (file_content.size() == 0) { check_status(ReadBinaryProto(Env::Default(), model, graph_def)); diff --git a/source/api_cc/src/DeepPotTFPlugin.cc b/source/api_cc/src/DeepPotTFPlugin.cc new file mode 100644 index 0000000000..5e20b6fcf7 --- /dev/null +++ b/source/api_cc/src/DeepPotTFPlugin.cc @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: LGPL-3.0-or-later +#ifdef BUILD_TENSORFLOW + +#include +#include + +#include "BackendPluginFactory.h" +#include "DataModifierTF.h" +#include "DeepPotTF.h" +#include "DeepSpinTF.h" +#include "DeepTensorTF.h" +#include "commonTF.h" +#include "google/protobuf/text_format.h" + +namespace { + +void convert_pbtxt_to_pb_impl(const char* pbtxt, const char* pb) { + if (pbtxt == nullptr || pb == nullptr) { + throw deepmd::deepmd_exception("pbtxt and pb paths must not be null"); + } + + std::ifstream input(pbtxt); + if (!input.is_open()) { + throw deepmd::deepmd_exception(std::string("Failed to open file: ") + + pbtxt); + } + + std::stringstream buffer; + buffer << input.rdbuf(); + + tensorflow::GraphDef graph_def; + if (!tensorflow::protobuf::TextFormat::ParseFromString(buffer.str(), + &graph_def)) { + throw deepmd::deepmd_exception(std::string("Failed to parse pbtxt: ") + + pbtxt); + } + + std::ofstream output(pb, std::ios::out | std::ios::trunc | std::ios::binary); + if (!output.is_open()) { + throw deepmd::deepmd_exception(std::string("Failed to open file: ") + pb); + } + if (!graph_def.SerializeToOstream(&output)) { + throw deepmd::deepmd_exception(std::string("Failed to write pb: ") + pb); + } +} + +} // namespace + +extern "C" void* deepmd_create_deeppot_backend_v1(const char* model, + int gpu_rank, + const char* file_content, + std::size_t file_content_size, + char** error_message) { + return deepmd::plugin::create_deeppot_backend( + model, gpu_rank, file_content, file_content_size, error_message); +} + +extern "C" void deepmd_delete_deeppot_backend_v1(void* backend) { + deepmd::plugin::delete_deeppot_backend(backend); +} + +extern "C" void* deepmd_create_deepspin_backend_v1( + const char* model, + int gpu_rank, + const char* file_content, + std::size_t file_content_size, + char** error_message) { + return deepmd::plugin::create_deepspin_backend( + model, gpu_rank, file_content, file_content_size, error_message); +} + +extern "C" void deepmd_delete_deepspin_backend_v1(void* backend) { + deepmd::plugin::delete_deepspin_backend(backend); +} + +extern "C" void* deepmd_create_deeptensor_backend_v1(const char* model, + int gpu_rank, + const char* name_scope, + char** error_message) { + return deepmd::plugin::create_deeptensor_backend( + model, gpu_rank, name_scope, error_message); +} + +extern "C" void deepmd_delete_deeptensor_backend_v1(void* backend) { + deepmd::plugin::delete_deeptensor_backend(backend); +} + +extern "C" void* deepmd_create_dipole_charge_modifier_backend_v1( + const char* model, + int gpu_rank, + const char* name_scope, + char** error_message) { + return deepmd::plugin::create_dipole_charge_modifier_backend< + deepmd::DipoleChargeModifierTF>(model, gpu_rank, name_scope, + error_message); +} + +extern "C" void deepmd_delete_dipole_charge_modifier_backend_v1(void* backend) { + deepmd::plugin::delete_dipole_charge_modifier_backend(backend); +} + +extern "C" int deepmd_convert_pbtxt_to_pb_v1(const char* pbtxt, + const char* pb, + char** error_message) { + try { + convert_pbtxt_to_pb_impl(pbtxt, pb); + return 0; + } catch (const deepmd::deepmd_exception& e) { + deepmd::plugin::set_error_message(error_message, e.what()); + } catch (const std::exception& e) { + deepmd::plugin::set_error_message(error_message, e.what()); + } catch (...) { + deepmd::plugin::set_error_message(error_message, + "unknown backend plugin error"); + } + return 1; +} + +extern "C" void deepmd_free_backend_error_v1(char* error_message) { + deepmd::plugin::free_error_message(error_message); +} + +#endif // BUILD_TENSORFLOW diff --git a/source/api_cc/src/DeepSpin.cc b/source/api_cc/src/DeepSpin.cc index 0ed694f280..047b8d85a1 100644 --- a/source/api_cc/src/DeepSpin.cc +++ b/source/api_cc/src/DeepSpin.cc @@ -5,14 +5,8 @@ #include #include "AtomMap.h" +#include "BackendPlugin.h" #include "common.h" -#ifdef BUILD_TENSORFLOW -#include "DeepSpinTF.h" -#endif -#ifdef BUILD_PYTORCH -#include "DeepSpinPT.h" -#include "DeepSpinPTExpt.h" -#endif #include "device.h" using namespace deepmd; @@ -38,28 +32,15 @@ void DeepSpin::init(const std::string& model, return; } const DPBackend backend = get_backend(model); - if (deepmd::DPBackend::TensorFlow == backend) { -#ifdef BUILD_TENSORFLOW - dp = std::make_shared(model, gpu_rank, file_content); -#else - throw deepmd::deepmd_exception("TensorFlow backend is not built"); -#endif - } else if (deepmd::DPBackend::PyTorch == backend) { -#ifdef BUILD_PYTORCH - dp = std::make_shared(model, gpu_rank, file_content); -#else - throw deepmd::deepmd_exception("PyTorch backend is not built"); -#endif - } else if (deepmd::DPBackend::PyTorchExportable == backend) { -#if defined(BUILD_PYTORCH) && BUILD_PT_EXPT_SPIN - dp = - std::make_shared(model, gpu_rank, file_content); -#else - throw deepmd::deepmd_exception( - "PyTorch Exportable backend is not available"); -#endif + if (deepmd::DPBackend::TensorFlow == backend || + deepmd::DPBackend::PyTorch == backend || + deepmd::DPBackend::PyTorchExportable == backend) { + dp = create_deepspin_backend_from_plugin(backend, model, gpu_rank, + file_content); } else if (deepmd::DPBackend::Paddle == backend) { throw deepmd::deepmd_exception("PaddlePaddle backend is not supported yet"); + } else if (deepmd::DPBackend::JAX == backend) { + throw deepmd::deepmd_exception("JAX backend is not supported yet"); } else { throw deepmd::deepmd_exception("Unknown file type"); } diff --git a/source/api_cc/src/DeepSpinPT.cc b/source/api_cc/src/DeepSpinPT.cc index 5add377045..e934706a1b 100644 --- a/source/api_cc/src/DeepSpinPT.cc +++ b/source/api_cc/src/DeepSpinPT.cc @@ -52,7 +52,7 @@ void DeepSpinPT::init(const std::string& model, << std::endl; return; } - deepmd::load_op_library(); + deepmd::load_op_library(deepmd::DPBackend::PyTorch); int gpu_num = torch::cuda::device_count(); if (gpu_num > 0) { gpu_id = gpu_rank % gpu_num; diff --git a/source/api_cc/src/DeepSpinPTExpt.cc b/source/api_cc/src/DeepSpinPTExpt.cc index 9edd51474b..7ec4f0d0a7 100644 --- a/source/api_cc/src/DeepSpinPTExpt.cc +++ b/source/api_cc/src/DeepSpinPTExpt.cc @@ -67,7 +67,7 @@ void DeepSpinPTExpt::init(const std::string& model, // Load libdeepmd_op_pt.so so deepmd_export::* schemas are visible // to torch's dispatcher before the AOTI module loads. See // DeepPotPTExpt::init for the full rationale. - deepmd::load_op_library(); + deepmd::load_op_library(deepmd::DPBackend::PyTorchExportable); if (!file_content.empty()) { throw deepmd::deepmd_exception( diff --git a/source/api_cc/src/DeepSpinTF.cc b/source/api_cc/src/DeepSpinTF.cc index 091fca64c2..1981f025e3 100644 --- a/source/api_cc/src/DeepSpinTF.cc +++ b/source/api_cc/src/DeepSpinTF.cc @@ -431,7 +431,7 @@ void DeepSpinTF::init(const std::string& model, get_env_nthreads(num_intra_nthreads, num_inter_nthreads); options.config.set_inter_op_parallelism_threads(num_inter_nthreads); options.config.set_intra_op_parallelism_threads(num_intra_nthreads); - deepmd::load_op_library(); + deepmd::load_op_library(deepmd::DPBackend::TensorFlow); if (file_content.size() == 0) { check_status(ReadBinaryProto(Env::Default(), model, graph_def)); diff --git a/source/api_cc/src/DeepTensor.cc b/source/api_cc/src/DeepTensor.cc index ce9ca9dea2..172e65a9d9 100644 --- a/source/api_cc/src/DeepTensor.cc +++ b/source/api_cc/src/DeepTensor.cc @@ -3,12 +3,7 @@ #include -#ifdef BUILD_TENSORFLOW -#include "DeepTensorTF.h" -#endif -#ifdef BUILD_PYTORCH -#include "DeepTensorPT.h" -#endif +#include "BackendPlugin.h" #include "common.h" using namespace deepmd; @@ -34,20 +29,17 @@ void DeepTensor::init(const std::string& model, return; } const DPBackend backend = get_backend(model); - if (deepmd::DPBackend::TensorFlow == backend) { -#ifdef BUILD_TENSORFLOW - dt = std::make_shared(model, gpu_rank, name_scope_); -#else - throw deepmd::deepmd_exception("TensorFlow backend is not built."); -#endif - } else if (deepmd::DPBackend::PyTorch == backend) { -#ifdef BUILD_PYTORCH - dt = std::make_shared(model, gpu_rank, name_scope_); -#else - throw deepmd::deepmd_exception("PyTorch backend is not built."); -#endif + if (deepmd::DPBackend::TensorFlow == backend || + deepmd::DPBackend::PyTorch == backend) { + dt = create_deeptensor_backend_from_plugin(backend, model, gpu_rank, + name_scope_); } else if (deepmd::DPBackend::Paddle == backend) { throw deepmd::deepmd_exception("PaddlePaddle backend is not supported yet"); + } else if (deepmd::DPBackend::PyTorchExportable == backend) { + throw deepmd::deepmd_exception( + "PyTorch Exportable backend is not supported yet"); + } else if (deepmd::DPBackend::JAX == backend) { + throw deepmd::deepmd_exception("JAX backend is not supported yet"); } else { throw deepmd::deepmd_exception("Unknown file type"); } diff --git a/source/api_cc/src/DeepTensorPT.cc b/source/api_cc/src/DeepTensorPT.cc index 1636f3af95..8ed45d6893 100644 --- a/source/api_cc/src/DeepTensorPT.cc +++ b/source/api_cc/src/DeepTensorPT.cc @@ -74,7 +74,7 @@ void DeepTensorPT::init(const std::string& model, return; } name_scope = name_scope_; - deepmd::load_op_library(); + deepmd::load_op_library(deepmd::DPBackend::PyTorch); int gpu_num = torch::cuda::device_count(); if (gpu_num > 0) { gpu_id = gpu_rank % gpu_num; diff --git a/source/api_cc/src/DeepTensorTF.cc b/source/api_cc/src/DeepTensorTF.cc index d17c248f7e..2f843cedd7 100644 --- a/source/api_cc/src/DeepTensorTF.cc +++ b/source/api_cc/src/DeepTensorTF.cc @@ -36,7 +36,7 @@ void DeepTensorTF::init(const std::string& model, get_env_nthreads(num_intra_nthreads, num_inter_nthreads); options.config.set_inter_op_parallelism_threads(num_inter_nthreads); options.config.set_intra_op_parallelism_threads(num_intra_nthreads); - deepmd::load_op_library(); + deepmd::load_op_library(deepmd::DPBackend::TensorFlow); int gpu_num = -1; #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM DPGetDeviceCount(gpu_num); // check current device environment diff --git a/source/api_cc/src/common.cc b/source/api_cc/src/common.cc index 0f59bb0e04..1782608289 100644 --- a/source/api_cc/src/common.cc +++ b/source/api_cc/src/common.cc @@ -9,6 +9,9 @@ #include #include "AtomMap.h" +#ifndef BUILD_TENSORFLOW +#include "BackendPlugin.h" +#endif #include "device.h" #if defined(_WIN32) #if defined(_WIN32_WINNT) @@ -460,16 +463,7 @@ static inline void _load_single_op_library(std::string library_name) { _load_library_path(dso_path); } -void deepmd::load_op_library() { -#ifdef BUILD_TENSORFLOW - _load_single_op_library("deepmd_op"); -#endif -#ifdef BUILD_PYTORCH - _load_single_op_library("deepmd_op_pt"); -#endif -#ifdef BUILD_PADDLE - _load_single_op_library("deepmd_op_pd"); -#endif +static inline void _load_customized_plugins() { // load customized plugins const char* env_customized_plugins = std::getenv("DP_PLUGIN_PATH"); if (env_customized_plugins) { @@ -489,6 +483,31 @@ void deepmd::load_op_library() { } } +void deepmd::load_op_library() { +#ifdef BUILD_TENSORFLOW + _load_single_op_library("deepmd_op"); +#endif +#ifdef BUILD_PYTORCH + _load_single_op_library("deepmd_op_pt"); +#endif +#ifdef BUILD_PADDLE + _load_single_op_library("deepmd_op_pd"); +#endif + _load_customized_plugins(); +} + +void deepmd::load_op_library(DPBackend backend) { + if (deepmd::DPBackend::TensorFlow == backend) { + _load_single_op_library("deepmd_op"); + } else if (deepmd::DPBackend::PyTorch == backend || + deepmd::DPBackend::PyTorchExportable == backend) { + _load_single_op_library("deepmd_op_pt"); + } else if (deepmd::DPBackend::Paddle == backend) { + _load_single_op_library("deepmd_op_pd"); + } + _load_customized_plugins(); +} + std::string deepmd::name_prefix(const std::string& scope) { std::string prefix = ""; if (scope != "") { @@ -1258,8 +1277,7 @@ void deepmd::convert_pbtxt_to_pb(std::string fn_pb_txt, std::string fn_pb) { std::ios::out | std::ios::trunc | std::ios::binary); graph_def.SerializeToOstream(&output); #else - throw deepmd::deepmd_exception( - "convert_pbtxt_to_pb: TensorFlow backend is not enabled."); + convert_pbtxt_to_pb_from_plugin(fn_pb_txt, fn_pb); #endif }