-
Notifications
You must be signed in to change notification settings - Fork 935
[ETVK] WebGPU runtime #18808
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[ETVK] WebGPU runtime #18808
Changes from all commits
ae9482d
576afdc
9548f74
cdb38bf
2a8a3cd
0c155b0
55e93f2
d7376e5
f5b4f3f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,124 @@ | ||
| # Copyright (c) Meta Platforms, Inc. and affiliates. | ||
| # All rights reserved. | ||
| # | ||
| # This source code is licensed under the BSD-style license found in the | ||
| # LICENSE file in the root directory of this source tree. | ||
|
|
||
| cmake_minimum_required(VERSION 3.19) | ||
|
|
||
| if(NOT EXECUTORCH_ROOT) | ||
| set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..) | ||
| endif() | ||
|
|
||
| include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake) | ||
|
|
||
| # Ensure vulkan_schema is available even when EXECUTORCH_BUILD_VULKAN is OFF. | ||
| # The WebGPU backend reuses the Vulkan FlatBuffer serialization format. | ||
| if(NOT TARGET vulkan_schema) | ||
| # We need the schema generation from the Vulkan backend. Build only the | ||
| # schema target by including the Vulkan CMakeLists.txt. The full Vulkan | ||
| # backend will only build if EXECUTORCH_BUILD_VULKAN is ON (which gates the | ||
| # vulkan_backend target), but vulkan_schema is unconditionally defined. | ||
| add_subdirectory( | ||
| ${CMAKE_CURRENT_SOURCE_DIR}/../vulkan | ||
| ${CMAKE_CURRENT_BINARY_DIR}/_vulkan_schema | ||
| ) | ||
| endif() | ||
|
|
||
| set(WEBGPU_SRCS | ||
| runtime/WebGPUBackend.cpp | ||
| runtime/WebGPUGraph.cpp | ||
| runtime/WebGPUDelegateHeader.cpp | ||
| runtime/WebGPUDevice.cpp | ||
| runtime/ops/OperatorRegistry.cpp | ||
| runtime/ops/add/BinaryOp.cpp | ||
| ) | ||
|
|
||
| add_library(webgpu_backend ${WEBGPU_SRCS}) | ||
|
|
||
| target_include_directories( | ||
| webgpu_backend | ||
| PRIVATE $<BUILD_INTERFACE:${EXECUTORCH_ROOT}/..> | ||
| ) | ||
|
|
||
| target_link_libraries(webgpu_backend PRIVATE vulkan_schema executorch_core) | ||
|
|
||
| # Native build: link against wgpu-native | ||
| set(WGPU_NATIVE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/third-party/wgpu-native" | ||
| CACHE PATH "Path to wgpu-native installation") | ||
|
|
||
| if(NOT EXISTS "${WGPU_NATIVE_DIR}/lib/libwgpu_native.a") | ||
| message(FATAL_ERROR | ||
| "wgpu-native not found at ${WGPU_NATIVE_DIR}. " | ||
| "Run: bash backends/webgpu/scripts/setup-wgpu-native.sh") | ||
| endif() | ||
|
|
||
| add_library(wgpu_native STATIC IMPORTED) | ||
| set_target_properties(wgpu_native PROPERTIES | ||
| IMPORTED_LOCATION "${WGPU_NATIVE_DIR}/lib/libwgpu_native.a" | ||
| ) | ||
|
|
||
| target_include_directories(webgpu_backend | ||
| PUBLIC $<BUILD_INTERFACE:${WGPU_NATIVE_DIR}/include> | ||
| ) | ||
| target_link_libraries(webgpu_backend PRIVATE wgpu_native) | ||
|
|
||
| if(APPLE) | ||
| target_link_libraries(webgpu_backend PRIVATE | ||
| "-framework Metal" | ||
| "-framework QuartzCore" | ||
| "-framework CoreGraphics" | ||
| "-framework Foundation" | ||
| ) | ||
| else() | ||
| target_link_libraries(webgpu_backend PRIVATE dl m pthread) | ||
| endif() | ||
|
|
||
| target_compile_options(webgpu_backend PRIVATE -fexceptions) | ||
|
|
||
| # Link with --whole-archive for static registration of backend + ops | ||
| executorch_target_link_options_shared_lib(webgpu_backend) | ||
|
|
||
| set_property(TARGET webgpu_backend PROPERTY CXX_STANDARD 17) | ||
|
|
||
| install( | ||
| TARGETS webgpu_backend | ||
| EXPORT ExecuTorchTargets | ||
| DESTINATION ${CMAKE_INSTALL_LIBDIR} | ||
| ) | ||
|
|
||
| # Native test target | ||
| if(EXECUTORCH_BUILD_WEBGPU_TEST) | ||
| add_executable(webgpu_native_test test/test_webgpu_native.cpp) | ||
|
|
||
| target_include_directories(webgpu_native_test | ||
| PRIVATE | ||
| $<BUILD_INTERFACE:${EXECUTORCH_ROOT}/..> | ||
| "${WGPU_NATIVE_DIR}/include" | ||
| ) | ||
|
|
||
| target_link_libraries(webgpu_native_test | ||
| PRIVATE | ||
| webgpu_backend | ||
| wgpu_native | ||
| executorch_core | ||
| extension_module_static | ||
| extension_data_loader | ||
| extension_tensor | ||
| portable_kernels | ||
| portable_ops_lib | ||
| ) | ||
|
|
||
| if(APPLE) | ||
| target_link_libraries(webgpu_native_test PRIVATE | ||
| "-framework Metal" | ||
| "-framework QuartzCore" | ||
| "-framework CoreGraphics" | ||
| ) | ||
| else() | ||
| target_link_libraries(webgpu_native_test PRIVATE dl m pthread) | ||
| endif() | ||
|
|
||
| target_compile_options(webgpu_native_test PRIVATE -fexceptions) | ||
| set_property(TARGET webgpu_native_test PROPERTY CXX_STANDARD 17) | ||
| endif() | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,113 @@ | ||||||
| # WebGPU Backend | ||||||
|
|
||||||
| Run ExecuTorch models on the GPU via [WebGPU](https://www.w3.org/TR/webgpu/). The backend compiles delegated subgraphs into WGSL compute shaders executed natively through [wgpu-native](https://github.com/gfx-rs/wgpu-native) (Metal on macOS, Vulkan on Linux/Windows). | ||||||
|
||||||
| Run ExecuTorch models on the GPU via [WebGPU](https://www.w3.org/TR/webgpu/). The backend compiles delegated subgraphs into WGSL compute shaders executed natively through [wgpu-native](https://github.com/gfx-rs/wgpu-native) (Metal on macOS, Vulkan on Linux/Windows). | |
| Run ExecuTorch models on the GPU via [WebGPU](https://www.w3.org/TR/webgpu/). The backend compiles delegated subgraphs into WGSL compute shaders executed natively through [wgpu-native](https://github.com/gfx-rs/wgpu-native) (Metal on macOS, Vulkan on Linux). Windows is not supported yet in this prototype. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,39 @@ | ||
| # WebGPU Backend — TODO | ||
|
|
||
| ## Current State (Prototype) | ||
| - Single op: `aten.add.Tensor` (fp32, buffer storage) | ||
| - No Python AOT code — directly consumes Vulkan delegate (.pte exported via VulkanPartitioner) | ||
| - Reuses Vulkan FlatBuffer format (VH00 header + VK00 payload) | ||
| - Registers as `"VulkanBackend"` at runtime — mutually exclusive with Vulkan backend at link time | ||
| - Built-in WGSL shaders (not embedded in .pte) | ||
|
|
||
| ## Architecture | ||
| ``` | ||
| VulkanPartitioner (Python) → VkGraphBuilder → VK00 FlatBuffer → .pte | ||
| → WebGPU Runtime: registers as "VulkanBackend", parses VH00/VK00 | ||
| → WebGPUGraph::build → GPU buffers/pipelines/bind groups | ||
| → WebGPUGraph::execute → encode + submit compute passes | ||
| ``` | ||
|
|
||
| Adding a new op requires only C++ runtime work: | ||
| 1. WGSL shader + header | ||
| 2. C++ op implementation (read args from VkGraph, create pipeline, record dispatch) | ||
| 3. Register in CMakeLists.txt | ||
| 4. Test with VulkanPartitioner export | ||
|
|
||
| ## Performance: Command Encoding Overhead | ||
| WebGPU `GPUCommandBuffer` is single-use (no equivalent to Vulkan's cached command lists). | ||
| Per-dispatch API call cost adds up for large graphs. | ||
|
|
||
| **Primary mitigation: mega-kernel fusion.** Generate fused WGSL shaders for chains of | ||
| element-wise ops (add→relu→mul→clamp) at compile time. Embed via the existing | ||
| `shaders: [VkBytes]` field in schema.fbs. | ||
|
|
||
| ## Next Steps | ||
| 1. **More ops**: sub, mul, relu, linear (matmul), softmax, layer_norm | ||
| 2. **fp16 support**: Feature-detect `shader-f16`, fallback to fp32 | ||
| 3. **Buffer pooling**: Reuse GPU buffers to avoid OOM at scale | ||
| 4. **Pipeline caching**: Cache compiled pipelines across runs | ||
| 5. **Profiling**: Wire WebGPU timestamp queries into ETDump/EventTracer | ||
| 6. **LLM support**: KV cache management, Flash Attention in WGSL, quantized ops (int4/int8) | ||
| 7. **Browser/JS runtime**: Emscripten build, JS harness, browser test page |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
add_subdirectory(../vulkan ...)pulls in the full Vulkan CMakeLists.txt, which currently unconditionally builds shader libraries and thevulkan_backendtarget (not justvulkan_schema). This makesEXECUTORCH_BUILD_WEBGPU=ONeffectively require the full Vulkan toolchain (e.g., glslc) and can also introduce duplicate backend registration. Consider factoringvulkan_schemainto a standalone CMake include, or adding a schema-only mode/guards inbackends/vulkan/CMakeLists.txtso including it here does not build the full Vulkan backend.