From 6331eff2708b454b33df29f6591bedde6fe61132 Mon Sep 17 00:00:00 2001 From: leejet Date: Mon, 6 Apr 2026 01:05:53 +0800 Subject: [PATCH 1/3] feat: add webm support --- .gitmodules | 3 + CMakeLists.txt | 38 +++++++ docs/build.md | 16 ++- examples/cli/CMakeLists.txt | 3 + examples/cli/README.md | 4 +- examples/cli/main.cpp | 10 +- examples/common/media_io.cpp | 181 +++++++++++++++++++++++++++++++++ examples/common/media_io.h | 8 ++ examples/server/CMakeLists.txt | 3 + thirdparty/CMakeLists.txt | 14 +++ thirdparty/libwebm | 1 + 11 files changed, 271 insertions(+), 10 deletions(-) create mode 160000 thirdparty/libwebm diff --git a/.gitmodules b/.gitmodules index 91cde1f28..1807eb85f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,3 +7,6 @@ [submodule "thirdparty/libwebp"] path = thirdparty/libwebp url = https://github.com/webmproject/libwebp.git +[submodule "thirdparty/libwebm"] + path = thirdparty/libwebm + url = https://github.com/webmproject/libwebm.git diff --git a/CMakeLists.txt b/CMakeLists.txt index e99cd32d7..ad67d969c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,6 +32,16 @@ else() set(SD_WEBP_DEFAULT ${SD_USE_SYSTEM_WEBP}) endif() +set(SD_SUBMODULE_WEBM FALSE) +if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/libwebm/CMakeLists.txt") + set(SD_SUBMODULE_WEBM TRUE) +endif() +if(SD_SUBMODULE_WEBM) + set(SD_WEBM_DEFAULT ON) +else() + set(SD_WEBM_DEFAULT ${SD_USE_SYSTEM_WEBM}) +endif() + # # Option list # @@ -41,6 +51,8 @@ endif() option(SD_BUILD_EXAMPLES "sd: build examples" ${SD_STANDALONE}) option(SD_WEBP "sd: enable WebP image I/O support" ${SD_WEBP_DEFAULT}) option(SD_USE_SYSTEM_WEBP "sd: link against system libwebp" OFF) +option(SD_WEBM "sd: enable WebM video output support" ${SD_WEBM_DEFAULT}) +option(SD_USE_SYSTEM_WEBM "sd: link against system libwebm" OFF) option(SD_CUDA "sd: cuda backend" OFF) option(SD_HIPBLAS "sd: rocm backend" OFF) option(SD_METAL "sd: metal backend" OFF) @@ -114,6 +126,32 @@ if(SD_WEBP) add_compile_definitions(SD_USE_WEBP) endif() +if(SD_WEBM) + if(NOT SD_WEBP) + message(FATAL_ERROR "SD_WEBM requires SD_WEBP because WebM output reuses libwebp VP8 encoding.") + endif() + if(NOT SD_SUBMODULE_WEBM AND NOT SD_USE_SYSTEM_WEBM) + message(FATAL_ERROR "WebM support enabled but no source found. + Either initialize the submodule:\n git submodule update --init thirdparty/libwebm\n\n" + "Or link against system library:\n cmake (...) -DSD_USE_SYSTEM_WEBM=ON") + endif() + if(SD_USE_SYSTEM_WEBM) + find_path(WEBM_INCLUDE_DIR + NAMES mkvmuxer/mkvmuxer.h mkvparser/mkvparser.h common/webmids.h + PATH_SUFFIXES webm + REQUIRED) + find_library(WEBM_LIBRARY + NAMES webm libwebm + REQUIRED) + + add_library(webm UNKNOWN IMPORTED) + set_target_properties(webm PROPERTIES + IMPORTED_LOCATION "${WEBM_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${WEBM_INCLUDE_DIR}") + endif() + add_compile_definitions(SD_USE_WEBM) +endif() + set(SD_LIB stable-diffusion) file(GLOB SD_LIB_SOURCES diff --git a/docs/build.md b/docs/build.md index eabb51ac3..dbc87691e 100644 --- a/docs/build.md +++ b/docs/build.md @@ -16,15 +16,23 @@ git submodule init git submodule update ``` -## WebP Support in Examples +## WebP and WebM Support in Examples -The example applications (`examples/cli` and `examples/server`) use `libwebp` to support WebP image I/O. This is enabled by default. +The example applications (`examples/cli` and `examples/server`) use `libwebp` to support WebP image I/O, and `examples/cli` can also use `libwebm` for `.webm` video output. Both are enabled by default. WebM output currently reuses `libwebp` to encode each frame as VP8 before muxing with `libwebm`. -If you do not want WebP support, you can disable it at configure time: +If you do not want WebP/WebM support, you can disable them at configure time: ```shell mkdir build && cd build -cmake .. -DSD_WEBP=OFF +cmake .. -DSD_WEBP=OFF -DSD_WEBM=OFF +cmake --build . --config Release +``` + +If the submodules are not available, you can also link against system packages instead: + +```shell +mkdir build && cd build +cmake .. -DSD_USE_SYSTEM_WEBP=ON -DSD_USE_SYSTEM_WEBM=ON cmake --build . --config Release ``` diff --git a/examples/cli/CMakeLists.txt b/examples/cli/CMakeLists.txt index e4acaac87..5fc777869 100644 --- a/examples/cli/CMakeLists.txt +++ b/examples/cli/CMakeLists.txt @@ -11,4 +11,7 @@ target_link_libraries(${TARGET} PRIVATE stable-diffusion zip ${CMAKE_THREAD_LIBS if(SD_WEBP) target_link_libraries(${TARGET} PRIVATE webp libwebpmux) endif() +if(SD_WEBM) + target_link_libraries(${TARGET} PRIVATE webm) +endif() target_compile_features(${TARGET} PUBLIC c_std_11 cxx_std_17) diff --git a/examples/cli/README.md b/examples/cli/README.md index 25fcce692..289cb866a 100644 --- a/examples/cli/README.md +++ b/examples/cli/README.md @@ -5,8 +5,8 @@ usage: ./bin/sd-cli [options] CLI Options: -o, --output path to write result image to. you can use printf-style %d format specifiers for image sequences (default: - ./output.png) (eg. output_%03d.png). For video generation, single-file outputs support .avi and animated .webp - --preview-path path to write preview image to (default: ./preview.png). Multi-frame previews support .avi and animated .webp + ./output.png) (eg. output_%03d.png). For video generation, single-file outputs support .avi, .webm, and animated .webp + --preview-path path to write preview image to (default: ./preview.png). Multi-frame previews support .avi, .webm, and animated .webp --preview-interval interval in denoising steps between consecutive updates of the image preview file (default is 1, meaning updating at every step) --output-begin-idx starting index for output image sequence, must be non-negative (default 0 if specified %d in output path, 1 otherwise) diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index b4a3c343e..ae7c34f53 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -58,7 +58,7 @@ struct SDCliParams { options.string_options = { {"-o", "--output", - "path to write result image to. you can use printf-style %d format specifiers for image sequences (default: ./output.png) (eg. output_%03d.png)", + "path to write result image to. you can use printf-style %d format specifiers for image sequences (default: ./output.png) (eg. output_%03d.png). Single-file video outputs support .avi, .webm, and animated .webp", &output_path}, {"", "--image", @@ -70,7 +70,7 @@ struct SDCliParams { &metadata_format}, {"", "--preview-path", - "path to write preview image to (default: ./preview.png)", + "path to write preview image to (default: ./preview.png). Multi-frame previews support .avi, .webm, and animated .webp", &preview_path}, }; @@ -396,7 +396,9 @@ bool save_results(const SDCliParams& cli_params, if (!ext.empty()) { if (output_format == EncodedImageFormat::JPEG || output_format == EncodedImageFormat::PNG || - output_format == EncodedImageFormat::WEBP) { + output_format == EncodedImageFormat::WEBP || + ext_lower == ".avi" || + ext_lower == ".webm") { base_path.replace_extension(); } } @@ -438,7 +440,7 @@ bool save_results(const SDCliParams& cli_params, } if (cli_params.mode == VID_GEN && num_results > 1) { - if (ext_lower != ".avi" && ext_lower != ".webp") + if (ext_lower != ".avi" && ext_lower != ".webp" && ext_lower != ".webm") ext = ".avi"; fs::path video_path = base_path; video_path += ext; diff --git a/examples/common/media_io.cpp b/examples/common/media_io.cpp index a38513b9d..ba3965326 100644 --- a/examples/common/media_io.cpp +++ b/examples/common/media_io.cpp @@ -30,6 +30,11 @@ #include "webp/mux.h" #endif +#ifdef SD_USE_WEBM +#include "mkvmuxer/mkvmuxer.h" +#include "mkvmuxer/mkvwriter.h" +#endif + namespace fs = std::filesystem; namespace { @@ -71,6 +76,13 @@ bool write_binary_file_bytes(const std::string& path, const std::vector return true; } +uint32_t read_u32_le_bytes(const uint8_t* data) { + return static_cast(data[0]) | + (static_cast(data[1]) << 8) | + (static_cast(data[2]) << 16) | + (static_cast(data[3]) << 24); +} + int stbi_ext_write_png_to_func(stbi_write_func* func, void* context, int x, @@ -289,6 +301,76 @@ bool encode_webp_image_to_vector(const uint8_t* image, WebPMuxDelete(mux); return ok; } + +#ifdef SD_USE_WEBM +bool extract_vp8_frame_from_webp(const std::vector& webp_data, std::vector& vp8_frame) { + if (!is_webp_signature(webp_data.data(), webp_data.size())) { + return false; + } + + size_t offset = 12; + while (offset + 8 <= webp_data.size()) { + const uint8_t* chunk = webp_data.data() + offset; + const uint32_t chunk_len = read_u32_le_bytes(chunk + 4); + const size_t chunk_start = offset + 8; + const size_t padded_len = static_cast(chunk_len) + (chunk_len & 1u); + + if (chunk_start + chunk_len > webp_data.size()) { + return false; + } + + if (memcmp(chunk, "VP8 ", 4) == 0) { + vp8_frame.assign(webp_data.data() + chunk_start, + webp_data.data() + chunk_start + chunk_len); + return !vp8_frame.empty(); + } + + offset = chunk_start + padded_len; + } + + return false; +} + +bool encode_sd_image_to_vp8_frame(const sd_image_t& image, int quality, std::vector& vp8_frame) { + if (image.data == nullptr || image.width == 0 || image.height == 0) { + return false; + } + + const int width = static_cast(image.width); + const int height = static_cast(image.height); + const int input_channel = static_cast(image.channel); + if (input_channel != 1 && input_channel != 3 && input_channel != 4) { + return false; + } + + std::vector rgb_buffer; + const uint8_t* rgb_data = image.data; + if (input_channel == 1) { + rgb_buffer.resize(static_cast(width) * static_cast(height) * 3); + for (int i = 0; i < width * height; ++i) { + rgb_buffer[i * 3 + 0] = image.data[i]; + rgb_buffer[i * 3 + 1] = image.data[i]; + rgb_buffer[i * 3 + 2] = image.data[i]; + } + rgb_data = rgb_buffer.data(); + } else if (input_channel == 4) { + rgb_buffer.resize(static_cast(width) * static_cast(height) * 3); + for (int i = 0; i < width * height; ++i) { + rgb_buffer[i * 3 + 0] = image.data[i * 4 + 0]; + rgb_buffer[i * 3 + 1] = image.data[i * 4 + 1]; + rgb_buffer[i * 3 + 2] = image.data[i * 4 + 2]; + } + rgb_data = rgb_buffer.data(); + } + + std::vector encoded_webp; + if (!encode_webp_image_to_vector(rgb_data, width, height, 3, "", quality, encoded_webp)) { + return false; + } + + return extract_vp8_frame_from_webp(encoded_webp, vp8_frame); +} +#endif #endif uint8_t* load_image_common(bool from_memory, @@ -861,6 +943,99 @@ int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images } #endif +#ifdef SD_USE_WEBM +int create_webm_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) { + if (num_images == 0) { + fprintf(stderr, "Error: Image array is empty.\n"); + return -1; + } + if (fps <= 0) { + fprintf(stderr, "Error: FPS must be positive.\n"); + return -1; + } + + const int width = static_cast(images[0].width); + const int height = static_cast(images[0].height); + if (width <= 0 || height <= 0) { + fprintf(stderr, "Error: Invalid frame dimensions.\n"); + return -1; + } + + mkvmuxer::MkvWriter writer; + if (!writer.Open(filename)) { + fprintf(stderr, "Error: Could not open WebM file for writing.\n"); + return -1; + } + + const int ret = [&]() -> int { + mkvmuxer::Segment segment; + if (!segment.Init(&writer)) { + fprintf(stderr, "Error: Failed to initialize WebM muxer.\n"); + return -1; + } + + segment.set_mode(mkvmuxer::Segment::kFile); + segment.OutputCues(true); + + const uint64_t track_number = segment.AddVideoTrack(width, height, 0); + if (track_number == 0) { + fprintf(stderr, "Error: Failed to add VP8 video track.\n"); + return -1; + } + if (!segment.CuesTrack(track_number)) { + fprintf(stderr, "Error: Failed to set WebM cues track.\n"); + return -1; + } + + mkvmuxer::VideoTrack* video_track = static_cast(segment.GetTrackByNumber(track_number)); + if (video_track != nullptr) { + video_track->set_display_width(static_cast(width)); + video_track->set_display_height(static_cast(height)); + video_track->set_frame_rate(static_cast(fps)); + } + segment.GetSegmentInfo()->set_writing_app("stable-diffusion.cpp"); + segment.GetSegmentInfo()->set_muxing_app("stable-diffusion.cpp"); + + const uint64_t frame_duration_ns = std::max( + 1, static_cast(std::llround(1000000000.0 / static_cast(fps)))); + uint64_t timestamp_ns = 0; + + for (int i = 0; i < num_images; ++i) { + const sd_image_t& image = images[i]; + if (static_cast(image.width) != width || static_cast(image.height) != height) { + fprintf(stderr, "Error: Frame dimensions do not match.\n"); + return -1; + } + + std::vector vp8_frame; + if (!encode_sd_image_to_vp8_frame(image, quality, vp8_frame)) { + fprintf(stderr, "Error: Failed to encode frame %d as VP8.\n", i); + return -1; + } + + if (!segment.AddFrame(vp8_frame.data(), + static_cast(vp8_frame.size()), + track_number, + timestamp_ns, + true)) { + fprintf(stderr, "Error: Failed to mux frame %d into WebM.\n", i); + return -1; + } + + timestamp_ns += frame_duration_ns; + } + + if (!segment.Finalize()) { + fprintf(stderr, "Error: Failed to finalize WebM output.\n"); + return -1; + } + return 0; + }(); + writer.Close(); + return ret; +} +#endif + int create_video_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) { std::string path = filename ? filename : ""; auto pos = path.find_last_of('.'); @@ -869,6 +1044,12 @@ int create_video_from_sd_images(const char* filename, sd_image_t* images, int nu ch = static_cast(tolower(static_cast(ch))); } +#ifdef SD_USE_WEBM + if (ext == ".webm") { + return create_webm_from_sd_images(filename, images, num_images, fps, quality); + } +#endif + #ifdef SD_USE_WEBP if (ext == ".webp") { return create_animated_webp_from_sd_images(filename, images, num_images, fps, quality); diff --git a/examples/common/media_io.h b/examples/common/media_io.h index cb8302906..e6ca098d9 100644 --- a/examples/common/media_io.h +++ b/examples/common/media_io.h @@ -67,6 +67,14 @@ int create_animated_webp_from_sd_images(const char* filename, int quality = 90); #endif +#ifdef SD_USE_WEBM +int create_webm_from_sd_images(const char* filename, + sd_image_t* images, + int num_images, + int fps, + int quality = 90); +#endif + int create_video_from_sd_images(const char* filename, sd_image_t* images, int num_images, diff --git a/examples/server/CMakeLists.txt b/examples/server/CMakeLists.txt index bf2b252bb..61c6ab607 100644 --- a/examples/server/CMakeLists.txt +++ b/examples/server/CMakeLists.txt @@ -77,6 +77,9 @@ target_link_libraries(${TARGET} PRIVATE stable-diffusion ${CMAKE_THREAD_LIBS_INI if(SD_WEBP) target_link_libraries(${TARGET} PRIVATE webp libwebpmux) endif() +if(SD_WEBM) + target_link_libraries(${TARGET} PRIVATE webm) +endif() # due to httplib; it contains a pragma for MSVC, but other things need explicit flags if(WIN32 AND NOT MSVC) diff --git a/thirdparty/CMakeLists.txt b/thirdparty/CMakeLists.txt index 9261af75e..25137ee45 100644 --- a/thirdparty/CMakeLists.txt +++ b/thirdparty/CMakeLists.txt @@ -18,3 +18,17 @@ if(SD_WEBP AND NOT SD_USE_SYSTEM_WEBP) add_subdirectory(libwebp EXCLUDE_FROM_ALL) endif() + +if(SD_WEBM AND NOT SD_USE_SYSTEM_WEBM) + if(MSVC) + set(MSVC_RUNTIME dll) + endif() + set(ENABLE_WEBMTS OFF) + set(ENABLE_WEBMINFO OFF) + set(ENABLE_TESTS OFF) + set(ENABLE_WEBM_PARSER OFF) + set(ENABLE_SAMPLE_PROGRAMS OFF) + + add_subdirectory(libwebm EXCLUDE_FROM_ALL) + target_include_directories(webm INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/libwebm) +endif() diff --git a/thirdparty/libwebm b/thirdparty/libwebm new file mode 160000 index 000000000..5bf12267e --- /dev/null +++ b/thirdparty/libwebm @@ -0,0 +1 @@ +Subproject commit 5bf12267eea773a32fcf4949de52b0add158a8d5 From bb0e1da7b96aa7266ec2346b38caa4554b13a0a1 Mon Sep 17 00:00:00 2001 From: leejet Date: Mon, 6 Apr 2026 01:18:24 +0800 Subject: [PATCH 2/3] scope SD_USE_WEBP and SD_USE_WEBM to example targets to avoid submodule rebuilds --- CMakeLists.txt | 2 -- examples/cli/CMakeLists.txt | 2 ++ examples/server/CMakeLists.txt | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ad67d969c..a43c99f90 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -123,7 +123,6 @@ if(SD_WEBP) ) endif() endif() - add_compile_definitions(SD_USE_WEBP) endif() if(SD_WEBM) @@ -149,7 +148,6 @@ if(SD_WEBM) IMPORTED_LOCATION "${WEBM_LIBRARY}" INTERFACE_INCLUDE_DIRECTORIES "${WEBM_INCLUDE_DIR}") endif() - add_compile_definitions(SD_USE_WEBM) endif() set(SD_LIB stable-diffusion) diff --git a/examples/cli/CMakeLists.txt b/examples/cli/CMakeLists.txt index 5fc777869..31ef39163 100644 --- a/examples/cli/CMakeLists.txt +++ b/examples/cli/CMakeLists.txt @@ -9,9 +9,11 @@ add_executable(${TARGET} install(TARGETS ${TARGET} RUNTIME) target_link_libraries(${TARGET} PRIVATE stable-diffusion zip ${CMAKE_THREAD_LIBS_INIT}) if(SD_WEBP) + target_compile_definitions(${TARGET} PRIVATE SD_USE_WEBP) target_link_libraries(${TARGET} PRIVATE webp libwebpmux) endif() if(SD_WEBM) + target_compile_definitions(${TARGET} PRIVATE SD_USE_WEBM) target_link_libraries(${TARGET} PRIVATE webm) endif() target_compile_features(${TARGET} PUBLIC c_std_11 cxx_std_17) diff --git a/examples/server/CMakeLists.txt b/examples/server/CMakeLists.txt index 61c6ab607..ad219eb1c 100644 --- a/examples/server/CMakeLists.txt +++ b/examples/server/CMakeLists.txt @@ -75,9 +75,11 @@ endif() install(TARGETS ${TARGET} RUNTIME) target_link_libraries(${TARGET} PRIVATE stable-diffusion ${CMAKE_THREAD_LIBS_INIT}) if(SD_WEBP) + target_compile_definitions(${TARGET} PRIVATE SD_USE_WEBP) target_link_libraries(${TARGET} PRIVATE webp libwebpmux) endif() if(SD_WEBM) + target_compile_definitions(${TARGET} PRIVATE SD_USE_WEBM) target_link_libraries(${TARGET} PRIVATE webm) endif() From e0018442661fea25e7d4e0ffad2871d794db3d20 Mon Sep 17 00:00:00 2001 From: leejet Date: Mon, 6 Apr 2026 01:38:45 +0800 Subject: [PATCH 3/3] fix ci --- .github/workflows/build.yml | 1 + thirdparty/CMakeLists.txt | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c3dca338a..9b42f8941 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -239,6 +239,7 @@ jobs: id: build-push uses: docker/build-push-action@v6 with: + context: . platforms: linux/amd64 push: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} file: Dockerfile.${{ matrix.variant }} diff --git a/thirdparty/CMakeLists.txt b/thirdparty/CMakeLists.txt index 25137ee45..4dfdf0d29 100644 --- a/thirdparty/CMakeLists.txt +++ b/thirdparty/CMakeLists.txt @@ -29,6 +29,17 @@ if(SD_WEBM AND NOT SD_USE_SYSTEM_WEBM) set(ENABLE_WEBM_PARSER OFF) set(ENABLE_SAMPLE_PROGRAMS OFF) + set(SD_LIBWEBM_PARENT_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + add_subdirectory(libwebm EXCLUDE_FROM_ALL) + + # libwebm mutates the global CMAKE_CXX_FLAGS for non-MSVC compilers to force + # C++11. Restore the parent flags so the main project keeps its own C++17 + # requirements, then pin the libwebm targets to C++17 explicitly. + set(CMAKE_CXX_FLAGS "${SD_LIBWEBM_PARENT_CXX_FLAGS}" CACHE STRING "" FORCE) + target_compile_features(mkvmuxer PRIVATE cxx_std_17) + target_compile_features(mkvparser PRIVATE cxx_std_17) + target_compile_features(webm PRIVATE cxx_std_17) + target_include_directories(webm INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/libwebm) endif()