diff --git a/README.md b/README.md index b5bb49751..c36b0f0f6 100644 --- a/README.md +++ b/README.md @@ -93,6 +93,7 @@ API and command-line option may change frequently.*** - `DPM++ 2M` - [`DPM++ 2M v2`](https://github.com/AUTOMATIC1111/stable-diffusion-webui/discussions/8457) - `DPM++ 2S a` + - `ER-SDE` - [`LCM`](https://github.com/AUTOMATIC1111/stable-diffusion-webui/issues/13952) - Cross-platform reproducibility - `--rng cuda`, default, consistent with the `stable-diffusion-webui GPU RNG` diff --git a/examples/cli/README.md b/examples/cli/README.md index 289cb866a..6429ffe7f 100644 --- a/examples/cli/README.md +++ b/examples/cli/README.md @@ -133,10 +133,10 @@ Generation Options: --disable-image-metadata do not embed generation metadata on image files -s, --seed RNG seed (default: 42, use random seed for < 0) --sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, - tcd, res_multistep, res_2s] (default: euler for Flux/SD3/Wan, euler_a + tcd, res_multistep, res_2s, er_sde] (default: euler for Flux/SD3/Wan, euler_a otherwise) --high-noise-sampling-method (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, - ddim_trailing, tcd, res_multistep, res_2s] default: euler for Flux/SD3/Wan, + ddim_trailing, tcd, res_multistep, res_2s, er_sde] default: euler for Flux/SD3/Wan, euler_a otherwise --scheduler denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple, kl_optimal, lcm, bong_tangent], default: discrete diff --git a/examples/common/common.cpp b/examples/common/common.cpp index a8719da80..8ef60d1bd 100644 --- a/examples/common/common.cpp +++ b/examples/common/common.cpp @@ -1170,12 +1170,12 @@ ArgOptions SDGenerationParams::get_options() { on_seed_arg}, {"", "--sampling-method", - "sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s] " + "sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s, er_sde] " "(default: euler for Flux/SD3/Wan, euler_a otherwise)", on_sample_method_arg}, {"", "--high-noise-sampling-method", - "(high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s]" + "(high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s, er_sde]" " default: euler for Flux/SD3/Wan, euler_a otherwise", on_high_noise_sample_method_arg}, {"", diff --git a/examples/server/README.md b/examples/server/README.md index 620586d2e..9da2fcfc5 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -208,10 +208,10 @@ Default Generation Options: --disable-image-metadata do not embed generation metadata on image files -s, --seed RNG seed (default: 42, use random seed for < 0) --sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, - tcd, res_multistep, res_2s] (default: euler for Flux/SD3/Wan, euler_a + tcd, res_multistep, res_2s, er_sde] (default: euler for Flux/SD3/Wan, euler_a otherwise) --high-noise-sampling-method (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, - ddim_trailing, tcd, res_multistep, res_2s] default: euler for Flux/SD3/Wan, + ddim_trailing, tcd, res_multistep, res_2s, er_sde] default: euler for Flux/SD3/Wan, euler_a otherwise --scheduler denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple, kl_optimal, lcm, bong_tangent], default: discrete diff --git a/include/stable-diffusion.h b/include/stable-diffusion.h index f093bb56c..64ca401a4 100644 --- a/include/stable-diffusion.h +++ b/include/stable-diffusion.h @@ -50,6 +50,7 @@ enum sample_method_t { TCD_SAMPLE_METHOD, RES_MULTISTEP_SAMPLE_METHOD, RES_2S_SAMPLE_METHOD, + ER_SDE_SAMPLE_METHOD, SAMPLE_METHOD_COUNT }; diff --git a/src/denoiser.hpp b/src/denoiser.hpp index 59b8c41b9..12e4707a4 100644 --- a/src/denoiser.hpp +++ b/src/denoiser.hpp @@ -818,6 +818,33 @@ static std::tuple get_ancestral_step_flow(float sigma_from, return {sigma_down, sigma_up, alpha_scale}; } +static float er_sde_flow_sigma(float sigma) { + sigma = std::max(sigma, 1e-6f); + sigma = std::min(sigma, 1.0f - 1e-4f); + return sigma; +} + +static float sigma_to_er_sde_lambda(float sigma, bool is_flow_denoiser) { + if (is_flow_denoiser) { + sigma = er_sde_flow_sigma(sigma); + return sigma / std::max(1.0f - sigma, 1e-6f); + } + return std::max(sigma, 1e-6f); +} + +static float sigma_to_er_sde_alpha(float sigma, bool is_flow_denoiser) { + if (is_flow_denoiser) { + sigma = er_sde_flow_sigma(sigma); + return 1.0f - sigma; + } + return 1.0f; +} + +static float er_sde_noise_scaler(float x) { + x = std::max(x, 0.0f); + return x * (std::exp(std::pow(x, 0.3f)) + 10.0f); +} + static sd::Tensor sample_euler_ancestral(denoise_cb_t model, sd::Tensor x, const std::vector& sigmas, @@ -1295,6 +1322,112 @@ static sd::Tensor sample_res_2s(denoise_cb_t model, return x; } +static sd::Tensor sample_er_sde(denoise_cb_t model, + sd::Tensor x, + std::vector sigmas, + std::shared_ptr rng, + bool is_flow_denoiser) { + constexpr int max_stage = 3; + constexpr int num_integration_points = 200; + constexpr float num_integration_points_f = 200.0f; + constexpr float s_noise = 1.0f; + + if (is_flow_denoiser) { + for (size_t i = 0; i + 1 < sigmas.size(); ++i) { + if (sigmas[i] > 1.0f) { + sigmas[i] = er_sde_flow_sigma(sigmas[i]); + } + } + } + + std::vector er_lambdas(sigmas.size(), 0.0f); + for (size_t i = 0; i < sigmas.size(); ++i) { + er_lambdas[i] = sigma_to_er_sde_lambda(sigmas[i], is_flow_denoiser); + } + + sd::Tensor old_denoised = x; + sd::Tensor old_denoised_d = x; + bool have_old_denoised = false; + bool have_old_denoised_d = false; + + int steps = static_cast(sigmas.size()) - 1; + for (int i = 0; i < steps; i++) { + sd::Tensor denoised = model(x, sigmas[i], i + 1); + if (denoised.empty()) { + return {}; + } + + int stage_used = std::min(max_stage, i + 1); + + if (sigmas[i + 1] == 0.0f) { + x = denoised; + } else { + float er_lambda_s = er_lambdas[i]; + float er_lambda_t = er_lambdas[i + 1]; + float alpha_s = sigma_to_er_sde_alpha(sigmas[i], is_flow_denoiser); + float alpha_t = sigma_to_er_sde_alpha(sigmas[i + 1], is_flow_denoiser); + float scaled_s = er_sde_noise_scaler(er_lambda_s); + float scaled_t = er_sde_noise_scaler(er_lambda_t); + float r_alpha = alpha_s > 0.0f ? alpha_t / alpha_s : 0.0f; + float r = scaled_s > 0.0f ? scaled_t / scaled_s : 0.0f; + + x = r_alpha * r * x + alpha_t * (1.0f - r) * denoised; + + if (stage_used >= 2 && have_old_denoised) { + float dt = er_lambda_t - er_lambda_s; + float lambda_step_size = -dt / num_integration_points_f; + float s = 0.0f; + float s_u = 0.0f; + + for (int p = 0; p < num_integration_points; ++p) { + float lambda_pos = er_lambda_t + p * lambda_step_size; + float scaled_pos = er_sde_noise_scaler(lambda_pos); + if (scaled_pos <= 0.0f) { + continue; + } + + s += 1.0f / scaled_pos; + if (stage_used >= 3 && have_old_denoised_d) { + s_u += (lambda_pos - er_lambda_s) / scaled_pos; + } + } + + s *= lambda_step_size; + + float denom_d = er_lambda_s - er_lambdas[i - 1]; + if (std::fabs(denom_d) > 1e-12f) { + float coeff_d = alpha_t * (dt + s * scaled_t); + sd::Tensor denoised_d = (denoised - old_denoised) / denom_d; + x += coeff_d * denoised_d; + + if (stage_used >= 3 && have_old_denoised_d) { + float denom_u = (er_lambda_s - er_lambdas[i - 2]) * 0.5f; + if (std::fabs(denom_u) > 1e-12f) { + s_u *= lambda_step_size; + float coeff_u = alpha_t * (0.5f * dt * dt + s_u * scaled_t); + sd::Tensor denoised_u = (denoised_d - old_denoised_d) / denom_u; + x += coeff_u * denoised_u; + } + } + + old_denoised_d = denoised_d; + have_old_denoised_d = true; + } + } + + float noise_scale_sq = er_lambda_t * er_lambda_t - er_lambda_s * er_lambda_s * r * r; + if (s_noise > 0.0f && noise_scale_sq > 0.0f) { + float noise_scale = alpha_t * std::sqrt(std::max(noise_scale_sq, 0.0f)); + x += sd::Tensor::randn_like(x, rng) * noise_scale; + } + } + + old_denoised = denoised; + have_old_denoised = true; + } + return x; +} + static sd::Tensor sample_ddim_trailing(denoise_cb_t model, sd::Tensor x, const std::vector& sigmas, @@ -1456,6 +1589,8 @@ static sd::Tensor sample_k_diffusion(sample_method_t method, return sample_res_multistep(model, std::move(x), sigmas, rng, eta); case RES_2S_SAMPLE_METHOD: return sample_res_2s(model, std::move(x), sigmas, rng, eta); + case ER_SDE_SAMPLE_METHOD: + return sample_er_sde(model, std::move(x), sigmas, rng, is_flow_denoiser); case DDIM_TRAILING_SAMPLE_METHOD: return sample_ddim_trailing(model, std::move(x), sigmas, rng, eta); case TCD_SAMPLE_METHOD: diff --git a/src/stable-diffusion.cpp b/src/stable-diffusion.cpp index 683a07d53..4176653e4 100644 --- a/src/stable-diffusion.cpp +++ b/src/stable-diffusion.cpp @@ -69,6 +69,7 @@ const char* sampling_methods_str[] = { "TCD", "Res Multistep", "Res 2s", + "ER-SDE", }; /*================================================== Helper Functions ================================================*/ @@ -1975,6 +1976,7 @@ const char* sample_method_to_str[] = { "tcd", "res_multistep", "res_2s", + "er_sde", }; const char* sd_sample_method_name(enum sample_method_t sample_method) {