Skip to content

Commit 7aa4d93

Browse files
committed
fix: adjust timestep calculations for DDIM and TCD
On img2img, the number of steps correspond to the last precalculated sigma values, but the internal alphas_cumprod and compvis_sigmas were being computed over the entire step range. Also, tweaks the prev_timestep calculation on DDIM to better match the current timestamp (like on TCD), to avoid inconsistencies due to rounding.
1 parent 10c6501 commit 7aa4d93

File tree

2 files changed

+21
-8
lines changed

2 files changed

+21
-8
lines changed

denoiser.hpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,7 @@ static void sample_k_diffusion(sample_method_t method,
474474
ggml_context* work_ctx,
475475
ggml_tensor* x,
476476
std::vector<float> sigmas,
477+
int initial_step,
477478
std::shared_ptr<RNG> rng,
478479
float eta) {
479480
size_t steps = sigmas.size() - 1;
@@ -1060,10 +1061,14 @@ static void sample_k_diffusion(sample_method_t method,
10601061
// x_t"
10611062
// - pred_prev_sample -> "x_t-1"
10621063
int timestep =
1063-
roundf(TIMESTEPS -
1064-
i * ((float)TIMESTEPS / steps)) - 1;
1064+
TIMESTEPS - 1 -
1065+
(int)roundf((initial_step + i) *
1066+
(TIMESTEPS / float(initial_step + steps)));
10651067
// 1. get previous step value (=t-1)
1066-
int prev_timestep = timestep - TIMESTEPS / steps;
1068+
int prev_timestep =
1069+
TIMESTEPS - 1 -
1070+
(int)roundf((initial_step + i + 1) *
1071+
(TIMESTEPS / float(initial_step + steps)));
10671072
// The sigma here is chosen to cause the
10681073
// CompVisDenoiser to produce t = timestep
10691074
float sigma = compvis_sigmas[timestep];
@@ -1236,12 +1241,13 @@ static void sample_k_diffusion(sample_method_t method,
12361241
// Analytic form for TCD timesteps
12371242
int timestep = TIMESTEPS - 1 -
12381243
(TIMESTEPS / original_steps) *
1239-
(int)floor(i * ((float)original_steps / steps));
1244+
(int)floor((initial_step + i) *
1245+
((float)original_steps / (initial_step + steps)));
12401246
// 1. get previous step value
12411247
int prev_timestep = i >= steps - 1 ? 0 :
12421248
TIMESTEPS - 1 - (TIMESTEPS / original_steps) *
1243-
(int)floor((i + 1) *
1244-
((float)original_steps / steps));
1249+
(int)floor((initial_step + i + 1) *
1250+
((float)original_steps / (initial_step + steps)));
12451251
// Here timestep_s is tau_n' in Algorithm 4. The _s
12461252
// notation appears to be that from C. Lu,
12471253
// "DPM-Solver: A Fast ODE Solver for Diffusion

stable-diffusion.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -798,6 +798,7 @@ class StableDiffusionGGML {
798798
float eta,
799799
sample_method_t method,
800800
const std::vector<float>& sigmas,
801+
int initial_step,
801802
int start_merge_step,
802803
SDCondition id_cond,
803804
std::vector<int> skip_layers = {},
@@ -991,7 +992,7 @@ class StableDiffusionGGML {
991992
return denoised;
992993
};
993994

994-
sample_k_diffusion(method, denoise, work_ctx, x, sigmas, rng, eta);
995+
sample_k_diffusion(method, denoise, work_ctx, x, sigmas, initial_step, rng, eta);
995996

996997
x = denoiser->inverse_noise_scaling(sigmas[sigmas.size() - 1], x);
997998

@@ -1202,6 +1203,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
12021203
int height,
12031204
enum sample_method_t sample_method,
12041205
const std::vector<float>& sigmas,
1206+
int initial_step,
12051207
int64_t seed,
12061208
int batch_count,
12071209
const sd_image_t* control_cond,
@@ -1464,6 +1466,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
14641466
eta,
14651467
sample_method,
14661468
sigmas,
1469+
initial_step,
14671470
start_merge_step,
14681471
id_cond,
14691472
skip_layers,
@@ -1611,6 +1614,7 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
16111614
height,
16121615
sample_method,
16131616
sigmas,
1617+
0,
16141618
seed,
16151619
batch_count,
16161620
control_cond,
@@ -1775,8 +1779,9 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
17751779
if (t_enc == sample_steps)
17761780
t_enc--;
17771781
LOG_INFO("target t_enc is %zu steps", t_enc);
1782+
int initial_step = sample_steps - t_enc - 1;
17781783
std::vector<float> sigma_sched;
1779-
sigma_sched.assign(sigmas.begin() + sample_steps - t_enc - 1, sigmas.end());
1784+
sigma_sched.assign(sigmas.begin() + initial_step, sigmas.end());
17801785

17811786
sd_image_t* result_images = generate_image(sd_ctx,
17821787
work_ctx,
@@ -1791,6 +1796,7 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
17911796
height,
17921797
sample_method,
17931798
sigma_sched,
1799+
initial_step,
17941800
seed,
17951801
batch_count,
17961802
control_cond,
@@ -1903,6 +1909,7 @@ SD_API sd_image_t* img2vid(sd_ctx_t* sd_ctx,
19031909
0.f,
19041910
sample_method,
19051911
sigmas,
1912+
0,
19061913
-1,
19071914
SDCondition(NULL, NULL, NULL));
19081915

0 commit comments

Comments
 (0)