From e4aac890e58ec414005e54de2c129c915d6f408e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Sat, 26 Jul 2025 20:23:27 +0200 Subject: [PATCH] Change image dimensions requirement for DiT models --- examples/cli/main.cpp | 8 ++++---- stable-diffusion.cpp | 9 +++++++++ 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index b3ae569e..27e3f787 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -596,13 +596,13 @@ void parse_args(int argc, const char** argv, SDParams& params) { exit(1); } - if (params.width <= 0 || params.width % 64 != 0) { - fprintf(stderr, "error: the width must be a multiple of 64\n"); + if (params.width <= 0) { + fprintf(stderr, "error: the width must be greater than 0\n"); exit(1); } - if (params.height <= 0 || params.height % 64 != 0) { - fprintf(stderr, "error: the height must be a multiple of 64\n"); + if (params.height <= 0) { + fprintf(stderr, "error: the height must be greater than 0\n"); exit(1); } diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 402585f1..2594ba2b 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -1875,6 +1875,15 @@ ggml_tensor* generate_init_latent(sd_ctx_t* sd_ctx, sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_gen_params) { int width = sd_img_gen_params->width; int height = sd_img_gen_params->height; + if (sd_version_is_dit(sd_ctx->sd->version)) { + if (width % 16 || height % 16) { + LOG_ERROR("Image dimensions must be must be a multiple of 16 on each axis for %s models. (Got %dx%d)", model_version_to_str[sd_ctx->sd->version], width, height); + return NULL; + } + } else if (width % 64 || height % 64) { + LOG_ERROR("Image dimensions must be must be a multiple of 64 on each axis for %s models. (Got %dx%d)", model_version_to_str[sd_ctx->sd->version], width, height); + return NULL; + } LOG_DEBUG("generate_image %dx%d", width, height); if (sd_ctx == NULL || sd_img_gen_params == NULL) { return NULL;