diff --git a/conditioner.hpp b/conditioner.hpp index cfd2b4ca..cdc08c0b 100644 --- a/conditioner.hpp +++ b/conditioner.hpp @@ -129,7 +129,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner { return true; } struct ggml_init_params params; - params.mem_size = 10 * 1024 * 1024; // max for custom embeddings 10 MB + params.mem_size = 100 * 1024 * 1024; // max for custom embeddings 100 MB params.mem_buffer = NULL; params.no_alloc = false; struct ggml_context* embd_ctx = ggml_init(params); diff --git a/preprocessing.hpp b/preprocessing.hpp index 4ea1dbab..c5ecfef9 100644 --- a/preprocessing.hpp +++ b/preprocessing.hpp @@ -164,7 +164,7 @@ void threshold_hystersis(struct ggml_tensor* img, float high_threshold, float lo uint8_t* preprocess_canny(uint8_t* img, int width, int height, float high_threshold, float low_threshold, float weak, float strong, bool inverse) { struct ggml_init_params params; - params.mem_size = static_cast(10 * 1024 * 1024); // 10 + params.mem_size = static_cast(10 * 1024 * 1024); // 10MB params.mem_buffer = NULL; params.no_alloc = false; struct ggml_context* work_ctx = ggml_init(params); diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 08894731..b809adca 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -2178,19 +2178,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g } struct ggml_init_params params; - params.mem_size = static_cast(10 * 1024 * 1024); // 10 MB - if (sd_version_is_sd3(sd_ctx->sd->version)) { - params.mem_size *= 3; - } - if (sd_version_is_flux(sd_ctx->sd->version)) { - params.mem_size *= 4; - } - if (sd_ctx->sd->stacked_id) { - params.mem_size += static_cast(10 * 1024 * 1024); // 10 MB - } - params.mem_size += width * height * 3 * sizeof(float) * 3; - params.mem_size += width * height * 3 * sizeof(float) * 3 * sd_img_gen_params->ref_images_count; - params.mem_size *= sd_img_gen_params->batch_count; + params.mem_size = static_cast(1024 * 1024) * 1024; // 1G params.mem_buffer = NULL; params.no_alloc = false; // LOG_DEBUG("mem_size %u ", params.mem_size); @@ -2425,8 +2413,7 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s } struct ggml_init_params params; - params.mem_size = static_cast(200 * 1024) * 1024; // 200 MB - params.mem_size += width * height * frames * 3 * sizeof(float) * 2; + params.mem_size = static_cast(1024 * 1024) * 1024; // 1GB params.mem_buffer = NULL; params.no_alloc = false; // LOG_DEBUG("mem_size %u ", params.mem_size); diff --git a/upscaler.cpp b/upscaler.cpp index 4ab0b73c..e63a3ecb 100644 --- a/upscaler.cpp +++ b/upscaler.cpp @@ -69,8 +69,7 @@ struct UpscalerGGML { input_image.width, input_image.height, output_width, output_height); struct ggml_init_params params; - params.mem_size = output_width * output_height * 3 * sizeof(float) * 2; - params.mem_size += 2 * ggml_tensor_overhead(); + params.mem_size = static_cast(1024 * 1024) * 1024; // 1G params.mem_buffer = NULL; params.no_alloc = false; @@ -80,7 +79,7 @@ struct UpscalerGGML { LOG_ERROR("ggml_init() failed"); return upscaled_image; } - LOG_DEBUG("upscale work buffer size: %.2f MB", params.mem_size / 1024.f / 1024.f); + // LOG_DEBUG("upscale work buffer size: %.2f MB", params.mem_size / 1024.f / 1024.f); ggml_tensor* input_image_tensor = ggml_new_tensor_4d(upscale_ctx, GGML_TYPE_F32, input_image.width, input_image.height, 3, 1); sd_image_to_tensor(input_image.data, input_image_tensor); diff --git a/wan.hpp b/wan.hpp index 48603a95..37c58f94 100644 --- a/wan.hpp +++ b/wan.hpp @@ -1219,7 +1219,7 @@ namespace WAN { void test() { struct ggml_init_params params; - params.mem_size = static_cast(1000 * 1024 * 1024); // 10 MB + params.mem_size = static_cast(1024 * 1024) * 1024; // 1G params.mem_buffer = NULL; params.no_alloc = false;