From 374ace4d5a73957094b8bafdadc2839935cf23c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ferenc=20Szont=C3=A1gh?= Date: Mon, 5 Feb 2024 19:18:25 +0000 Subject: [PATCH 1/6] add progress callback, supress others --- stable-diffusion.h | 2 ++ util.cpp | 17 ++++++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/stable-diffusion.h b/stable-diffusion.h index a8c9f5329..3890eb3e1 100644 --- a/stable-diffusion.h +++ b/stable-diffusion.h @@ -88,8 +88,10 @@ enum sd_log_level_t { }; typedef void (*sd_log_cb_t)(enum sd_log_level_t level, const char* text, void* data); +typedef void (*sd_progress_cb_t)(int step,int steps,float time, void* data); SD_API void sd_set_log_callback(sd_log_cb_t sd_log_cb, void* data); +SD_API void sd_set_progress_callback(sd_progress_cb_t cb, void* data); SD_API int32_t get_num_physical_cores(); SD_API const char* sd_get_system_info(); diff --git a/util.cpp b/util.cpp index c5f3f8610..a862ca87c 100644 --- a/util.cpp +++ b/util.cpp @@ -159,6 +159,9 @@ int32_t get_num_physical_cores() { return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4; } +static sd_progress_cb_t sd_progress_cb = NULL; +void* sd_progress_cb_data = NULL; + std::u32string utf8_to_utf32(const std::string& utf8_str) { std::wstring_convert, char32_t> converter; return converter.from_bytes(utf8_str); @@ -203,6 +206,10 @@ std::string path_join(const std::string& p1, const std::string& p2) { } void pretty_progress(int step, int steps, float time) { + if (sd_progress_cb) { + sd_progress_cb(step,steps,time, sd_progress_cb_data); + return; + } std::string progress = " |"; int max_progress = 50; int32_t current = (int32_t)(step * 1.f * max_progress / steps); @@ -243,8 +250,9 @@ std::string trim(const std::string& s) { return rtrim(ltrim(s)); } -static sd_log_cb_t sd_log_cb = NULL; -void* sd_log_cb_data = NULL; +static sd_log_cb_t sd_log_cb = NULL; +void* sd_log_cb_data = NULL; + #define LOG_BUFFER_SIZE 1024 @@ -281,7 +289,10 @@ void sd_set_log_callback(sd_log_cb_t cb, void* data) { sd_log_cb = cb; sd_log_cb_data = data; } - +void sd_set_progress_callback(sd_progress_cb_t cb, void* data) { + sd_progress_cb = cb; + sd_progress_cb_data = data; +} const char* sd_get_system_info() { static char buffer[1024]; std::stringstream ss; From 6ee1c65bfdf112d7183cc3a9a967deffd36e9df2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ferenc=20Szont=C3=A1gh?= Date: Sat, 10 Feb 2024 15:46:03 +0100 Subject: [PATCH 2/6] some error handling at loars and embeddings --- clip.hpp | 10 +++++++++- lora.hpp | 15 ++++++++++++--- stable-diffusion.cpp | 6 +++--- vae.hpp | 2 +- 4 files changed, 25 insertions(+), 8 deletions(-) diff --git a/clip.hpp b/clip.hpp index 546e944b5..4c9f7d49e 100644 --- a/clip.hpp +++ b/clip.hpp @@ -694,8 +694,12 @@ struct CLIPTextModel { LOG_ERROR("embedding '%s' failed", embd_name.c_str()); return false; } + if (std::find(readed_embeddings.begin(), readed_embeddings.end(), embd_name) != readed_embeddings.end()) { + LOG_DEBUG("embedding already readed in: %s", embd_name.c_str()); + return false; + } struct ggml_init_params params; - params.mem_size = 32 * 1024; // max for custom embeddings 32 KB + params.mem_size = 256 * 1024; // max for custom embeddings 256 KB params.mem_buffer = NULL; params.no_alloc = false; struct ggml_context* embd_ctx = ggml_init(params); @@ -709,7 +713,11 @@ struct CLIPTextModel { *dst_tensor = embd; return true; }; + model_loader.load_tensors(on_load, NULL); + if (ggml_nbytes(token_embed_custom) <= (num_custom_embeddings * hidden_size * ggml_type_size(token_embed_custom->type)) + ggml_nbytes(embd)) { + return false; + } ggml_backend_tensor_set(token_embed_custom, embd->data, num_custom_embeddings * hidden_size * ggml_type_size(token_embed_custom->type), ggml_nbytes(embd)); readed_embeddings.push_back(embd_name); for (int i = 0; i < embd->ne[1]; i++) { diff --git a/lora.hpp b/lora.hpp index 5587b3af5..f2fb28a97 100644 --- a/lora.hpp +++ b/lora.hpp @@ -3,7 +3,7 @@ #include "ggml_extend.hpp" -#define LORA_GRAPH_SIZE 10240 +#define LORA_GRAPH_SIZE 40960 struct LoraModel : public GGMLModule { float multiplier = 1.0f; @@ -28,6 +28,15 @@ struct LoraModel : public GGMLModule { return model_loader.cal_mem_size(NULL); } + static inline int ggml_n_dims_t(const struct TensorStorage tensor) { + for (int i = GGML_MAX_DIMS - 1; i >= 1; --i) { + if (tensor.ne[i] > 1) { + return i + 1; + } + } + return 1; + } + bool load_from_file(ggml_backend_t backend) { if (!alloc_params_buffer(backend)) { return false; @@ -44,7 +53,7 @@ struct LoraModel : public GGMLModule { auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool { const std::string& name = tensor_storage.name; - struct ggml_tensor* real = ggml_new_tensor(params_ctx, tensor_storage.type, tensor_storage.n_dims, tensor_storage.ne); + struct ggml_tensor* real = ggml_new_tensor(params_ctx, tensor_storage.type, this->ggml_n_dims_t(tensor_storage), tensor_storage.ne); ggml_allocr_alloc(alloc, real); *dst_tensor = real; @@ -54,7 +63,7 @@ struct LoraModel : public GGMLModule { }; model_loader.load_tensors(on_new_tensor_cb, backend); - + LOG_DEBUG("finished loaded lora"); ggml_allocr_free(alloc); return true; diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 8dd5f16e5..eb9b1eca6 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -222,7 +222,7 @@ class StableDiffusionGGML { } struct ggml_init_params params; - params.mem_size = static_cast(10 * 1024) * 1024; // 10M + params.mem_size = static_cast(10 * 2048) * 1024; // 10M params.mem_buffer = NULL; params.no_alloc = false; // LOG_DEBUG("mem_size %u ", params.mem_size); @@ -1232,7 +1232,7 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx, int64_t t1 = ggml_time_ms(); LOG_INFO("apply_loras completed, taking %.2fs", (t1 - t0) * 1.0f / 1000); struct ggml_init_params params; - params.mem_size = static_cast(10 * 1024 * 1024); // 10 MB + params.mem_size = static_cast(20 * 1024 * 1024); // 10 MB params.mem_size += width * height * 3 * sizeof(float); params.mem_size *= batch_count; params.mem_buffer = NULL; @@ -1377,7 +1377,7 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx, sigma_sched.assign(sigmas.begin() + sample_steps - t_enc - 1, sigmas.end()); struct ggml_init_params params; - params.mem_size = static_cast(10 * 1024) * 1024; // 10 MB + params.mem_size = static_cast(20 * 1024) * 1024; // 10 MB params.mem_size += width * height * 3 * sizeof(float) * 2; params.mem_buffer = NULL; params.no_alloc = false; diff --git a/vae.hpp b/vae.hpp index f78777f95..0dca7b34b 100644 --- a/vae.hpp +++ b/vae.hpp @@ -6,7 +6,7 @@ /*================================================== AutoEncoderKL ===================================================*/ -#define VAE_GRAPH_SIZE 10240 +#define VAE_GRAPH_SIZE 20480 struct ResnetBlock { // network hparams From 0e834e75619953e265fb368f79db0294a78a6dac Mon Sep 17 00:00:00 2001 From: fszontagh Date: Sat, 10 Feb 2024 20:03:37 +0100 Subject: [PATCH 3/6] formatting mistakes --- clip.hpp | 2 +- lora.hpp | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/clip.hpp b/clip.hpp index 4c9f7d49e..d15f54e73 100644 --- a/clip.hpp +++ b/clip.hpp @@ -695,7 +695,7 @@ struct CLIPTextModel { return false; } if (std::find(readed_embeddings.begin(), readed_embeddings.end(), embd_name) != readed_embeddings.end()) { - LOG_DEBUG("embedding already readed in: %s", embd_name.c_str()); + LOG_DEBUG("embedding already read in: %s", embd_name.c_str()); return false; } struct ggml_init_params params; diff --git a/lora.hpp b/lora.hpp index f2fb28a97..716379d48 100644 --- a/lora.hpp +++ b/lora.hpp @@ -63,7 +63,6 @@ struct LoraModel : public GGMLModule { }; model_loader.load_tensors(on_new_tensor_cb, backend); - LOG_DEBUG("finished loaded lora"); ggml_allocr_free(alloc); return true; From fc8be44f16bf5db3179ed4e86a50368f189e7f2f Mon Sep 17 00:00:00 2001 From: fszontagh Date: Thu, 15 Feb 2024 07:36:55 +0100 Subject: [PATCH 4/6] #175 #154 #141 - remove lora mapping which is disallow to release ram --- stable-diffusion.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index eb9b1eca6..5e8791de5 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -72,7 +72,6 @@ class StableDiffusionGGML { std::string lora_model_dir; // lora_name => multiplier std::unordered_map curr_lora_state; - std::map loras; std::shared_ptr denoiser = std::make_shared(); ggml_backend_t backend = NULL; // general backend @@ -395,7 +394,6 @@ class StableDiffusionGGML { lora.multiplier = multiplier; lora.apply(tensors, n_threads); - loras[lora_name] = lora; lora.free_params_buffer(); int64_t t1 = ggml_time_ms(); From fecdbccab0b1e47265923cd5610e526a640daf70 Mon Sep 17 00:00:00 2001 From: fszontagh <51741446+fszontagh@users.noreply.github.com> Date: Mon, 26 Feb 2024 08:52:39 +0100 Subject: [PATCH 5/6] restored graphs and buffer sizes --- lora.hpp | 4 ++-- stable-diffusion.cpp | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lora.hpp b/lora.hpp index 785d36c1b..82f90a811 100644 --- a/lora.hpp +++ b/lora.hpp @@ -3,7 +3,7 @@ #include "ggml_extend.hpp" -#define LORA_GRAPH_SIZE 40960 +#define LORA_GRAPH_SIZE 10240 struct LoraModel : public GGMLModule { float multiplier = 1.0f; @@ -56,7 +56,7 @@ struct LoraModel : public GGMLModule { auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool { const std::string& name = tensor_storage.name; - struct ggml_tensor* real = ggml_new_tensor(params_ctx, tensor_storage.type, this->ggml_n_dims_t(tensor_storage), tensor_storage.ne); + struct ggml_tensor* real = ggml_new_tensor(params_ctx, tensor_storage.type, ggml_n_dims_t(tensor_storage), tensor_storage.ne); ggml_allocr_alloc(alloc, real); *dst_tensor = real; diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index e6a40b415..be32f7f6e 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -238,7 +238,7 @@ class StableDiffusionGGML { } struct ggml_init_params params; - params.mem_size = static_cast(10 * 2048) * 1024; // 10M + params.mem_size = static_cast(10 * 1024) * 1024; // 10M params.mem_buffer = NULL; params.no_alloc = false; // LOG_DEBUG("mem_size %u ", params.mem_size); @@ -1332,7 +1332,7 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx, int64_t t1 = ggml_time_ms(); LOG_INFO("apply_loras completed, taking %.2fs", (t1 - t0) * 1.0f / 1000); struct ggml_init_params params; - params.mem_size = static_cast(20 * 1024 * 1024); // 10 MB + params.mem_size = static_cast(10 * 1024 * 1024); // 10 MB params.mem_size += width * height * 3 * sizeof(float); params.mem_size *= batch_count; params.mem_buffer = NULL; @@ -1491,7 +1491,7 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx, sigma_sched.assign(sigmas.begin() + sample_steps - t_enc - 1, sigmas.end()); struct ggml_init_params params; - params.mem_size = static_cast(20 * 1024) * 1024; // 10 MB + params.mem_size = static_cast(10 * 1024) * 1024; // 10 MB params.mem_size += width * height * 3 * sizeof(float) * 2; params.mem_buffer = NULL; params.no_alloc = false; From af449d301f9be6896c970824ff88191cfc8852f3 Mon Sep 17 00:00:00 2001 From: fszontagh <51741446+fszontagh@users.noreply.github.com> Date: Tue, 27 Feb 2024 17:55:12 +0100 Subject: [PATCH 6/6] removed ggml_n_dims_t --- lora.hpp | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/lora.hpp b/lora.hpp index e5dfc8e59..7eb42e100 100644 --- a/lora.hpp +++ b/lora.hpp @@ -33,14 +33,6 @@ struct LoraModel : public GGMLModule { return model_loader.get_params_mem_size(NULL); } - static inline int ggml_n_dims_t(const struct TensorStorage tensor) { - for (int i = GGML_MAX_DIMS - 1; i >= 1; --i) { - if (tensor.ne[i] > 1) { - return i + 1; - } - } - return 1; - } bool load_from_file() { LOG_INFO("loading LoRA from '%s'", file_path.c_str()); @@ -57,7 +49,7 @@ struct LoraModel : public GGMLModule { if (dry_run) { struct ggml_tensor* real = ggml_new_tensor(params_ctx, tensor_storage.type, - ggml_n_dims_t(tensor_storage), + tensor_storage.n_dims, tensor_storage.ne); lora_tensors[name] = real; } else {