From 374ace4d5a73957094b8bafdadc2839935cf23c9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ferenc=20Szont=C3=A1gh?= <szf@fsociety.hu>
Date: Mon, 5 Feb 2024 19:18:25 +0000
Subject: [PATCH 1/6] add progress callback, supress others

---
 stable-diffusion.h |  2 ++
 util.cpp           | 17 ++++++++++++++---
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/stable-diffusion.h b/stable-diffusion.h
index a8c9f5329..3890eb3e1 100644
--- a/stable-diffusion.h
+++ b/stable-diffusion.h
@@ -88,8 +88,10 @@ enum sd_log_level_t {
 };
 
 typedef void (*sd_log_cb_t)(enum sd_log_level_t level, const char* text, void* data);
+typedef void (*sd_progress_cb_t)(int step,int steps,float time, void* data);
 
 SD_API void sd_set_log_callback(sd_log_cb_t sd_log_cb, void* data);
+SD_API void sd_set_progress_callback(sd_progress_cb_t cb, void* data);
 SD_API int32_t get_num_physical_cores();
 SD_API const char* sd_get_system_info();
 
diff --git a/util.cpp b/util.cpp
index c5f3f8610..a862ca87c 100644
--- a/util.cpp
+++ b/util.cpp
@@ -159,6 +159,9 @@ int32_t get_num_physical_cores() {
     return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
 }
 
+static sd_progress_cb_t sd_progress_cb  = NULL;
+void* sd_progress_cb_data               = NULL;
+
 std::u32string utf8_to_utf32(const std::string& utf8_str) {
     std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
     return converter.from_bytes(utf8_str);
@@ -203,6 +206,10 @@ std::string path_join(const std::string& p1, const std::string& p2) {
 }
 
 void pretty_progress(int step, int steps, float time) {
+    if (sd_progress_cb) {
+        sd_progress_cb(step,steps,time, sd_progress_cb_data);
+        return;
+    }
     std::string progress = "  |";
     int max_progress     = 50;
     int32_t current      = (int32_t)(step * 1.f * max_progress / steps);
@@ -243,8 +250,9 @@ std::string trim(const std::string& s) {
     return rtrim(ltrim(s));
 }
 
-static sd_log_cb_t sd_log_cb = NULL;
-void* sd_log_cb_data         = NULL;
+static sd_log_cb_t sd_log_cb            = NULL;
+void* sd_log_cb_data                    = NULL;
+
 
 #define LOG_BUFFER_SIZE 1024
 
@@ -281,7 +289,10 @@ void sd_set_log_callback(sd_log_cb_t cb, void* data) {
     sd_log_cb      = cb;
     sd_log_cb_data = data;
 }
-
+void sd_set_progress_callback(sd_progress_cb_t cb, void* data) {
+    sd_progress_cb      = cb;
+    sd_progress_cb_data = data;
+}
 const char* sd_get_system_info() {
     static char buffer[1024];
     std::stringstream ss;

From 6ee1c65bfdf112d7183cc3a9a967deffd36e9df2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ferenc=20Szont=C3=A1gh?= <szf@fsociety.hu>
Date: Sat, 10 Feb 2024 15:46:03 +0100
Subject: [PATCH 2/6] some error handling at loars and embeddings

---
 clip.hpp             | 10 +++++++++-
 lora.hpp             | 15 ++++++++++++---
 stable-diffusion.cpp |  6 +++---
 vae.hpp              |  2 +-
 4 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/clip.hpp b/clip.hpp
index 546e944b5..4c9f7d49e 100644
--- a/clip.hpp
+++ b/clip.hpp
@@ -694,8 +694,12 @@ struct CLIPTextModel {
             LOG_ERROR("embedding '%s' failed", embd_name.c_str());
             return false;
         }
+        if (std::find(readed_embeddings.begin(), readed_embeddings.end(), embd_name) != readed_embeddings.end()) {
+            LOG_DEBUG("embedding already readed in: %s", embd_name.c_str());
+            return false;
+        }
         struct ggml_init_params params;
-        params.mem_size               = 32 * 1024;  // max for custom embeddings 32 KB
+        params.mem_size               = 256 * 1024;  // max for custom embeddings 256 KB
         params.mem_buffer             = NULL;
         params.no_alloc               = false;
         struct ggml_context* embd_ctx = ggml_init(params);
@@ -709,7 +713,11 @@ struct CLIPTextModel {
             *dst_tensor = embd;
             return true;
         };
+
         model_loader.load_tensors(on_load, NULL);
+        if (ggml_nbytes(token_embed_custom) <= (num_custom_embeddings * hidden_size * ggml_type_size(token_embed_custom->type)) + ggml_nbytes(embd)) {
+            return false;
+        }
         ggml_backend_tensor_set(token_embed_custom, embd->data, num_custom_embeddings * hidden_size * ggml_type_size(token_embed_custom->type), ggml_nbytes(embd));
         readed_embeddings.push_back(embd_name);
         for (int i = 0; i < embd->ne[1]; i++) {
diff --git a/lora.hpp b/lora.hpp
index 5587b3af5..f2fb28a97 100644
--- a/lora.hpp
+++ b/lora.hpp
@@ -3,7 +3,7 @@
 
 #include "ggml_extend.hpp"
 
-#define LORA_GRAPH_SIZE 10240
+#define LORA_GRAPH_SIZE 40960
 
 struct LoraModel : public GGMLModule {
     float multiplier = 1.0f;
@@ -28,6 +28,15 @@ struct LoraModel : public GGMLModule {
         return model_loader.cal_mem_size(NULL);
     }
 
+    static inline int ggml_n_dims_t(const struct TensorStorage tensor) {
+        for (int i = GGML_MAX_DIMS - 1; i >= 1; --i) {
+            if (tensor.ne[i] > 1) {
+                return i + 1;
+            }
+        }
+        return 1;
+    }
+
     bool load_from_file(ggml_backend_t backend) {
         if (!alloc_params_buffer(backend)) {
             return false;
@@ -44,7 +53,7 @@ struct LoraModel : public GGMLModule {
         auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool {
             const std::string& name = tensor_storage.name;
 
-            struct ggml_tensor* real = ggml_new_tensor(params_ctx, tensor_storage.type, tensor_storage.n_dims, tensor_storage.ne);
+            struct ggml_tensor* real = ggml_new_tensor(params_ctx, tensor_storage.type, this->ggml_n_dims_t(tensor_storage), tensor_storage.ne);
             ggml_allocr_alloc(alloc, real);
 
             *dst_tensor = real;
@@ -54,7 +63,7 @@ struct LoraModel : public GGMLModule {
         };
 
         model_loader.load_tensors(on_new_tensor_cb, backend);
-
+        
         LOG_DEBUG("finished loaded lora");
         ggml_allocr_free(alloc);
         return true;
diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
index 8dd5f16e5..eb9b1eca6 100644
--- a/stable-diffusion.cpp
+++ b/stable-diffusion.cpp
@@ -222,7 +222,7 @@ class StableDiffusionGGML {
         }
 
         struct ggml_init_params params;
-        params.mem_size   = static_cast<size_t>(10 * 1024) * 1024;  // 10M
+        params.mem_size   = static_cast<size_t>(10 * 2048) * 1024;  // 10M
         params.mem_buffer = NULL;
         params.no_alloc   = false;
         // LOG_DEBUG("mem_size %u ", params.mem_size);
@@ -1232,7 +1232,7 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
     int64_t t1 = ggml_time_ms();
     LOG_INFO("apply_loras completed, taking %.2fs", (t1 - t0) * 1.0f / 1000);
     struct ggml_init_params params;
-    params.mem_size = static_cast<size_t>(10 * 1024 * 1024);  // 10 MB
+    params.mem_size = static_cast<size_t>(20 * 1024 * 1024);  // 10 MB
     params.mem_size += width * height * 3 * sizeof(float);
     params.mem_size *= batch_count;
     params.mem_buffer = NULL;
@@ -1377,7 +1377,7 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
     sigma_sched.assign(sigmas.begin() + sample_steps - t_enc - 1, sigmas.end());
 
     struct ggml_init_params params;
-    params.mem_size = static_cast<size_t>(10 * 1024) * 1024;  // 10 MB
+    params.mem_size = static_cast<size_t>(20 * 1024) * 1024;  // 10 MB
     params.mem_size += width * height * 3 * sizeof(float) * 2;
     params.mem_buffer = NULL;
     params.no_alloc   = false;
diff --git a/vae.hpp b/vae.hpp
index f78777f95..0dca7b34b 100644
--- a/vae.hpp
+++ b/vae.hpp
@@ -6,7 +6,7 @@
 
 /*================================================== AutoEncoderKL ===================================================*/
 
-#define VAE_GRAPH_SIZE 10240
+#define VAE_GRAPH_SIZE 20480
 
 struct ResnetBlock {
     // network hparams

From 0e834e75619953e265fb368f79db0294a78a6dac Mon Sep 17 00:00:00 2001
From: fszontagh <szf@fsociety.hu>
Date: Sat, 10 Feb 2024 20:03:37 +0100
Subject: [PATCH 3/6] formatting mistakes

---
 clip.hpp | 2 +-
 lora.hpp | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/clip.hpp b/clip.hpp
index 4c9f7d49e..d15f54e73 100644
--- a/clip.hpp
+++ b/clip.hpp
@@ -695,7 +695,7 @@ struct CLIPTextModel {
             return false;
         }
         if (std::find(readed_embeddings.begin(), readed_embeddings.end(), embd_name) != readed_embeddings.end()) {
-            LOG_DEBUG("embedding already readed in: %s", embd_name.c_str());
+            LOG_DEBUG("embedding already read in: %s", embd_name.c_str());
             return false;
         }
         struct ggml_init_params params;
diff --git a/lora.hpp b/lora.hpp
index f2fb28a97..716379d48 100644
--- a/lora.hpp
+++ b/lora.hpp
@@ -63,7 +63,6 @@ struct LoraModel : public GGMLModule {
         };
 
         model_loader.load_tensors(on_new_tensor_cb, backend);
-        
         LOG_DEBUG("finished loaded lora");
         ggml_allocr_free(alloc);
         return true;

From fc8be44f16bf5db3179ed4e86a50368f189e7f2f Mon Sep 17 00:00:00 2001
From: fszontagh <szf@fsociety.hu>
Date: Thu, 15 Feb 2024 07:36:55 +0100
Subject: [PATCH 4/6] #175 #154 #141 - remove lora mapping which is disallow to
 release ram

---
 stable-diffusion.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
index eb9b1eca6..5e8791de5 100644
--- a/stable-diffusion.cpp
+++ b/stable-diffusion.cpp
@@ -72,7 +72,6 @@ class StableDiffusionGGML {
     std::string lora_model_dir;
     // lora_name => multiplier
     std::unordered_map<std::string, float> curr_lora_state;
-    std::map<std::string, LoraModel> loras;
 
     std::shared_ptr<Denoiser> denoiser = std::make_shared<CompVisDenoiser>();
     ggml_backend_t backend             = NULL;  // general backend
@@ -395,7 +394,6 @@ class StableDiffusionGGML {
 
         lora.multiplier = multiplier;
         lora.apply(tensors, n_threads);
-        loras[lora_name] = lora;
         lora.free_params_buffer();
 
         int64_t t1 = ggml_time_ms();

From fecdbccab0b1e47265923cd5610e526a640daf70 Mon Sep 17 00:00:00 2001
From: fszontagh <51741446+fszontagh@users.noreply.github.com>
Date: Mon, 26 Feb 2024 08:52:39 +0100
Subject: [PATCH 5/6] restored graphs and buffer sizes

---
 lora.hpp             | 4 ++--
 stable-diffusion.cpp | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/lora.hpp b/lora.hpp
index 785d36c1b..82f90a811 100644
--- a/lora.hpp
+++ b/lora.hpp
@@ -3,7 +3,7 @@
 
 #include "ggml_extend.hpp"
 
-#define LORA_GRAPH_SIZE 40960
+#define LORA_GRAPH_SIZE 10240
 
 struct LoraModel : public GGMLModule {
     float multiplier = 1.0f;
@@ -56,7 +56,7 @@ struct LoraModel : public GGMLModule {
         auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool {
             const std::string& name = tensor_storage.name;
 
-            struct ggml_tensor* real = ggml_new_tensor(params_ctx, tensor_storage.type, this->ggml_n_dims_t(tensor_storage), tensor_storage.ne);
+            struct ggml_tensor* real = ggml_new_tensor(params_ctx, tensor_storage.type, ggml_n_dims_t(tensor_storage), tensor_storage.ne);
             ggml_allocr_alloc(alloc, real);
 
             *dst_tensor = real;
diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
index e6a40b415..be32f7f6e 100644
--- a/stable-diffusion.cpp
+++ b/stable-diffusion.cpp
@@ -238,7 +238,7 @@ class StableDiffusionGGML {
         }
 
         struct ggml_init_params params;
-        params.mem_size   = static_cast<size_t>(10 * 2048) * 1024;  // 10M
+        params.mem_size   = static_cast<size_t>(10 * 1024) * 1024;  // 10M
         params.mem_buffer = NULL;
         params.no_alloc   = false;
         // LOG_DEBUG("mem_size %u ", params.mem_size);
@@ -1332,7 +1332,7 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
     int64_t t1 = ggml_time_ms();
     LOG_INFO("apply_loras completed, taking %.2fs", (t1 - t0) * 1.0f / 1000);
     struct ggml_init_params params;
-    params.mem_size = static_cast<size_t>(20 * 1024 * 1024);  // 10 MB
+    params.mem_size = static_cast<size_t>(10 * 1024 * 1024);  // 10 MB
     params.mem_size += width * height * 3 * sizeof(float);
     params.mem_size *= batch_count;
     params.mem_buffer = NULL;
@@ -1491,7 +1491,7 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
     sigma_sched.assign(sigmas.begin() + sample_steps - t_enc - 1, sigmas.end());
 
     struct ggml_init_params params;
-    params.mem_size = static_cast<size_t>(20 * 1024) * 1024;  // 10 MB
+    params.mem_size = static_cast<size_t>(10 * 1024) * 1024;  // 10 MB
     params.mem_size += width * height * 3 * sizeof(float) * 2;
     params.mem_buffer = NULL;
     params.no_alloc   = false;

From af449d301f9be6896c970824ff88191cfc8852f3 Mon Sep 17 00:00:00 2001
From: fszontagh <51741446+fszontagh@users.noreply.github.com>
Date: Tue, 27 Feb 2024 17:55:12 +0100
Subject: [PATCH 6/6] removed ggml_n_dims_t

---
 lora.hpp | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/lora.hpp b/lora.hpp
index e5dfc8e59..7eb42e100 100644
--- a/lora.hpp
+++ b/lora.hpp
@@ -33,14 +33,6 @@ struct LoraModel : public GGMLModule {
         return model_loader.get_params_mem_size(NULL);
     }
 
-    static inline int ggml_n_dims_t(const struct TensorStorage tensor) {
-        for (int i = GGML_MAX_DIMS - 1; i >= 1; --i) {
-            if (tensor.ne[i] > 1) {
-                return i + 1;
-            }
-        }
-        return 1;
-    }
 
     bool load_from_file() {
         LOG_INFO("loading LoRA from '%s'", file_path.c_str());
@@ -57,7 +49,7 @@ struct LoraModel : public GGMLModule {
             if (dry_run) {
                 struct ggml_tensor* real = ggml_new_tensor(params_ctx,
                                                            tensor_storage.type,
-                                                           ggml_n_dims_t(tensor_storage),
+                                                           tensor_storage.n_dims,
                                                            tensor_storage.ne);
                 lora_tensors[name]       = real;
             } else {