From 22e48d802795e54e7784e5280775c6996bd6f2cc Mon Sep 17 00:00:00 2001 From: Green Sky Date: Mon, 26 Aug 2024 14:44:45 +0200 Subject: [PATCH 1/3] fix and improve: VAE tiling - properly handle the upper left corner interpolating both x and y - refactor out lerp - use smootherstep to preserve more detail and spend less area blending --- ggml_extend.hpp | 41 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/ggml_extend.hpp b/ggml_extend.hpp index 09e4fcb21..f1b3752ac 100644 --- a/ggml_extend.hpp +++ b/ggml_extend.hpp @@ -349,6 +349,16 @@ __STATIC_INLINE__ void ggml_split_tensor_2d(struct ggml_tensor* input, } } +__STATIC_INLINE__ float ggml_lerp_f32(const float a, const float b, const float x) { + return (1 - x) * a + x * b; +} + +// unclamped -> expects x in the range [0-1] +__STATIC_INLINE__ float ggml_smootherstep_f32(const float x) { + GGML_ASSERT(x >= 0.f && x <= 1.f); + return x * x * x * (x * (6.0f * x - 15.0f) + 10.0f); +} + __STATIC_INLINE__ void ggml_merge_tensor_2d(struct ggml_tensor* input, struct ggml_tensor* output, int x, @@ -364,12 +374,33 @@ __STATIC_INLINE__ void ggml_merge_tensor_2d(struct ggml_tensor* input, float new_value = ggml_tensor_get_f32(input, ix, iy, k); if (overlap > 0) { // blend colors in overlapped area float old_value = ggml_tensor_get_f32(output, x + ix, y + iy, k); - if (x > 0 && ix < overlap) { // in overlapped horizontal - ggml_tensor_set_f32(output, old_value + (new_value - old_value) * (ix / (1.0f * overlap)), x + ix, y + iy, k); + const bool inside_x_overlap = x > 0 && ix < overlap; + const bool inside_y_overlap = y > 0 && iy < overlap; + if (inside_x_overlap && inside_y_overlap) { + // upper left corner needs to be interpolated in both directions + const float x_f = ix / float(overlap); + const float y_f = iy / float(overlap); + // TODO: try `x+y - 1` + const float f = std::min(x_f, y_f); // min of both + ggml_tensor_set_f32( + output, + ggml_lerp_f32(old_value, new_value, ggml_smootherstep_f32(f)), + x + ix, y + iy, k + ); continue; - } - if (y > 0 && iy < overlap) { // in overlapped vertical - ggml_tensor_set_f32(output, old_value + (new_value - old_value) * (iy / (1.0f * overlap)), x + ix, y + iy, k); + } else if (inside_x_overlap) { + ggml_tensor_set_f32( + output, + ggml_lerp_f32(old_value, new_value, ggml_smootherstep_f32(ix / float(overlap))), + x + ix, y + iy, k + ); + continue; + } else if (inside_y_overlap) { + ggml_tensor_set_f32( + output, + ggml_lerp_f32(old_value, new_value, ggml_smootherstep_f32(iy / float(overlap))), + x + ix, y + iy, k + ); continue; } } From 03afc83a3ba31382c305e4e656b169d1d733088f Mon Sep 17 00:00:00 2001 From: Green Sky Date: Mon, 26 Aug 2024 19:59:31 +0200 Subject: [PATCH 2/3] actually fix vae tile merging Co-authored-by: stduhpf --- ggml_extend.hpp | 50 ++++++++++++++++++++----------------------------- 1 file changed, 20 insertions(+), 30 deletions(-) diff --git a/ggml_extend.hpp b/ggml_extend.hpp index f1b3752ac..cfa9b5e72 100644 --- a/ggml_extend.hpp +++ b/ggml_extend.hpp @@ -367,6 +367,10 @@ __STATIC_INLINE__ void ggml_merge_tensor_2d(struct ggml_tensor* input, int64_t width = input->ne[0]; int64_t height = input->ne[1]; int64_t channels = input->ne[2]; + + int64_t img_width = output->ne[0]; + int64_t img_height = output->ne[1]; + GGML_ASSERT(input->type == GGML_TYPE_F32 && output->type == GGML_TYPE_F32); for (int iy = 0; iy < height; iy++) { for (int ix = 0; ix < width; ix++) { @@ -374,37 +378,23 @@ __STATIC_INLINE__ void ggml_merge_tensor_2d(struct ggml_tensor* input, float new_value = ggml_tensor_get_f32(input, ix, iy, k); if (overlap > 0) { // blend colors in overlapped area float old_value = ggml_tensor_get_f32(output, x + ix, y + iy, k); - const bool inside_x_overlap = x > 0 && ix < overlap; - const bool inside_y_overlap = y > 0 && iy < overlap; - if (inside_x_overlap && inside_y_overlap) { - // upper left corner needs to be interpolated in both directions - const float x_f = ix / float(overlap); - const float y_f = iy / float(overlap); - // TODO: try `x+y - 1` - const float f = std::min(x_f, y_f); // min of both - ggml_tensor_set_f32( - output, - ggml_lerp_f32(old_value, new_value, ggml_smootherstep_f32(f)), - x + ix, y + iy, k - ); - continue; - } else if (inside_x_overlap) { - ggml_tensor_set_f32( - output, - ggml_lerp_f32(old_value, new_value, ggml_smootherstep_f32(ix / float(overlap))), - x + ix, y + iy, k - ); - continue; - } else if (inside_y_overlap) { - ggml_tensor_set_f32( - output, - ggml_lerp_f32(old_value, new_value, ggml_smootherstep_f32(iy / float(overlap))), - x + ix, y + iy, k - ); - continue; - } + + const float x_f_0 = (x > 0) ? ix / float(overlap) : 1; + const float x_f_1 = (x < (img_width - width)) ? (width - ix) / float(overlap) : 1 ; + const float y_f_0 = (y > 0) ? iy / float(overlap) : 1; + const float y_f_1 = (y < (img_height - height)) ? (height - iy) / float(overlap) : 1; + + const float x_f = std::min(std::min(x_f_0, x_f_1), 1.f); + const float y_f = std::min(std::min(y_f_0, y_f_1), 1.f); + + ggml_tensor_set_f32( + output, + old_value + new_value * ggml_smootherstep_f32(y_f) * ggml_smootherstep_f32(x_f), + x + ix, y + iy, k + ); + } else { + ggml_tensor_set_f32(output, new_value, x + ix, y + iy, k); } - ggml_tensor_set_f32(output, new_value, x + ix, y + iy, k); } } } From b593aac504a9953096cef5e92b855c8e8c67ec4b Mon Sep 17 00:00:00 2001 From: Green Sky Date: Tue, 27 Aug 2024 09:01:47 +0200 Subject: [PATCH 3/3] remove the now unused lerp function --- ggml_extend.hpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ggml_extend.hpp b/ggml_extend.hpp index cfa9b5e72..ebb8382ba 100644 --- a/ggml_extend.hpp +++ b/ggml_extend.hpp @@ -349,10 +349,6 @@ __STATIC_INLINE__ void ggml_split_tensor_2d(struct ggml_tensor* input, } } -__STATIC_INLINE__ float ggml_lerp_f32(const float a, const float b, const float x) { - return (1 - x) * a + x * b; -} - // unclamped -> expects x in the range [0-1] __STATIC_INLINE__ float ggml_smootherstep_f32(const float x) { GGML_ASSERT(x >= 0.f && x <= 1.f);