Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 1f30b48

Browse files
committed
refactor: align ggml backend implementation
Signed-off-by: thxCode <[email protected]>
1 parent 17d2aa0 commit 1f30b48

File tree

8 files changed

+108
-37
lines changed

8 files changed

+108
-37
lines changed

.github/workflows/build.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,9 +163,9 @@ jobs:
163163
- build: "avx512"
164164
defines: "-DGGML_AVX512=ON -DSD_BUILD_SHARED_LIBS=ON"
165165
- build: "cuda12"
166-
defines: "-DSD_CUBLAS=ON -DSD_BUILD_SHARED_LIBS=ON"
166+
defines: "-DSD_CUDA=ON -DSD_BUILD_SHARED_LIBS=ON"
167167
- build: "rocm5.5"
168-
defines: '-G Ninja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS="gfx1100;gfx1102;gfx1030" -DSD_BUILD_SHARED_LIBS=ON'
168+
defines: '-G Ninja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIP=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS="gfx1100;gfx1102;gfx1030" -DSD_BUILD_SHARED_LIBS=ON'
169169
- build: 'vulkan'
170170
defines: "-DSD_VULKAN=ON -DSD_BUILD_SHARED_LIBS=ON"
171171
steps:

CMakeLists.txt

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
cmake_minimum_required(VERSION 3.12)
22
project("stable-diffusion")
33

4+
if (NOT TARGET ggml)
5+
cmake_policy(SET CMP0077 NEW)
6+
endif()
7+
48
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
59

610
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
@@ -24,19 +28,20 @@ endif()
2428
# general
2529
#option(SD_BUILD_TESTS "sd: build tests" ${SD_STANDALONE})
2630
option(SD_BUILD_EXAMPLES "sd: build examples" ${SD_STANDALONE})
27-
option(SD_CUBLAS "sd: cuda backend" OFF)
28-
option(SD_HIPBLAS "sd: rocm backend" OFF)
31+
option(SD_CUDA "sd: cuda backend" OFF)
32+
option(SD_HIP "sd: rocm backend" OFF)
2933
option(SD_METAL "sd: metal backend" OFF)
3034
option(SD_VULKAN "sd: vulkan backend" OFF)
3135
option(SD_SYCL "sd: sycl backend" OFF)
32-
option(SD_FAST_SOFTMAX "sd: x1.5 faster softmax, indeterministic (sometimes, same seed don't generate same image), cuda only" OFF)
36+
option(SD_CANN "sd: cann backend" OFF)
37+
option(SD_MUSA "sd: musa backend" OFF)
3338
option(SD_BUILD_SHARED_LIBS "sd: build shared libs" OFF)
3439
#option(SD_BUILD_SERVER "sd: build server example" ON)
3540

36-
if(SD_CUBLAS)
37-
message("-- Use CUBLAS as backend stable-diffusion")
41+
if(SD_CUDA)
42+
message("-- Use CUDA as backend stable-diffusion")
3843
set(GGML_CUDA ON)
39-
add_definitions(-DSD_USE_CUBLAS)
44+
add_definitions(-DSD_USE_CUDA)
4045
endif()
4146

4247
if(SD_METAL)
@@ -51,13 +56,22 @@ if (SD_VULKAN)
5156
add_definitions(-DSD_USE_VULKAN)
5257
endif ()
5358

54-
if (SD_HIPBLAS)
55-
message("-- Use HIPBLAS as backend stable-diffusion")
56-
set(GGML_HIPBLAS ON)
57-
add_definitions(-DSD_USE_CUBLAS)
58-
if(SD_FAST_SOFTMAX)
59-
set(GGML_CUDA_FAST_SOFTMAX ON)
60-
endif()
59+
if (SD_HIP)
60+
message("-- Use HIP as backend stable-diffusion")
61+
set(GGML_HIP ON)
62+
add_definitions(-DSD_USE_CUDA)
63+
endif ()
64+
65+
if (SD_CANN)
66+
message("-- Use CANN as backend stable-diffusion")
67+
set(GGML_CANN ON)
68+
add_definitions(-DSD_USE_CANN)
69+
endif ()
70+
71+
if (SD_MUSA)
72+
message("-- Use MUSA as backend stable-diffusion")
73+
set(GGML_MUSA ON)
74+
add_definitions(-DSD_USE_CUDA)
6175
endif ()
6276

6377
set(SD_LIB stable-diffusion)
@@ -98,8 +112,6 @@ if(SD_SYCL)
98112
target_compile_options(${SD_LIB} PRIVATE ${SYCL_COMPILE_OPTIONS})
99113
endif()
100114

101-
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
102-
103115
# see https://github.com/ggerganov/ggml/pull/682
104116
add_definitions(-DGGML_MAX_NAME=128)
105117

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ cmake --build . --config Release
118118
This provides BLAS acceleration using the CUDA cores of your Nvidia GPU. Make sure to have the CUDA toolkit installed. You can download it from your Linux distro's package manager (e.g. `apt install nvidia-cuda-toolkit`) or from here: [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads). Recommended to have at least 4 GB of VRAM.
119119
120120
```
121-
cmake .. -DSD_CUBLAS=ON
121+
cmake .. -DSD_CUDA=ON
122122
cmake --build . --config Release
123123
```
124124
@@ -128,7 +128,7 @@ This provides BLAS acceleration using the ROCm cores of your AMD GPU. Make sure
128128
Windows User Refer to [docs/hipBLAS_on_Windows.md](docs%2FhipBLAS_on_Windows.md) for a comprehensive guide.
129129
130130
```
131-
cmake .. -G "Ninja" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=gfx1100
131+
cmake .. -G "Ninja" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIP=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=gfx1100
132132
cmake --build . --config Release
133133
```
134134

docs/hipBLAS_on_Windows.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ set ninja=C:\Program Files\ninja\ninja.exe
4545
```
4646
## Building stable-diffusion.cpp
4747

48-
The thing different from the regular CPU build is `-DSD_HIPBLAS=ON` ,
48+
The thing different from the regular CPU build is `-DSD_HIP=ON` ,
4949
`-G "Ninja"`, `-DCMAKE_C_COMPILER=clang`, `-DCMAKE_CXX_COMPILER=clang++`, `-DAMDGPU_TARGETS=gfx1100`
5050

5151
>**Notice**: check the `clang` and `clang++` information:
@@ -78,7 +78,7 @@ option:
7878
```commandline
7979
mkdir build
8080
cd build
81-
cmake .. -G "Ninja" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=gfx1100
81+
cmake .. -G "Ninja" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIP=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=gfx1100
8282
cmake --build . --config Release
8383
```
8484

ggml_extend.hpp

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
#include "ggml-cpu.h"
2626
#include "ggml.h"
2727

28-
#ifdef SD_USE_CUBLAS
28+
#ifdef SD_USE_CUDA
2929
#include "ggml-cuda.h"
3030
#endif
3131

@@ -41,6 +41,14 @@
4141
#include "ggml-sycl.h"
4242
#endif
4343

44+
#ifdef SD_USE_CANN
45+
#include "ggml-cann.h"
46+
#endif
47+
48+
#ifdef SD_USE_MUSA
49+
#include "ggml-musa.h"
50+
#endif
51+
4452
#include "rng.hpp"
4553
#include "util.h"
4654

@@ -670,7 +678,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention(struct ggml_context* ctx
670678
struct ggml_tensor* k,
671679
struct ggml_tensor* v,
672680
bool mask = false) {
673-
#if defined(SD_USE_FLASH_ATTENTION) && !defined(SD_USE_CUBLAS) && !defined(SD_USE_METAL) && !defined(SD_USE_VULKAN) && !defined(SD_USE_SYCL)
681+
#if defined(SD_USE_FLASH_ATTENTION) && !defined(SD_USE_CUDA) && !defined(SD_USE_METAL) && !defined(SD_USE_VULKAN) && !defined(SD_USE_SYCL) && !defined(SD_USE_CANN) && !defined(SD_USE_MUSA)
674682
struct ggml_tensor* kqv = ggml_flash_attn(ctx, q, k, v, false); // [N * n_head, n_token, d_head]
675683
#else
676684
float d_head = (float)q->ne[0];
@@ -826,7 +834,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_group_norm(struct ggml_context* ct
826834
}
827835

828836
__STATIC_INLINE__ void ggml_backend_tensor_get_and_sync(ggml_backend_t backend, const struct ggml_tensor* tensor, void* data, size_t offset, size_t size) {
829-
#if defined(SD_USE_CUBLAS) || defined(SD_USE_SYCL)
837+
#if defined(SD_USE_CUDA) || defined(SD_USE_SYCL)
830838
if (!ggml_backend_is_cpu(backend)) {
831839
ggml_backend_tensor_get_async(backend, tensor, data, offset, size);
832840
ggml_backend_synchronize(backend);
@@ -1137,11 +1145,6 @@ struct GGMLRunner {
11371145
ggml_backend_cpu_set_n_threads(backend, n_threads);
11381146
}
11391147

1140-
#ifdef SD_USE_METAL
1141-
if (ggml_backend_is_metal(backend)) {
1142-
ggml_backend_metal_set_n_cb(backend, n_threads);
1143-
}
1144-
#endif
11451148
ggml_backend_graph_compute(backend, gf);
11461149

11471150
#ifdef GGML_PERF

model.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,14 @@
2626
#include "ggml-vulkan.h"
2727
#endif
2828

29+
#ifdef SD_USE_CANN
30+
#include "ggml-cann.h"
31+
#endif
32+
33+
#ifdef SD_USE_MUSA
34+
#include "ggml-musa.h"
35+
#endif
36+
2937
#define ST_HEADER_SIZE_LEN 8
3038

3139
uint64_t read_u64(uint8_t* buffer) {

stable-diffusion.cpp

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -159,27 +159,48 @@ class StableDiffusionGGML {
159159
bool vae_on_cpu,
160160
bool diffusion_flash_attn) {
161161
use_tiny_autoencoder = taesd_path.size() > 0;
162-
#ifdef SD_USE_CUBLAS
162+
#ifdef SD_USE_CUDA
163+
#ifdef SD_USE_HIP
164+
LOG_DEBUG("Using HIP backend");
165+
#else
166+
#ifdef SD_USE_MUSA
167+
LOG_DEBUG("Using MUSA backend");
168+
#else
163169
LOG_DEBUG("Using CUDA backend");
170+
#endif
171+
#endif
164172
backend = ggml_backend_cuda_init(0);
173+
if (!backend) {
174+
LOG_ERROR("CUDA backend init failed");
175+
}
165176
#endif
166177
#ifdef SD_USE_METAL
167178
LOG_DEBUG("Using Metal backend");
168-
ggml_backend_metal_log_set_callback(ggml_log_callback_default, nullptr);
169179
backend = ggml_backend_metal_init();
180+
if (!backend) {
181+
LOG_ERROR("Metal backend init failed");
182+
}
170183
#endif
171184
#ifdef SD_USE_VULKAN
172185
LOG_DEBUG("Using Vulkan backend");
173-
for (int device = 0; device < ggml_backend_vk_get_device_count(); ++device) {
174-
backend = ggml_backend_vk_init(device);
175-
}
186+
backend = ggml_backend_vk_init(0);
176187
if (!backend) {
177-
LOG_WARN("Failed to initialize Vulkan backend");
188+
LOG_ERROR("Vulkan backend init failed");
178189
}
179190
#endif
180191
#ifdef SD_USE_SYCL
181192
LOG_DEBUG("Using SYCL backend");
182193
backend = ggml_backend_sycl_init(0);
194+
if (!backend) {
195+
LOG_ERROR("SYCL backend init failed");
196+
}
197+
#endif
198+
#ifdef SD_USE_CANN
199+
LOG_DEBUG("Using CANN backend");
200+
backend = ggml_backend_cann_init(0);
201+
if (!backend) {
202+
LOG_ERROR("CANN backend init failed");
203+
}
183204
#endif
184205

185206
if (!backend) {

upscaler.cpp

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,28 +15,55 @@ struct UpscalerGGML {
1515
}
1616

1717
bool load_from_file(const std::string& esrgan_path) {
18-
#ifdef SD_USE_CUBLAS
18+
#ifdef SD_USE_CUDA
19+
#ifdef SD_USE_HIP
20+
LOG_DEBUG("Using HIP backend");
21+
#else
22+
#ifdef SD_USE_MUSA
23+
LOG_DEBUG("Using MUSA backend");
24+
#else
1925
LOG_DEBUG("Using CUDA backend");
26+
#endif
27+
#endif
2028
backend = ggml_backend_cuda_init(0);
29+
if (!backend) {
30+
LOG_ERROR("CUDA backend init failed");
31+
}
2132
#endif
2233
#ifdef SD_USE_METAL
2334
LOG_DEBUG("Using Metal backend");
24-
ggml_backend_metal_log_set_callback(ggml_log_callback_default, nullptr);
2535
backend = ggml_backend_metal_init();
36+
if (!backend) {
37+
LOG_ERROR("Metal backend init failed");
38+
}
2639
#endif
2740
#ifdef SD_USE_VULKAN
2841
LOG_DEBUG("Using Vulkan backend");
2942
backend = ggml_backend_vk_init(0);
43+
if (!backend) {
44+
LOG_ERROR("Vulkan backend init failed");
45+
}
3046
#endif
3147
#ifdef SD_USE_SYCL
3248
LOG_DEBUG("Using SYCL backend");
3349
backend = ggml_backend_sycl_init(0);
50+
if (!backend) {
51+
LOG_ERROR("SYCL backend init failed");
52+
}
53+
#endif
54+
#ifdef SD_USE_CANN
55+
LOG_DEBUG("Using CANN backend");
56+
backend = ggml_backend_cann_init(0);
57+
if (!backend) {
58+
LOG_ERROR("CANN backend init failed");
59+
}
3460
#endif
3561

3662
if (!backend) {
3763
LOG_DEBUG("Using CPU backend");
3864
backend = ggml_backend_cpu_init();
3965
}
66+
4067
LOG_INFO("Upscaler weight type: %s", ggml_type_name(model_data_type));
4168
esrgan_upscaler = std::make_shared<ESRGAN>(backend, model_data_type);
4269
if (!esrgan_upscaler->load_from_file(esrgan_path)) {

0 commit comments

Comments
 (0)