Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit ee17ccb

Browse files
feat: update Chroma model integration and streamline parameter initialization
1 parent 32a4cfb commit ee17ccb

File tree

4 files changed

+70
-152
lines changed

4 files changed

+70
-152
lines changed

chroma.hpp

Lines changed: 7 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -181,13 +181,9 @@ struct Approximator_ggml : public UnaryBlock {
181181
blocks["layers." + std::to_string(i)] = std::shared_ptr<GGMLBlock>(new MLPEmbedder(hidden_dim, hidden_dim));
182182
blocks["norms." + std::to_string(i)] = std::shared_ptr<GGMLBlock>(new RMSNorm(hidden_dim));
183183
}
184-
blocks["out_proj"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_dim, out_dim));
184+
blocks["out_proj"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_dim , out_dim));
185185
}
186186

187-
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") override {
188-
// Rely on the base class to initialize nested blocks
189-
UnaryBlock::init_params(ctx, tensor_types, prefix);
190-
}
191187

192188
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* timestep) {
193189
// Implement forward pass based on the pseudo-code in the plan (Phase 2.2)
@@ -243,10 +239,6 @@ struct QKNorm : public GGMLBlock {
243239
return norm->forward(ctx, x);
244240
}
245241

246-
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") override {
247-
// Rely on the base class to initialize nested blocks
248-
GGMLBlock::init_params(ctx, tensor_types, prefix);
249-
}
250242
};
251243

252244
// Based on the plan (Phase 1.2 and 2.5)
@@ -262,12 +254,6 @@ struct SelfAttention : public GGMLBlock {
262254
blocks["norm"] = std::shared_ptr<GGMLBlock>(new QKNorm(head_dim));
263255
blocks["proj"] = std::shared_ptr<GGMLBlock>(new Linear(dim, dim));
264256
}
265-
266-
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") override {
267-
// Rely on the base class to initialize nested blocks
268-
GGMLBlock::init_params(ctx, tensor_types, prefix);
269-
}
270-
271257
std::vector<struct ggml_tensor*> pre_attention(struct ggml_context* ctx, struct ggml_tensor* x) {
272258
auto qkv_proj = std::dynamic_pointer_cast<Linear>(blocks["qkv"]);
273259
auto norm = std::dynamic_pointer_cast<QKNorm>(blocks["norm"]);
@@ -288,7 +274,7 @@ struct SelfAttention : public GGMLBlock {
288274
auto v = ggml_view_3d(ctx, qkv_split, dim, L, N, qkv_split->nb[1], qkv_split->nb[2], offset * 2); // [dim, L, N]
289275

290276
// Reshape q, k, v for QKNorm and ggml_nn_attention_ext
291-
// QKNorm expects [..., dim], ggml_nn_attention_ext expects [d_head, L, N*n_head] for q, k and [d_head, L, n_head, N] for v
277+
// QKNorm expects [..., dim], ggml_nn_attention_ext expects [d_head, L, N*n_head] or similar depending on implementation
292278
// Let's reshape q, k, v to [d_head, L, N*n_head] and [d_head, L, n_head, N] respectively
293279

294280
auto q_reshaped = ggml_reshape_3d(ctx, q, head_dim, L, N * num_heads); // [dim, L, N] -> [d_head, L, N*n_head]
@@ -470,10 +456,6 @@ struct SingleStreamBlock_ggml : public GGMLBlock {
470456
// Modulation block is created and called in the forward pass based on the plan
471457
}
472458

473-
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") override {
474-
GGMLBlock::init_params(ctx, tensor_types, prefix);
475-
}
476-
477459
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x, struct ggml_tensor* pe, struct ModulationOut& mod, struct ggml_tensor* attn_mask = NULL) {
478460
// x: [N, L, hidden_size]
479461
// pe: Positional embeddings (shape TBD)
@@ -573,10 +555,6 @@ struct DoubleStreamBlock_ggml : public GGMLBlock { // DoubleStreamBlock forward
573555
blocks["txt_mlp.2"] = std::shared_ptr<GGMLBlock>(new Linear(mlp_hidden_dim, hidden_size));
574556
}
575557

576-
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") override {
577-
GGMLBlock::init_params(ctx, tensor_types, prefix);
578-
}
579-
580558
std::pair<struct ggml_tensor*, struct ggml_tensor*> forward(struct ggml_context* ctx, struct ggml_tensor* img, struct ggml_tensor* txt, struct ggml_tensor* pe, const std::vector<ModulationOut>& img_mods, const std::vector<ModulationOut>& txt_mods, struct ggml_tensor* attn_mask = NULL) {
581559
auto img_norm1 = std::dynamic_pointer_cast<LayerNorm>(blocks["img_norm1"]);
582560
auto img_attn = std::dynamic_pointer_cast<SelfAttention>(blocks["img_attn"]);
@@ -665,9 +643,6 @@ struct LastLayer_ggml : public GGMLBlock {
665643
}
666644

667645

668-
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") override {
669-
GGMLBlock::init_params(ctx, tensor_types, prefix);
670-
}
671646

672647
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x, struct ggml_tensor* shift, struct ggml_tensor* scale) {
673648
auto norm_final = std::dynamic_pointer_cast<LayerNorm>(blocks["norm_final"]);
@@ -718,7 +693,7 @@ struct ChromaUNet_ggml : public GGMLBlock {
718693
: hidden_size(hidden_size), num_heads(num_heads), mlp_ratio(mlp_ratio),
719694
depth(depth), single_depth(single_depth), in_channels(in_channels),
720695
out_channels(out_channels), flash_attn(flash_attn) {
721-
blocks["approximator"] = std::shared_ptr<GGMLBlock>(new Approximator_ggml(1, hidden_size * 6, hidden_size)); // out_dim is hidden_size * 6 for 2 sets of scale/shift/gate
696+
blocks["distilled_guidance_layer"] = std::shared_ptr<GGMLBlock>(new Approximator_ggml(64, hidden_size * 6 , hidden_size)); // out_dim is hidden_size * 6 for 2 sets of scale/shift/gate
722697

723698
blocks["img_in"] = std::shared_ptr<GGMLBlock>(new Linear(in_channels, hidden_size, true));
724699
blocks["txt_in"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, hidden_size, true)); // T5 embeddings are already hidden_size
@@ -734,9 +709,6 @@ struct ChromaUNet_ggml : public GGMLBlock {
734709
blocks["final_layer"] = std::shared_ptr<GGMLBlock>(new LastLayer_ggml(hidden_size, 1, out_channels)); // patch_size is 1 for Chroma
735710
}
736711

737-
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") override {
738-
GGMLBlock::init_params(ctx, tensor_types, prefix);
739-
}
740712

741713
struct ggml_tensor* forward(struct ggml_context* ctx,
742714
struct ggml_tensor* img_latent,
@@ -745,7 +717,7 @@ struct ChromaUNet_ggml : public GGMLBlock {
745717
struct ggml_tensor* pe,
746718
struct ggml_tensor* t5_padding_mask,
747719
std::vector<int> skip_layers = std::vector<int>()) {
748-
auto approximator = std::dynamic_pointer_cast<Approximator_ggml>(blocks["approximator"]);
720+
auto approximator = std::dynamic_pointer_cast<Approximator_ggml>(blocks["distilled_guidance_layer"]);
749721
auto img_in = std::dynamic_pointer_cast<Linear>(blocks["img_in"]);
750722
auto txt_in = std::dynamic_pointer_cast<Linear>(blocks["txt_in"]);
751723
auto final_layer = std::dynamic_pointer_cast<LastLayer_ggml>(blocks["final_layer"]);
@@ -804,10 +776,6 @@ struct ChromaUNet_ggml : public GGMLBlock {
804776
// This means approx_output needs to be split into one set of (scale, shift, gate).
805777
// This is inconsistent with the DoubleStreamBlock.
806778

807-
// Let's assume the `vec` for SingleStreamBlock is also from the Approximator.
808-
// And we need a separate Modulation instance for it.
809-
// This means the Approximator output needs to be even larger.
810-
811779
// Re-reading the plan:
812780
// Approximator output: `conditioning signal (Tensor of shape [batch_size, 1, out_dim])`, likely split into scale, shift, gate.
813781
// SingleStreamBlock: `mod = vec`
@@ -817,19 +785,6 @@ struct ChromaUNet_ggml : public GGMLBlock {
817785
// Let's assume `approx_output` is the `vec` and it contains all necessary modulation parameters.
818786
// And the `Modulation` class will be used to extract them.
819787

820-
// Let's create a single Modulation instance for SingleStreamBlock.
821-
// And pass the relevant part of approx_output to it.
822-
// This means approx_output needs to be split into one set of (scale, shift, gate).
823-
824-
// This is getting complicated. Let's simplify.
825-
// The `FluxModel` passes a single `vec` to all blocks.
826-
// Let's assume `approx_output` is the single `vec` for all blocks.
827-
// And each block will extract what it needs.
828-
829-
// For SingleStreamBlock, it needs one ModulationOut.
830-
// So, we need to extract one ModulationOut from `approx_output`.
831-
// This means `approx_output` should contain at least `hidden_size * 3`.
832-
833788
// Let's assume `approx_output` is `hidden_size * 6` (for 2 sets of modulations).
834789
// And `SingleStreamBlock` will use the first set.
835790

@@ -909,7 +864,8 @@ struct ChromaRunner : public GGMLRunner {
909864
ChromaRunner(
910865
ggml_backend_t backend,
911866
std::map<std::string, enum ggml_type>& tensor_types,
912-
bool flash_attn
867+
const std::string prefix = "",
868+
bool flash_attn = false
913869
) :
914870
GGMLRunner(backend),
915871
chroma_params({
@@ -921,7 +877,7 @@ struct ChromaRunner : public GGMLRunner {
921877
chroma_params.in_channels, chroma_params.out_channels, chroma_params.flash_attn
922878
)
923879
{
924-
chroma_unet.init_params(params_ctx, tensor_types);
880+
chroma_unet.init(params_ctx, tensor_types,prefix);
925881
}
926882

927883
std::string get_desc() override {

diffusion_model.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ struct ChromaModel : public DiffusionModel {
187187
std::map<std::string, enum ggml_type>& tensor_types,
188188
SDVersion version = VERSION_CHROMA,
189189
bool flash_attn = false)
190-
: chroma(backend, tensor_types,flash_attn) {
190+
: chroma(backend, tensor_types, "model.diffusion_model",flash_attn) {
191191
}
192192

193193
void alloc_params_buffer() {

model.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -940,8 +940,6 @@ bool ModelLoader::init_from_gguf_file(const std::string& file_path, const std::s
940940
struct ggml_tensor* dummy = ggml_get_tensor(ctx_meta_, name.c_str());
941941
size_t offset = data_offset + gguf_get_tensor_offset(ctx_gguf_, i);
942942

943-
LOG_INFO("GGUF Tensor: %s, type: %s", name.c_str(), ggml_type_name(dummy->type)); // Added debug log
944-
945943
// LOG_DEBUG("%s", name.c_str());
946944

947945
TensorStorage tensor_storage(prefix + name, dummy->type, dummy->ne, ggml_n_dims(dummy), file_index, offset);
@@ -1492,7 +1490,7 @@ SDVersion ModelLoader::get_sd_version() {
14921490
#define found_family (is_xl || is_flux)
14931491
for (auto& tensor_storage : tensor_storages) {
14941492
if (!found_family) {
1495-
if (tensor_storage.name.find("distilled_guidance_layer.in_proj.bias") != std::string::npos) {
1493+
if (tensor_storage.name.find("model.diffusion_model.distilled_guidance_layer.in_proj.bias") != std::string::npos) {
14961494
return VERSION_CHROMA;
14971495
}
14981496
if (tensor_storage.name.find("model.diffusion_model.double_blocks.") != std::string::npos) {

0 commit comments

Comments
 (0)