@@ -935,240 +935,4 @@ struct ChromaT5Embedder : public Conditioner {
935
935
GGML_ASSERT (0 && " Not implemented yet!" );
936
936
return " " ;
937
937
}
938
- };
939
-
940
- -> #ifndef __DIFFUSION_MODEL_H__
941
- #define __DIFFUSION_MODEL_H__
942
-
943
- #include " flux.hpp"
944
- #include " mmdit.hpp"
945
- #include " unet.hpp"
946
- #include " chroma.hpp"
947
- #include " ggml_extend.hpp" // Required for set_timestep_embedding
948
-
949
- struct DiffusionModel {
950
- virtual void compute (int n_threads,
951
- struct ggml_tensor * x,
952
- struct ggml_tensor * timesteps,
953
- struct ggml_tensor * context,
954
- struct ggml_tensor * c_concat,
955
- struct ggml_tensor * y,
956
- struct ggml_tensor * guidance,
957
- int num_video_frames = -1 ,
958
- std::vector<struct ggml_tensor *> controls = {},
959
- float control_strength = 0 .f,
960
- struct ggml_tensor ** output = NULL ,
961
- struct ggml_context * output_ctx = NULL ,
962
- std::vector<int > skip_layers = std::vector<int >()) = 0;
963
- virtual void alloc_params_buffer () = 0;
964
- virtual void free_params_buffer () = 0;
965
- virtual void free_compute_buffer () = 0;
966
- virtual void get_param_tensors (std::map<std::string, struct ggml_tensor *>& tensors) = 0;
967
- virtual size_t get_params_buffer_size () = 0;
968
- virtual int64_t get_adm_in_channels () = 0;
969
- };
970
-
971
- struct UNetModel : public DiffusionModel {
972
- UNetModelRunner unet;
973
-
974
- UNetModel (ggml_backend_t backend,
975
- std::map<std::string, enum ggml_type>& tensor_types,
976
- SDVersion version = VERSION_SD1,
977
- bool flash_attn = false )
978
- : unet(backend, tensor_types, " model.diffusion_model" , version, flash_attn) {
979
- }
980
-
981
- void alloc_params_buffer () {
982
- unet.alloc_params_buffer ();
983
- }
984
-
985
- void free_params_buffer () {
986
- unet.free_params_buffer ();
987
- }
988
-
989
- void free_compute_buffer () {
990
- unet.free_compute_buffer ();
991
- }
992
-
993
- void get_param_tensors (std::map<std::string, struct ggml_tensor *>& tensors) {
994
- unet.get_param_tensors (tensors, " model.diffusion_model" );
995
- }
996
-
997
- size_t get_params_buffer_size () {
998
- return unet.get_params_buffer_size ();
999
- }
1000
-
1001
- int64_t get_adm_in_channels () {
1002
- return unet.unet .adm_in_channels ;
1003
- }
1004
-
1005
- void compute (int n_threads,
1006
- struct ggml_tensor * x,
1007
- struct ggml_tensor * timesteps,
1008
- struct ggml_tensor * context,
1009
- struct ggml_tensor * c_concat,
1010
- struct ggml_tensor * y,
1011
- struct ggml_tensor * guidance,
1012
- int num_video_frames = -1 ,
1013
- std::vector<struct ggml_tensor *> controls = {},
1014
- float control_strength = 0 .f,
1015
- struct ggml_tensor ** output = NULL ,
1016
- struct ggml_context * output_ctx = NULL ,
1017
- std::vector<int > skip_layers = std::vector<int >()) {
1018
- (void )skip_layers; // SLG doesn't work with UNet models
1019
- return unet.compute (n_threads, x, timesteps, context, c_concat, y, num_video_frames, controls, control_strength, output, output_ctx);
1020
- }
1021
- };
1022
-
1023
- struct MMDiTModel : public DiffusionModel {
1024
- MMDiTRunner mmdit;
1025
-
1026
- MMDiTModel (ggml_backend_t backend,
1027
- std::map<std::string, enum ggml_type>& tensor_types)
1028
- : mmdit(backend, tensor_types, " model.diffusion_model" ) {
1029
- }
1030
-
1031
- void alloc_params_buffer () {
1032
- mmdit.alloc_params_buffer ();
1033
- }
1034
-
1035
- void free_params_buffer () {
1036
- mmdit.free_params_buffer ();
1037
- }
1038
-
1039
- void free_compute_buffer () {
1040
- mmdit.free_compute_buffer ();
1041
- }
1042
-
1043
- void get_param_tensors (std::map<std::string, struct ggml_tensor *>& tensors) {
1044
- mmdit.get_param_tensors (tensors, " model.diffusion_model" );
1045
- }
1046
-
1047
- size_t get_params_buffer_size () {
1048
- return mmdit.get_params_buffer_size ();
1049
- }
1050
-
1051
- int64_t get_adm_in_channels () {
1052
- return 768 + 1280 ;
1053
- }
1054
-
1055
- void compute (int n_threads,
1056
- struct ggml_tensor * x,
1057
- struct ggml_tensor * timesteps,
1058
- struct ggml_tensor * context,
1059
- struct ggml_tensor * c_concat,
1060
- struct ggml_tensor * y,
1061
- struct ggml_tensor * guidance,
1062
- int num_video_frames = -1 ,
1063
- std::vector<struct ggml_tensor *> controls = {},
1064
- float control_strength = 0 .f,
1065
- struct ggml_tensor ** output = NULL ,
1066
- struct ggml_context * output_ctx = NULL ,
1067
- std::vector<int > skip_layers = std::vector<int >()) {
1068
- return mmdit.compute (n_threads, x, timesteps, context, y, output, output_ctx, skip_layers);
1069
- }
1070
- };
1071
-
1072
- struct FluxModel : public DiffusionModel {
1073
- Flux::FluxRunner flux;
1074
-
1075
- FluxModel (ggml_backend_t backend,
1076
- std::map<std::string, enum ggml_type>& tensor_types,
1077
- SDVersion version = VERSION_FLUX,
1078
- bool flash_attn = false )
1079
- : flux(backend, tensor_types, " model.diffusion_model" , version, flash_attn) {
1080
- }
1081
-
1082
- void alloc_params_buffer () {
1083
- flux.alloc_params_buffer ();
1084
- }
1085
-
1086
- void free_params_buffer () {
1087
- flux.free_params_buffer ();
1088
- }
1089
-
1090
- void free_compute_buffer () {
1091
- flux.free_compute_buffer ();
1092
- }
1093
-
1094
- void get_param_tensors (std::map<std::string, struct ggml_tensor *>& tensors) {
1095
- flux.get_param_tensors (tensors, " model.diffusion_model" );
1096
- }
1097
-
1098
- size_t get_params_buffer_size () {
1099
- return flux.get_params_buffer_size ();
1100
- }
1101
-
1102
- int64_t get_adm_in_channels () {
1103
- return 768 ;
1104
- }
1105
-
1106
- void compute (int n_threads,
1107
- struct ggml_tensor * x,
1108
- struct ggml_tensor * timesteps,
1109
- struct ggml_tensor * context,
1110
- struct ggml_tensor * c_concat,
1111
- struct ggml_tensor * y,
1112
- struct ggml_tensor * guidance,
1113
- int num_video_frames = -1 ,
1114
- std::vector<struct ggml_tensor *> controls = {},
1115
- float control_strength = 0 .f,
1116
- struct ggml_tensor ** output = NULL ,
1117
- struct ggml_context * output_ctx = NULL ,
1118
- std::vector<int > skip_layers = std::vector<int >()) {
1119
- return flux.compute (n_threads, x, timesteps, context, c_concat, y, guidance, output, output_ctx, skip_layers);
1120
- }
1121
- };
1122
-
1123
- struct ChromaModel : public DiffusionModel {
1124
- Chroma::ChromaRunner chroma;
1125
-
1126
- ChromaModel (ggml_backend_t backend,
1127
- std::map<std::string, enum ggml_type>& tensor_types,
1128
- SDVersion version = VERSION_CHROMA,
1129
- bool flash_attn = false )
1130
- : chroma(backend, tensor_types, " model.diffusion_model" ,flash_attn) {
1131
- }
1132
-
1133
- void alloc_params_buffer () {
1134
- chroma.alloc_params_buffer ();
1135
- }
1136
-
1137
- void free_params_buffer () {
1138
- chroma.free_params_buffer ();
1139
- }
1140
-
1141
- void free_compute_buffer () {
1142
- chroma.free_compute_buffer ();
1143
- }
1144
-
1145
- void get_param_tensors (std::map<std::string, struct ggml_tensor *>& tensors) {
1146
- chroma.get_param_tensors (tensors, " model.diffusion_model" );
1147
- }
1148
-
1149
- size_t get_params_buffer_size () {
1150
- return chroma.get_params_buffer_size ();
1151
- }
1152
- int64_t get_adm_in_channels () {
1153
- return 768 ;
1154
- }
1155
-
1156
-
1157
- void compute (int n_threads,
1158
- struct ggml_tensor * x,
1159
- struct ggml_tensor * timesteps,
1160
- struct ggml_tensor * context,
1161
- struct ggml_tensor * c_concat,
1162
- struct ggml_tensor * y,
1163
- struct ggml_tensor * guidance,
1164
- int num_video_frames = -1 ,
1165
- std::vector<struct ggml_tensor *> controls = {},
1166
- float control_strength = 0 .f,
1167
- struct ggml_tensor ** output = NULL ,
1168
- struct ggml_context * output_ctx = NULL ,
1169
- std::vector<int > skip_layers = std::vector<int >()) {
1170
- return chroma.compute (n_threads, x, timesteps, context, c_concat, y, guidance, output, output_ctx, skip_layers);
1171
- }
1172
- };
1173
-
1174
- #endif
938
+ };
0 commit comments