@@ -97,6 +97,8 @@ struct SDParams {
97
97
bool clip_on_cpu = false ;
98
98
bool vae_on_cpu = false ;
99
99
bool diffusion_flash_attn = false ;
100
+ bool diffusion_conv_direct = false ;
101
+ bool vae_conv_direct = false ;
100
102
bool canny_preprocess = false ;
101
103
bool color = false ;
102
104
int upscale_repeats = 1 ;
@@ -142,6 +144,8 @@ void print_params(SDParams params) {
142
144
printf (" controlnet cpu: %s\n " , params.control_net_cpu ? " true" : " false" );
143
145
printf (" vae decoder on cpu:%s\n " , params.vae_on_cpu ? " true" : " false" );
144
146
printf (" diffusion flash attention:%s\n " , params.diffusion_flash_attn ? " true" : " false" );
147
+ printf (" diffusion Conv2d direct:%s\n " , params.diffusion_conv_direct ? " true" : " false" );
148
+ printf (" vae Conv2d direct:%s\n " , params.vae_conv_direct ? " true" : " false" );
145
149
printf (" strength(control): %.2f\n " , params.control_strength );
146
150
printf (" prompt: %s\n " , params.prompt .c_str ());
147
151
printf (" negative_prompt: %s\n " , params.negative_prompt .c_str ());
@@ -232,6 +236,10 @@ void print_usage(int argc, const char* argv[]) {
232
236
printf (" --diffusion-fa use flash attention in the diffusion model (for low vram)\n " );
233
237
printf (" Might lower quality, since it implies converting k and v to f16.\n " );
234
238
printf (" This might crash if it is not supported by the backend.\n " );
239
+ printf (" --diffusion-conv-direct use Conv2d direct in the diffusion model" );
240
+ printf (" This might crash if it is not supported by the backend.\n " );
241
+ printf (" --vae-conv-direct use Conv2d direct in the vae model (should improve the performance)" );
242
+ printf (" This might crash if it is not supported by the backend.\n " );
235
243
printf (" --control-net-cpu keep controlnet in cpu (for low vram)\n " );
236
244
printf (" --canny apply canny preprocessor (edge detection)\n " );
237
245
printf (" --color colors the logging tags according to level\n " );
@@ -422,6 +430,8 @@ void parse_args(int argc, const char** argv, SDParams& params) {
422
430
{" " , " --clip-on-cpu" , " " , true , ¶ms.clip_on_cpu },
423
431
{" " , " --vae-on-cpu" , " " , true , ¶ms.vae_on_cpu },
424
432
{" " , " --diffusion-fa" , " " , true , ¶ms.diffusion_flash_attn },
433
+ {" " , " --diffusion-conv-direct" , " " , true , ¶ms.diffusion_conv_direct },
434
+ {" " , " --vae-conv-direct" , " " , true , ¶ms.vae_conv_direct },
425
435
{" " , " --canny" , " " , true , ¶ms.canny_preprocess },
426
436
{" -v" , " --verbos" , " " , true , ¶ms.verbose },
427
437
{" " , " --color" , " " , true , ¶ms.color },
@@ -901,6 +911,8 @@ int main(int argc, const char* argv[]) {
901
911
params.control_net_cpu ,
902
912
params.vae_on_cpu ,
903
913
params.diffusion_flash_attn ,
914
+ params.diffusion_conv_direct ,
915
+ params.vae_conv_direct ,
904
916
params.chroma_use_dit_mask ,
905
917
params.chroma_use_t5_mask ,
906
918
params.chroma_t5_mask_pad ,
@@ -1012,7 +1024,8 @@ int main(int argc, const char* argv[]) {
1012
1024
int upscale_factor = 4 ; // unused for RealESRGAN_x4plus_anime_6B.pth
1013
1025
if (params.esrgan_path .size () > 0 && params.upscale_repeats > 0 ) {
1014
1026
upscaler_ctx_t * upscaler_ctx = new_upscaler_ctx (params.esrgan_path .c_str (),
1015
- params.n_threads );
1027
+ params.n_threads ,
1028
+ params.diffusion_conv_direct );
1016
1029
1017
1030
if (upscaler_ctx == NULL ) {
1018
1031
printf (" new_upscaler_ctx failed\n " );
0 commit comments