code · pull · Aug 9, 2024 · Aug 8, 2024 · Aug 8, 2024 · Aug 8, 2024
diff --git a/.github/workflows/pullrequest-ci-run.yml b/.github/workflows/pullrequest-ci-run.yml
@@ -0,0 +1,37 @@
+# This is the GitHub Workflow that drives full-GPU-enabled tests of pull requests to ComfyUI, when the 'Run-CI-Test' label is added
+# Results are reported as checkmarks on the commits, as well as onto https://ci.comfy.org/
+name: Pull Request CI Workflow Runs
+on:
+    pull_request_target:
+        types: [labeled]
+
+jobs:
+  pr-test-stable:
+    if: ${{ github.event.label.name == 'Run-CI-Test' }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [macos, linux, windows]
+        python_version: ["3.9", "3.10", "3.11", "3.12"]
+        cuda_version: ["12.1"]
+        torch_version: ["stable"]
+        include:
+          - os: macos
+            runner_label: [self-hosted, macOS]
+            flags: "--use-pytorch-cross-attention"
+          - os: linux
+            runner_label: [self-hosted, Linux]
+            flags: ""
+          - os: windows
+            runner_label: [self-hosted, win]
+            flags: ""
+    runs-on: ${{ matrix.runner_label }}
+    steps:
+      - name: Test Workflows
+        uses: comfy-org/comfy-action@main
+        with:
+          os: ${{ matrix.os }}
+          python_version: ${{ matrix.python_version }}
+          torch_version: ${{ matrix.torch_version }}
+          google_credentials: ${{ secrets.GCS_SERVICE_ACCOUNT_JSON }}
+          comfyui_flags: ${{ matrix.flags }}
diff --git a/.github/workflows/test-ci.yml b/.github/workflows/test-ci.yml
@@ -0,0 +1,95 @@
+# This is the GitHub Workflow that drives automatic full-GPU-enabled tests of all new commits to the master branch of ComfyUI
+# Results are reported as checkmarks on the commits, as well as onto https://ci.comfy.org/
+name: Full Comfy CI Workflow Runs
+on:
+  push:
+    branches:
+      - master
+    paths-ignore:
+      - 'app/**'
+      - 'input/**'
+      - 'output/**'
+      - 'notebooks/**'
+      - 'script_examples/**'
+      - '.github/**'
+      - 'web/**'
+  workflow_dispatch:
+
+jobs:
+  test-stable:
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [macos, linux, windows]
+        python_version: ["3.9", "3.10", "3.11", "3.12"]
+        cuda_version: ["12.1"]
+        torch_version: ["stable"]
+        include:
+          - os: macos
+            runner_label: [self-hosted, macOS]
+            flags: "--use-pytorch-cross-attention"
+          - os: linux
+            runner_label: [self-hosted, Linux]
+            flags: ""
+          - os: windows
+            runner_label: [self-hosted, win]
+            flags: ""
+    runs-on: ${{ matrix.runner_label }}
+    steps:
+      - name: Test Workflows
+        uses: comfy-org/comfy-action@main
+        with:
+          os: ${{ matrix.os }}
+          python_version: ${{ matrix.python_version }}
+          torch_version: ${{ matrix.torch_version }}
+          google_credentials: ${{ secrets.GCS_SERVICE_ACCOUNT_JSON }}
+          comfyui_flags: ${{ matrix.flags }}
+
+  test-win-nightly:
+    strategy:
+      fail-fast: true
+      matrix:
+        os: [windows]
+        python_version: ["3.9", "3.10", "3.11", "3.12"]
+        cuda_version: ["12.1"]
+        torch_version: ["nightly"]
+        include:
+          - os: windows
+            runner_label: [self-hosted, win]
+            flags: ""
+    runs-on: ${{ matrix.runner_label }}
+    steps:
+      - name: Test Workflows
+        uses: comfy-org/comfy-action@main
+        with:
+          os: ${{ matrix.os }}
+          python_version: ${{ matrix.python_version }}
+          torch_version: ${{ matrix.torch_version }}
+          google_credentials: ${{ secrets.GCS_SERVICE_ACCOUNT_JSON }}
+          comfyui_flags: ${{ matrix.flags }}
+
+  test-unix-nightly:
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [macos, linux]
+        python_version: ["3.11"]
+        cuda_version: ["12.1"]
+        torch_version: ["nightly"]
+        include:
+          - os: macos
+            runner_label: [self-hosted, macOS]
+            flags: "--use-pytorch-cross-attention"
+          - os: linux
+            runner_label: [self-hosted, Linux]
+            flags: ""
+    runs-on: ${{ matrix.runner_label }}
+    steps:
+      - name: Test Workflows
+        uses: comfy-org/comfy-action@main
+        with:
+          os: ${{ matrix.os }}
+          python_version: ${{ matrix.python_version }}
+          torch_version: ${{ matrix.torch_version }}
+          google_credentials: ${{ secrets.GCS_SERVICE_ACCOUNT_JSON }}
+          comfyui_flags: ${{ matrix.flags }}
diff --git a/comfy/model_management.py b/comfy/model_management.py
@@ -1,3 +1,21 @@
+"""
+    This file is part of ComfyUI.
+    Copyright (C) 2024 Comfy
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+"""
+
 import psutil
 import logging
 from enum import Enum
@@ -273,6 +291,9 @@ def __init__(self, model):
     def model_memory(self):
         return self.model.model_size()
 
+    def model_offloaded_memory(self):
+        return self.model.model_size() - self.model.loaded_size()
+
     def model_memory_required(self, device):
         if device == self.model.current_loaded_device():
             return 0
@@ -308,15 +329,37 @@ def should_reload_model(self, force_patch_weights=False):
             return True
         return False
 
-    def model_unload(self, unpatch_weights=True):
+    def model_unload(self, memory_to_free=None, unpatch_weights=True):
+        if memory_to_free is not None:
+            if memory_to_free < self.model.loaded_size():
+                self.model.partially_unload(self.model.offload_device, memory_to_free)
+                return False
         self.model.unpatch_model(self.model.offload_device, unpatch_weights=unpatch_weights)
         self.model.model_patches_to(self.model.offload_device)
         self.weights_loaded = self.weights_loaded and not unpatch_weights
         self.real_model = None
+        return True
+
+    def model_use_more_vram(self, extra_memory):
+        return self.model.partially_load(self.device, extra_memory)
 
     def __eq__(self, other):
         return self.model is other.model
 
+def use_more_memory(extra_memory, loaded_models, device):
+    for m in loaded_models:
+        if m.device == device:
+            extra_memory -= m.model_use_more_vram(extra_memory)
+            if extra_memory <= 0:
+                break
+
+def offloaded_memory(loaded_models, device):
+    offloaded_mem = 0
+    for m in loaded_models:
+        if m.device == device:
+            offloaded_mem += m.model_offloaded_memory()
+    return offloaded_mem
+
 def minimum_inference_memory():
     return (1024 * 1024 * 1024) * 1.2
 
@@ -363,11 +406,15 @@ def free_memory(memory_required, device, keep_loaded=[]):
 
     for x in sorted(can_unload):
         i = x[-1]
+        memory_to_free = None
         if not DISABLE_SMART_MEMORY:
-            if get_free_memory(device) > memory_required:
+            free_mem = get_free_memory(device)
+            if free_mem > memory_required:
                 break
-        current_loaded_models[i].model_unload()
-        unloaded_model.append(i)
+            memory_to_free = memory_required - free_mem
+        logging.debug(f"Unloading {current_loaded_models[i].model.model.__class__.__name__}")
+        if current_loaded_models[i].model_unload(memory_to_free):
+            unloaded_model.append(i)
 
     for i in sorted(unloaded_model, reverse=True):
         unloaded_models.append(current_loaded_models.pop(i))
@@ -385,11 +432,11 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimu
     global vram_state
 
     inference_memory = minimum_inference_memory()
-    extra_mem = max(inference_memory, memory_required)
+    extra_mem = max(inference_memory, memory_required) + 100 * 1024 * 1024
     if minimum_memory_required is None:
         minimum_memory_required = extra_mem
     else:
-        minimum_memory_required = max(inference_memory, minimum_memory_required)
+        minimum_memory_required = max(inference_memory, minimum_memory_required) + 100 * 1024 * 1024
 
     models = set(models)
 
@@ -422,12 +469,14 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimu
         devs = set(map(lambda a: a.device, models_already_loaded))
         for d in devs:
             if d != torch.device("cpu"):
-                free_memory(extra_mem, d, models_already_loaded)
+                free_memory(extra_mem + offloaded_memory(models_already_loaded, d), d, models_already_loaded)
                 free_mem = get_free_memory(d)
                 if free_mem < minimum_memory_required:
                     logging.info("Unloading models for lowram load.") #TODO: partial model unloading when this case happens, also handle the opposite case where models can be unlowvramed.
                     models_to_load = free_memory(minimum_memory_required, d)
                     logging.info("{} models unloaded.".format(len(models_to_load)))
+                else:
+                    use_more_memory(free_mem - minimum_memory_required, models_already_loaded, d)
         if len(models_to_load) == 0:
             return
 
@@ -467,6 +516,14 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimu
 
         cur_loaded_model = loaded_model.model_load(lowvram_model_memory, force_patch_weights=force_patch_weights)
         current_loaded_models.insert(0, loaded_model)
+
+
+    devs = set(map(lambda a: a.device, models_already_loaded))
+    for d in devs:
+        if d != torch.device("cpu"):
+            free_mem = get_free_memory(d)
+            if free_mem > minimum_memory_required:
+                use_more_memory(free_mem - minimum_memory_required, models_already_loaded, d)
     return