From 50cba9a15e797cfb9b5b141863000992f1011a13 Mon Sep 17 00:00:00 2001
From: GAD-cell <gadsinoue@gmail.com>
Date: Mon, 16 Jun 2025 17:12:55 +0200
Subject: [PATCH 1/3] Add logits hook post grad

---
 unsloth_zoo/peft_utils.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/unsloth_zoo/peft_utils.py b/unsloth_zoo/peft_utils.py
index 8aff264f..56c25491 100644
--- a/unsloth_zoo/peft_utils.py
+++ b/unsloth_zoo/peft_utils.py
@@ -194,11 +194,15 @@ def requires_grad_post_hook(module, input, output):
         if type_output is torch.Tensor:
             output.requires_grad_(True)
         else:
-            try:
-                # Output in huggingface generally a dataclass with loss, try to add to that
-                output.loss.requires_grad_(True)
-            except Exception as _:
-                raise RuntimeError("Unsloth: Failed to make output require gradients!")
+            try: # For dataclass from HF, try on loss or logits 
+                if hasattr(output, "loss") and output.loss is not None:
+                    output.loss.requires_grad_(True)
+                elif hasattr(output, "logits") and output.logits is not None: #with RL like GRPO there are no loss as you don't provide labels
+                    output.logits.requires_grad_(True)
+                else:
+                    raise ValueError("Neither loss nor logits are available for grad post hook.")
+            except Exception as e:
+                raise RuntimeError(f"Unsloth: Failed to make output require gradients: {e}")
     pass
 
     def requires_grad_pre_hook(module, input):

From c54e91cc2898ce0ed10d888e8f02a1dbbfe7d242 Mon Sep 17 00:00:00 2001
From: GAD-cell <gadsinoue@gmail.com>
Date: Tue, 17 Jun 2025 09:58:41 +0200
Subject: [PATCH 2/3] grad post hook update

---
 unsloth_zoo/peft_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsloth_zoo/peft_utils.py b/unsloth_zoo/peft_utils.py
index 56c25491..23ccf13a 100644
--- a/unsloth_zoo/peft_utils.py
+++ b/unsloth_zoo/peft_utils.py
@@ -197,7 +197,7 @@ def requires_grad_post_hook(module, input, output):
             try: # For dataclass from HF, try on loss or logits 
                 if hasattr(output, "loss") and output.loss is not None:
                     output.loss.requires_grad_(True)
-                elif hasattr(output, "logits") and output.logits is not None: #with RL like GRPO there are no loss as you don't provide labels
+                elif hasattr(output, "logits") and output.logits is not None: #with RL like GRPO there are no loss as you don't provide labels 
                     output.logits.requires_grad_(True)
                 else:
                     raise ValueError("Neither loss nor logits are available for grad post hook.")

From ba43cd4939735efd112ad2df31998cce0840d08d Mon Sep 17 00:00:00 2001
From: GAD-cell <gadsinoue@gmail.com>
Date: Thu, 3 Jul 2025 00:28:31 +0200
Subject: [PATCH 3/3] added support for vision input in grpo_accumulated

---
 unsloth_zoo/rl_replacements.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/unsloth_zoo/rl_replacements.py b/unsloth_zoo/rl_replacements.py
index ab30ac78..1f901fba 100644
--- a/unsloth_zoo/rl_replacements.py
+++ b/unsloth_zoo/rl_replacements.py
@@ -315,7 +315,9 @@ def grpo_accumulated_loss(
 ):
     # All Unsloth Zoo code licensed under LGPLv3
     bsz, qlen = input_ids.shape
-
+    pixel_values = kwargs.get('pixel_values',None)
+    image_grid_thw = kwargs.get('image_grid_thw',None)
+    
     # Find closest multiple
     factors = [i for i in range(1, bsz + 1) if bsz % i == 0]
     if n_chunks == -1: n_chunks = bsz
@@ -335,15 +337,23 @@ def grpo_accumulated_loss(
             ref_hidden_states = trainer.model(
                 input_ids = input_ids,
                 attention_mask = attention_mask,
+                pixel_values = pixel_values,
+                image_grid_thw = image_grid_thw,
                 logits_to_keep = logits_to_keep + 1,
             ).logits
         pass
         new_hidden_states = trainer.model(
             input_ids = input_ids,
             attention_mask = attention_mask,
+            pixel_values = pixel_values,
+            image_grid_thw = image_grid_thw,
             logits_to_keep = logits_to_keep + 1,
         ).logits
 
+        if ref_hidden_states.size(1) != logits_to_keep + 1 : # Some models like Qwen VL don't have logits_to_keep parameter so you need to trim the output manually
+            ref_hidden_states = ref_hidden_states[:,-(logits_to_keep + 1):,:]
+            new_hidden_states = new_hidden_states[:,-(logits_to_keep + 1):,:]
+
         loss, completion_length, mean_kl = UnslothEfficientGRPO.apply(
             new_hidden_states,
             old_hidden_states,