From 50cba9a15e797cfb9b5b141863000992f1011a13 Mon Sep 17 00:00:00 2001 From: GAD-cell Date: Mon, 16 Jun 2025 17:12:55 +0200 Subject: [PATCH 1/3] Add logits hook post grad --- unsloth_zoo/peft_utils.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/unsloth_zoo/peft_utils.py b/unsloth_zoo/peft_utils.py index 8aff264f..56c25491 100644 --- a/unsloth_zoo/peft_utils.py +++ b/unsloth_zoo/peft_utils.py @@ -194,11 +194,15 @@ def requires_grad_post_hook(module, input, output): if type_output is torch.Tensor: output.requires_grad_(True) else: - try: - # Output in huggingface generally a dataclass with loss, try to add to that - output.loss.requires_grad_(True) - except Exception as _: - raise RuntimeError("Unsloth: Failed to make output require gradients!") + try: # For dataclass from HF, try on loss or logits + if hasattr(output, "loss") and output.loss is not None: + output.loss.requires_grad_(True) + elif hasattr(output, "logits") and output.logits is not None: #with RL like GRPO there are no loss as you don't provide labels + output.logits.requires_grad_(True) + else: + raise ValueError("Neither loss nor logits are available for grad post hook.") + except Exception as e: + raise RuntimeError(f"Unsloth: Failed to make output require gradients: {e}") pass def requires_grad_pre_hook(module, input): From c54e91cc2898ce0ed10d888e8f02a1dbbfe7d242 Mon Sep 17 00:00:00 2001 From: GAD-cell Date: Tue, 17 Jun 2025 09:58:41 +0200 Subject: [PATCH 2/3] grad post hook update --- unsloth_zoo/peft_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth_zoo/peft_utils.py b/unsloth_zoo/peft_utils.py index 56c25491..23ccf13a 100644 --- a/unsloth_zoo/peft_utils.py +++ b/unsloth_zoo/peft_utils.py @@ -197,7 +197,7 @@ def requires_grad_post_hook(module, input, output): try: # For dataclass from HF, try on loss or logits if hasattr(output, "loss") and output.loss is not None: output.loss.requires_grad_(True) - elif hasattr(output, "logits") and output.logits is not None: #with RL like GRPO there are no loss as you don't provide labels + elif hasattr(output, "logits") and output.logits is not None: #with RL like GRPO there are no loss as you don't provide labels output.logits.requires_grad_(True) else: raise ValueError("Neither loss nor logits are available for grad post hook.") From ba43cd4939735efd112ad2df31998cce0840d08d Mon Sep 17 00:00:00 2001 From: GAD-cell Date: Thu, 3 Jul 2025 00:28:31 +0200 Subject: [PATCH 3/3] added support for vision input in grpo_accumulated --- unsloth_zoo/rl_replacements.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/unsloth_zoo/rl_replacements.py b/unsloth_zoo/rl_replacements.py index ab30ac78..1f901fba 100644 --- a/unsloth_zoo/rl_replacements.py +++ b/unsloth_zoo/rl_replacements.py @@ -315,7 +315,9 @@ def grpo_accumulated_loss( ): # All Unsloth Zoo code licensed under LGPLv3 bsz, qlen = input_ids.shape - + pixel_values = kwargs.get('pixel_values',None) + image_grid_thw = kwargs.get('image_grid_thw',None) + # Find closest multiple factors = [i for i in range(1, bsz + 1) if bsz % i == 0] if n_chunks == -1: n_chunks = bsz @@ -335,15 +337,23 @@ def grpo_accumulated_loss( ref_hidden_states = trainer.model( input_ids = input_ids, attention_mask = attention_mask, + pixel_values = pixel_values, + image_grid_thw = image_grid_thw, logits_to_keep = logits_to_keep + 1, ).logits pass new_hidden_states = trainer.model( input_ids = input_ids, attention_mask = attention_mask, + pixel_values = pixel_values, + image_grid_thw = image_grid_thw, logits_to_keep = logits_to_keep + 1, ).logits + if ref_hidden_states.size(1) != logits_to_keep + 1 : # Some models like Qwen VL don't have logits_to_keep parameter so you need to trim the output manually + ref_hidden_states = ref_hidden_states[:,-(logits_to_keep + 1):,:] + new_hidden_states = new_hidden_states[:,-(logits_to_keep + 1):,:] + loss, completion_length, mean_kl = UnslothEfficientGRPO.apply( new_hidden_states, old_hidden_states,