From 64a9cdf6b93bc0fac199304bc61aeb3c35d179f5 Mon Sep 17 00:00:00 2001
From: Jintao Huang <huangjintao.hjt@alibaba-inc.com>
Date: Fri, 20 Dec 2024 15:06:14 +0800
Subject: [PATCH 01/11] bump version

---
 swift/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/swift/version.py b/swift/version.py
index 9a09a91539..ef9ab31e9f 100644
--- a/swift/version.py
+++ b/swift/version.py
@@ -1,5 +1,5 @@
 # Make sure to modify __release_datetime__ to release time when making official release.
-__version__ = '3.0.0.dev0'
+__version__ = '3.0.0'
 # default release datetime for branches under active development is set
 # to be a time far-far-away-into-the-future
 __release_datetime__ = '2099-10-13 08:56:12'

From 4896c6fb67b7bbc0d02bce4f9475225c7a461237 Mon Sep 17 00:00:00 2001
From: Jintao Huang <huangjintao.hjt@alibaba-inc.com>
Date: Sun, 29 Dec 2024 15:32:37 +0800
Subject: [PATCH 02/11] bump version

---
 swift/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/swift/version.py b/swift/version.py
index f8a3cd8922..f44966a0f5 100644
--- a/swift/version.py
+++ b/swift/version.py
@@ -1,5 +1,5 @@
 # Make sure to modify __release_datetime__ to release time when making official release.
-__version__ = '3.0.1'
+__version__ = '3.0.1.post1'
 # default release datetime for branches under active development is set
 # to be a time far-far-away-into-the-future
 __release_datetime__ = '2099-10-13 08:56:12'

From e9cc74067f826bcd1fdef5f9533611ce6b959cc4 Mon Sep 17 00:00:00 2001
From: Jintao Huang <huangjintao.hjt@alibaba-inc.com>
Date: Mon, 6 Jan 2025 11:31:13 +0800
Subject: [PATCH 03/11] bump version

---
 swift/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/swift/version.py b/swift/version.py
index f44966a0f5..f0d9c4725a 100644
--- a/swift/version.py
+++ b/swift/version.py
@@ -1,5 +1,5 @@
 # Make sure to modify __release_datetime__ to release time when making official release.
-__version__ = '3.0.1.post1'
+__version__ = '3.0.2'
 # default release datetime for branches under active development is set
 # to be a time far-far-away-into-the-future
 __release_datetime__ = '2099-10-13 08:56:12'

From d307635b24bccd31837f422166164bcdb08ecc0e Mon Sep 17 00:00:00 2001
From: Jintao <huangjintao.hjt@alibaba-inc.com>
Date: Wed, 8 Jan 2025 12:35:59 +0800
Subject: [PATCH 04/11] update qlora shell (#2880)

---
 examples/train/qlora/awq.sh  | 28 ++++++++++++++++++++++++++++
 examples/train/qlora/bnb.sh  | 34 ++++++++++++++++++++++++++++++++++
 examples/train/qlora/gptq.sh | 28 ++++++++++++++++++++++++++++
 examples/train/qlora/hqq.sh  | 31 +++++++++++++++++++++++++++++++
 tests/llm/test_run.py        |  2 +-
 5 files changed, 122 insertions(+), 1 deletion(-)
 create mode 100644 examples/train/qlora/awq.sh
 create mode 100644 examples/train/qlora/bnb.sh
 create mode 100644 examples/train/qlora/gptq.sh
 create mode 100644 examples/train/qlora/hqq.sh

diff --git a/examples/train/qlora/awq.sh b/examples/train/qlora/awq.sh
new file mode 100644
index 0000000000..f13fb39108
--- /dev/null
+++ b/examples/train/qlora/awq.sh
@@ -0,0 +1,28 @@
+# 10GB
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+    --model Qwen/Qwen2.5-7B-Instruct-AWQ \
+    --train_type lora \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
+              'AI-ModelScope/alpaca-gpt4-data-en#500' \
+              'swift/self-cognition#500' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --gradient_accumulation_steps 16 \
+    --eval_steps 50 \
+    --save_steps 50 \
+    --save_total_limit 5 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --system 'You are a helpful assistant.' \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --model_author swift \
+    --model_name swift-robot
diff --git a/examples/train/qlora/bnb.sh b/examples/train/qlora/bnb.sh
new file mode 100644
index 0000000000..15e9af0105
--- /dev/null
+++ b/examples/train/qlora/bnb.sh
@@ -0,0 +1,34 @@
+# 10GB
+# pip install bitsandbytes
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --train_type lora \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
+              'AI-ModelScope/alpaca-gpt4-data-en#500' \
+              'swift/self-cognition#500' \
+    --torch_dtype bfloat16 \
+    --bnb_4bit_compute_dtype bfloat16 \
+    --bnb_4bit_quant_type nf4 \
+    --bnb_4bit_use_double_quant true \
+    --quant_method bnb \
+    --quant_bits 4 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --gradient_accumulation_steps 16 \
+    --eval_steps 50 \
+    --save_steps 50 \
+    --save_total_limit 5 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --system 'You are a helpful assistant.' \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --model_author swift \
+    --model_name swift-robot
diff --git a/examples/train/qlora/gptq.sh b/examples/train/qlora/gptq.sh
new file mode 100644
index 0000000000..7cb4eefc44
--- /dev/null
+++ b/examples/train/qlora/gptq.sh
@@ -0,0 +1,28 @@
+# 9GB
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+    --model Qwen/Qwen2.5-7B-Instruct-GPTQ-Int4 \
+    --train_type lora \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
+              'AI-ModelScope/alpaca-gpt4-data-en#500' \
+              'swift/self-cognition#500' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --gradient_accumulation_steps 16 \
+    --eval_steps 50 \
+    --save_steps 50 \
+    --save_total_limit 5 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --system 'You are a helpful assistant.' \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --model_author swift \
+    --model_name swift-robot
diff --git a/examples/train/qlora/hqq.sh b/examples/train/qlora/hqq.sh
new file mode 100644
index 0000000000..8d25afea67
--- /dev/null
+++ b/examples/train/qlora/hqq.sh
@@ -0,0 +1,31 @@
+# 10GB
+# pip install hqq
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --train_type lora \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
+              'AI-ModelScope/alpaca-gpt4-data-en#500' \
+              'swift/self-cognition#500' \
+    --torch_dtype bfloat16 \
+    --quant_method hqq \
+    --quant_bits 4 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --gradient_accumulation_steps 16 \
+    --eval_steps 50 \
+    --save_steps 50 \
+    --save_total_limit 5 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --system 'You are a helpful assistant.' \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --model_author swift \
+    --model_name swift-robot
diff --git a/tests/llm/test_run.py b/tests/llm/test_run.py
index 783f2ae0d4..862710357f 100644
--- a/tests/llm/test_run.py
+++ b/tests/llm/test_run.py
@@ -27,7 +27,7 @@
 kwargs = {
     'per_device_train_batch_size': 2,
     'per_device_eval_batch_size': 2,
-    'save_steps': 10,
+    'save_steps': 5,
     'gradient_accumulation_steps': 4,
     'num_train_epochs': 1,
 }

From 33ab2bda10db9f758917b65ec831ce12d21206cd Mon Sep 17 00:00:00 2001
From: Jintao <huangjintao.hjt@alibaba-inc.com>
Date: Wed, 8 Jan 2025 17:04:24 +0800
Subject: [PATCH 05/11] fix docs (#2882)

---
 ...\216\250\347\220\206\345\222\214\351\203\250\347\275\262.md" | 2 +-
 docs/source_en/Instruction/Inference-and-deployment.md          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git "a/docs/source/Instruction/\346\216\250\347\220\206\345\222\214\351\203\250\347\275\262.md" "b/docs/source/Instruction/\346\216\250\347\220\206\345\222\214\351\203\250\347\275\262.md"
index 76a574b957..ca887b619b 100644
--- "a/docs/source/Instruction/\346\216\250\347\220\206\345\222\214\351\203\250\347\275\262.md"
+++ "b/docs/source/Instruction/\346\216\250\347\220\206\345\222\214\351\203\250\347\275\262.md"
@@ -15,7 +15,7 @@ SWIFT支持以命令行、Python代码和界面方式进行推理和部署：
 - `single-line`命令 切换到单行模式
 - `clear`命令 清除history
 - `exit`命令 退出
-- 如果query中带有多模态数据，添加<image>/<video>/<audio>等标签，例如输入`<image>What is in the image?`，即可在接下来输入图片地址
+- 如果query中带有多模态数据，添加`<image>/<video>/<audio>`等标签，例如输入`<image>What is in the image?`，即可在接下来输入图片地址
 
 ## 推理加速后端
 
diff --git a/docs/source_en/Instruction/Inference-and-deployment.md b/docs/source_en/Instruction/Inference-and-deployment.md
index 1229ba3590..c7c0fa2e08 100644
--- a/docs/source_en/Instruction/Inference-and-deployment.md
+++ b/docs/source_en/Instruction/Inference-and-deployment.md
@@ -16,7 +16,7 @@ The command line inference can be referred to via the link provided in the secon
 - The `single-line` command switches to single-line mode.
 - The `clear` command clears the history.
 - The `exit` command exits the application.
-If the query involves multimodal data, add tags like <image>/<video>/<audio>. For example, input `<image>What is in the image?`, and you can then input the image address.
+If the query involves multimodal data, add tags like `<image>/<video>/<audio>`. For example, input `<image>What is in the image?`, and you can then input the image address.
 
 ## Inference Acceleration Backend
 You can perform inference and deployment using `swift infer/deploy`. Currently, SWIFT supports three inference frameworks: pt (native torch), vLLM, and LMDeploy. You can switch between them using `--infer_backend pt/vllm/lmdeploy`. Apart from pt, both vLLM and LMDeploy have their own model support ranges. Please refer to their official documentation to verify availability and prevent runtime errors.

From fc9cc05dbc7a3b1151d6dec9a5945853dfb9909a Mon Sep 17 00:00:00 2001
From: Jintao <huangjintao.hjt@alibaba-inc.com>
Date: Thu, 9 Jan 2025 18:44:26 +0800
Subject: [PATCH 06/11] Fix qwen vl eval (#2892)

---
 swift/llm/__init__.py               | 23 +++++++++++++----
 swift/llm/dataset/__init__.py       | 22 +++--------------
 swift/llm/template/base.py          | 38 ++++++++++++++++-------------
 swift/llm/template/template/qwen.py |  6 +++++
 swift/llm/utils.py                  | 22 +++++++++++++++++
 5 files changed, 70 insertions(+), 41 deletions(-)

diff --git a/swift/llm/__init__.py b/swift/llm/__init__.py
index 8279dca957..99bd05d605 100644
--- a/swift/llm/__init__.py
+++ b/swift/llm/__init__.py
@@ -26,7 +26,7 @@
                           LazyLLMDataset, ConstantLengthDataset, standard_keys, load_dataset, DATASET_TYPE,
                           sample_dataset, RowPreprocessor, DatasetMeta)
     from .utils import (deep_getattr, to_device, History, Messages, history_to_messages, messages_to_history, Processor,
-                        save_checkpoint, ProcessorMixin)
+                        save_checkpoint, ProcessorMixin, get_temporary_cache_files_directory)
     from .base import SwiftPipeline
 else:
     _extra_objects = {k: v for k, v in globals().items() if not k.startswith('_')}
@@ -57,13 +57,26 @@
             'load_by_unsloth', 'git_clone_github', 'get_matched_model_meta'
         ],
         'dataset': [
-            'AlpacaPreprocessor', 'MessagesPreprocessor', 'DATASET_MAPPING', 'MediaResource', 'register_dataset',
-            'register_dataset_info', 'EncodePreprocessor', 'LazyLLMDataset', 'ConstantLengthDataset', 'standard_keys',
-            'load_dataset', 'DATASET_TYPE', 'sample_dataset', 'RowPreprocessor', 'ResponsePreprocessor', 'DatasetMeta'
+            'AlpacaPreprocessor',
+            'MessagesPreprocessor',
+            'DATASET_MAPPING',
+            'MediaResource',
+            'register_dataset',
+            'register_dataset_info',
+            'EncodePreprocessor',
+            'LazyLLMDataset',
+            'ConstantLengthDataset',
+            'standard_keys',
+            'load_dataset',
+            'DATASET_TYPE',
+            'sample_dataset',
+            'RowPreprocessor',
+            'ResponsePreprocessor',
+            'DatasetMeta',
         ],
         'utils': [
             'deep_getattr', 'to_device', 'History', 'Messages', 'history_to_messages', 'messages_to_history',
-            'Processor', 'save_checkpoint', 'ProcessorMixin'
+            'Processor', 'save_checkpoint', 'ProcessorMixin', 'get_temporary_cache_files_directory'
         ],
         'base': ['SwiftPipeline'],
     }
diff --git a/swift/llm/dataset/__init__.py b/swift/llm/dataset/__init__.py
index 9c344dbddd..7258f0a1e5 100644
--- a/swift/llm/dataset/__init__.py
+++ b/swift/llm/dataset/__init__.py
@@ -1,14 +1,9 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
-
-import os
-import tempfile
-
-import datasets.config
 import datasets.fingerprint
 from datasets import disable_caching
-from modelscope.hub.utils.utils import get_cache_dir
 
 from swift.utils.torch_utils import _find_local_mac
+from ..utils import get_temporary_cache_files_directory
 from . import dataset
 from .loader import DATASET_TYPE, load_dataset
 from .media import MediaResource
@@ -32,20 +27,9 @@ def _update_fingerprint_mac(*args, **kwargs):
     return fp
 
 
-def _new_get_temporary_cache_files_directory(*args, **kwargs):
-    global DATASET_TEMP_DIR
-    if DATASET_TEMP_DIR is None:
-        tmp_dir = os.path.join(get_cache_dir(), 'tmp')
-        os.makedirs(tmp_dir, exist_ok=True)
-        DATASET_TEMP_DIR = tempfile.TemporaryDirectory(prefix=datasets.config.TEMP_CACHE_DIR_PREFIX, dir=tmp_dir)
-
-    return DATASET_TEMP_DIR.name
-
-
 datasets.fingerprint.update_fingerprint = _update_fingerprint_mac
 datasets.arrow_dataset.update_fingerprint = _update_fingerprint_mac
-datasets.fingerprint.get_temporary_cache_files_directory = _new_get_temporary_cache_files_directory
-datasets.arrow_dataset.get_temporary_cache_files_directory = _new_get_temporary_cache_files_directory
-DATASET_TEMP_DIR = None
+datasets.fingerprint.get_temporary_cache_files_directory = get_temporary_cache_files_directory
+datasets.arrow_dataset.get_temporary_cache_files_directory = get_temporary_cache_files_directory
 register_dataset_info()
 disable_caching()
diff --git a/swift/llm/template/base.py b/swift/llm/template/base.py
index d2a2ae84a8..0a7a1e540b 100644
--- a/swift/llm/template/base.py
+++ b/swift/llm/template/base.py
@@ -12,6 +12,7 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
+from modelscope.hub.utils.utils import get_cache_dir
 from peft import PeftModel
 from PIL import Image
 from torch.nn.utils.rnn import pad_sequence
@@ -116,22 +117,21 @@ def __init__(
         self._deepspeed_initialize = None
 
     @staticmethod
-    def _load_images(images, load_images: bool) -> None:
-        for i, image in enumerate(images):
-            if load_images:
-                if isinstance(image, dict) and 'bytes' in image:
-                    image = image['bytes'] or image['path']
+    def _load_image(image, load_images: bool):
+        if load_images:
+            if isinstance(image, dict) and 'bytes' in image:
+                image = image['bytes'] or image['path']
+            image = load_image(image)
+        else:
+            if isinstance(image, dict):
+                path = image['path']
+                if path and (path.startswith('http') or os.path.exists(path)):
+                    image = path
+                else:
+                    image = load_image(image['bytes'])
+            elif not isinstance(image, str):
                 image = load_image(image)
-            else:
-                if isinstance(image, dict):
-                    path = image['path']
-                    if path and (path.startswith('http') or os.path.exists(path)):
-                        image = path
-                    else:
-                        image = load_image(image['bytes'])
-                elif not isinstance(image, str):
-                    image = load_image(image)
-            images[i] = image
+        return image
 
     def _preprocess_inputs(
         self,
@@ -143,7 +143,8 @@ def _preprocess_inputs(
         if self.max_pixels is not None or inputs.objects:
             load_images = True
         if images:
-            self._load_images(images, load_images)
+            for i, image in enumerate(images):
+                images[i] = self._load_image(images[i], load_images)
         if self.max_pixels is not None:
             assert self.grounding_type != 'real', 'not support'  # TODO:check
             images = [rescale_image(img, self.max_pixels) for img in images]
@@ -298,7 +299,10 @@ def prepare_generate_kwargs(self, generate_kwargs: Dict[str, Any], *, model=None
     def _save_pil_image(image: Image.Image) -> str:
         img_bytes = image.tobytes()
         img_hash = hashlib.sha256(img_bytes).hexdigest()
-        img_path = os.path.join('tmp', f'{img_hash}.png')
+        tmp_dir = os.path.join(get_cache_dir(), 'tmp', 'images')
+        logger.info_once(f'create tmp_dir: {tmp_dir}')
+        os.makedirs(tmp_dir, exist_ok=True)
+        img_path = os.path.join(tmp_dir, f'{img_hash}.png')
         if not os.path.exists(img_path):
             image.save(img_path)
         return img_path
diff --git a/swift/llm/template/template/qwen.py b/swift/llm/template/template/qwen.py
index c740236af6..f117f57c8c 100644
--- a/swift/llm/template/template/qwen.py
+++ b/swift/llm/template/template/qwen.py
@@ -50,6 +50,12 @@ class QwqTemplateMeta(QwenTemplateMeta):
 class QwenVLTemplate(Template):
     load_images = False
 
+    @staticmethod
+    def _load_image(image, load_images: bool):
+        if not load_images and isinstance(image, str) and (image.startswith('data:') or len(image) > 200):
+            load_images = True
+        return Template._load_image(image, load_images)
+
     def replace_tag(self, media_type: Literal['image', 'video', 'audio'], index: int,
                     inputs: StdTemplateInputs) -> List[Context]:
         assert media_type == 'image'
diff --git a/swift/llm/utils.py b/swift/llm/utils.py
index 4e734559d6..d9b93ffc70 100644
--- a/swift/llm/utils.py
+++ b/swift/llm/utils.py
@@ -2,11 +2,13 @@
 import inspect
 import os
 import shutil
+import tempfile
 from types import MethodType
 from typing import Any, Dict, List, Mapping, Optional, Tuple, Union
 
 import torch
 import torch.nn as nn
+from modelscope.hub.utils.utils import get_cache_dir
 from transformers import FeatureExtractionMixin, GenerationConfig, PreTrainedModel, PreTrainedTokenizerBase
 from transformers import ProcessorMixin as HfProcessorMixin
 
@@ -223,3 +225,23 @@ def save_checkpoint(model: Optional[PreTrainedModel],
             elif os.path.isdir(src_path):
                 shutil.copytree(src_path, tgt_path)
                 break
+
+
+TEMP_DIR_POOL = {}
+
+
+def get_temporary_cache_files_directory(prefix=None):
+    if prefix is None:
+        import datasets.config
+        prefix = datasets.config.TEMP_CACHE_DIR_PREFIX
+    global TEMP_DIR_POOL
+    if prefix in TEMP_DIR_POOL:
+        TEMP_DIR = TEMP_DIR_POOL[prefix]
+    else:
+        tmp_dir = os.path.join(get_cache_dir(), 'tmp')
+        os.makedirs(tmp_dir, exist_ok=True)
+        TEMP_DIR = tempfile.TemporaryDirectory(prefix=prefix, dir=tmp_dir)
+        logger.info(f'create tmp_dir: {TEMP_DIR.name}')
+        TEMP_DIR_POOL[prefix] = TEMP_DIR
+
+    return TEMP_DIR.name

From c3386bd9f51d1d1bf67cba70bcb4c5901c309634 Mon Sep 17 00:00:00 2001
From: Jintao <huangjintao.hjt@alibaba-inc.com>
Date: Thu, 9 Jan 2025 23:51:34 +0800
Subject: [PATCH 07/11] fix infer engine (#2898)

---
 swift/llm/infer/infer_engine/infer_engine.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/swift/llm/infer/infer_engine/infer_engine.py b/swift/llm/infer/infer_engine/infer_engine.py
index fe6057b383..1d106af764 100644
--- a/swift/llm/infer/infer_engine/infer_engine.py
+++ b/swift/llm/infer/infer_engine/infer_engine.py
@@ -151,7 +151,7 @@ def _gen_wrapper():
                 result += res
                 i += max_batch_size
                 prog_bar.update(len(tasks_samples))
-            return self._update_metrics(res, metrics)
+            return self._update_metrics(result, metrics)
 
     def _get_toolcall(self,
                       response: Union[str, List[Dict[str, Any]]],

From 9ca4419afd10572b523d7b2fc1e3fa1e791a2bcb Mon Sep 17 00:00:00 2001
From: tastelikefeet <58414341+tastelikefeet@users.noreply.github.com>
Date: Fri, 10 Jan 2025 01:25:01 +0800
Subject: [PATCH 08/11] Add phi4 (#2895)

---
 ...22\214\346\225\260\346\215\256\351\233\206.md" |  1 +
 .../Instruction/Supported-models-and-datasets.md  |  1 +
 swift/llm/model/constant.py                       |  1 +
 swift/llm/model/model/microsoft.py                | 15 +++++++++++++++
 swift/llm/template/constant.py                    |  1 +
 swift/llm/template/template/microsoft.py          | 15 +++++++++++++++
 tests/test_align/test_template/test_llm.py        | 10 +++++++++-
 7 files changed, 43 insertions(+), 1 deletion(-)

diff --git "a/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" "b/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md"
index 3d19bf2cda..f36055b293 100644
--- "a/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md"
+++ "b/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md"
@@ -413,6 +413,7 @@
 |[LLM-Research/Phi-3-medium-128k-instruct](https://modelscope.cn/models/LLM-Research/Phi-3-medium-128k-instruct)|phi3|phi3|transformers>=4.36|-|[microsoft/Phi-3-medium-128k-instruct](https://huggingface.co/microsoft/Phi-3-medium-128k-instruct)|
 |[LLM-Research/Phi-3.5-mini-instruct](https://modelscope.cn/models/LLM-Research/Phi-3.5-mini-instruct)|phi3|phi3|transformers>=4.36|-|[microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct)|
 |[LLM-Research/Phi-3.5-MoE-instruct](https://modelscope.cn/models/LLM-Research/Phi-3.5-MoE-instruct)|phi3_moe|phi3|transformers>=4.36|-|[microsoft/Phi-3.5-MoE-instruct](https://huggingface.co/microsoft/Phi-3.5-MoE-instruct)|
+|[microsoft/phi-4](https://modelscope.cn/models/microsoft/phi-4)|phi4|phi4|transformers>=4.36|-|[LLM-Research/phi-4](https://huggingface.co/LLM-Research/phi-4)|
 |[AI-ModelScope/gemma-2b-it](https://modelscope.cn/models/AI-ModelScope/gemma-2b-it)|gemma|gemma|transformers>=4.38|-|[google/gemma-2b-it](https://huggingface.co/google/gemma-2b-it)|
 |[AI-ModelScope/gemma-2b](https://modelscope.cn/models/AI-ModelScope/gemma-2b)|gemma|gemma|transformers>=4.38|-|[google/gemma-2b](https://huggingface.co/google/gemma-2b)|
 |[AI-ModelScope/gemma-7b](https://modelscope.cn/models/AI-ModelScope/gemma-7b)|gemma|gemma|transformers>=4.38|-|[google/gemma-7b](https://huggingface.co/google/gemma-7b)|
diff --git a/docs/source_en/Instruction/Supported-models-and-datasets.md b/docs/source_en/Instruction/Supported-models-and-datasets.md
index 1b946760be..cc3ad9ed3c 100644
--- a/docs/source_en/Instruction/Supported-models-and-datasets.md
+++ b/docs/source_en/Instruction/Supported-models-and-datasets.md
@@ -413,6 +413,7 @@ The table below introduces the models integrated with ms-swift:
 |[LLM-Research/Phi-3-medium-128k-instruct](https://modelscope.cn/models/LLM-Research/Phi-3-medium-128k-instruct)|phi3|phi3|transformers>=4.36|-|[microsoft/Phi-3-medium-128k-instruct](https://huggingface.co/microsoft/Phi-3-medium-128k-instruct)|
 |[LLM-Research/Phi-3.5-mini-instruct](https://modelscope.cn/models/LLM-Research/Phi-3.5-mini-instruct)|phi3|phi3|transformers>=4.36|-|[microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct)|
 |[LLM-Research/Phi-3.5-MoE-instruct](https://modelscope.cn/models/LLM-Research/Phi-3.5-MoE-instruct)|phi3_moe|phi3|transformers>=4.36|-|[microsoft/Phi-3.5-MoE-instruct](https://huggingface.co/microsoft/Phi-3.5-MoE-instruct)|
+|[microsoft/phi-4](https://modelscope.cn/models/microsoft/phi-4)|phi4|phi4|transformers>=4.36|-|[LLM-Research/phi-4](https://huggingface.co/LLM-Research/phi-4)|
 |[AI-ModelScope/gemma-2b-it](https://modelscope.cn/models/AI-ModelScope/gemma-2b-it)|gemma|gemma|transformers>=4.38|-|[google/gemma-2b-it](https://huggingface.co/google/gemma-2b-it)|
 |[AI-ModelScope/gemma-2b](https://modelscope.cn/models/AI-ModelScope/gemma-2b)|gemma|gemma|transformers>=4.38|-|[google/gemma-2b](https://huggingface.co/google/gemma-2b)|
 |[AI-ModelScope/gemma-7b](https://modelscope.cn/models/AI-ModelScope/gemma-7b)|gemma|gemma|transformers>=4.38|-|[google/gemma-7b](https://huggingface.co/google/gemma-7b)|
diff --git a/swift/llm/model/constant.py b/swift/llm/model/constant.py
index b10d688c8c..0c8879b7c7 100644
--- a/swift/llm/model/constant.py
+++ b/swift/llm/model/constant.py
@@ -75,6 +75,7 @@ class LLMModelType:
     phi3_small = 'phi3_small'
     phi3 = 'phi3'
     phi3_moe = 'phi3_moe'
+    phi4 = 'phi4'
 
     gemma = 'gemma'
     gemma2 = 'gemma2'
diff --git a/swift/llm/model/model/microsoft.py b/swift/llm/model/model/microsoft.py
index 97ee990a53..2b76f4c3c1 100644
--- a/swift/llm/model/model/microsoft.py
+++ b/swift/llm/model/model/microsoft.py
@@ -173,6 +173,21 @@ def get_model_tokenizer_phi(model_dir: str,
         model_arch=ModelArch.phi3,
     ))
 
+register_model(
+    ModelMeta(
+        LLMModelType.phi4,
+        [
+            ModelGroup([
+                Model('LLM-Research/phi-4', 'microsoft/phi-4'),
+            ]),
+        ],
+        TemplateType.phi4,
+        get_model_tokenizer_with_flash_attn,
+        architectures=['Phi3ForCausalLM'],
+        requires=['transformers>=4.36'],
+        model_arch=ModelArch.phi3,
+    ))
+
 register_model(
     ModelMeta(
         LLMModelType.phi3_moe,
diff --git a/swift/llm/template/constant.py b/swift/llm/template/constant.py
index a66955a500..9e921dff1f 100644
--- a/swift/llm/template/constant.py
+++ b/swift/llm/template/constant.py
@@ -59,6 +59,7 @@ class LLMTemplateType:
     wizardlm2_moe = 'wizardlm2_moe'
     gemma = 'gemma'
     phi3 = 'phi3'
+    phi4 = 'phi4'
 
     yuan = 'yuan'
     xverse = 'xverse'
diff --git a/swift/llm/template/template/microsoft.py b/swift/llm/template/template/microsoft.py
index e7a46d9f62..4de4ef4ac1 100644
--- a/swift/llm/template/template/microsoft.py
+++ b/swift/llm/template/template/microsoft.py
@@ -103,6 +103,21 @@ class Phi3TemplateMeta(TemplateMeta):
 register_template(Phi3TemplateMeta(LLMTemplateType.phi3))
 
 
+@dataclass
+class Phi4TemplateMeta(TemplateMeta):
+    prefix: Prompt = field(default_factory=list)
+    prompt: Prompt = field(
+        default_factory=lambda: ['<|im_start|>user<|im_sep|>{{QUERY}}<|im_end|><|im_start|>assistant<|im_sep|>'])
+    chat_sep: Optional[Prompt] = field(default_factory=lambda: ['<|im_end|>'])
+    suffix: Prompt = field(default_factory=lambda: ['<|im_end|>'])
+    system_prefix: Optional[Prompt] = field(
+        default_factory=lambda: ['<|im_start|>system<|im_sep|>{{SYSTEM}}<|im_end|>'])
+    auto_add_bos: bool = True
+
+
+register_template(Phi4TemplateMeta(LLMTemplateType.phi4))
+
+
 class Phi3VisionTemplate(Template):
     image_placeholder = ['<|image|><s>\n']  # <|image|>\n
 
diff --git a/tests/test_align/test_template/test_llm.py b/tests/test_align/test_template/test_llm.py
index 0b29120dfe..ed0cfcca16 100644
--- a/tests/test_align/test_template/test_llm.py
+++ b/tests/test_align/test_template/test_llm.py
@@ -35,6 +35,13 @@ def test_qwen2_5():
     _infer_model(pt_engine)
 
 
+def test_phi4():
+    pt_engine = PtEngine('LLM-Research/phi-4')
+    _infer_model(pt_engine)
+    pt_engine.default_template.template_backend = 'jinja'
+    _infer_model(pt_engine)
+
+
 def test_qwen1half():
     pt_engine = PtEngine('Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int4')
     _infer_model(pt_engine)
@@ -267,4 +274,5 @@ def test_skywork_reward():
     # test_internlm2_reward()
     # test_qwen2_reward()
     # test_qwen2_5_math()
-    test_skywork_reward()
+    # test_skywork_reward()
+    test_phi4()

From b82b55a17d5b0fcfcc29d9669f95d2a7371366b0 Mon Sep 17 00:00:00 2001
From: Jintao Huang <huangjintao.hjt@alibaba-inc.com>
Date: Fri, 10 Jan 2025 14:29:56 +0800
Subject: [PATCH 09/11] bump version

---
 swift/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/swift/version.py b/swift/version.py
index f0d9c4725a..6efd94899b 100644
--- a/swift/version.py
+++ b/swift/version.py
@@ -1,5 +1,5 @@
 # Make sure to modify __release_datetime__ to release time when making official release.
-__version__ = '3.0.2'
+__version__ = '3.0.2.post1'
 # default release datetime for branches under active development is set
 # to be a time far-far-away-into-the-future
 __release_datetime__ = '2099-10-13 08:56:12'

From fbe6f74924e407c8f40f77b697f14986b0b16846 Mon Sep 17 00:00:00 2001
From: Jintao <huangjintao.hjt@alibaba-inc.com>
Date: Fri, 10 Jan 2025 21:17:25 +0800
Subject: [PATCH 10/11] fix link & bug (#2902)

---
 ...\222\214\346\225\260\346\215\256\351\233\206.md" |  8 ++++----
 .../Instruction/Supported-models-and-datasets.md    |  8 ++++----
 swift/llm/argument/base_args/base_args.py           | 10 +++++++++-
 swift/llm/model/model/llava.py                      |  6 +++---
 swift/llm/train/rlhf.py                             | 13 +++++++++++--
 tests/llm/test_run.py                               |  5 +++--
 6 files changed, 34 insertions(+), 16 deletions(-)

diff --git "a/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" "b/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md"
index f36055b293..c84077a542 100644
--- "a/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md"
+++ "b/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md"
@@ -413,7 +413,7 @@
 |[LLM-Research/Phi-3-medium-128k-instruct](https://modelscope.cn/models/LLM-Research/Phi-3-medium-128k-instruct)|phi3|phi3|transformers>=4.36|-|[microsoft/Phi-3-medium-128k-instruct](https://huggingface.co/microsoft/Phi-3-medium-128k-instruct)|
 |[LLM-Research/Phi-3.5-mini-instruct](https://modelscope.cn/models/LLM-Research/Phi-3.5-mini-instruct)|phi3|phi3|transformers>=4.36|-|[microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct)|
 |[LLM-Research/Phi-3.5-MoE-instruct](https://modelscope.cn/models/LLM-Research/Phi-3.5-MoE-instruct)|phi3_moe|phi3|transformers>=4.36|-|[microsoft/Phi-3.5-MoE-instruct](https://huggingface.co/microsoft/Phi-3.5-MoE-instruct)|
-|[microsoft/phi-4](https://modelscope.cn/models/microsoft/phi-4)|phi4|phi4|transformers>=4.36|-|[LLM-Research/phi-4](https://huggingface.co/LLM-Research/phi-4)|
+|[LLM-Research/phi-4](https://modelscope.cn/models/LLM-Research/phi-4)|phi4|phi4|transformers>=4.36|-|[microsoft/phi-4](https://huggingface.co/microsoft/phi-4)|
 |[AI-ModelScope/gemma-2b-it](https://modelscope.cn/models/AI-ModelScope/gemma-2b-it)|gemma|gemma|transformers>=4.38|-|[google/gemma-2b-it](https://huggingface.co/google/gemma-2b-it)|
 |[AI-ModelScope/gemma-2b](https://modelscope.cn/models/AI-ModelScope/gemma-2b)|gemma|gemma|transformers>=4.38|-|[google/gemma-2b](https://huggingface.co/google/gemma-2b)|
 |[AI-ModelScope/gemma-7b](https://modelscope.cn/models/AI-ModelScope/gemma-7b)|gemma|gemma|transformers>=4.38|-|[google/gemma-7b](https://huggingface.co/google/gemma-7b)|
@@ -593,9 +593,9 @@
 |[AI-ModelScope/llava-llama-3-8b-v1_1-transformers](https://modelscope.cn/models/AI-ModelScope/llava-llama-3-8b-v1_1-transformers)|llava_llama3_hf|llava_llama3_hf|transformers>=4.36|vision|[xtuner/llava-llama-3-8b-v1_1-transformers](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers)|
 |[AI-ModelScope/llava-v1.6-mistral-7b](https://modelscope.cn/models/AI-ModelScope/llava-v1.6-mistral-7b)|llava1_6_mistral|llava1_6_mistral|transformers>=4.34|vision|[liuhaotian/llava-v1.6-mistral-7b](https://huggingface.co/liuhaotian/llava-v1.6-mistral-7b)|
 |[AI-ModelScope/llava-v1.6-34b](https://modelscope.cn/models/AI-ModelScope/llava-v1.6-34b)|llava1_6_yi|llava1_6_yi|transformers>=4.34|vision|[liuhaotian/llava-v1.6-34b](https://huggingface.co/liuhaotian/llava-v1.6-34b)|
-|[AI-Modelscope/llava-next-72b](https://modelscope.cn/models/AI-Modelscope/llava-next-72b)|llava_next_qwen|llava_next_qwen|transformers>=4.42, av|vision|[lmms-lab/llava-next-72b](https://huggingface.co/lmms-lab/llava-next-72b)|
-|[AI-Modelscope/llava-next-110b](https://modelscope.cn/models/AI-Modelscope/llava-next-110b)|llava_next_qwen|llava_next_qwen|transformers>=4.42, av|vision|[lmms-lab/llava-next-110b](https://huggingface.co/lmms-lab/llava-next-110b)|
-|[AI-Modelscope/llama3-llava-next-8b](https://modelscope.cn/models/AI-Modelscope/llama3-llava-next-8b)|llama3_llava_next|llama3_llava_next|transformers>=4.42, av|vision|[lmms-lab/llama3-llava-next-8b](https://huggingface.co/lmms-lab/llama3-llava-next-8b)|
+|[AI-ModelScope/llava-next-72b](https://modelscope.cn/models/AI-ModelScope/llava-next-72b)|llava_next_qwen|llava_next_qwen|transformers>=4.42, av|vision|[lmms-lab/llava-next-72b](https://huggingface.co/lmms-lab/llava-next-72b)|
+|[AI-ModelScope/llava-next-110b](https://modelscope.cn/models/AI-ModelScope/llava-next-110b)|llava_next_qwen|llava_next_qwen|transformers>=4.42, av|vision|[lmms-lab/llava-next-110b](https://huggingface.co/lmms-lab/llava-next-110b)|
+|[AI-ModelScope/llama3-llava-next-8b](https://modelscope.cn/models/AI-ModelScope/llama3-llava-next-8b)|llama3_llava_next|llama3_llava_next|transformers>=4.42, av|vision|[lmms-lab/llama3-llava-next-8b](https://huggingface.co/lmms-lab/llama3-llava-next-8b)|
 |[deepseek-ai/deepseek-vl-1.3b-chat](https://modelscope.cn/models/deepseek-ai/deepseek-vl-1.3b-chat)|deepseek_vl|deepseek_vl|-|vision|[deepseek-ai/deepseek-vl-1.3b-chat](https://huggingface.co/deepseek-ai/deepseek-vl-1.3b-chat)|
 |[deepseek-ai/deepseek-vl-7b-chat](https://modelscope.cn/models/deepseek-ai/deepseek-vl-7b-chat)|deepseek_vl|deepseek_vl|-|vision|[deepseek-ai/deepseek-vl-7b-chat](https://huggingface.co/deepseek-ai/deepseek-vl-7b-chat)|
 |[deepseek-ai/deepseek-vl2-tiny](https://modelscope.cn/models/deepseek-ai/deepseek-vl2-tiny)|deepseek_vl2|deepseek_vl2|transformers<4.42|vision|[deepseek-ai/deepseek-vl2-tiny](https://huggingface.co/deepseek-ai/deepseek-vl2-tiny)|
diff --git a/docs/source_en/Instruction/Supported-models-and-datasets.md b/docs/source_en/Instruction/Supported-models-and-datasets.md
index cc3ad9ed3c..04aebafe5e 100644
--- a/docs/source_en/Instruction/Supported-models-and-datasets.md
+++ b/docs/source_en/Instruction/Supported-models-and-datasets.md
@@ -413,7 +413,7 @@ The table below introduces the models integrated with ms-swift:
 |[LLM-Research/Phi-3-medium-128k-instruct](https://modelscope.cn/models/LLM-Research/Phi-3-medium-128k-instruct)|phi3|phi3|transformers>=4.36|-|[microsoft/Phi-3-medium-128k-instruct](https://huggingface.co/microsoft/Phi-3-medium-128k-instruct)|
 |[LLM-Research/Phi-3.5-mini-instruct](https://modelscope.cn/models/LLM-Research/Phi-3.5-mini-instruct)|phi3|phi3|transformers>=4.36|-|[microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct)|
 |[LLM-Research/Phi-3.5-MoE-instruct](https://modelscope.cn/models/LLM-Research/Phi-3.5-MoE-instruct)|phi3_moe|phi3|transformers>=4.36|-|[microsoft/Phi-3.5-MoE-instruct](https://huggingface.co/microsoft/Phi-3.5-MoE-instruct)|
-|[microsoft/phi-4](https://modelscope.cn/models/microsoft/phi-4)|phi4|phi4|transformers>=4.36|-|[LLM-Research/phi-4](https://huggingface.co/LLM-Research/phi-4)|
+|[LLM-Research/phi-4](https://modelscope.cn/models/LLM-Research/phi-4)|phi4|phi4|transformers>=4.36|-|[microsoft/phi-4](https://huggingface.co/microsoft/phi-4)|
 |[AI-ModelScope/gemma-2b-it](https://modelscope.cn/models/AI-ModelScope/gemma-2b-it)|gemma|gemma|transformers>=4.38|-|[google/gemma-2b-it](https://huggingface.co/google/gemma-2b-it)|
 |[AI-ModelScope/gemma-2b](https://modelscope.cn/models/AI-ModelScope/gemma-2b)|gemma|gemma|transformers>=4.38|-|[google/gemma-2b](https://huggingface.co/google/gemma-2b)|
 |[AI-ModelScope/gemma-7b](https://modelscope.cn/models/AI-ModelScope/gemma-7b)|gemma|gemma|transformers>=4.38|-|[google/gemma-7b](https://huggingface.co/google/gemma-7b)|
@@ -593,9 +593,9 @@ The table below introduces the models integrated with ms-swift:
 |[AI-ModelScope/llava-llama-3-8b-v1_1-transformers](https://modelscope.cn/models/AI-ModelScope/llava-llama-3-8b-v1_1-transformers)|llava_llama3_hf|llava_llama3_hf|transformers>=4.36|vision|[xtuner/llava-llama-3-8b-v1_1-transformers](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers)|
 |[AI-ModelScope/llava-v1.6-mistral-7b](https://modelscope.cn/models/AI-ModelScope/llava-v1.6-mistral-7b)|llava1_6_mistral|llava1_6_mistral|transformers>=4.34|vision|[liuhaotian/llava-v1.6-mistral-7b](https://huggingface.co/liuhaotian/llava-v1.6-mistral-7b)|
 |[AI-ModelScope/llava-v1.6-34b](https://modelscope.cn/models/AI-ModelScope/llava-v1.6-34b)|llava1_6_yi|llava1_6_yi|transformers>=4.34|vision|[liuhaotian/llava-v1.6-34b](https://huggingface.co/liuhaotian/llava-v1.6-34b)|
-|[AI-Modelscope/llava-next-72b](https://modelscope.cn/models/AI-Modelscope/llava-next-72b)|llava_next_qwen|llava_next_qwen|transformers>=4.42, av|vision|[lmms-lab/llava-next-72b](https://huggingface.co/lmms-lab/llava-next-72b)|
-|[AI-Modelscope/llava-next-110b](https://modelscope.cn/models/AI-Modelscope/llava-next-110b)|llava_next_qwen|llava_next_qwen|transformers>=4.42, av|vision|[lmms-lab/llava-next-110b](https://huggingface.co/lmms-lab/llava-next-110b)|
-|[AI-Modelscope/llama3-llava-next-8b](https://modelscope.cn/models/AI-Modelscope/llama3-llava-next-8b)|llama3_llava_next|llama3_llava_next|transformers>=4.42, av|vision|[lmms-lab/llama3-llava-next-8b](https://huggingface.co/lmms-lab/llama3-llava-next-8b)|
+|[AI-ModelScope/llava-next-72b](https://modelscope.cn/models/AI-ModelScope/llava-next-72b)|llava_next_qwen|llava_next_qwen|transformers>=4.42, av|vision|[lmms-lab/llava-next-72b](https://huggingface.co/lmms-lab/llava-next-72b)|
+|[AI-ModelScope/llava-next-110b](https://modelscope.cn/models/AI-ModelScope/llava-next-110b)|llava_next_qwen|llava_next_qwen|transformers>=4.42, av|vision|[lmms-lab/llava-next-110b](https://huggingface.co/lmms-lab/llava-next-110b)|
+|[AI-ModelScope/llama3-llava-next-8b](https://modelscope.cn/models/AI-ModelScope/llama3-llava-next-8b)|llama3_llava_next|llama3_llava_next|transformers>=4.42, av|vision|[lmms-lab/llama3-llava-next-8b](https://huggingface.co/lmms-lab/llama3-llava-next-8b)|
 |[deepseek-ai/deepseek-vl-1.3b-chat](https://modelscope.cn/models/deepseek-ai/deepseek-vl-1.3b-chat)|deepseek_vl|deepseek_vl|-|vision|[deepseek-ai/deepseek-vl-1.3b-chat](https://huggingface.co/deepseek-ai/deepseek-vl-1.3b-chat)|
 |[deepseek-ai/deepseek-vl-7b-chat](https://modelscope.cn/models/deepseek-ai/deepseek-vl-7b-chat)|deepseek_vl|deepseek_vl|-|vision|[deepseek-ai/deepseek-vl-7b-chat](https://huggingface.co/deepseek-ai/deepseek-vl-7b-chat)|
 |[deepseek-ai/deepseek-vl2-tiny](https://modelscope.cn/models/deepseek-ai/deepseek-vl2-tiny)|deepseek_vl2|deepseek_vl2|transformers<4.42|vision|[deepseek-ai/deepseek-vl2-tiny](https://huggingface.co/deepseek-ai/deepseek-vl2-tiny)|
diff --git a/swift/llm/argument/base_args/base_args.py b/swift/llm/argument/base_args/base_args.py
index a39bac4ad3..82f3e82989 100644
--- a/swift/llm/argument/base_args/base_args.py
+++ b/swift/llm/argument/base_args/base_args.py
@@ -241,7 +241,14 @@ def get_template(self, processor: 'Processor') -> 'Template':
         logger.info(f'default_system: {template.template_meta.default_system}')
         return template
 
-    def get_model_processor(self, *, model=None, model_type=None, model_revision=None, task_type=None, **kwargs):
+    def get_model_processor(self,
+                            *,
+                            model=None,
+                            model_type=None,
+                            model_revision=None,
+                            task_type=None,
+                            num_labels=None,
+                            **kwargs):
         if self.tuner_backend == 'unsloth':
             return load_by_unsloth(self)
         kwargs.update(self.get_model_kwargs())
@@ -250,5 +257,6 @@ def get_model_processor(self, *, model=None, model_type=None, model_revision=Non
         kwargs['model_type'] = model_type or self.model_type
         kwargs['model_revision'] = model_revision or self.model_revision
         kwargs['task_type'] = task_type or self.task_type
+        kwargs['num_labels'] = num_labels or self.num_labels
 
         return get_model_tokenizer(**kwargs)
diff --git a/swift/llm/model/model/llava.py b/swift/llm/model/model/llava.py
index 0b5c29e51b..811a49b793 100644
--- a/swift/llm/model/model/llava.py
+++ b/swift/llm/model/model/llava.py
@@ -334,7 +334,7 @@ def _new_forward(*args, **kwargs):
         MLLMModelType.llama3_llava_next,
         [
             ModelGroup([
-                Model('AI-Modelscope/llama3-llava-next-8b', 'lmms-lab/llama3-llava-next-8b'),
+                Model('AI-ModelScope/llama3-llava-next-8b', 'lmms-lab/llama3-llava-next-8b'),
             ], ),
         ],
         TemplateType.llama3_llava_next,
@@ -379,8 +379,8 @@ def _new_forward(*args, **kwargs):
     ModelMeta(
         MLLMModelType.llava_next_qwen, [
             ModelGroup([
-                Model('AI-Modelscope/llava-next-72b', 'lmms-lab/llava-next-72b'),
-                Model('AI-Modelscope/llava-next-110b', 'lmms-lab/llava-next-110b'),
+                Model('AI-ModelScope/llava-next-72b', 'lmms-lab/llava-next-72b'),
+                Model('AI-ModelScope/llava-next-110b', 'lmms-lab/llava-next-110b'),
             ], ),
         ],
         TemplateType.llava_next_qwen,
diff --git a/swift/llm/train/rlhf.py b/swift/llm/train/rlhf.py
index feffd4e65c..aedc0bd017 100644
--- a/swift/llm/train/rlhf.py
+++ b/swift/llm/train/rlhf.py
@@ -30,10 +30,19 @@ def _prepare_model_tokenizer(self):
             model_type = getattr(args, f'{key}_model_type')
             model_revision = getattr(args, f'{key}_model_revision')
             adapters = args.adapters if key == 'ref' else args.reward_adapters
-            task_type = args.task_type if origin_key == 'ref' else 'seq_cls'
+            if origin_key == 'ref':
+                task_type = args.task_type
+                num_labels = None
+            else:
+                task_type = 'seq_cls'
+                num_labels = 1
             # Be aware of the unexpected behavior caused by double monkey patching.
             model = args.get_model_processor(
-                model=model_id_or_path, model_type=model_type, model_revision=model_revision, task_type=task_type)[0]
+                model=model_id_or_path,
+                model_type=model_type,
+                model_revision=model_revision,
+                task_type=task_type,
+                num_labels=num_labels)[0]
 
             model = prepare_adapter(args, model, adapters)
             if origin_key in {'ref', 'reward'}:
diff --git a/tests/llm/test_run.py b/tests/llm/test_run.py
index 862710357f..b5ef6330d5 100644
--- a/tests/llm/test_run.py
+++ b/tests/llm/test_run.py
@@ -1,6 +1,6 @@
 if __name__ == '__main__':
     import os
-    os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
+    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
     os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
 
 import os
@@ -242,13 +242,14 @@ def test_rlhf(self):
                        if rlhf_type != 'kto' else 'AI-ModelScope/ultrafeedback-binarized-preferences-cleaned-kto#100')
             train_kwargs = {}
             if rlhf_type == 'ppo':
-                train_kwargs['reward_model_type'] = 'Qwen/Qwen2-1.5B-Instruct'
+                train_kwargs['reward_model'] = 'Qwen/Qwen2-1.5B-Instruct'
             output = rlhf_main(
                 RLHFArguments(
                     rlhf_type=rlhf_type,
                     model='Qwen/Qwen2-1.5B-Instruct',
                     dataset=dataset,
                     eval_steps=5,
+                    split_dataset_ratio=0.05,
                     **train_kwargs,
                     **kwargs))
             if rlhf_type == 'ppo':

From 84a6dff662d1261e7900cb3aa2b0f5484437f4f9 Mon Sep 17 00:00:00 2001
From: Jintao Huang <huangjintao.hjt@alibaba-inc.com>
Date: Mon, 20 Jan 2025 17:42:49 +0800
Subject: [PATCH 11/11] bump version

---
 swift/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/swift/version.py b/swift/version.py
index 6efd94899b..367b9007fd 100644
--- a/swift/version.py
+++ b/swift/version.py
@@ -1,5 +1,5 @@
 # Make sure to modify __release_datetime__ to release time when making official release.
-__version__ = '3.0.2.post1'
+__version__ = '3.0.3'
 # default release datetime for branches under active development is set
 # to be a time far-far-away-into-the-future
 __release_datetime__ = '2099-10-13 08:56:12'