Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion configs/examples/grpo_verl_countdown/gcp_job.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ name: grpo-verl-countdown

resources:
cloud: gcp
accelerators: "H100:8"
accelerators: "A100-80GB:2"
use_spot: false

working_dir: .
Expand Down
46 changes: 20 additions & 26 deletions configs/examples/grpo_verl_countdown/train.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,19 @@ model:
data:
train:
datasets:
- dataset_name: "d1shs0ap/countdown-final"
- dataset_name: "d1shs0ap/countdown"
split: "train"
validation:
datasets:
- dataset_name: "d1shs0ap/countdown-final"
- dataset_name: "d1shs0ap/countdown"
split: "test"

training:
trainer_type: "VERL_GRPO"
num_train_epochs: 5
save_steps: 150
num_train_epochs: 1
save_steps: -1
eval_strategy: "steps"
eval_steps: 12
eval_steps: 50

learning_rate: 1.0e-6
enable_gradient_checkpointing: True
Expand All @@ -40,44 +40,38 @@ training:
grpo:
max_completion_length: 1024
use_vllm: True
temperature: 0.6
vllm_gpu_memory_utilization: 0.7
temperature: 1.0
vllm_gpu_memory_utilization: 0.4

verl_config_overrides:
data:
train_files: "/home/cmu/countdown-curriculum/data/countdown/train-3-3.parquet"
val_files: "/home/cmu/countdown-curriculum/data/countdown/test-3-10.parquet"
train_batch_size: 128
val_batch_size: 256
train_batch_size: 64
val_batch_size: 640
max_prompt_length: 256
max_extrapolation_length: 2048
shuffle: False
actor_rollout_ref:
model:
use_remove_padding: True
actor:
ppo_mini_batch_size: 32
use_dynamic_bsz: True
gradients: "normal"
use_kl_loss: True
kl_loss_coef: 0.001
kl_loss_type: "low_var_kl"
entropy_coeff: 0
ppo_mini_batch_size: 16
ppo_micro_batch_size: 4
rollout:
log_prob_micro_batch_size: 4
tensor_model_parallel_size: 2
n: 8
enforce_eager: False
free_cache_engine: False
val_kwargs:
temperature: 0.6
n: 8
do_sample: True
max_num_batched_tokens: 16384
n: 16
ref:
log_prob_micro_batch_size: 2
fsdp_config:
param_offload: True
algorithm:
kl_ctrl:
kl_coef: 0.001
trainer:
n_gpus_per_node: 8
critic_warmup: 0
val_before_train: False
n_gpus_per_node: 2
nnodes: 1

output_dir: "output/grpo_verl_countdown"
Expand Down
5 changes: 5 additions & 0 deletions configs/examples/grpo_verl_geometry3k/gcp_job.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ envs:
setup: |
set -e
pip install uv && uv pip install oumi[gpu] huggingface_hub[hf_xet] mathruler
# TODO: OPE-1331 - Fix bug that lets us upgrade to verl 0.4.0.
# Note that qwen_vl_utils needs to manually installed.
# In the meantime, we need to use this specific commit to support vLLM 0.8.3:
# https://github.com/volcengine/verl/pull/912
pip install git+https://github.com/volcengine/verl.git@1ee730163f6326e9679644db62eb32c8d1947c7f
pip install -U flash-attn --no-build-isolation

run: |
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ gpu = [
"bitsandbytes>=0.45.0,<0.46", # Used for QLora, and PagedAdam implementation
# When updating verl version, make sure to also update the default config:
# src/oumi/core/trainers/verl_trainer_config.yaml.
"verl>=0.3.0,<0.4", # Used for the VERL_GRPO trainer.
"verl>=0.4.0,<0.5", # Used for the VERL_GRPO trainer.
"vllm>=0.8.3,<0.9", # For VLLMInferenceEngine, and vLLM-powered GRPO training.
]

Expand Down
Loading
Loading