Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 3c47296

Browse files
committed
fix
Signed-off-by: Chen Cui <[email protected]>
1 parent f6b1c7b commit 3c47296

1 file changed

Lines changed: 4 additions & 3 deletions

File tree

nemo/collections/llm/gpt/model/gpt_oss.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
from typing import TYPE_CHECKING, Annotated, Callable, List, Literal, Optional, Tuple, Union
2121

2222
import torch
23-
from megatron.core.transformer.enums import AttnBackend
2423
from safetensors import safe_open
2524
from torch import nn
2625
from transformers import AutoModelForCausalLM, GenerationConfig
@@ -63,11 +62,13 @@ class GPTOSSConfig(GPTConfig):
6362

6463
position_embedding_type: str = "yarn"
6564
rotary_base: int = 150000
66-
rotary_scaling_factor: float = 32.0
67-
yarn_original_max_position_embeddings: int = 131072
65+
yarn_rotary_scaling_factor: float = 32.0
66+
yarn_original_max_position_embeddings: int = 4096
6867
yarn_beta_fast: float = 32.0
6968
yarn_beta_slow: float = 1.0
7069
yarn_correction_range_round_to_int: bool = False
70+
yarn_mscale: float = 1.0
71+
yarn_mscale_all_dim: float = 1.0
7172

7273
moe_router_topk: int = 4
7374
moe_router_pre_softmax: bool = False

0 commit comments

Comments
 (0)