fix

cuichenx · cuichenx · commit 3c47296f08d6 · 2025-09-30T17:16:03.000-07:00
Signed-off-by: Chen Cui &lt;chcui@nvidia.com&gt;
diff --git a/nemo/collections/llm/gpt/model/gpt_oss.py b/nemo/collections/llm/gpt/model/gpt_oss.py
@@ -20,7 +20,6 @@
 from typing import TYPE_CHECKING, Annotated, Callable, List, Literal, Optional, Tuple, Union
 
 import torch
-from megatron.core.transformer.enums import AttnBackend
 from safetensors import safe_open
 from torch import nn
 from transformers import AutoModelForCausalLM, GenerationConfig
@@ -63,11 +62,13 @@ class GPTOSSConfig(GPTConfig):
 
     position_embedding_type: str = "yarn"
     rotary_base: int = 150000
-    rotary_scaling_factor: float = 32.0
-    yarn_original_max_position_embeddings: int = 131072
+    yarn_rotary_scaling_factor: float = 32.0
+    yarn_original_max_position_embeddings: int = 4096
     yarn_beta_fast: float = 32.0
     yarn_beta_slow: float = 1.0
     yarn_correction_range_round_to_int: bool = False
+    yarn_mscale: float = 1.0
+    yarn_mscale_all_dim: float = 1.0
 
     moe_router_topk: int = 4
     moe_router_pre_softmax: bool = False