@@ -2628,7 +2628,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
2628
2628
return [(self .map_tensor_name (name ), data_torch )]
2629
2629
2630
2630
2631
- @Model .register ("BertModel" , "CamembertModel" , "RobertaModel" )
2631
+ @Model .register ("BertModel" , "CamembertModel" )
2632
2632
class BertModel (Model ):
2633
2633
model_arch = gguf .MODEL_ARCH .BERT
2634
2634
@@ -2701,6 +2701,51 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
2701
2701
return [(self .map_tensor_name (name ), data_torch )]
2702
2702
2703
2703
2704
+ @Model .register ("RobertaModel" )
2705
+ class RobertaModel (BertModel ):
2706
+ model_arch = gguf .MODEL_ARCH .BERT
2707
+
2708
+ def __init__ (self , * args , ** kwargs ):
2709
+ super ().__init__ (* args , ** kwargs )
2710
+
2711
+ # we need the pad_token_id to know how to chop down position_embd matrix
2712
+ if (pad_token_id := self .hparams .get ("pad_token_id" )) is not None :
2713
+ self ._position_offset = 1 + pad_token_id
2714
+ if "max_position_embeddings" in self .hparams :
2715
+ self .hparams ["max_position_embeddings" ] -= self ._position_offset
2716
+ else :
2717
+ self ._position_offset = None
2718
+
2719
+ def set_vocab (self ):
2720
+ """Support BPE tokenizers for roberta models"""
2721
+ bpe_tok_path = self .dir_model / "tokenizer.json"
2722
+ if bpe_tok_path .exists ():
2723
+ self ._set_vocab_gpt2 ()
2724
+ self .gguf_writer .add_add_bos_token (True )
2725
+ self .gguf_writer .add_add_eos_token (True )
2726
+
2727
+ # we need this to validate the size of the token_type embeddings
2728
+ # though currently we are passing all zeros to the token_type embeddings
2729
+ # "Sequence A" or "Sequence B"
2730
+ self .gguf_writer .add_token_type_count (self .hparams .get ("type_vocab_size" , 1 ))
2731
+
2732
+ else :
2733
+ return super ().set_vocab ()
2734
+
2735
+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
2736
+ # if name starts with "roberta.", remove the prefix
2737
+ # e.g. https://huggingface.co/BAAI/bge-reranker-v2-m3/tree/main
2738
+ if name .startswith ("roberta." ):
2739
+ name = name [8 :]
2740
+
2741
+ # position embeddings start at pad_token_id + 1, so just chop down the weight tensor
2742
+ if name == "embeddings.position_embeddings.weight" :
2743
+ if self ._position_offset is not None :
2744
+ data_torch = data_torch [self ._position_offset :,:]
2745
+
2746
+ return super ().modify_tensors (data_torch , name , bid )
2747
+
2748
+
2704
2749
@Model .register ("NomicBertModel" )
2705
2750
class NomicBertModel (BertModel ):
2706
2751
model_arch = gguf .MODEL_ARCH .NOMIC_BERT
0 commit comments