Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit fe2543d

Browse files
author
gongenlei
authored
Improve CodeGen (PaddlePaddle#3371)
1 parent 18e2e11 commit fe2543d

3 files changed

Lines changed: 19 additions & 5 deletions

File tree

examples/code_generation/codegen/run_clm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ def do_train(args):
252252
block_size)
253253
dev_set = process_ds(dev_set, tokenizer, args.overwrite_cache, block_size)
254254

255-
batchify_fn = DataCollatorWithPadding(tokenizer)
255+
batchify_fn = DataCollatorWithPadding(tokenizer, return_attention_mask=True)
256256

257257
train_batch_sampler = DistributedBatchSampler(
258258
train_set, batch_size=args.train_batch_size, shuffle=True)

paddlenlp/data/data_collator.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ class DataCollatorWithPadding:
192192
max_length: Optional[int] = None
193193
pad_to_multiple_of: Optional[int] = None
194194
return_tensors: str = "pd"
195+
return_attention_mask: Optional[bool] = None
195196

196197
def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, Any]:
197198
batch = self.tokenizer.pad(
@@ -200,7 +201,7 @@ def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, Any]:
200201
max_length=self.max_length,
201202
pad_to_multiple_of=self.pad_to_multiple_of,
202203
return_tensors=self.return_tensors,
203-
)
204+
return_attention_mask=self.return_attention_mask)
204205
if "label" in batch:
205206
batch["labels"] = batch["label"]
206207
del batch["label"]

paddlenlp/transformers/codegen/modeling.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -412,6 +412,7 @@ def forward(
412412
self,
413413
input_ids=None,
414414
attention_mask=None,
415+
token_type_ids=None,
415416
use_cache=False,
416417
cache=None,
417418
):
@@ -472,9 +473,15 @@ def forward(
472473
if attention_mask is None:
473474
assert input_ids is not None, "input_ids should be " \
474475
"specified when generating attention_mask"
475-
attention_mask = paddle.cast(
476-
input_ids == self.pad_token_id,
477-
dtype=paddle.get_default_dtype()).unsqueeze([1, 2]) * -1e4
476+
if batch_size == 1 and past_length != 0:
477+
batch_size, seq_len = input_shape
478+
attention_mask = paddle.ones(
479+
[batch_size, 1, 1, seq_len + past_length],
480+
dtype=paddle.get_default_dtype())
481+
else:
482+
attention_mask = paddle.cast(
483+
input_ids == self.pad_token_id,
484+
dtype=paddle.get_default_dtype()).unsqueeze([1, 2]) * -1e4
478485
# For 2D attention_mask from tokenizer
479486
elif attention_mask.ndim == 2:
480487
attention_mask = paddle.unsqueeze(
@@ -483,6 +490,10 @@ def forward(
483490
attention_mask.stop_gradient = True
484491

485492
inputs_embeds = self.wte(input_ids)
493+
if token_type_ids is not None:
494+
token_type_embeds = self.wte(token_type_ids)
495+
inputs_embeds = inputs_embeds + token_type_embeds
496+
486497
hidden_states = self.drop(inputs_embeds)
487498
output_shape = input_shape[:] + [hidden_states.shape[-1]]
488499

@@ -579,6 +590,7 @@ def prepare_inputs_for_generation(self, input_ids, cache=None, **kwargs):
579590
def forward(self,
580591
input_ids=None,
581592
attention_mask=None,
593+
token_type_ids=None,
582594
use_cache=False,
583595
cache=None):
584596
r"""
@@ -613,6 +625,7 @@ def forward(self,
613625

614626
transformer_outputs = self.transformer(input_ids,
615627
attention_mask=attention_mask,
628+
token_type_ids=token_type_ids,
616629
use_cache=use_cache,
617630
cache=cache)
618631

0 commit comments

Comments
 (0)