Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 131eff7

Browse files
authored
update reader (PaddlePaddle#134)
1 parent 599e3f1 commit 131eff7

1 file changed

Lines changed: 4 additions & 4 deletions

File tree

  • examples/machine_translation/transformer

examples/machine_translation/transformer/reader.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
import paddle.distributed as dist
2424
from paddlenlp.data import Pad, Vocab
2525
from paddlenlp.datasets import load_dataset
26-
from paddlenlp.datasets import WMT14ende
2726
from paddlenlp.data.sampler import SamplerHelper
2827

2928

@@ -125,12 +124,13 @@ def convert_samples(sample):
125124

126125

127126
def adapt_vocab_size(args):
128-
root = None if args.root == "None" else args.root
129-
(src_vocab, trg_vocab) = WMT14ende.get_vocab(root=root)
127+
dataset = load_dataset('wmt14ende', splits=('test'))
128+
src_vocab = Vocab.load_vocabulary(**dataset.vocab_info["bpe"])
129+
trg_vocab = src_vocab
130+
130131
padding_vocab = (
131132
lambda x: (x + args.pad_factor - 1) // args.pad_factor * args.pad_factor
132133
)
133-
134134
args.src_vocab_size = padding_vocab(len(src_vocab))
135135
args.trg_vocab_size = padding_vocab(len(trg_vocab))
136136

0 commit comments

Comments
 (0)