Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit bc11e8f

Browse files
authored
Updated URLs for dataset (#1955)
1 parent faf2c47 commit bc11e8f

1 file changed

Lines changed: 7 additions & 1 deletion

File tree

beginner_source/translation_transformer.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,15 @@
2424

2525
from torchtext.data.utils import get_tokenizer
2626
from torchtext.vocab import build_vocab_from_iterator
27-
from torchtext.datasets import Multi30k
27+
from torchtext.datasets import multi30k, Multi30k
2828
from typing import Iterable, List
2929

3030

31+
# We need to modify the URLs for the dataset since the links to the original dataset are broken
32+
# Refer to https://github.com/pytorch/text/issues/1756#issuecomment-1163664163 for more info
33+
multi30k.URL["train"] = "https://raw.githubusercontent.com/neychev/small_DL_repo/master/datasets/Multi30k/training.tar.gz"
34+
multi30k.URL["valid"] = "https://raw.githubusercontent.com/neychev/small_DL_repo/master/datasets/Multi30k/validation.tar.gz"
35+
3136
SRC_LANGUAGE = 'de'
3237
TGT_LANGUAGE = 'en'
3338

@@ -37,6 +42,7 @@
3742

3843

3944
# Create source and target language tokenizer. Make sure to install the dependencies.
45+
# pip install -U torchdata
4046
# pip install -U spacy
4147
# python -m spacy download en_core_web_sm
4248
# python -m spacy download de_core_news_sm

0 commit comments

Comments
 (0)