Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit e924720

Browse files
Stevenjin8brianjo
andauthored
Improvements to word embeddings tutorial. (#1667)
* Improvements to word embeddings tutorial. * More generic comments. Co-authored-by: Brian Johnson <[email protected]>
1 parent e5a7d53 commit e924720

1 file changed

Lines changed: 20 additions & 12 deletions

File tree

beginner_source/nlp/word_embeddings_tutorial.py

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -207,11 +207,17 @@
207207
This were to be new made when thou art old,
208208
And see thy blood warm when thou feel'st it cold.""".split()
209209
# we should tokenize the input, but we will ignore that for now
210-
# build a list of tuples. Each tuple is ([ word_i-2, word_i-1 ], target word)
211-
trigrams = [([test_sentence[i], test_sentence[i + 1]], test_sentence[i + 2])
212-
for i in range(len(test_sentence) - 2)]
213-
# print the first 3, just so you can see what they look like
214-
print(trigrams[:3])
210+
# build a list of tuples.
211+
# Each tuple is ([ word_i-CONTEXT_SIZE, ..., word_i-1 ], target word)
212+
ngrams = [
213+
(
214+
[test_sentence[i - j - 1] for j in range(CONTEXT_SIZE)],
215+
test_sentence[i]
216+
)
217+
for i in range(CONTEXT_SIZE, len(test_sentence))
218+
]
219+
# Print the first 3, just so you can see what they look like.
220+
print(ngrams[:3])
215221

216222
vocab = set(test_sentence)
217223
word_to_ix = {word: i for i, word in enumerate(vocab)}
@@ -240,7 +246,7 @@ def forward(self, inputs):
240246

241247
for epoch in range(10):
242248
total_loss = 0
243-
for context, target in trigrams:
249+
for context, target in ngrams:
244250

245251
# Step 1. Prepare the inputs to be passed to the model (i.e, turn the words
246252
# into integer indices and wrap them in tensors)
@@ -290,7 +296,7 @@ def forward(self, inputs):
290296
# and :math:`w_{i+1}, \dots, w_{i+N}`, referring to all context words
291297
# collectively as :math:`C`, CBOW tries to minimize
292298
#
293-
# .. math:: -\log p(w_i | C) = -\log \text{Softmax}(A(\sum_{w \in C} q_w) + b)
299+
# .. math:: -\log p(w_i | C) = -\log \text{Softmax}\left(A(\sum_{w \in C} q_w) + b\right)
294300
#
295301
# where :math:`q_w` is the embedding for word :math:`w`.
296302
#
@@ -316,9 +322,11 @@ def forward(self, inputs):
316322

317323
word_to_ix = {word: i for i, word in enumerate(vocab)}
318324
data = []
319-
for i in range(2, len(raw_text) - 2):
320-
context = [raw_text[i - 2], raw_text[i - 1],
321-
raw_text[i + 1], raw_text[i + 2]]
325+
for i in range(CONTEXT_SIZE, len(raw_text) - CONTEXT_SIZE):
326+
context = (
327+
[raw_text[i - j - 1] for j in range(CONTEXT_SIZE)]
328+
+ [raw_text[i + j + 1] for j in range(CONTEXT_SIZE)]
329+
)
322330
target = raw_text[i]
323331
data.append((context, target))
324332
print(data[:5])
@@ -332,8 +340,8 @@ def __init__(self):
332340
def forward(self, inputs):
333341
pass
334342

335-
# create your model and train. here are some functions to help you make
336-
# the data ready for use by your module
343+
# Create your model and train. Here are some functions to help you make
344+
# the data ready for use by your module.
337345

338346

339347
def make_context_vector(context, word_to_ix):

0 commit comments

Comments
 (0)