207207This were to be new made when thou art old,
208208And see thy blood warm when thou feel'st it cold.""" .split ()
209209# we should tokenize the input, but we will ignore that for now
210- # build a list of tuples. Each tuple is ([ word_i-2, word_i-1 ], target word)
211- trigrams = [([test_sentence [i ], test_sentence [i + 1 ]], test_sentence [i + 2 ])
212- for i in range (len (test_sentence ) - 2 )]
213- # print the first 3, just so you can see what they look like
214- print (trigrams [:3 ])
210+ # build a list of tuples.
211+ # Each tuple is ([ word_i-CONTEXT_SIZE, ..., word_i-1 ], target word)
212+ ngrams = [
213+ (
214+ [test_sentence [i - j - 1 ] for j in range (CONTEXT_SIZE )],
215+ test_sentence [i ]
216+ )
217+ for i in range (CONTEXT_SIZE , len (test_sentence ))
218+ ]
219+ # Print the first 3, just so you can see what they look like.
220+ print (ngrams [:3 ])
215221
216222vocab = set (test_sentence )
217223word_to_ix = {word : i for i , word in enumerate (vocab )}
@@ -240,7 +246,7 @@ def forward(self, inputs):
240246
241247for epoch in range (10 ):
242248 total_loss = 0
243- for context , target in trigrams :
249+ for context , target in ngrams :
244250
245251 # Step 1. Prepare the inputs to be passed to the model (i.e, turn the words
246252 # into integer indices and wrap them in tensors)
@@ -290,7 +296,7 @@ def forward(self, inputs):
290296# and :math:`w_{i+1}, \dots, w_{i+N}`, referring to all context words
291297# collectively as :math:`C`, CBOW tries to minimize
292298#
293- # .. math:: -\log p(w_i | C) = -\log \text{Softmax}(A(\sum_{w \in C} q_w) + b)
299+ # .. math:: -\log p(w_i | C) = -\log \text{Softmax}\left (A(\sum_{w \in C} q_w) + b\right )
294300#
295301# where :math:`q_w` is the embedding for word :math:`w`.
296302#
@@ -316,9 +322,11 @@ def forward(self, inputs):
316322
317323word_to_ix = {word : i for i , word in enumerate (vocab )}
318324data = []
319- for i in range (2 , len (raw_text ) - 2 ):
320- context = [raw_text [i - 2 ], raw_text [i - 1 ],
321- raw_text [i + 1 ], raw_text [i + 2 ]]
325+ for i in range (CONTEXT_SIZE , len (raw_text ) - CONTEXT_SIZE ):
326+ context = (
327+ [raw_text [i - j - 1 ] for j in range (CONTEXT_SIZE )]
328+ + [raw_text [i + j + 1 ] for j in range (CONTEXT_SIZE )]
329+ )
322330 target = raw_text [i ]
323331 data .append ((context , target ))
324332print (data [:5 ])
@@ -332,8 +340,8 @@ def __init__(self):
332340 def forward (self , inputs ):
333341 pass
334342
335- # create your model and train. here are some functions to help you make
336- # the data ready for use by your module
343+ # Create your model and train. Here are some functions to help you make
344+ # the data ready for use by your module.
337345
338346
339347def make_context_vector (context , word_to_ix ):
0 commit comments