@@ -266,6 +266,7 @@ def get_batch(source, i):
266266 world_size = 1 ,
267267 rpc_backend_options = rpc .TensorPipeRpcBackendOptions (
268268 init_method = "file://{}" .format (tmpfile .name ),
269+ _transports = ["ibv" , "uv" ],
269270 )
270271)
271272
@@ -409,10 +410,49 @@ def evaluate(eval_model, data_source):
409410# Evaluate the model with the test dataset
410411# -------------------------------------
411412#
413+
414+
415+ ######################################################################
412416# Apply the best model to check the result with the test dataset.
413417
414418test_loss = evaluate (best_model , test_data )
415419print ('=' * 89 )
416420print ('| End of training | test loss {:5.2f} | test ppl {:8.2f}' .format (
417421 test_loss , math .exp (test_loss )))
418422print ('=' * 89 )
423+
424+
425+ ######################################################################
426+ # Output
427+ # ------
428+ #
429+
430+
431+ ######################################################################
432+ #.. code-block:: py
433+ #
434+ # Total parameters in model: 1,847,087,215
435+ # | epoch 1 | 10/ 50 batches | lr 5.00 | ms/batch 2387.45 | loss 42.16 | ppl 2036775646369743616.00
436+ # | epoch 1 | 20/ 50 batches | lr 5.00 | ms/batch 2150.93 | loss 48.24 | ppl 891334049215401558016.00
437+ # | epoch 1 | 30/ 50 batches | lr 5.00 | ms/batch 2155.23 | loss 34.66 | ppl 1125676483188404.62
438+ # | epoch 1 | 40/ 50 batches | lr 5.00 | ms/batch 2158.42 | loss 38.87 | ppl 76287208340888368.00
439+ # -----------------------------------------------------------------------------------------
440+ # | end of epoch 1 | time: 119.65s | valid loss 2.95 | valid ppl 19.15
441+ # -----------------------------------------------------------------------------------------
442+ # | epoch 2 | 10/ 50 batches | lr 4.51 | ms/batch 2376.16 | loss 34.92 | ppl 1458001430957104.00
443+ # | epoch 2 | 20/ 50 batches | lr 4.51 | ms/batch 2160.96 | loss 34.75 | ppl 1232463826541886.50
444+ # | epoch 2 | 30/ 50 batches | lr 4.51 | ms/batch 2160.66 | loss 28.10 | ppl 1599598251136.51
445+ # | epoch 2 | 40/ 50 batches | lr 4.51 | ms/batch 2160.07 | loss 20.25 | ppl 621174306.77
446+ # -----------------------------------------------------------------------------------------
447+ # | end of epoch 2 | time: 119.76s | valid loss 0.87 | valid ppl 2.38
448+ # -----------------------------------------------------------------------------------------
449+ # | epoch 3 | 10/ 50 batches | lr 4.29 | ms/batch 2376.49 | loss 13.20 | ppl 537727.23
450+ # | epoch 3 | 20/ 50 batches | lr 4.29 | ms/batch 2160.12 | loss 10.98 | ppl 58548.58
451+ # | epoch 3 | 30/ 50 batches | lr 4.29 | ms/batch 2160.05 | loss 12.01 | ppl 164152.79
452+ # | epoch 3 | 40/ 50 batches | lr 4.29 | ms/batch 2160.03 | loss 10.63 | ppl 41348.00
453+ # -----------------------------------------------------------------------------------------
454+ # | end of epoch 3 | time: 119.76s | valid loss 0.78 | valid ppl 2.17
455+ # -----------------------------------------------------------------------------------------
456+ # =========================================================================================
457+ # | End of training | test loss 0.69 | test ppl 1.99
458+ # =========================================================================================
0 commit comments