Thanks to visit codestin.com
Credit goes to github.com

Skip to content
This repository was archived by the owner on Dec 9, 2024. It is now read-only.

Commit fbd0782

Browse files
Add timestamped_allocator and pending_cap GPU config options
to tf_cnn_benchmarks. PiperOrigin-RevId: 233087377
1 parent fdd6041 commit fbd0782

File tree

1 file changed

+12
-0
lines changed

1 file changed

+12
-0
lines changed

scripts/tf_cnn_benchmarks/benchmark_cnn.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,13 @@
364364
flags.DEFINE_boolean('use_unified_memory', None,
365365
'If True, allocate unified memory enabling larger models '
366366
'to fit in available device RAM.')
367+
flags.DEFINE_boolean('timestamped_allocator', False,
368+
'If True marks free BFCAllocator::Chunks with time '
369+
'at which they are freed which can allow more efficient '
370+
'memory allocation in cases like RDMA networking.')
371+
flags.DEFINE_integer('gpu_pending_cap', 0, 'If > 0 then then number of pending '
372+
'(queued but not yet known to have terminated) kernels '
373+
'per GPU device will be capped to this number.')
367374
flags.DEFINE_boolean('use_tf_layers', True,
368375
'If True, use tf.layers for neural network layers. This '
369376
'should not affect performance or accuracy in any way.')
@@ -742,6 +749,11 @@ def create_config_proto(params):
742749
if params.use_unified_memory:
743750
config.gpu_options.experimental.use_unified_memory = (
744751
params.use_unified_memory)
752+
if params.timestamped_allocator:
753+
config.gpu_options.experimental.timestamped_allocator = (
754+
params.timestamped_allocator)
755+
if params.gpu_pending_cap > 0:
756+
config.gpu_options.experimental.pending_cap = params.gpu_pending_cap
745757
if params.xla:
746758
config.graph_options.optimizer_options.global_jit_level = (
747759
tf.OptimizerOptions.ON_1)

0 commit comments

Comments
 (0)