|
364 | 364 | flags.DEFINE_boolean('use_unified_memory', None,
|
365 | 365 | 'If True, allocate unified memory enabling larger models '
|
366 | 366 | 'to fit in available device RAM.')
|
| 367 | +flags.DEFINE_boolean('timestamped_allocator', False, |
| 368 | + 'If True marks free BFCAllocator::Chunks with time ' |
| 369 | + 'at which they are freed which can allow more efficient ' |
| 370 | + 'memory allocation in cases like RDMA networking.') |
| 371 | +flags.DEFINE_integer('gpu_pending_cap', 0, 'If > 0 then then number of pending ' |
| 372 | + '(queued but not yet known to have terminated) kernels ' |
| 373 | + 'per GPU device will be capped to this number.') |
367 | 374 | flags.DEFINE_boolean('use_tf_layers', True,
|
368 | 375 | 'If True, use tf.layers for neural network layers. This '
|
369 | 376 | 'should not affect performance or accuracy in any way.')
|
@@ -742,6 +749,11 @@ def create_config_proto(params):
|
742 | 749 | if params.use_unified_memory:
|
743 | 750 | config.gpu_options.experimental.use_unified_memory = (
|
744 | 751 | params.use_unified_memory)
|
| 752 | + if params.timestamped_allocator: |
| 753 | + config.gpu_options.experimental.timestamped_allocator = ( |
| 754 | + params.timestamped_allocator) |
| 755 | + if params.gpu_pending_cap > 0: |
| 756 | + config.gpu_options.experimental.pending_cap = params.gpu_pending_cap |
745 | 757 | if params.xla:
|
746 | 758 | config.graph_options.optimizer_options.global_jit_level = (
|
747 | 759 | tf.OptimizerOptions.ON_1)
|
|
0 commit comments