Benchmark update (tensorflow#4034)

qlzh727 · web-flow · commit 21ec0e1b77b0 · 2018-04-19T14:39:26.000-07:00
* Update the benchmark logger to have default logging.

1. Create global instance of benchmark logger, which default log to
tf.logging.info
2. Allow user to config the logging location.
3. Fix nits in code and comment.

* Fix lint and test error.

* Address review comments.

* Remove the duplicated print statement.
diff --git a/official/resnet/resnet_run_loop.py b/official/resnet/resnet_run_loop.py
@@ -398,11 +398,8 @@ def resnet_main(flags, model_function, input_function, shape=None):
           'dtype': flags.dtype
       })
 
-  if flags.benchmark_log_dir is not None:
-    benchmark_logger = logger.BenchmarkLogger(flags.benchmark_log_dir)
-    benchmark_logger.log_run_info('resnet')
-  else:
-    benchmark_logger = None
+  benchmark_logger = logger.config_benchmark_logger(flags.benchmark_log_dir)
+  benchmark_logger.log_run_info('resnet')
 
   for _ in range(flags.train_epochs // flags.epochs_between_evals):
     train_hooks = hooks_helper.get_train_hooks(
@@ -434,10 +431,8 @@ def input_fn_eval():
     # global_step count.
     eval_results = classifier.evaluate(input_fn=input_fn_eval,
                                        steps=flags.max_train_steps)
-    print(eval_results)
 
-    if benchmark_logger:
-      benchmark_logger.log_estimator_evaluation_result(eval_results)
+    benchmark_logger.log_evaluation_result(eval_results)
 
     if model_helpers.past_stop_threshold(
         flags.stop_threshold, eval_results['accuracy']):
diff --git a/official/utils/logs/hooks_helper.py b/official/utils/logs/hooks_helper.py
@@ -27,6 +27,7 @@
 import tensorflow as tf  # pylint: disable=g-bad-import-order
 
 from official.utils.logs import hooks
+from official.utils.logs import logger
 from official.utils.logs import metric_hook
 
 _TENSORS_TO_LOG = dict((x, x) for x in ['learning_rate',
@@ -140,13 +141,12 @@ def get_logging_metric_hook(benchmark_log_dir=None,
     Returns a ProfilerHook that writes out timelines that can be loaded into
     profiling tools like chrome://tracing.
   """
-  if benchmark_log_dir is None:
-    raise ValueError("metric_log_dir should be provided to use metric logger")
+  logger.config_benchmark_logger(benchmark_log_dir)
   if tensors_to_log is None:
     tensors_to_log = _TENSORS_TO_LOG
   return metric_hook.LoggingMetricHook(
       tensors=tensors_to_log,
-      log_dir=benchmark_log_dir,
+      metric_logger=logger.get_benchmark_logger(),
       every_n_secs=every_n_secs)
 
 
diff --git a/official/utils/logs/logger.py b/official/utils/logs/logger.py
@@ -27,6 +27,7 @@
 import multiprocessing
 import numbers
 import os
+import threading
 
 import tensorflow as tf
 from tensorflow.python.client import device_lib
@@ -36,27 +37,48 @@
 _DATE_TIME_FORMAT_PATTERN = "%Y-%m-%dT%H:%M:%S.%fZ"
 
 
-class BenchmarkLogger(object):
-  """Class to log the benchmark information to local disk."""
+# Don't use it directly. Use get_benchmark_logger to access a logger.
+_benchmark_logger = None
+_logger_lock = threading.Lock()
 
-  def __init__(self, logging_dir):
-    self._logging_dir = logging_dir
-    if not tf.gfile.IsDirectory(self._logging_dir):
-      tf.gfile.MakeDirs(self._logging_dir)
 
-  def log_estimator_evaluation_result(self, eval_results):
-    """Log the evaluation result for a estimator.
+def config_benchmark_logger(logging_dir):
+  """Config the global benchmark logger"""
+  _logger_lock.acquire()
+  try:
+    global _benchmark_logger
+    if logging_dir:
+      _benchmark_logger = BenchmarkFileLogger(logging_dir)
+    else:
+      _benchmark_logger = BaseBenchmarkLogger()
+  finally:
+    _logger_lock.release()
+  return _benchmark_logger
+
+
+def get_benchmark_logger():
+  if not _benchmark_logger:
+    config_benchmark_logger(None)
+
+  return _benchmark_logger
+
+
+class BaseBenchmarkLogger(object):
+  """Class to log the benchmark information to STDOUT."""
+
+  def log_evaluation_result(self, eval_results):
+    """Log the evaluation result.
 
-    The evaluate result is a directory that contains metrics defined in
+    The evaluate result is a dictionary that contains metrics defined in
     model_fn. It also contains a entry for global_step which contains the value
     of the global step when evaluation was performed.
 
     Args:
-      eval_results: dict, the result of evaluate() from a estimator.
+      eval_results: dict, the result of evaluate.
     """
     if not isinstance(eval_results, dict):
-      tf.logging.warning("eval_results should be directory for logging. Got %s",
-                         type(eval_results))
+      tf.logging.warning("eval_results should be dictionary for logging. "
+                         "Got %s", type(eval_results))
       return
     global_step = eval_results[tf.GraphKeys.GLOBAL_STEP]
     for key in sorted(eval_results):
@@ -81,10 +103,45 @@ def log_metric(self, name, value, unit=None, global_step=None, extras=None):
       tf.logging.warning(
           "Metric value to log should be a number. Got %s", type(value))
       return
-    if extras:
-      extras = [{"name": k, "value": v} for k, v in sorted(extras.items())]
-    else:
-      extras = []
+    extras = _convert_to_json_dict(extras)
+
+    tf.logging.info("Benchmark metric: "
+                    "Name %s, value %d, unit %s, global_step %d, extras %s",
+                    name, value, unit, global_step, extras)
+
+  def log_run_info(self, model_name):
+    tf.logging.info("Benchmark run: %s", _gather_run_info(model_name))
+
+
+class BenchmarkFileLogger(BaseBenchmarkLogger):
+  """Class to log the benchmark information to local disk."""
+
+  def __init__(self, logging_dir):
+    super(BenchmarkFileLogger, self).__init__()
+    self._logging_dir = logging_dir
+    if not tf.gfile.IsDirectory(self._logging_dir):
+      tf.gfile.MakeDirs(self._logging_dir)
+
+  def log_metric(self, name, value, unit=None, global_step=None, extras=None):
+    """Log the benchmark metric information to local file.
+
+    Currently the logging is done in a synchronized way. This should be updated
+    to log asynchronously.
+
+    Args:
+      name: string, the name of the metric to log.
+      value: number, the value of the metric. The value will not be logged if it
+        is not a number type.
+      unit: string, the unit of the metric, E.g "image per second".
+      global_step: int, the global_step when the metric is logged.
+      extras: map of string:string, the extra information about the metric.
+    """
+    if not isinstance(value, numbers.Number):
+      tf.logging.warning(
+          "Metric value to log should be a number. Got %s", type(value))
+      return
+    extras = _convert_to_json_dict(extras)
+
     with tf.gfile.GFile(
         os.path.join(self._logging_dir, METRIC_LOG_FILE_NAME), "a") as f:
       metric = {
@@ -110,15 +167,7 @@ def log_run_info(self, model_name):
     Args:
       model_name: string, the name of the model.
     """
-    run_info = {
-        "model_name": model_name,
-        "machine_config": {},
-        "run_date": datetime.datetime.now().strftime(_DATE_TIME_FORMAT_PATTERN)}
-    _collect_tensorflow_info(run_info)
-    _collect_tensorflow_environment_variables(run_info)
-    _collect_cpu_info(run_info)
-    _collect_gpu_info(run_info)
-    _collect_memory_info(run_info)
+    run_info = _gather_run_info(model_name)
 
     with tf.gfile.GFile(os.path.join(
         self._logging_dir, BENCHMARK_RUN_LOG_FILE_NAME), "w") as f:
@@ -130,6 +179,20 @@ def log_run_info(self, model_name):
                            e)
 
 
+def _gather_run_info(model_name):
+  """Collect the benchmark run information for the local environment."""
+  run_info = {
+      "model_name": model_name,
+      "machine_config": {},
+      "run_date": datetime.datetime.now().strftime(_DATE_TIME_FORMAT_PATTERN)}
+  _collect_tensorflow_info(run_info)
+  _collect_tensorflow_environment_variables(run_info)
+  _collect_cpu_info(run_info)
+  _collect_gpu_info(run_info)
+  _collect_memory_info(run_info)
+  return run_info
+
+
 def _collect_tensorflow_info(run_info):
   run_info["tensorflow_version"] = {
       "version": tf.VERSION, "git_hash": tf.GIT_VERSION}
@@ -194,3 +257,10 @@ def _parse_gpu_model(physical_device_desc):
     if k.strip() == "name":
       return v.strip()
   return None
+
+
+def _convert_to_json_dict(input_dict):
+  if input_dict:
+    return [{"name": k, "value": v} for k, v in sorted(input_dict.items())]
+  else:
+    return []
diff --git a/official/utils/logs/logger_test.py b/official/utils/logs/logger_test.py
@@ -31,16 +31,58 @@
 
 class BenchmarkLoggerTest(tf.test.TestCase):
 
+  def test_get_default_benchmark_logger(self):
+    self.assertIsInstance(logger.get_benchmark_logger(),
+                          logger.BaseBenchmarkLogger)
+
+  def test_config_base_benchmark_logger(self):
+    logger.config_benchmark_logger("")
+    self.assertIsInstance(logger.get_benchmark_logger(),
+                          logger.BaseBenchmarkLogger)
+
+  def test_config_benchmark_file_logger(self):
+    logger.config_benchmark_logger("/tmp/abc")
+    self.assertIsInstance(logger.get_benchmark_logger(),
+                          logger.BenchmarkFileLogger)
+
+
+class BaseBenchmarkLoggerTest(tf.test.TestCase):
+
+  def setUp(self):
+    super(BaseBenchmarkLoggerTest, self).setUp()
+    self._actual_log = tf.logging.info
+    self.logged_message = None
+
+    def mock_log(*args, **kwargs):
+      self.logged_message = args
+      self._actual_log(*args, **kwargs)
+
+    tf.logging.info = mock_log
+
+  def tearDown(self):
+    super(BaseBenchmarkLoggerTest, self).tearDown()
+    tf.logging.info = self._actual_log
+
+  def test_log_metric(self):
+    log = logger.BaseBenchmarkLogger()
+    log.log_metric("accuracy", 0.999, global_step=1e4, extras={"name": "value"})
+
+    expected_log_prefix = "Benchmark metric:"
+    self.assertRegexpMatches(str(self.logged_message), expected_log_prefix)
+
+
+class BenchmarkFileLoggerTest(tf.test.TestCase):
+
   def setUp(self):
-    super(BenchmarkLoggerTest, self).setUp()
+    super(BenchmarkFileLoggerTest, self).setUp()
     # Avoid pulling extra env vars from test environment which affects the test
     # result, eg. Kokoro test has a TF_PKG env which affect the test case
     # test_collect_tensorflow_environment_variables()
     self.original_environ = dict(os.environ)
     os.environ.clear()
 
   def tearDown(self):
-    super(BenchmarkLoggerTest, self).tearDown()
+    super(BenchmarkFileLoggerTest, self).tearDown()
     tf.gfile.DeleteRecursively(self.get_temp_dir())
     os.environ.clear()
     os.environ.update(self.original_environ)
@@ -49,12 +91,12 @@ def test_create_logging_dir(self):
     non_exist_temp_dir = os.path.join(self.get_temp_dir(), "unknown_dir")
     self.assertFalse(tf.gfile.IsDirectory(non_exist_temp_dir))
 
-    logger.BenchmarkLogger(non_exist_temp_dir)
+    logger.BenchmarkFileLogger(non_exist_temp_dir)
     self.assertTrue(tf.gfile.IsDirectory(non_exist_temp_dir))
 
   def test_log_metric(self):
     log_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
-    log = logger.BenchmarkLogger(log_dir)
+    log = logger.BenchmarkFileLogger(log_dir)
     log.log_metric("accuracy", 0.999, global_step=1e4, extras={"name": "value"})
 
     metric_log = os.path.join(log_dir, "metric.log")
@@ -69,7 +111,7 @@ def test_log_metric(self):
 
   def test_log_multiple_metrics(self):
     log_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
-    log = logger.BenchmarkLogger(log_dir)
+    log = logger.BenchmarkFileLogger(log_dir)
     log.log_metric("accuracy", 0.999, global_step=1e4, extras={"name": "value"})
     log.log_metric("loss", 0.02, global_step=1e4)
 
@@ -90,9 +132,9 @@ def test_log_multiple_metrics(self):
       self.assertEqual(loss["global_step"], 1e4)
       self.assertEqual(loss["extras"], [])
 
-  def test_log_non_nubmer_value(self):
+  def test_log_non_number_value(self):
     log_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
-    log = logger.BenchmarkLogger(log_dir)
+    log = logger.BenchmarkFileLogger(log_dir)
     const = tf.constant(1)
     log.log_metric("accuracy", const)
 
@@ -104,8 +146,8 @@ def test_log_evaluation_result(self):
                    "global_step": 207082,
                    "accuracy": 0.9285}
     log_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
-    log = logger.BenchmarkLogger(log_dir)
-    log.log_estimator_evaluation_result(eval_result)
+    log = logger.BenchmarkFileLogger(log_dir)
+    log.log_evaluation_result(eval_result)
 
     metric_log = os.path.join(log_dir, "metric.log")
     self.assertTrue(tf.gfile.Exists(metric_log))
@@ -125,8 +167,8 @@ def test_log_evaluation_result(self):
   def test_log_evaluation_result_with_invalid_type(self):
     eval_result = "{'loss': 0.46237424, 'global_step': 207082}"
     log_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
-    log = logger.BenchmarkLogger(log_dir)
-    log.log_estimator_evaluation_result(eval_result)
+    log = logger.BenchmarkFileLogger(log_dir)
+    log.log_evaluation_result(eval_result)
 
     metric_log = os.path.join(log_dir, "metric.log")
     self.assertFalse(tf.gfile.Exists(metric_log))
diff --git a/official/utils/logs/metric_hook.py b/official/utils/logs/metric_hook.py
@@ -20,8 +20,6 @@
 
 import tensorflow as tf  # pylint: disable=g-bad-import-order
 
-from official.utils.logs import logger
-
 
 class LoggingMetricHook(tf.train.LoggingTensorHook):
   """Hook to log benchmark metric information.
@@ -35,17 +33,15 @@ class LoggingMetricHook(tf.train.LoggingTensorHook):
   whose evaluation produces a side effect such as consuming additional inputs.
   """
 
-  def __init__(self, tensors, log_dir=None, metric_logger=None,
+  def __init__(self, tensors, metric_logger=None,
                every_n_iter=None, every_n_secs=None, at_end=False):
     """Initializer for LoggingMetricHook.
 
     Args:
       tensors: `dict` that maps string-valued tags to tensors/tensor names,
           or `iterable` of tensors/tensor names.
-      log_dir: `string`, directory path that metric hook should write log to.
       metric_logger: instance of `BenchmarkLogger`, the benchmark logger that
-          hook should use to write the log. Exactly one of the `log_dir` and
-          `metric_logger` should be provided.
+          hook should use to write the log.
       every_n_iter: `int`, print the values of `tensors` once every N local
           steps taken on the current worker.
       every_n_secs: `int` or `float`, print the values of `tensors` once every N
@@ -66,14 +62,9 @@ def __init__(self, tensors, log_dir=None, metric_logger=None,
         every_n_secs=every_n_secs,
         at_end=at_end)
 
-    if (log_dir is None) == (metric_logger is None):
-      raise ValueError(
-          "exactly one of log_dir and metric_logger should be provided.")
-
-    if log_dir is not None:
-      self._logger = logger.BenchmarkLogger(log_dir)
-    else:
-      self._logger = metric_logger
+    if metric_logger is None:
+      raise ValueError("metric_logger should be provided.")
+    self._logger = metric_logger
 
   def begin(self):
     super(LoggingMetricHook, self).begin()
diff --git a/official/utils/logs/metric_hook_test.py b/official/utils/logs/metric_hook_test.py
@@ -64,12 +64,8 @@ def test_illegal_args(self):
           tensors=['t'], every_n_iter=5, every_n_secs=5)
     with self.assertRaisesRegexp(ValueError, 'xactly one of'):
       metric_hook.LoggingMetricHook(tensors=['t'])
-    with self.assertRaisesRegexp(ValueError, 'log_dir and metric_logger'):
+    with self.assertRaisesRegexp(ValueError, 'metric_logger'):
       metric_hook.LoggingMetricHook(tensors=['t'], every_n_iter=5)
-    with self.assertRaisesRegexp(ValueError, 'log_dir and metric_logger'):
-      metric_hook.LoggingMetricHook(
-          tensors=['t'], every_n_iter=5, log_dir=self._log_dir,
-          metric_logger=self._logger)
 
   def test_print_at_end_only(self):
     with tf.Graph().as_default(), tf.Session() as sess: