fix code style problem and add option "last_layers_contain_logits_only"

walkerlala · walkerlala · commit 13c9de396516 · 2018-04-06T21:44:16.000+08:00
diff --git a/research/deeplab/datasets/build_ade20k_data.py b/research/deeplab/datasets/build_ade20k_data.py
@@ -13,18 +13,15 @@
 # limitations under the License.
 # ==============================================================================
 
-import glob
 import math
 import os
 import random
 import string
 import sys
-from PIL import Image
 import build_data
 import tensorflow as tf
 
 FLAGS = tf.app.flags.FLAGS
-flags = tf.app.flags
 
 tf.app.flags.DEFINE_string(
     'train_image_folder',
@@ -52,18 +49,18 @@
 _NUM_SHARDS = 4
 
 def _convert_dataset(dataset_split, dataset_dir, dataset_label_dir):
-  """ Convert the ADE20k dataset into into tfrecord format (SSTable).
+  """ Converts the ADE20k dataset into into tfrecord format (SSTable).
 
   Args:
-    dataset_split: dataset split (e.g., train, val)
-    dataset_dir: dir in which the dataset locates
-    dataset_label_dir: dir in which the annotations locates
+    dataset_split: Dataset split (e.g., train, val).
+    dataset_dir: Dir in which the dataset locates.
+    dataset_label_dir: Dir in which the annotations locates.
 
   Raises:
     RuntimeError: If loaded image and label have different shape.
   """
 
-  img_names = glob.glob(os.path.join(dataset_dir, '*.jpg'))
+  img_names = tf.gfile.Glob(os.path.join(dataset_dir, '*.jpg'))
   random.shuffle(img_names)
   seg_names = []
   for f in img_names:
@@ -74,7 +71,7 @@ def _convert_dataset(dataset_split, dataset_dir, dataset_label_dir):
     seg_names.append(seg)
 
   num_images = len(img_names)
-  num_per_shard = int(math.ceil(num_images) / float(_NUM_SHARDS))
+  num_per_shard = int(math.ceil(num_images / float(_NUM_SHARDS)))
 
   image_reader = build_data.ImageReader('jpeg', channels=3)
   label_reader = build_data.ImageReader('png', channels=1)
diff --git a/research/deeplab/datasets/build_voc2012_data.py b/research/deeplab/datasets/build_voc2012_data.py
@@ -50,7 +50,6 @@
   image/segmentation/class/encoded: encoded semantic segmentation content.
   image/segmentation/class/format: semantic segmentation file format.
 """
-import glob
 import math
 import os.path
 import sys
@@ -133,7 +132,7 @@ def _convert_dataset(dataset_split):
 
 
 def main(unused_argv):
-  dataset_splits = glob.glob(os.path.join(FLAGS.list_folder, '*.txt'))
+  dataset_splits = tf.gfile.Glob(os.path.join(FLAGS.list_folder, '*.txt'))
   for dataset_split in dataset_splits:
     _convert_dataset(dataset_split)
 
diff --git a/research/deeplab/datasets/download_and_convert_ade20k.sh b/research/deeplab/datasets/download_and_convert_ade20k.sh
@@ -39,27 +39,27 @@ set -e
 
 CURRENT_DIR=$(pwd)
 WORK_DIR="./ADE20K"
-mkdir -p ${WORK_DIR}
-cd ${WORK_DIR}
+mkdir -p "${WORK_DIR}"
+cd "${WORK_DIR}"
 
 # Helper function to download and unpack ADE20K dataset.
 download_and_uncompress() {
   local BASE_URL=${1}
   local FILENAME=${2}
 
-  if [ ! -f ${FILENAME} ]; then
+  if [ ! -f "${FILENAME}" ]; then
     echo "Downloading ${FILENAME} to ${WORK_DIR}"
     wget -nd -c "${BASE_URL}/${FILENAME}"
   fi
   echo "Uncompressing ${FILENAME}"
-  unzip ${FILENAME}
+  unzip "${FILENAME}"
 }
 
 # Download the images.
 BASE_URL="http://data.csail.mit.edu/places/ADEchallenge"
 FILENAME="ADEChallengeData2016.zip"
 
-download_and_uncompress ${BASE_URL} ${FILENAME}
+download_and_uncompress "${BASE_URL}" "${FILENAME}"
 
 cd "${CURRENT_DIR}"
 
diff --git a/research/deeplab/datasets/download_and_convert_voc2012.sh b/research/deeplab/datasets/download_and_convert_voc2012.sh
@@ -37,27 +37,27 @@ set -e
 
 CURRENT_DIR=$(pwd)
 WORK_DIR="./pascal_voc_seg"
-mkdir -p ${WORK_DIR}
-cd ${WORK_DIR}
+mkdir -p "${WORK_DIR}"
+cd "${WORK_DIR}"
 
 # Helper function to download and unpack VOC 2012 dataset.
 download_and_uncompress() {
   local BASE_URL=${1}
   local FILENAME=${2}
 
-  if [ ! -f ${FILENAME} ]; then
+  if [ ! -f "${FILENAME}" ]; then
     echo "Downloading ${FILENAME} to ${WORK_DIR}"
     wget -nd -c "${BASE_URL}/${FILENAME}"
   fi
   echo "Uncompressing ${FILENAME}"
-  tar -xf ${FILENAME}
+  tar -xf "${FILENAME}"
 }
 
 # Download the images.
 BASE_URL="http://host.robots.ox.ac.uk/pascal/VOC/voc2012/"
 FILENAME="VOCtrainval_11-May-2012.tar"
 
-download_and_uncompress ${BASE_URL} ${FILENAME}
+download_and_uncompress "${BASE_URL}" "${FILENAME}"
 
 cd "${CURRENT_DIR}"
 
diff --git a/research/deeplab/datasets/segmentation_dataset.py b/research/deeplab/datasets/segmentation_dataset.py
@@ -31,6 +31,11 @@
 The Cityscapes dataset contains 19 semantic labels (such as road, person, car,
 and so on) for urban street scenes.
 
+3. ADE20K dataset (http://groups.csail.mit.edu/vision/datasets/ADE20K)
+
+The ADE20K dataset contains 150 semantic labels both urban street scenes and
+indoor scenes.
+
 References:
   M. Everingham, S. M. A. Eslami, L. V. Gool, C. K. I. Williams, J. Winn,
   and A. Zisserman, The pascal visual object classes challenge a retrospective.
@@ -39,6 +44,9 @@
   M. Cordts, M. Omran, S. Ramos, T. Rehfeld, M. Enzweiler, R. Benenson,
   U. Franke, S. Roth, and B. Schiele, "The cityscapes dataset for semantic urban
   scene understanding," In Proc. of CVPR, 2016.
+
+  B. Zhou, H. Zhao, X. Puig, S. Fidler, A. Barriuso, A. Torralba, "Scene Parsing
+  through ADE20K dataset", In Proc. of CVPR, 2017.
 """
 import collections
 import os.path
@@ -87,12 +95,10 @@
 
 # These number (i.e., 'train'/'test') seems to have to be hard coded
 # You are required to figure it out for your training/testing example.
-# Is there a way to automatically figure it out ?
 _ADE20K_INFORMATION = DatasetDescriptor(
     splits_to_sizes = {
         'train': 20210, # num of samples in images/training
         'val': 2000, # num of samples in images/validation
-        'eval': 2,
     },
     num_classes=150,
     ignore_label=255,
diff --git a/research/deeplab/g3doc/ade20k.md b/research/deeplab/g3doc/ade20k.md
@@ -67,6 +67,7 @@ python deeplab/train.py \
     --fine_tune_batch_norm=False \
     --dataset="ade20k" \
     --initialize_last_layer=False \
+    --last_layers_contain_logits_only=True \
     --tf_initial_checkpoint=${PATH_TO_INITIAL_CHECKPOINT} \
     --train_logdir=${PATH_TO_TRAIN_DIR}\
     --dataset_dir=${PATH_TO_DATASET}
@@ -90,7 +91,7 @@ which the ADE20K dataset resides (the `tfrecord` above)
     fine_tune_batch_norm = False.
 
 2. User should fine tune the `min_resize_value` and `max_resize_value` to get
-   better result. Note that `resize_factor` has to equals to `output_stride`.
+   better result. Note that `resize_factor` has to be equal to `output_stride`.
 
 2.  The users should change atrous_rates from [6, 12, 18] to [12, 24, 36] if
     setting output_stride=8.
diff --git a/research/deeplab/model.py b/research/deeplab/model.py
@@ -64,19 +64,26 @@
 _DECODER_SCOPE = 'decoder'
 
 
-def get_extra_layer_scopes():
+def get_extra_layer_scopes(last_layers_contain_logits_only=False):
   """Gets the scopes for extra layers.
 
+  Args:
+    last_layers_contain_logits_only: Boolean, True if only consider logits as
+    the last layer (i.e., exclude ASPP module, decoder module and so on)
+
   Returns:
     A list of scopes for extra layers.
   """
-  return [
-      _LOGITS_SCOPE_NAME,
-      _IMAGE_POOLING_SCOPE,
-      _ASPP_SCOPE,
-      _CONCAT_PROJECTION_SCOPE,
-      _DECODER_SCOPE,
-  ]
+  if last_layers_contain_logits_only:
+    return [_LOGITS_SCOPE_NAME]
+  else:
+    return [
+        _LOGITS_SCOPE_NAME,
+        _IMAGE_POOLING_SCOPE,
+        _ASPP_SCOPE,
+        _CONCAT_PROJECTION_SCOPE,
+        _DECODER_SCOPE,
+    ]
 
 
 def predict_labels_multi_scale(images,
diff --git a/research/deeplab/train.py b/research/deeplab/train.py
@@ -118,6 +118,9 @@
 flags.DEFINE_boolean('initialize_last_layer', True,
                      'Initialize the last layer.')
 
+flags.DEFINE_boolean('last_layers_contain_logits_only', False,
+                     'Only consider logits as last layers or not.')
+
 flags.DEFINE_integer('slow_start_step', 0,
                      'Training model with small learning rate for few steps.')
 
@@ -292,7 +295,7 @@ def main(unused_argv):
       summaries.add(tf.summary.scalar('total_loss', total_loss))
 
       # Modify the gradients for biases and last layer variables.
-      last_layers = model.get_extra_layer_scopes()
+      last_layers = model.get_extra_layer_scopes(FLAGS.last_layers_contain_logits_only)
       grad_mult = train_utils.get_model_gradient_multipliers(
           last_layers, FLAGS.last_layer_gradient_multiplier)
       if grad_mult:
diff --git a/research/deeplab/utils/train_utils.py b/research/deeplab/utils/train_utils.py
@@ -99,7 +99,7 @@ def get_model_init_fn(train_logdir,
   tf.logging.info('Initializing model from path: %s', tf_initial_checkpoint)
 
   # Variables that will not be restored.
-  exclude_list = ['global_step', 'logits']
+  exclude_list = ['global_step']
   if not initialize_last_layer:
     exclude_list.extend(last_layers)