fix the 0.5 error between image coordinate and pixel coordinate when applying tf transform for homography

YaoYao · YaoYao · commit 78aaae72e5ce · 2019-03-03T16:33:46.000+08:00
diff --git a/README.md b/README.md
@@ -41,9 +41,9 @@
 * Enter the ``MVSNet/mvsnet`` folder, in ``test.py``, set ``pretrained_model_ckpt_path`` to ``MODEL_FOLDER/model.ckpt``
 
 * To run MVSNet (GTX1080Ti): 
-``python test.py --dense_folder /data/dtu/github_data/scan9/  --regularization '3DCNNs' --max_w 1152 --max_h 864 --max_d 192 --interval_scale 1.06``
+``python test.py --dense_folder TEST_DATA_FOLDER  --regularization '3DCNNs' --max_w 1152 --max_h 864 --max_d 192 --interval_scale 1.06``
 * To run R-MVSNet (GTX1080Ti): 
-``python test.py --dense_folder /data/dtu/github_data/scan9/  --regularization 'GRU' --max_w 1600 --max_h 1200 --max_d 256 --interval_scale 0.8``
+``python test.py --dense_folder TEST_DATA_FOLDER  --regularization 'GRU' --max_w 1600 --max_h 1200 --max_d 256 --interval_scale 0.8``
 * Inspect the .pfm format outputs in ``TEST_DATA_FOLDER/depths_mvsnet`` using ``python visualize.py .pfm``. For example the depth map and probability map for image `00000012` should look like:
 
 <img src="doc/image.png" width="250">   | <img src="doc/depth_example.png" width="250"> |  <img src="doc/probability_example.png" width="250">
@@ -59,7 +59,7 @@ To run the post-processing:
 * Check out the modified version fusibile ```git clone https://github.com/YoYo000/fusibile```
 * Install fusibile by ```cmake .``` and ```make```, which will generate the executable at ``FUSIBILE_EXE_PATH``
 * Run post-processing:
-``python depthfusion.py --dense_folder TEST_DATA_FOLDER --fusibile_exe_path FUSIBILE_EXE_PATH``
+``python depthfusion.py --dense_folder TEST_DATA_FOLDER --fusibile_exe_path FUSIBILE_EXE_PATH --prob_threshold 0.3``
 * The final point cloud is stored in `TEST_DATA_FOLDER/points_mvsnet/consistencyCheck-TIME/final3d_model.ply`.
 
 We observe that the point cloud output of ``depthfusion.py`` is very similar to our own implementation. For detailed differences, please refer to [MVSNet paper](https://arxiv.org/abs/1804.02505) and [Galliani's paper](https://www.cv-foundation.org/openaccess/content_iccv_2015/papers/Galliani_Massively_Parallel_Multiview_ICCV_2015_paper.pdf). The point cloud for `scan9` should look like:
diff --git a/mvsnet/homography_warping.py b/mvsnet/homography_warping.py
@@ -200,4 +200,49 @@ def homography_warping(input_image, homography):
 
     # return input_image
     return warped_image
+def tf_transform_homography(input_image, homography):
+
+	# tf.contrib.image.transform is for pixel coordinate but our
+	# homograph parameters are for image coordinate (x_p = x_i + 0.5).
+	# So need to change the corresponding homography parameters 
+    homography = tf.reshape(homography, [-1, 9])
+    a0 = tf.slice(homography, [0, 0], [-1, 1])
+    a1 = tf.slice(homography, [0, 1], [-1, 1])
+    a2 = tf.slice(homography, [0, 2], [-1, 1])
+    b0 = tf.slice(homography, [0, 3], [-1, 1])
+    b1 = tf.slice(homography, [0, 4], [-1, 1])
+    b2 = tf.slice(homography, [0, 5], [-1, 1])
+    c0 = tf.slice(homography, [0, 6], [-1, 1])
+    c1 = tf.slice(homography, [0, 7], [-1, 1])
+    c2 = tf.slice(homography, [0, 8], [-1, 1])
+    a_0 = a0 - c0 / 2
+    a_1 = a1 - c1 / 2
+    a_2 = (a0 + a1) / 2 + a2 - (c0 + c1) / 4 - c2 / 2
+    b_0 = b0 - c0 / 2
+    b_1 = b1 - c1 / 2
+    b_2 = (b0 + b1) / 2 + b2 - (c0 + c1) / 4 - c2 / 2
+    c_0 = c0
+    c_1 = c1
+    c_2 = c2 + (c0 + c1) / 2
+    homo = []
+    homo.append(a_0)
+    homo.append(a_1)
+    homo.append(a_2)
+    homo.append(b_0)
+    homo.append(b_1)
+    homo.append(b_2)
+    homo.append(c_0)
+    homo.append(c_1)
+    homo.append(c_2)
+    homography = tf.stack(homo, axis=1)
+    homography = tf.reshape(homography, [-1, 9])
+
+    homography_linear = tf.slice(homography, begin=[0, 0], size=[-1, 8])
+    homography_linear_div = tf.tile(tf.slice(homography, begin=[0, 8], size=[-1, 1]), [1, 8])
+    homography_linear = tf.div(homography_linear, homography_linear_div)
+    warped_image = tf.contrib.image.transform(
+        input_image, homography_linear, interpolation='BILINEAR')
+
+    # return input_image
+    return warped_image
 
diff --git a/mvsnet/model.py b/mvsnet/model.py
@@ -12,7 +12,7 @@
 sys.path.append("../")
 from cnn_wrapper.mvsnet import *
 from convgru import ConvGRUCell
-from homography_warping import get_homographies, get_homographies_inv_depth, homography_warping
+from homography_warping import *
 
 FLAGS = tf.app.flags.FLAGS
 
@@ -108,7 +108,8 @@ def inference(images, cams, depth_num, depth_start, depth_interval, is_master_gp
             for view in range(0, FLAGS.view_num - 1):
                 homography = tf.slice(view_homographies[view], begin=[0, d, 0, 0], size=[-1, 1, 3, 3])
                 homography = tf.squeeze(homography, axis=1)
-                warped_view_feature = homography_warping(view_towers[view].get_output(), homography)
+				# warped_view_feature = homography_warping(view_towers[view].get_output(), homography)
+                warped_view_feature = tf_transform_homography(view_towers[view].get_output(), homography)
                 ave_feature = ave_feature + warped_view_feature
                 ave_feature2 = ave_feature2 + tf.square(warped_view_feature)
             ave_feature = ave_feature / FLAGS.view_num
@@ -201,16 +202,8 @@ def body(view, ave_feature, ave_feature2):
                 """Loop body."""
                 homography = tf.slice(view_homographies[view], begin=[0, d, 0, 0], size=[-1, 1, 3, 3])
                 homography = tf.squeeze(homography, axis=1)
-
                 # warped_view_feature = homography_warping(view_features[view], homography)
-                ########## tf.contrib.image.transform #############
-                homography = tf.reshape(homography, [-1, 9])
-                homography_linear = tf.slice(homography, begin=[0, 0], size=[-1, 8])
-                homography_linear_div = tf.tile(tf.slice(homography, begin=[0, 8], size=[-1, 1]), [1, 8])
-                homography_linear = tf.div(homography_linear, homography_linear_div)
-                warped_view_feature = tf.contrib.image.transform(
-                    view_features[view], homography_linear, interpolation='BILINEAR')
-
+                warped_view_feature = tf_transform_homography(view_features[view], homography)
                 ave_feature = tf.assign_add(ave_feature, warped_view_feature)
                 ave_feature2 = tf.assign_add(ave_feature2, tf.square(warped_view_feature))
                 view = tf.add(view, 1)
@@ -327,13 +320,8 @@ def body(depth_index, state1, state2, state3, depth_image, max_prob_image, exp_s
             homographies = view_homographies[view]
             homographies = tf.transpose(homographies, perm=[1, 0, 2, 3])
             homography = homographies[depth_index]
-            homography = tf.reshape(homography, [-1, 9])
-            homography_linear = tf.slice(homography, begin=[0, 0], size=[-1, 8])
-            homography_linear_div = tf.tile(tf.slice(homography, begin=[0, 8], size=[-1, 1]), [1, 8])
-            homography_linear = tf.div(homography_linear, homography_linear_div)
-            warped_view_feature = tf.contrib.image.transform(
-                view_towers[view].get_output(), homography_linear, interpolation='BILINEAR')
-
+            # warped_view_feature = homography_warping(view_towers[view].get_output(), homography)
+            warped_view_feature = tf_transform_homography(view_towers[view].get_output(), homography)
             ave_feature = ave_feature + warped_view_feature
             ave_feature2 = ave_feature2 + tf.square(warped_view_feature)
         ave_feature = ave_feature / FLAGS.view_num