From e46a7ba46897d8e39d99840695613147c046e044 Mon Sep 17 00:00:00 2001 From: "xuezhi.zhang" Date: Fri, 20 Apr 2018 10:41:00 -0400 Subject: [PATCH 01/38] config gpu id used --- main.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/main.py b/main.py index 8711558..b30f8ad 100644 --- a/main.py +++ b/main.py @@ -37,6 +37,9 @@ args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() +# set gpu id used +os.environ["CUDA_VISIBLE_DEVICES"] = "0,1" + torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) From 3adf7c22ad3d250eac468c8e9da6f6e2785eb1cc Mon Sep 17 00:00:00 2001 From: "xuezhi.zhang" Date: Fri, 20 Apr 2018 10:59:19 -0400 Subject: [PATCH 02/38] mkdir dataset, and some subfolders. which can help users to understand where the dataset is, and which name is used for different datasets. --- dataset/Readme.md | 22 ++++++++++++++++++++++ main.py | 2 +- run.sh | 4 ++-- 3 files changed, 25 insertions(+), 3 deletions(-) create mode 100644 dataset/Readme.md diff --git a/dataset/Readme.md b/dataset/Readme.md new file mode 100644 index 0000000..b0bcf10 --- /dev/null +++ b/dataset/Readme.md @@ -0,0 +1,22 @@ +* SceneFlow includes three datasets: flything3d, driving and monkaa. +* You can train PSMNet with some of three datasets, or all of them. +* the following is the describtion of six subfolder. +``` +# the disp folder of Driving dataset +driving_disparity +# the image folder of Driving dataset +driving_frames_cleanpass + +# the disp folder of Flything3D dataset +frames_cleanpass +# the image folder of Flything3D dataset +frames_disparity + +# the disp folder of Monkaa dataset +monkaa_disparity +# the image folder of Monkaa dataset +monkaa_frames_cleanpass +``` +* Download the dataset from [this](https://lmb.informatik.uni-freiburg.de/resources/datasets/SceneFlowDatasets.en.html). And unzip them to corresponding folder. + +* `data_scene_flow_2015` is the folder for kitti15. You can unzip kitti15 to this folder. This will be used in **test** pahse. diff --git a/main.py b/main.py index b30f8ad..d41c081 100644 --- a/main.py +++ b/main.py @@ -22,7 +22,7 @@ help='maxium disparity') parser.add_argument('--model', default='stackhourglass', help='select model') -parser.add_argument('--datapath', default='/media/jiaren/ImageNet/SceneFlowData/', +parser.add_argument('--datapath', default='dataset/', help='datapath') parser.add_argument('--epochs', type=int, default=10, help='number of epochs to train') diff --git a/run.sh b/run.sh index ce059ea..847c84d 100644 --- a/run.sh +++ b/run.sh @@ -2,7 +2,7 @@ python main.py --maxdisp 192 \ --model stackhourglass \ - --datapath /media/jiaren/ImageNet/SceneFlowData/ \ + --datapath dataset/ \ --epochs 0 \ --loadmodel ./trained/checkpoint_10.tar \ --savemodel ./trained/ @@ -12,7 +12,7 @@ python main.py --maxdisp 192 \ python finetune.py --maxdisp 192 \ --model stackhourglass \ --datatype 2015 \ - --datapath /media/jiaren/ImageNet/data_scene_flow_2015/training/ \ + --datapath dataset/data_scene_flow_2015/training/ \ --epochs 300 \ --loadmodel ./trained/checkpoint_10.tar \ --savemodel ./trained/ From 9161f2589d9dc807e12c885d9347926774bc6276 Mon Sep 17 00:00:00 2001 From: H1Gdev Date: Tue, 6 Nov 2018 17:16:47 +0900 Subject: [PATCH 03/38] Fix typo in README.md. --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 437a852..b088381 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ python main.py --maxdisp 192 \ --model stackhourglass \ --datapath (your scene flow data folder)\ --epochs 10 \ - --loadmodel (optional)\ + --loadmodel (optional)\ --savemodel (path for saving model) ``` @@ -66,7 +66,7 @@ python finetune.py --maxdisp 192 \ --loadmodel (pretrained PSMNet) \ --savemodel (path for saving model) ``` -You can alse see those example in run.sh +You can also see those examples in run.sh. ### Evaluation Use the following command to evaluate the trained PSMNet on KITTI 2015 test data @@ -91,10 +91,10 @@ Update: 2018/9/6 We released the pre-trained KITTI 2012 model. ## Results -### Evalutation of PSMNet with different settings +### Evaluation of PSMNet with different settings -※Note that the reported 3-px validation errors were calculated using KITTI's offical matlab code, not our code. +※Note that the reported 3-px validation errors were calculated using KITTI's official matlab code, not our code. ### Results on KITTI 2015 leaderboard [Leaderboard Link](http://www.cvlibs.net/datasets/kitti/eval_scene_flow.php?benchmark=stereo) From b6520ff9b77168e3dfe3485f8bf6a2a798361d85 Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Thu, 7 Mar 2019 16:14:41 +0800 Subject: [PATCH 04/38] Update stackhourglass.py --- models/stackhourglass.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/models/stackhourglass.py b/models/stackhourglass.py index 8430242..991ba16 100644 --- a/models/stackhourglass.py +++ b/models/stackhourglass.py @@ -149,6 +149,9 @@ def forward(self, left, right): cost3 = F.upsample(cost3, [self.maxdisp,left.size()[2],left.size()[3]], mode='trilinear') cost3 = torch.squeeze(cost3,1) pred3 = F.softmax(cost3,dim=1) + #For your information: This formulation 'softmax(c)' learned "similarity" + #while 'softmax(-c)' learned 'matching cost' as mentioned in the paper. + #However, 'c' or '-c' do not affect the performance because feature-based cost volume provided flexibility. pred3 = disparityregression(self.maxdisp)(pred3) if self.training: From 14f162d454574fbfb07c252ebc857894aa962506 Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Fri, 7 Jun 2019 21:12:17 +0800 Subject: [PATCH 05/38] Update README.md add notice --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index fa9f9e8..f3d5b8f 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,10 @@ Download RGB cleanpass images and its disparity for three subset: FlyingThings3D Put them in the same folder. And rename the folder as: "driving_frames_cleanpass", "driving_disparity", "monkaa_frames_cleanpass", "monkaa_disparity", "frames_cleanpass", "frames_disparity". ``` +### Notice +1. Warning of upsample function in PyTorch 0.4.1+: add "align_corners=True" to upsample functions. +2. Output disparity may be better with multipling by 1.17. Reported from issues [#135](https://github.com/JiaRenChang/PSMNet/issues/135) and [#113](https://github.com/JiaRenChang/PSMNet/issues/113). +3. with torchvision > 0.2.0, RGB images should be loaded without adding ".astype('float32'))" ### Train As an example, use the following command to train a PSMNet on Scene Flow From 09bb5d25aefefadd53db83361cb4dc514f30b9c5 Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Thu, 20 Jun 2019 23:02:47 +0800 Subject: [PATCH 06/38] Update download link --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f3d5b8f..d4d9be5 100644 --- a/README.md +++ b/README.md @@ -90,7 +90,7 @@ Update: 2018/9/6 We released the pre-trained KITTI 2012 model. | KITTI 2015 | Scene Flow | KITTI 2012| |---|---|---| -|[Google Drive](https://drive.google.com/file/d/1pHWjmhKMG4ffCrpcsp_MTXMJXhgl3kF9/view?usp=sharing)|[Google Drive](https://drive.google.com/file/d/1xoqkQ2NXik1TML_FMUTNZJFAHrhLdKZG/view?usp=sharing)|[Google Drive](https://drive.google.com/file/d/1p4eJ2xDzvQxaqB20A_MmSP9-KORBX1pZ/view)| +|[Google Drive](https://drive.google.com/file/d/1pHWjmhKMG4ffCrpcsp_MTXMJXhgl3kF9/view?usp=sharing)|[Google Drive](https://drive.google.com/file/d/1xoqkQ2NXik1TML_FMUTNZJFAHrhLdKZG/view?usp=sharing)|[Google Drive](https://drive.google.com/file/d/1p4eJ2xDzvQxaqB20A_MmSP9-KORBX1pZ/view?usp=sharing)| ## Results From 622ea49c84f3d474826a67e29c8ca1d7a749b56c Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Mon, 9 Sep 2019 15:26:05 +0800 Subject: [PATCH 07/38] Test_img.py A script to inference on any size of image pairs --- Test_img.py | 141 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 Test_img.py diff --git a/Test_img.py b/Test_img.py new file mode 100644 index 0000000..5804ab6 --- /dev/null +++ b/Test_img.py @@ -0,0 +1,141 @@ +from __future__ import print_function +import argparse +import os +import random +import torch +import torch.nn as nn +import torch.nn.parallel +import torch.backends.cudnn as cudnn +import torch.optim as optim +import torch.utils.data +from torch.autograd import Variable +import torch.nn.functional as F +import skimage +import skimage.io +import skimage.transform +import numpy as np +import time +import math +from utils import preprocess +from models import * +import cv2 + +# 2012 data /media/jiaren/ImageNet/data_scene_flow_2012/testing/ + +parser = argparse.ArgumentParser(description='PSMNet') +parser.add_argument('--KITTI', default='2015', + help='KITTI version') +parser.add_argument('--datapath', default='/media/jiaren/ImageNet/data_scene_flow_2015/testing/', + help='select model') +parser.add_argument('--loadmodel', default='./trained/pretrained_model_KITTI2015.tar', + help='loading model') +parser.add_argument('--leftimg', default= None, + help='load model') +parser.add_argument('--rightimg', default= None, + help='load model') +parser.add_argument('--isgray', default= False, + help='load model') +parser.add_argument('--model', default='stackhourglass', + help='select model') +parser.add_argument('--maxdisp', type=int, default=192, + help='maxium disparity') +parser.add_argument('--no-cuda', action='store_true', default=False, + help='enables CUDA training') +parser.add_argument('--seed', type=int, default=1, metavar='S', + help='random seed (default: 1)') +args = parser.parse_args() +args.cuda = not args.no_cuda and torch.cuda.is_available() + +torch.manual_seed(args.seed) +if args.cuda: + torch.cuda.manual_seed(args.seed) +#test_left_img, test_right_img = DA.dataloader(args.datapath) + +if args.model == 'stackhourglass': + model = stackhourglass(args.maxdisp) +elif args.model == 'basic': + model = basic(args.maxdisp) +else: + print('no model') + +model = nn.DataParallel(model, device_ids=[0]) +model.cuda() + + +if args.loadmodel is not None: + print('load PSMNet') + state_dict = torch.load(args.loadmodel) + model.load_state_dict(state_dict['state_dict']) + +print('Number of model parameters: {}'.format(sum([p.data.nelement() for p in model.parameters()]))) + +def test(imgL,imgR): + model.eval() + + if args.cuda: + imgL = torch.FloatTensor(imgL).cuda() + imgR = torch.FloatTensor(imgR).cuda() + + imgL, imgR= Variable(imgL), Variable(imgR) + + with torch.no_grad(): + disp = model(imgL,imgR) + + disp = torch.squeeze(disp) + pred_disp = disp.data.cpu().numpy() + + return pred_disp + + +def main(): + processed = preprocess.get_transform(augment=False) + if args.isgray: + imgL_o = cv2.cvtColor(cv2.imread(args.leftimg,0), cv2.COLOR_GRAY2RGB) + imgR_o = cv2.cvtColor(cv2.imread(args.rightimg,0), cv2.COLOR_GRAY2RGB) + else: + imgL_o = (skimage.io.imread(args.leftimg).astype('float32')) + imgR_o = (skimage.io.imread(args.rightimg).astype('float32')) + + imgL = processed(imgL_o).numpy() + imgR = processed(imgR_o).numpy() + imgL = np.reshape(imgL,[1,3,imgL.shape[1],imgL.shape[2]]) + imgR = np.reshape(imgR,[1,3,imgR.shape[1],imgR.shape[2]]) + + # pad to width and hight to 16 times + if imgL.shape[2] % 16 != 0: + times = imgL.shape[2]//16 + top_pad = (times+1)*16 -imgL.shape[2] + else: + top_pad = 0 + if imgL.shape[3] % 16 != 0: + times = imgL.shape[3]//16 + left_pad = (times+1)*16-imgL.shape[3] + else: + left_pad = 0 + imgL = np.lib.pad(imgL,((0,0),(0,0),(top_pad,0),(0,left_pad)),mode='constant',constant_values=0) + imgR = np.lib.pad(imgR,((0,0),(0,0),(top_pad,0),(0,left_pad)),mode='constant',constant_values=0) + + start_time = time.time() + pred_disp = test(imgL,imgR) + print('time = %.2f' %(time.time() - start_time)) + if top_pad !=0 or left_pad != 0: + img = pred_disp[top_pad:,:-left_pad] + else: + img = pred_disp + img = (img*256).astype('uint16') + skimage.io.imsave('disparity.png',img) + + #img = np.concatenate((imgL_o, imgR_o),axis=1) + #img = cv2.line(img, (0, 240), (1504, 240), (0, 0, 255), 2) + #img = cv2.line(img, (0, 210), (1504, 210), (0, 0, 255), 2) + #img = cv2.line(img, (0, 270), (1504, 270), (0, 0, 255), 2) + #skimage.io.imsave('test.png',img) + +if __name__ == '__main__': + main() + + + + + + From b8e1da0a35b60d0ec5753f1fab211aa8b9251ea1 Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Mon, 9 Sep 2019 15:29:38 +0800 Subject: [PATCH 08/38] New script to test images --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index d4d9be5..c5d39d6 100644 --- a/README.md +++ b/README.md @@ -92,6 +92,10 @@ Update: 2018/9/6 We released the pre-trained KITTI 2012 model. |---|---|---| |[Google Drive](https://drive.google.com/file/d/1pHWjmhKMG4ffCrpcsp_MTXMJXhgl3kF9/view?usp=sharing)|[Google Drive](https://drive.google.com/file/d/1xoqkQ2NXik1TML_FMUTNZJFAHrhLdKZG/view?usp=sharing)|[Google Drive](https://drive.google.com/file/d/1p4eJ2xDzvQxaqB20A_MmSP9-KORBX1pZ/view?usp=sharing)| +### Test on your own stereo pair +``` +python Test_img.py --leftimg ./left.png --rightimg ./right.png --isgray False +``` ## Results From 92614984ae582740aabd8a4dee9e60aa9e4c821f Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Mon, 9 Sep 2019 15:33:04 +0800 Subject: [PATCH 09/38] New script to test --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c5d39d6..195f886 100644 --- a/README.md +++ b/README.md @@ -94,7 +94,7 @@ Update: 2018/9/6 We released the pre-trained KITTI 2012 model. ### Test on your own stereo pair ``` -python Test_img.py --leftimg ./left.png --rightimg ./right.png --isgray False +python Test_img.py --loadmodel (finetuned PSMNet) --leftimg ./left.png --rightimg ./right.png --isgray False ``` ## Results From 909168465aca37aba2cbbcdd37f002574534962e Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Mon, 20 Jan 2020 11:11:31 +0800 Subject: [PATCH 10/38] Version --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 195f886..0138d03 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,8 @@ This repository contains the code (in PyTorch) for "[Pyramid Stereo Matching Network](https://arxiv.org/abs/1803.08669)" paper (CVPR 2018) by [Jia-Ren Chang](https://jiarenchang.github.io/) and [Yong-Sheng Chen](https://people.cs.nctu.edu.tw/~yschen/). +## ※We are working on moving these codes to Python 3.7 and torch 1.4. + ### Citation ``` @inproceedings{chang2018pyramid, From f80a50f2117da341b46542aa024b8fce5ed798b7 Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Thu, 16 Apr 2020 15:52:51 +0800 Subject: [PATCH 11/38] Update Test_img.py --- Test_img.py | 141 ---------------------------------------------------- 1 file changed, 141 deletions(-) delete mode 100644 Test_img.py diff --git a/Test_img.py b/Test_img.py deleted file mode 100644 index 5804ab6..0000000 --- a/Test_img.py +++ /dev/null @@ -1,141 +0,0 @@ -from __future__ import print_function -import argparse -import os -import random -import torch -import torch.nn as nn -import torch.nn.parallel -import torch.backends.cudnn as cudnn -import torch.optim as optim -import torch.utils.data -from torch.autograd import Variable -import torch.nn.functional as F -import skimage -import skimage.io -import skimage.transform -import numpy as np -import time -import math -from utils import preprocess -from models import * -import cv2 - -# 2012 data /media/jiaren/ImageNet/data_scene_flow_2012/testing/ - -parser = argparse.ArgumentParser(description='PSMNet') -parser.add_argument('--KITTI', default='2015', - help='KITTI version') -parser.add_argument('--datapath', default='/media/jiaren/ImageNet/data_scene_flow_2015/testing/', - help='select model') -parser.add_argument('--loadmodel', default='./trained/pretrained_model_KITTI2015.tar', - help='loading model') -parser.add_argument('--leftimg', default= None, - help='load model') -parser.add_argument('--rightimg', default= None, - help='load model') -parser.add_argument('--isgray', default= False, - help='load model') -parser.add_argument('--model', default='stackhourglass', - help='select model') -parser.add_argument('--maxdisp', type=int, default=192, - help='maxium disparity') -parser.add_argument('--no-cuda', action='store_true', default=False, - help='enables CUDA training') -parser.add_argument('--seed', type=int, default=1, metavar='S', - help='random seed (default: 1)') -args = parser.parse_args() -args.cuda = not args.no_cuda and torch.cuda.is_available() - -torch.manual_seed(args.seed) -if args.cuda: - torch.cuda.manual_seed(args.seed) -#test_left_img, test_right_img = DA.dataloader(args.datapath) - -if args.model == 'stackhourglass': - model = stackhourglass(args.maxdisp) -elif args.model == 'basic': - model = basic(args.maxdisp) -else: - print('no model') - -model = nn.DataParallel(model, device_ids=[0]) -model.cuda() - - -if args.loadmodel is not None: - print('load PSMNet') - state_dict = torch.load(args.loadmodel) - model.load_state_dict(state_dict['state_dict']) - -print('Number of model parameters: {}'.format(sum([p.data.nelement() for p in model.parameters()]))) - -def test(imgL,imgR): - model.eval() - - if args.cuda: - imgL = torch.FloatTensor(imgL).cuda() - imgR = torch.FloatTensor(imgR).cuda() - - imgL, imgR= Variable(imgL), Variable(imgR) - - with torch.no_grad(): - disp = model(imgL,imgR) - - disp = torch.squeeze(disp) - pred_disp = disp.data.cpu().numpy() - - return pred_disp - - -def main(): - processed = preprocess.get_transform(augment=False) - if args.isgray: - imgL_o = cv2.cvtColor(cv2.imread(args.leftimg,0), cv2.COLOR_GRAY2RGB) - imgR_o = cv2.cvtColor(cv2.imread(args.rightimg,0), cv2.COLOR_GRAY2RGB) - else: - imgL_o = (skimage.io.imread(args.leftimg).astype('float32')) - imgR_o = (skimage.io.imread(args.rightimg).astype('float32')) - - imgL = processed(imgL_o).numpy() - imgR = processed(imgR_o).numpy() - imgL = np.reshape(imgL,[1,3,imgL.shape[1],imgL.shape[2]]) - imgR = np.reshape(imgR,[1,3,imgR.shape[1],imgR.shape[2]]) - - # pad to width and hight to 16 times - if imgL.shape[2] % 16 != 0: - times = imgL.shape[2]//16 - top_pad = (times+1)*16 -imgL.shape[2] - else: - top_pad = 0 - if imgL.shape[3] % 16 != 0: - times = imgL.shape[3]//16 - left_pad = (times+1)*16-imgL.shape[3] - else: - left_pad = 0 - imgL = np.lib.pad(imgL,((0,0),(0,0),(top_pad,0),(0,left_pad)),mode='constant',constant_values=0) - imgR = np.lib.pad(imgR,((0,0),(0,0),(top_pad,0),(0,left_pad)),mode='constant',constant_values=0) - - start_time = time.time() - pred_disp = test(imgL,imgR) - print('time = %.2f' %(time.time() - start_time)) - if top_pad !=0 or left_pad != 0: - img = pred_disp[top_pad:,:-left_pad] - else: - img = pred_disp - img = (img*256).astype('uint16') - skimage.io.imsave('disparity.png',img) - - #img = np.concatenate((imgL_o, imgR_o),axis=1) - #img = cv2.line(img, (0, 240), (1504, 240), (0, 0, 255), 2) - #img = cv2.line(img, (0, 210), (1504, 210), (0, 0, 255), 2) - #img = cv2.line(img, (0, 270), (1504, 270), (0, 0, 255), 2) - #skimage.io.imsave('test.png',img) - -if __name__ == '__main__': - main() - - - - - - From d535b353fcf9876e3a4b65f99ae264ceef8f4efe Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Thu, 16 Apr 2020 15:53:30 +0800 Subject: [PATCH 12/38] Update Test_img.py --- Test_img.py | 128 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 Test_img.py diff --git a/Test_img.py b/Test_img.py new file mode 100644 index 0000000..948dc73 --- /dev/null +++ b/Test_img.py @@ -0,0 +1,128 @@ +from __future__ import print_function +import argparse +import os +import random +import torch +import torch.nn as nn +import torchvision.transforms as transforms +import torch.nn.functional as F +import numpy as np +import time +import math +from models import * +import cv2 +from PIL import Image + +# 2012 data /media/jiaren/ImageNet/data_scene_flow_2012/testing/ + +parser = argparse.ArgumentParser(description='PSMNet') +parser.add_argument('--KITTI', default='2015', + help='KITTI version') +parser.add_argument('--datapath', default='/media/jiaren/ImageNet/data_scene_flow_2015/testing/', + help='select model') +parser.add_argument('--loadmodel', default='./trained/pretrained_model_KITTI2015.tar', + help='loading model') +parser.add_argument('--leftimg', default= './VO04_L.png', + help='load model') +parser.add_argument('--rightimg', default= './VO04_R.png', + help='load model') +parser.add_argument('--model', default='stackhourglass', + help='select model') +parser.add_argument('--maxdisp', type=int, default=192, + help='maxium disparity') +parser.add_argument('--no-cuda', action='store_true', default=False, + help='enables CUDA training') +parser.add_argument('--seed', type=int, default=1, metavar='S', + help='random seed (default: 1)') +args = parser.parse_args() +args.cuda = not args.no_cuda and torch.cuda.is_available() + +torch.manual_seed(args.seed) +if args.cuda: + torch.cuda.manual_seed(args.seed) + +if args.model == 'stackhourglass': + model = stackhourglass(args.maxdisp) +elif args.model == 'basic': + model = basic(args.maxdisp) +else: + print('no model') + +model = nn.DataParallel(model, device_ids=[0]) +model.cuda() + +if args.loadmodel is not None: + print('load PSMNet') + state_dict = torch.load(args.loadmodel) + model.load_state_dict(state_dict['state_dict']) + +print('Number of model parameters: {}'.format(sum([p.data.nelement() for p in model.parameters()]))) + +def test(imgL,imgR): + model.eval() + + if args.cuda: + imgL = imgL.cuda() + imgR = imgR.cuda() + + with torch.no_grad(): + disp = model(imgL,imgR) + + disp = torch.squeeze(disp) + pred_disp = disp.data.cpu().numpy() + + return pred_disp + + +def main(): + + normal_mean_var = {'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225]} + infer_transform = transforms.Compose([transforms.ToTensor(), + transforms.Normalize(**normal_mean_var)]) + + imgL_o = Image.open(args.leftimg).convert('RGB') + imgR_o = Image.open(args.rightimg).convert('RGB') + + imgL = infer_transform(imgL_o) + imgR = infer_transform(imgR_o) + + + # pad to width and hight to 16 times + if imgL.shape[1] % 16 != 0: + times = imgL.shape[1]//16 + top_pad = (times+1)*16 -imgL.shape[1] + else: + top_pad = 0 + + if imgL.shape[2] % 16 != 0: + times = imgL.shape[2]//16 + right_pad = (times+1)*16-imgL.shape[2] + else: + right_pad = 0 + + imgL = F.pad(imgL,(0,right_pad, top_pad,0)).unsqueeze(0) + imgR = F.pad(imgR,(0,right_pad, top_pad,0)).unsqueeze(0) + + start_time = time.time() + pred_disp = test(imgL,imgR) + print('time = %.2f' %(time.time() - start_time)) + + + if top_pad !=0 or right_pad != 0: + img = pred_disp[top_pad:,:-right_pad] + else: + img = pred_disp + + img = (img*256).astype('uint16') + img = Image.fromarray(img) + img.save('Test_disparity.png') + +if __name__ == '__main__': + main() + + + + + + From de72bbd6cb59586921755e1718f1dead3ab5e588 Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Thu, 16 Apr 2020 15:57:49 +0800 Subject: [PATCH 13/38] Update README.md --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 0138d03..8f659c6 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,8 @@ This repository contains the code (in PyTorch) for "[Pyramid Stereo Matching Network](https://arxiv.org/abs/1803.08669)" paper (CVPR 2018) by [Jia-Ren Chang](https://jiarenchang.github.io/) and [Yong-Sheng Chen](https://people.cs.nctu.edu.tw/~yschen/). ## ※We are working on moving these codes to Python 3.7 and torch 1.4. +#### changlog +2020/04/16 Update Test_img.py: now support torch 1.4/torchvision 0.50 and python 3.7 for inference ### Citation ``` @@ -96,7 +98,7 @@ Update: 2018/9/6 We released the pre-trained KITTI 2012 model. ### Test on your own stereo pair ``` -python Test_img.py --loadmodel (finetuned PSMNet) --leftimg ./left.png --rightimg ./right.png --isgray False +python Test_img.py --loadmodel (finetuned PSMNet) --leftimg ./left.png --rightimg ./right.png ``` ## Results From 2aecdf7294621dc03eb6a111ee29211f1085e0b0 Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Thu, 16 Apr 2020 15:58:37 +0800 Subject: [PATCH 14/38] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8f659c6..8831047 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ This repository contains the code (in PyTorch) for "[Pyramid Stereo Matching Net ## ※We are working on moving these codes to Python 3.7 and torch 1.4. #### changlog -2020/04/16 Update Test_img.py: now support torch 1.4/torchvision 0.50 and python 3.7 for inference +2020/04/16: Update Test_img.py: now support torch 1.4 / torchvision 0.5.0 and python 3.7 for inference ### Citation ``` From 842ce3adfdc40b8e296a5bacb73b367791229adc Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Thu, 16 Apr 2020 16:15:01 +0800 Subject: [PATCH 15/38] update submiss.py --- submission.py | 121 -------------------------------------------------- 1 file changed, 121 deletions(-) delete mode 100644 submission.py diff --git a/submission.py b/submission.py deleted file mode 100644 index ab74207..0000000 --- a/submission.py +++ /dev/null @@ -1,121 +0,0 @@ -from __future__ import print_function -import argparse -import os -import random -import torch -import torch.nn as nn -import torch.nn.parallel -import torch.backends.cudnn as cudnn -import torch.optim as optim -import torch.utils.data -from torch.autograd import Variable -import torch.nn.functional as F -import skimage -import skimage.io -import skimage.transform -import numpy as np -import time -import math -from utils import preprocess -from models import * - -# 2012 data /media/jiaren/ImageNet/data_scene_flow_2012/testing/ - -parser = argparse.ArgumentParser(description='PSMNet') -parser.add_argument('--KITTI', default='2015', - help='KITTI version') -parser.add_argument('--datapath', default='/media/jiaren/ImageNet/data_scene_flow_2015/testing/', - help='select model') -parser.add_argument('--loadmodel', default=None, - help='loading model') -parser.add_argument('--model', default='stackhourglass', - help='select model') -parser.add_argument('--maxdisp', type=int, default=192, - help='maxium disparity') -parser.add_argument('--no-cuda', action='store_true', default=False, - help='enables CUDA training') -parser.add_argument('--seed', type=int, default=1, metavar='S', - help='random seed (default: 1)') -args = parser.parse_args() -args.cuda = not args.no_cuda and torch.cuda.is_available() - -torch.manual_seed(args.seed) -if args.cuda: - torch.cuda.manual_seed(args.seed) - -if args.KITTI == '2015': - from dataloader import KITTI_submission_loader as DA -else: - from dataloader import KITTI_submission_loader2012 as DA - - -test_left_img, test_right_img = DA.dataloader(args.datapath) - -if args.model == 'stackhourglass': - model = stackhourglass(args.maxdisp) -elif args.model == 'basic': - model = basic(args.maxdisp) -else: - print('no model') - -model = nn.DataParallel(model, device_ids=[0]) -model.cuda() - -if args.loadmodel is not None: - state_dict = torch.load(args.loadmodel) - model.load_state_dict(state_dict['state_dict']) - -print('Number of model parameters: {}'.format(sum([p.data.nelement() for p in model.parameters()]))) - -def test(imgL,imgR): - model.eval() - - if args.cuda: - imgL = torch.FloatTensor(imgL).cuda() - imgR = torch.FloatTensor(imgR).cuda() - - imgL, imgR= Variable(imgL), Variable(imgR) - - with torch.no_grad(): - output = model(imgL,imgR) - output = torch.squeeze(output) - pred_disp = output.data.cpu().numpy() - - return pred_disp - - -def main(): - processed = preprocess.get_transform(augment=False) - - for inx in range(len(test_left_img)): - - imgL_o = (skimage.io.imread(test_left_img[inx]).astype('float32')) - imgR_o = (skimage.io.imread(test_right_img[inx]).astype('float32')) - imgL = processed(imgL_o).numpy() - imgR = processed(imgR_o).numpy() - imgL = np.reshape(imgL,[1,3,imgL.shape[1],imgL.shape[2]]) - imgR = np.reshape(imgR,[1,3,imgR.shape[1],imgR.shape[2]]) - - # pad to (384, 1248) - top_pad = 384-imgL.shape[2] - left_pad = 1248-imgL.shape[3] - imgL = np.lib.pad(imgL,((0,0),(0,0),(top_pad,0),(0,left_pad)),mode='constant',constant_values=0) - imgR = np.lib.pad(imgR,((0,0),(0,0),(top_pad,0),(0,left_pad)),mode='constant',constant_values=0) - - start_time = time.time() - pred_disp = test(imgL,imgR) - print('time = %.2f' %(time.time() - start_time)) - - top_pad = 384-imgL_o.shape[0] - left_pad = 1248-imgL_o.shape[1] - img = pred_disp[top_pad:,:-left_pad] - skimage.io.imsave(test_left_img[inx].split('/')[-1],(img*256).astype('uint16')) - -if __name__ == '__main__': - main() - - - - - - From af5cf81f22b5f31e0f4f836bc5b8534915ab17d6 Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Thu, 16 Apr 2020 16:15:28 +0800 Subject: [PATCH 16/38] update submission.py --- submission.py | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 submission.py diff --git a/submission.py b/submission.py new file mode 100644 index 0000000..e80ab02 --- /dev/null +++ b/submission.py @@ -0,0 +1,123 @@ +from __future__ import print_function +import argparse +import os +import random +import torch +import torch.nn as nn +import torchvision.transforms as transforms +import torch.nn.functional as F +import numpy as np +import time +import math +from models import * +from PIL import Image + +parser = argparse.ArgumentParser(description='PSMNet') +parser.add_argument('--KITTI', default='2015', + help='KITTI version') +parser.add_argument('--datapath', default='/media/jiaren/ImageNet/data_scene_flow_2015/testing/', + help='select model') +parser.add_argument('--loadmodel', default='./trained/pretrained_model_KITTI2015.tar', + help='loading model') +parser.add_argument('--model', default='stackhourglass', + help='select model') +parser.add_argument('--maxdisp', default=192, + help='maxium disparity') +parser.add_argument('--no-cuda', action='store_true', default=False, + help='enables CUDA training') +parser.add_argument('--seed', type=int, default=1, metavar='S', + help='random seed (default: 1)') +args = parser.parse_args() +args.cuda = not args.no_cuda and torch.cuda.is_available() + +torch.manual_seed(args.seed) +if args.cuda: + torch.cuda.manual_seed(args.seed) + +if args.KITTI == '2015': + from dataloader import KITTI_submission_loader as DA +else: + from dataloader import KITTI_submission_loader2012 as DA + +test_left_img, test_right_img = DA.dataloader(args.datapath) + +if args.model == 'stackhourglass': + model = stackhourglass(args.maxdisp) +elif args.model == 'basic': + model = basic(args.maxdisp) +else: + print('no model') + +model = nn.DataParallel(model, device_ids=[0]) +model.cuda() + +if args.loadmodel is not None: + state_dict = torch.load(args.loadmodel) + model.load_state_dict(state_dict['state_dict']) + +print('Number of model parameters: {}'.format(sum([p.data.nelement() for p in model.parameters()]))) + +def test(imgL,imgR): + model.eval() + + if args.cuda: + imgL = imgL.cuda() + imgR = imgR.cuda() + + with torch.no_grad(): + output = model(imgL,imgR) + output = torch.squeeze(output).data.cpu().numpy() + return output + +def main(): + normal_mean_var = {'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225]} + infer_transform = transforms.Compose([transforms.ToTensor(), + transforms.Normalize(**normal_mean_var)]) + + for inx in range(len(test_left_img)): + + imgL_o = Image.open(test_left_img[inx]).convert('RGB') + imgR_o = Image.open(test_right_img[inx]).convert('RGB') + + imgL = infer_transform(imgL_o) + imgR = infer_transform(imgR_o) + + # pad to width and hight to 16 times + if imgL.shape[1] % 16 != 0: + times = imgL.shape[1]//16 + top_pad = (times+1)*16 -imgL.shape[1] + else: + top_pad = 0 + + if imgL.shape[2] % 16 != 0: + times = imgL.shape[2]//16 + right_pad = (times+1)*16-imgL.shape[2] + else: + right_pad = 0 + + imgL = F.pad(imgL,(0,right_pad, top_pad,0)).unsqueeze(0) + imgR = F.pad(imgR,(0,right_pad, top_pad,0)).unsqueeze(0) + + start_time = time.time() + pred_disp = test(imgL,imgR) + print('time = %.2f' %(time.time() - start_time)) + + if top_pad !=0 or right_pad != 0: + img = pred_disp[top_pad:,:-right_pad] + else: + img = pred_disp + + img = (img*256).astype('uint16') + img = Image.fromarray(img) + img.save(test_left_img[inx].split('/')[-1]) + + +if __name__ == '__main__': + main() + + + + + + From 2cac9622922cb1d2aa3331917d8be350d144f3fd Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Thu, 16 Apr 2020 16:16:18 +0800 Subject: [PATCH 17/38] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 8831047..8a0fd30 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ This repository contains the code (in PyTorch) for "[Pyramid Stereo Matching Net ## ※We are working on moving these codes to Python 3.7 and torch 1.4. #### changlog 2020/04/16: Update Test_img.py: now support torch 1.4 / torchvision 0.5.0 and python 3.7 for inference +2020/04/16: Update submission.py: now support torch 1.4 / torchvision 0.5.0 and python 3.7 for inference ### Citation ``` From 8c385073f252eb9b84a123e4efede9a2039acd4e Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Thu, 16 Apr 2020 16:17:06 +0800 Subject: [PATCH 18/38] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 8a0fd30..5c85f6b 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ This repository contains the code (in PyTorch) for "[Pyramid Stereo Matching Net ## ※We are working on moving these codes to Python 3.7 and torch 1.4. #### changlog 2020/04/16: Update Test_img.py: now support torch 1.4 / torchvision 0.5.0 and python 3.7 for inference + 2020/04/16: Update submission.py: now support torch 1.4 / torchvision 0.5.0 and python 3.7 for inference ### Citation From 7d710eddcef967550ba449faf7fbed28b5afa5bc Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Thu, 16 Apr 2020 17:31:30 +0800 Subject: [PATCH 19/38] Delete SecenFlowLoader.py --- dataloader/SecenFlowLoader.py | 79 ----------------------------------- 1 file changed, 79 deletions(-) delete mode 100644 dataloader/SecenFlowLoader.py diff --git a/dataloader/SecenFlowLoader.py b/dataloader/SecenFlowLoader.py deleted file mode 100644 index af6f30a..0000000 --- a/dataloader/SecenFlowLoader.py +++ /dev/null @@ -1,79 +0,0 @@ -import os -import torch -import torch.utils.data as data -import torch -import torchvision.transforms as transforms -import random -from PIL import Image, ImageOps -import preprocess -import listflowfile as lt -import readpfm as rp -import numpy as np - -IMG_EXTENSIONS = [ - '.jpg', '.JPG', '.jpeg', '.JPEG', - '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP', -] - -def is_image_file(filename): - return any(filename.endswith(extension) for extension in IMG_EXTENSIONS) - -def default_loader(path): - return Image.open(path).convert('RGB') - -def disparity_loader(path): - return rp.readPFM(path) - - -class myImageFloder(data.Dataset): - def __init__(self, left, right, left_disparity, training, loader=default_loader, dploader= disparity_loader): - - self.left = left - self.right = right - self.disp_L = left_disparity - self.loader = loader - self.dploader = dploader - self.training = training - - def __getitem__(self, index): - left = self.left[index] - right = self.right[index] - disp_L= self.disp_L[index] - - - left_img = self.loader(left) - right_img = self.loader(right) - dataL, scaleL = self.dploader(disp_L) - dataL = np.ascontiguousarray(dataL,dtype=np.float32) - - - - if self.training: - w, h = left_img.size - th, tw = 256, 512 - - x1 = random.randint(0, w - tw) - y1 = random.randint(0, h - th) - - left_img = left_img.crop((x1, y1, x1 + tw, y1 + th)) - right_img = right_img.crop((x1, y1, x1 + tw, y1 + th)) - - dataL = dataL[y1:y1 + th, x1:x1 + tw] - - processed = preprocess.get_transform(augment=False) - left_img = processed(left_img) - right_img = processed(right_img) - - return left_img, right_img, dataL - else: - w, h = left_img.size - left_img = left_img.crop((w-960, h-544, w, h)) - right_img = right_img.crop((w-960, h-544, w, h)) - processed = preprocess.get_transform(augment=False) - left_img = processed(left_img) - right_img = processed(right_img) - - return left_img, right_img, dataL - - def __len__(self): - return len(self.left) From 2f07238b79b88994e45829c0547c0f8f542d4a89 Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Thu, 16 Apr 2020 17:31:41 +0800 Subject: [PATCH 20/38] Delete readpfm.py --- dataloader/readpfm.py | 42 ------------------------------------------ 1 file changed, 42 deletions(-) delete mode 100644 dataloader/readpfm.py diff --git a/dataloader/readpfm.py b/dataloader/readpfm.py deleted file mode 100644 index c4b1536..0000000 --- a/dataloader/readpfm.py +++ /dev/null @@ -1,42 +0,0 @@ -import re -import numpy as np -import sys - - -def readPFM(file): - file = open(file, 'rb') - - color = None - width = None - height = None - scale = None - endian = None - - header = file.readline().rstrip() - if header == 'PF': - color = True - elif header == 'Pf': - color = False - else: - raise Exception('Not a PFM file.') - - dim_match = re.match(r'^(\d+)\s(\d+)\s$', file.readline()) - if dim_match: - width, height = map(int, dim_match.groups()) - else: - raise Exception('Malformed PFM header.') - - scale = float(file.readline().rstrip()) - if scale < 0: # little-endian - endian = '<' - scale = -scale - else: - endian = '>' # big-endian - - data = np.fromfile(file, endian + 'f') - shape = (height, width, 3) if color else (height, width) - - data = np.reshape(data, shape) - data = np.flipud(data) - return data, scale - From 3a584bdd649c470529bcf0eb27c1b3861593c98a Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Thu, 16 Apr 2020 17:32:07 +0800 Subject: [PATCH 21/38] Add files via upload --- dataloader/SecenFlowLoader.py | 72 +++++++++++++++++++++++++++++++++++ dataloader/readpfm.py | 45 ++++++++++++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 dataloader/SecenFlowLoader.py create mode 100644 dataloader/readpfm.py diff --git a/dataloader/SecenFlowLoader.py b/dataloader/SecenFlowLoader.py new file mode 100644 index 0000000..a6ba537 --- /dev/null +++ b/dataloader/SecenFlowLoader.py @@ -0,0 +1,72 @@ +import os +import torch +import torch.utils.data as data +import torch +import torchvision.transforms as transforms +import random +from PIL import Image, ImageOps +from . import preprocess +from . import listflowfile as lt +from . import readpfm as rp +import numpy as np + +IMG_EXTENSIONS = [ + '.jpg', '.JPG', '.jpeg', '.JPEG', + '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP', +] + +def is_image_file(filename): + return any(filename.endswith(extension) for extension in IMG_EXTENSIONS) + +def default_loader(path): + return Image.open(path).convert('RGB') + +def disparity_loader(path): + return rp.readPFM(path) + +class myImageFloder(data.Dataset): + def __init__(self, left, right, left_disparity, training, loader=default_loader, dploader= disparity_loader): + + self.left = left + self.right = right + self.disp_L = left_disparity + self.loader = loader + self.dploader = dploader + self.training = training + + def __getitem__(self, index): + left = self.left[index] + right = self.right[index] + disp_L= self.disp_L[index] + + + left_img = self.loader(left) + right_img = self.loader(right) + dataL, scaleL = self.dploader(disp_L) + dataL = np.ascontiguousarray(dataL,dtype=np.float32) + + if self.training: + w, h = left_img.size + th, tw = 256, 512 + + x1 = random.randint(0, w - tw) + y1 = random.randint(0, h - th) + + left_img = left_img.crop((x1, y1, x1 + tw, y1 + th)) + right_img = right_img.crop((x1, y1, x1 + tw, y1 + th)) + + dataL = dataL[y1:y1 + th, x1:x1 + tw] + + processed = preprocess.get_transform(augment=False) + left_img = processed(left_img) + right_img = processed(right_img) + + return left_img, right_img, dataL + else: + processed = preprocess.get_transform(augment=False) + left_img = processed(left_img) + right_img = processed(right_img) + return left_img, right_img, dataL + + def __len__(self): + return len(self.left) diff --git a/dataloader/readpfm.py b/dataloader/readpfm.py new file mode 100644 index 0000000..6223627 --- /dev/null +++ b/dataloader/readpfm.py @@ -0,0 +1,45 @@ +import re +import numpy as np +import sys +import chardet + +def readPFM(file): + file = open(file, 'rb') + + color = None + width = None + height = None + scale = None + endian = None + + header = file.readline().rstrip() + encode_type = chardet.detect(header) + header = header.decode(encode_type['encoding']) + if header == 'PF': + color = True + elif header == 'Pf': + color = False + else: + raise Exception('Not a PFM file.') + + dim_match = re.match(r'^(\d+)\s(\d+)\s$', file.readline().decode(encode_type['encoding'])) + if dim_match: + width, height = map(int, dim_match.groups()) + else: + raise Exception('Malformed PFM header.') + + scale = float(file.readline().rstrip().decode(encode_type['encoding'])) + if scale < 0: # little-endian + endian = '<' + scale = -scale + else: + endian = '>' # big-endian + + data = np.fromfile(file, endian + 'f') + shape = (height, width, 3) if color else (height, width) + + data = np.reshape(data, shape) + data = np.flipud(data) + return data, scale + + From dbf3923f6b098b81fe969efb6110fc6da0ac4e1d Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Thu, 16 Apr 2020 17:34:25 +0800 Subject: [PATCH 22/38] Update README.md --- README.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 5c85f6b..2729a67 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ This repository contains the code (in PyTorch) for "[Pyramid Stereo Matching Net #### changlog 2020/04/16: Update Test_img.py: now support torch 1.4 / torchvision 0.5.0 and python 3.7 for inference -2020/04/16: Update submission.py: now support torch 1.4 / torchvision 0.5.0 and python 3.7 for inference +2020/04/16: Update submission.py, SecenFlowLoader.py, readpfm.py, main.py ### Citation ``` @@ -36,9 +36,9 @@ Recent work has shown that depth estimation from a stereo pair of images can be ### Dependencies -- [Python2.7](https://www.python.org/downloads/) -- [PyTorch(0.4.0+)](http://pytorch.org) -- torchvision 0.2.0 (higher version may cause issues) +- [Python 3.7](https://www.python.org/downloads/) +- [PyTorch(1.4.0+)](http://pytorch.org) +- torchvision 0.5.0 - [KITTI Stereo](http://www.cvlibs.net/datasets/kitti/eval_stereo.php) - [Scene Flow](https://lmb.informatik.uni-freiburg.de/resources/datasets/SceneFlowDatasets.en.html) @@ -51,7 +51,6 @@ And rename the folder as: "driving_frames_cleanpass", "driving_disparity", "monk ### Notice 1. Warning of upsample function in PyTorch 0.4.1+: add "align_corners=True" to upsample functions. 2. Output disparity may be better with multipling by 1.17. Reported from issues [#135](https://github.com/JiaRenChang/PSMNet/issues/135) and [#113](https://github.com/JiaRenChang/PSMNet/issues/113). -3. with torchvision > 0.2.0, RGB images should be loaded without adding ".astype('float32'))" ### Train As an example, use the following command to train a PSMNet on Scene Flow From 481f240f19585a9420eba5492023cd7da23aeb5c Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Thu, 16 Apr 2020 17:34:46 +0800 Subject: [PATCH 23/38] Delete main.py --- main.py | 183 -------------------------------------------------------- 1 file changed, 183 deletions(-) delete mode 100644 main.py diff --git a/main.py b/main.py deleted file mode 100644 index 0933c74..0000000 --- a/main.py +++ /dev/null @@ -1,183 +0,0 @@ -from __future__ import print_function -import argparse -import os -import random -import torch -import torch.nn as nn -import torch.nn.parallel -import torch.backends.cudnn as cudnn -import torch.optim as optim -import torch.utils.data -from torch.autograd import Variable -import torch.nn.functional as F -import numpy as np -import time -import math -from dataloader import listflowfile as lt -from dataloader import SecenFlowLoader as DA -from models import * - -parser = argparse.ArgumentParser(description='PSMNet') -parser.add_argument('--maxdisp', type=int ,default=192, - help='maxium disparity') -parser.add_argument('--model', default='stackhourglass', - help='select model') -parser.add_argument('--datapath', default='dataset/', - help='datapath') -parser.add_argument('--epochs', type=int, default=10, - help='number of epochs to train') -parser.add_argument('--loadmodel', default= None, - help='load model') -parser.add_argument('--savemodel', default='./', - help='save model') -parser.add_argument('--no-cuda', action='store_true', default=False, - help='enables CUDA training') -parser.add_argument('--seed', type=int, default=1, metavar='S', - help='random seed (default: 1)') -args = parser.parse_args() -args.cuda = not args.no_cuda and torch.cuda.is_available() - -# set gpu id used -os.environ["CUDA_VISIBLE_DEVICES"] = "0,1" - -torch.manual_seed(args.seed) -if args.cuda: - torch.cuda.manual_seed(args.seed) - -all_left_img, all_right_img, all_left_disp, test_left_img, test_right_img, test_left_disp = lt.dataloader(args.datapath) - -TrainImgLoader = torch.utils.data.DataLoader( - DA.myImageFloder(all_left_img,all_right_img,all_left_disp, True), - batch_size= 12, shuffle= True, num_workers= 8, drop_last=False) - -TestImgLoader = torch.utils.data.DataLoader( - DA.myImageFloder(test_left_img,test_right_img,test_left_disp, False), - batch_size= 8, shuffle= False, num_workers= 4, drop_last=False) - - -if args.model == 'stackhourglass': - model = stackhourglass(args.maxdisp) -elif args.model == 'basic': - model = basic(args.maxdisp) -else: - print('no model') - -if args.cuda: - model = nn.DataParallel(model) - model.cuda() - -if args.loadmodel is not None: - state_dict = torch.load(args.loadmodel) - model.load_state_dict(state_dict['state_dict']) - -print('Number of model parameters: {}'.format(sum([p.data.nelement() for p in model.parameters()]))) - -optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999)) - -def train(imgL,imgR, disp_L): - model.train() - imgL = Variable(torch.FloatTensor(imgL)) - imgR = Variable(torch.FloatTensor(imgR)) - disp_L = Variable(torch.FloatTensor(disp_L)) - - if args.cuda: - imgL, imgR, disp_true = imgL.cuda(), imgR.cuda(), disp_L.cuda() - - #--------- - mask = disp_true < args.maxdisp - mask.detach_() - #---- - optimizer.zero_grad() - - if args.model == 'stackhourglass': - output1, output2, output3 = model(imgL,imgR) - output1 = torch.squeeze(output1,1) - output2 = torch.squeeze(output2,1) - output3 = torch.squeeze(output3,1) - loss = 0.5*F.smooth_l1_loss(output1[mask], disp_true[mask], size_average=True) + 0.7*F.smooth_l1_loss(output2[mask], disp_true[mask], size_average=True) + F.smooth_l1_loss(output3[mask], disp_true[mask], size_average=True) - elif args.model == 'basic': - output = model(imgL,imgR) - output = torch.squeeze(output,1) - loss = F.smooth_l1_loss(output[mask], disp_true[mask], size_average=True) - - loss.backward() - optimizer.step() - - return loss.data[0] - -def test(imgL,imgR,disp_true): - model.eval() - imgL = Variable(torch.FloatTensor(imgL)) - imgR = Variable(torch.FloatTensor(imgR)) - if args.cuda: - imgL, imgR = imgL.cuda(), imgR.cuda() - - #--------- - mask = disp_true < 192 - #---- - - with torch.no_grad(): - output3 = model(imgL,imgR) - - output = torch.squeeze(output3.data.cpu(),1)[:,4:,:] - - if len(disp_true[mask])==0: - loss = 0 - else: - loss = torch.mean(torch.abs(output[mask]-disp_true[mask])) # end-point-error - - return loss - -def adjust_learning_rate(optimizer, epoch): - lr = 0.001 - print(lr) - for param_group in optimizer.param_groups: - param_group['lr'] = lr - - -def main(): - - start_full_time = time.time() - for epoch in range(1, args.epochs+1): - print('This is %d-th epoch' %(epoch)) - total_train_loss = 0 - adjust_learning_rate(optimizer,epoch) - - ## training ## - for batch_idx, (imgL_crop, imgR_crop, disp_crop_L) in enumerate(TrainImgLoader): - start_time = time.time() - - loss = train(imgL_crop,imgR_crop, disp_crop_L) - print('Iter %d training loss = %.3f , time = %.2f' %(batch_idx, loss, time.time() - start_time)) - total_train_loss += loss - print('epoch %d total training loss = %.3f' %(epoch, total_train_loss/len(TrainImgLoader))) - - #SAVE - savefilename = args.savemodel+'/checkpoint_'+str(epoch)+'.tar' - torch.save({ - 'epoch': epoch, - 'state_dict': model.state_dict(), - 'train_loss': total_train_loss/len(TrainImgLoader), - }, savefilename) - - print('full training time = %.2f HR' %((time.time() - start_full_time)/3600)) - - #------------- TEST ------------------------------------------------------------ - total_test_loss = 0 - for batch_idx, (imgL, imgR, disp_L) in enumerate(TestImgLoader): - test_loss = test(imgL,imgR, disp_L) - print('Iter %d test loss = %.3f' %(batch_idx, test_loss)) - total_test_loss += test_loss - - print('total test loss = %.3f' %(total_test_loss/len(TestImgLoader))) - #---------------------------------------------------------------------------------- - #SAVE test information - savefilename = args.savemodel+'testinformation.tar' - torch.save({ - 'test_loss': total_test_loss/len(TestImgLoader), - }, savefilename) - - -if __name__ == '__main__': - main() - From 48e3523846833daca0e14a1b2065b69156f720ad Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Thu, 16 Apr 2020 17:35:09 +0800 Subject: [PATCH 24/38] Add files via upload --- main.py | 193 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 193 insertions(+) create mode 100644 main.py diff --git a/main.py b/main.py new file mode 100644 index 0000000..f33ef94 --- /dev/null +++ b/main.py @@ -0,0 +1,193 @@ +from __future__ import print_function +import argparse +import os +import random +import torch +import torch.nn as nn +import torch.optim as optim +from torch.autograd import Variable +import torch.nn.functional as F +import numpy as np +import time +import math +from dataloader import listflowfile as lt +from dataloader import SecenFlowLoader as DA +from models import * + +parser = argparse.ArgumentParser(description='PSMNet') +parser.add_argument('--maxdisp', type=int ,default=192, + help='maxium disparity') +parser.add_argument('--model', default='stackhourglass', + help='select model') +parser.add_argument('--datapath', default='/media/jiaren/ImageNet/SceneFlowData/', + help='datapath') +parser.add_argument('--epochs', type=int, default=0, + help='number of epochs to train') +parser.add_argument('--loadmodel', default= './trained/pretrained_sceneflow.tar', + help='load model') +parser.add_argument('--savemodel', default='./', + help='save model') +parser.add_argument('--no-cuda', action='store_true', default=False, + help='enables CUDA training') +parser.add_argument('--seed', type=int, default=1, metavar='S', + help='random seed (default: 1)') +args = parser.parse_args() +args.cuda = not args.no_cuda and torch.cuda.is_available() + +torch.manual_seed(args.seed) +if args.cuda: + torch.cuda.manual_seed(args.seed) + +all_left_img, all_right_img, all_left_disp, test_left_img, test_right_img, test_left_disp = lt.dataloader(args.datapath) + +TrainImgLoader = torch.utils.data.DataLoader( + DA.myImageFloder(all_left_img,all_right_img,all_left_disp, True), + batch_size= 12, shuffle= True, num_workers= 8, drop_last=False) + +TestImgLoader = torch.utils.data.DataLoader( + DA.myImageFloder(test_left_img,test_right_img,test_left_disp, False), + batch_size= 8, shuffle= False, num_workers= 4, drop_last=False) + + +if args.model == 'stackhourglass': + model = stackhourglass(args.maxdisp) +elif args.model == 'basic': + model = basic(args.maxdisp) +else: + print('no model') + +if args.cuda: + model = nn.DataParallel(model) + model.cuda() + +if args.loadmodel is not None: + print('Load pretrained model') + pretrain_dict = torch.load(args.loadmodel) + model.load_state_dict(pretrain_dict['state_dict']) + +print('Number of model parameters: {}'.format(sum([p.data.nelement() for p in model.parameters()]))) + +optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999)) + +def train(imgL,imgR, disp_L): + model.train() + + if args.cuda: + imgL, imgR, disp_true = imgL.cuda(), imgR.cuda(), disp_L.cuda() + + #--------- + mask = disp_true < args.maxdisp + mask.detach_() + #---- + optimizer.zero_grad() + + if args.model == 'stackhourglass': + output1, output2, output3 = model(imgL,imgR) + output1 = torch.squeeze(output1,1) + output2 = torch.squeeze(output2,1) + output3 = torch.squeeze(output3,1) + loss = 0.5*F.smooth_l1_loss(output1[mask], disp_true[mask], size_average=True) + 0.7*F.smooth_l1_loss(output2[mask], disp_true[mask], size_average=True) + F.smooth_l1_loss(output3[mask], disp_true[mask], size_average=True) + elif args.model == 'basic': + output = model(imgL,imgR) + output = torch.squeeze(output,1) + loss = F.smooth_l1_loss(output[mask], disp_true[mask], size_average=True) + + loss.backward() + optimizer.step() + + return loss.data + +def test(imgL,imgR,disp_true): + + model.eval() + + if args.cuda: + imgL, imgR, disp_true = imgL.cuda(), imgR.cuda(), disp_true.cuda() + #--------- + mask = disp_true < 192 + #---- + + if imgL.shape[2] % 16 != 0: + times = imgL.shape[2]//16 + top_pad = (times+1)*16 -imgL.shape[2] + else: + top_pad = 0 + + if imgL.shape[3] % 16 != 0: + times = imgL.shape[3]//16 + right_pad = (times+1)*16-imgL.shape[3] + else: + right_pad = 0 + + imgL = F.pad(imgL,(0,right_pad, top_pad,0)) + imgR = F.pad(imgR,(0,right_pad, top_pad,0)) + + with torch.no_grad(): + output3 = model(imgL,imgR) + output3 = torch.squeeze(output3) + + if top_pad !=0: + img = output3[:,top_pad:,:] + else: + img = output3 + + if len(disp_true[mask])==0: + loss = 0 + else: + loss = F.l1_loss(img[mask],disp_true[mask]) #torch.mean(torch.abs(img[mask]-disp_true[mask])) # end-point-error + + return loss.data.cpu() + +def adjust_learning_rate(optimizer, epoch): + lr = 0.001 + print(lr) + for param_group in optimizer.param_groups: + param_group['lr'] = lr + + +def main(): + + start_full_time = time.time() + for epoch in range(0, args.epochs): + print('This is %d-th epoch' %(epoch)) + total_train_loss = 0 + adjust_learning_rate(optimizer,epoch) + + ## training ## + for batch_idx, (imgL_crop, imgR_crop, disp_crop_L) in enumerate(TrainImgLoader): + start_time = time.time() + + loss = train(imgL_crop,imgR_crop, disp_crop_L) + print('Iter %d training loss = %.3f , time = %.2f' %(batch_idx, loss, time.time() - start_time)) + total_train_loss += loss + print('epoch %d total training loss = %.3f' %(epoch, total_train_loss/len(TrainImgLoader))) + + #SAVE + savefilename = args.savemodel+'/checkpoint_'+str(epoch)+'.tar' + torch.save({ + 'epoch': epoch, + 'state_dict': model.state_dict(), + 'train_loss': total_train_loss/len(TrainImgLoader), + }, savefilename) + + print('full training time = %.2f HR' %((time.time() - start_full_time)/3600)) + + #------------- TEST ------------------------------------------------------------ + total_test_loss = 0 + for batch_idx, (imgL, imgR, disp_L) in enumerate(TestImgLoader): + test_loss = test(imgL,imgR, disp_L) + print('Iter %d test loss = %.3f' %(batch_idx, test_loss)) + total_test_loss += test_loss + + print('total test loss = %.3f' %(total_test_loss/len(TestImgLoader))) + #---------------------------------------------------------------------------------- + #SAVE test information + savefilename = args.savemodel+'testinformation.tar' + torch.save({ + 'test_loss': total_test_loss/len(TestImgLoader), + }, savefilename) + + +if __name__ == '__main__': + main() + From cefdd754ef19b033ea8648479f7d7b766a6af3ef Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Thu, 16 Apr 2020 17:35:59 +0800 Subject: [PATCH 25/38] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2729a67..ac9f415 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ This repository contains the code (in PyTorch) for "[Pyramid Stereo Matching Network](https://arxiv.org/abs/1803.08669)" paper (CVPR 2018) by [Jia-Ren Chang](https://jiarenchang.github.io/) and [Yong-Sheng Chen](https://people.cs.nctu.edu.tw/~yschen/). -## ※We are working on moving these codes to Python 3.7 and torch 1.4. +## ※We are working on moving these codes to python 3.7 and torch 1.4. #### changlog 2020/04/16: Update Test_img.py: now support torch 1.4 / torchvision 0.5.0 and python 3.7 for inference From 6757107a81cdb52ec7d6a7741fd1d59ce604599e Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Thu, 16 Apr 2020 17:47:40 +0800 Subject: [PATCH 26/38] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ac9f415..1cbee08 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ This repository contains the code (in PyTorch) for "[Pyramid Stereo Matching Network](https://arxiv.org/abs/1803.08669)" paper (CVPR 2018) by [Jia-Ren Chang](https://jiarenchang.github.io/) and [Yong-Sheng Chen](https://people.cs.nctu.edu.tw/~yschen/). ## ※We are working on moving these codes to python 3.7 and torch 1.4. -#### changlog +#### changelog 2020/04/16: Update Test_img.py: now support torch 1.4 / torchvision 0.5.0 and python 3.7 for inference 2020/04/16: Update submission.py, SecenFlowLoader.py, readpfm.py, main.py From a5c27041b34785bcaaf3589cf9724428053e60f7 Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Sun, 3 May 2020 13:03:32 +0800 Subject: [PATCH 27/38] Update stackhourglass.py --- models/stackhourglass.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/models/stackhourglass.py b/models/stackhourglass.py index 991ba16..fce9f29 100644 --- a/models/stackhourglass.py +++ b/models/stackhourglass.py @@ -107,9 +107,9 @@ def forward(self, left, right): #matching - cost = Variable(torch.FloatTensor(refimg_fea.size()[0], refimg_fea.size()[1]*2, self.maxdisp/4, refimg_fea.size()[2], refimg_fea.size()[3]).zero_()).cuda() + cost = Variable(torch.FloatTensor(refimg_fea.size()[0], refimg_fea.size()[1]*2, self.maxdisp//4, refimg_fea.size()[2], refimg_fea.size()[3]).zero_()).cuda() - for i in range(self.maxdisp/4): + for i in range(self.maxdisp//4): if i > 0 : cost[:, :refimg_fea.size()[1], i, :,i:] = refimg_fea[:,:,:,i:] cost[:, refimg_fea.size()[1]:, i, :,i:] = targetimg_fea[:,:,:,:-i] From ea7a9f57f6c15536c6b0b15226265c345782ac07 Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Sun, 20 Dec 2020 14:52:39 +0800 Subject: [PATCH 28/38] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1cbee08..f46d5d2 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ This repository contains the code (in PyTorch) for "[Pyramid Stereo Matching Network](https://arxiv.org/abs/1803.08669)" paper (CVPR 2018) by [Jia-Ren Chang](https://jiarenchang.github.io/) and [Yong-Sheng Chen](https://people.cs.nctu.edu.tw/~yschen/). -## ※We are working on moving these codes to python 3.7 and torch 1.4. +## ※We are working on moving these codes to python 3.7 and lastest PyTorch #### changelog 2020/04/16: Update Test_img.py: now support torch 1.4 / torchvision 0.5.0 and python 3.7 for inference From 70943082cdcaf381906868e45df7920002a8f220 Mon Sep 17 00:00:00 2001 From: jiaren Date: Sun, 20 Dec 2020 15:44:43 +0800 Subject: [PATCH 29/38] updatePSM --- Test_img.py | 6 +- dataloader/KITTI_submission_loader.py | 12 +- dataloader/KITTI_submission_loader2012.py | 12 +- dataloader/KITTIloader2012.py | 26 ++--- dataloader/KITTIloader2015.py | 32 +++--- dataloader/listflowfile.py | 127 +++++++++++----------- main.py | 4 +- models/basic.py | 2 +- models/stackhourglass.py | 24 ++-- models/submodule.py | 16 +-- 10 files changed, 127 insertions(+), 134 deletions(-) diff --git a/Test_img.py b/Test_img.py index 948dc73..31340ef 100644 --- a/Test_img.py +++ b/Test_img.py @@ -109,8 +109,12 @@ def main(): print('time = %.2f' %(time.time() - start_time)) - if top_pad !=0 or right_pad != 0: + if top_pad !=0 and right_pad != 0: img = pred_disp[top_pad:,:-right_pad] + elif top_pad ==0 and right_pad != 0: + img = pred_disp[:,:-right_pad] + elif top_pad !=0 and right_pad == 0: + img = pred_disp[top_pad:,:] else: img = pred_disp diff --git a/dataloader/KITTI_submission_loader.py b/dataloader/KITTI_submission_loader.py index ad73745..cd4252e 100644 --- a/dataloader/KITTI_submission_loader.py +++ b/dataloader/KITTI_submission_loader.py @@ -16,14 +16,14 @@ def is_image_file(filename): def dataloader(filepath): - left_fold = 'image_2/' - right_fold = 'image_3/' + left_fold = 'image_2/' + right_fold = 'image_3/' - image = [img for img in os.listdir(filepath+left_fold) if img.find('_10') > -1] + image = [img for img in os.listdir(filepath+left_fold) if img.find('_10') > -1] - left_test = [filepath+left_fold+img for img in image] - right_test = [filepath+right_fold+img for img in image] + left_test = [filepath+left_fold+img for img in image] + right_test = [filepath+right_fold+img for img in image] - return left_test, right_test + return left_test, right_test diff --git a/dataloader/KITTI_submission_loader2012.py b/dataloader/KITTI_submission_loader2012.py index 7ca9859..0767ab6 100644 --- a/dataloader/KITTI_submission_loader2012.py +++ b/dataloader/KITTI_submission_loader2012.py @@ -16,14 +16,14 @@ def is_image_file(filename): def dataloader(filepath): - left_fold = 'colored_0/' - right_fold = 'colored_1/' + left_fold = 'colored_0/' + right_fold = 'colored_1/' - image = [img for img in os.listdir(filepath+left_fold) if img.find('_10') > -1] + image = [img for img in os.listdir(filepath+left_fold) if img.find('_10') > -1] - left_test = [filepath+left_fold+img for img in image] - right_test = [filepath+right_fold+img for img in image] + left_test = [filepath+left_fold+img for img in image] + right_test = [filepath+right_fold+img for img in image] - return left_test, right_test + return left_test, right_test diff --git a/dataloader/KITTIloader2012.py b/dataloader/KITTIloader2012.py index d651a89..6a0f944 100644 --- a/dataloader/KITTIloader2012.py +++ b/dataloader/KITTIloader2012.py @@ -16,22 +16,22 @@ def is_image_file(filename): def dataloader(filepath): - left_fold = 'colored_0/' - right_fold = 'colored_1/' - disp_noc = 'disp_occ/' + left_fold = 'colored_0/' + right_fold = 'colored_1/' + disp_noc = 'disp_occ/' - image = [img for img in os.listdir(filepath+left_fold) if img.find('_10') > -1] + image = [img for img in os.listdir(filepath+left_fold) if img.find('_10') > -1] - train = image[:] - val = image[160:] + train = image[:] + val = image[160:] - left_train = [filepath+left_fold+img for img in train] - right_train = [filepath+right_fold+img for img in train] - disp_train = [filepath+disp_noc+img for img in train] + left_train = [filepath+left_fold+img for img in train] + right_train = [filepath+right_fold+img for img in train] + disp_train = [filepath+disp_noc+img for img in train] - left_val = [filepath+left_fold+img for img in val] - right_val = [filepath+right_fold+img for img in val] - disp_val = [filepath+disp_noc+img for img in val] + left_val = [filepath+left_fold+img for img in val] + right_val = [filepath+right_fold+img for img in val] + disp_val = [filepath+disp_noc+img for img in val] - return left_train, right_train, disp_train, left_val, right_val, disp_val + return left_train, right_train, disp_train, left_val, right_val, disp_val diff --git a/dataloader/KITTIloader2015.py b/dataloader/KITTIloader2015.py index c443189..0eb1cf4 100644 --- a/dataloader/KITTIloader2015.py +++ b/dataloader/KITTIloader2015.py @@ -16,24 +16,24 @@ def is_image_file(filename): def dataloader(filepath): - left_fold = 'image_2/' - right_fold = 'image_3/' - disp_L = 'disp_occ_0/' - disp_R = 'disp_occ_1/' + left_fold = 'image_2/' + right_fold = 'image_3/' + disp_L = 'disp_occ_0/' + disp_R = 'disp_occ_1/' - image = [img for img in os.listdir(filepath+left_fold) if img.find('_10') > -1] + image = [img for img in os.listdir(filepath+left_fold) if img.find('_10') > -1] - train = image[:160] - val = image[160:] + train = image[:160] + val = image[160:] - left_train = [filepath+left_fold+img for img in train] - right_train = [filepath+right_fold+img for img in train] - disp_train_L = [filepath+disp_L+img for img in train] - #disp_train_R = [filepath+disp_R+img for img in train] + left_train = [filepath+left_fold+img for img in train] + right_train = [filepath+right_fold+img for img in train] + disp_train_L = [filepath+disp_L+img for img in train] + #disp_train_R = [filepath+disp_R+img for img in train] - left_val = [filepath+left_fold+img for img in val] - right_val = [filepath+right_fold+img for img in val] - disp_val_L = [filepath+disp_L+img for img in val] - #disp_val_R = [filepath+disp_R+img for img in val] + left_val = [filepath+left_fold+img for img in val] + right_val = [filepath+right_fold+img for img in val] + disp_val_L = [filepath+disp_L+img for img in val] + #disp_val_R = [filepath+disp_R+img for img in val] - return left_train, right_train, disp_train_L, left_val, right_val, disp_val_L + return left_train, right_train, disp_train_L, left_val, right_val, disp_val_L diff --git a/dataloader/listflowfile.py b/dataloader/listflowfile.py index d16556e..fa606bb 100644 --- a/dataloader/listflowfile.py +++ b/dataloader/listflowfile.py @@ -15,93 +15,94 @@ def is_image_file(filename): def dataloader(filepath): - classes = [d for d in os.listdir(filepath) if os.path.isdir(os.path.join(filepath, d))] - image = [img for img in classes if img.find('frames_cleanpass') > -1] - disp = [dsp for dsp in classes if dsp.find('disparity') > -1] + classes = [d for d in os.listdir(filepath) if os.path.isdir(os.path.join(filepath, d))] + image = [img for img in classes if img.find('frames_cleanpass') > -1] + disp = [dsp for dsp in classes if dsp.find('disparity') > -1] - monkaa_path = filepath + [x for x in image if 'monkaa' in x][0] - monkaa_disp = filepath + [x for x in disp if 'monkaa' in x][0] + monkaa_path = filepath + [x for x in image if 'monkaa' in x][0] + monkaa_disp = filepath + [x for x in disp if 'monkaa' in x][0] - - monkaa_dir = os.listdir(monkaa_path) - all_left_img=[] - all_right_img=[] - all_left_disp = [] - test_left_img=[] - test_right_img=[] - test_left_disp = [] + monkaa_dir = os.listdir(monkaa_path) + all_left_img=[] + all_right_img=[] + all_left_disp = [] + test_left_img=[] + test_right_img=[] + test_left_disp = [] - for dd in monkaa_dir: - for im in os.listdir(monkaa_path+'/'+dd+'/left/'): - if is_image_file(monkaa_path+'/'+dd+'/left/'+im): - all_left_img.append(monkaa_path+'/'+dd+'/left/'+im) - all_left_disp.append(monkaa_disp+'/'+dd+'/left/'+im.split(".")[0]+'.pfm') - for im in os.listdir(monkaa_path+'/'+dd+'/right/'): - if is_image_file(monkaa_path+'/'+dd+'/right/'+im): - all_right_img.append(monkaa_path+'/'+dd+'/right/'+im) + for dd in monkaa_dir: + for im in os.listdir(monkaa_path+'/'+dd+'/left/'): + if is_image_file(monkaa_path+'/'+dd+'/left/'+im): + all_left_img.append(monkaa_path+'/'+dd+'/left/'+im) + all_left_disp.append(monkaa_disp+'/'+dd+'/left/'+im.split(".")[0]+'.pfm') - flying_path = filepath + [x for x in image if x == 'frames_cleanpass'][0] - flying_disp = filepath + [x for x in disp if x == 'frames_disparity'][0] - flying_dir = flying_path+'/TRAIN/' - subdir = ['A','B','C'] + for im in os.listdir(monkaa_path+'/'+dd+'/right/'): + if is_image_file(monkaa_path+'/'+dd+'/right/'+im): + all_right_img.append(monkaa_path+'/'+dd+'/right/'+im) - for ss in subdir: - flying = os.listdir(flying_dir+ss) + flying_path = filepath + [x for x in image if x == 'frames_cleanpass'][0] + flying_disp = filepath + [x for x in disp if x == 'frames_disparity'][0] + flying_dir = flying_path+'/TRAIN/' + subdir = ['A','B','C'] - for ff in flying: - imm_l = os.listdir(flying_dir+ss+'/'+ff+'/left/') - for im in imm_l: - if is_image_file(flying_dir+ss+'/'+ff+'/left/'+im): - all_left_img.append(flying_dir+ss+'/'+ff+'/left/'+im) + for ss in subdir: + flying = os.listdir(flying_dir+ss) - all_left_disp.append(flying_disp+'/TRAIN/'+ss+'/'+ff+'/left/'+im.split(".")[0]+'.pfm') + for ff in flying: + imm_l = os.listdir(flying_dir+ss+'/'+ff+'/left/') + for im in imm_l: + if is_image_file(flying_dir+ss+'/'+ff+'/left/'+im): + all_left_img.append(flying_dir+ss+'/'+ff+'/left/'+im) + + all_left_disp.append(flying_disp+'/TRAIN/'+ss+'/'+ff+'/left/'+im.split(".")[0]+'.pfm') - if is_image_file(flying_dir+ss+'/'+ff+'/right/'+im): - all_right_img.append(flying_dir+ss+'/'+ff+'/right/'+im) + if is_image_file(flying_dir+ss+'/'+ff+'/right/'+im): + all_right_img.append(flying_dir+ss+'/'+ff+'/right/'+im) - flying_dir = flying_path+'/TEST/' + flying_dir = flying_path+'/TEST/' - subdir = ['A','B','C'] + subdir = ['A','B','C'] - for ss in subdir: - flying = os.listdir(flying_dir+ss) + for ss in subdir: + flying = os.listdir(flying_dir+ss) - for ff in flying: - imm_l = os.listdir(flying_dir+ss+'/'+ff+'/left/') - for im in imm_l: - if is_image_file(flying_dir+ss+'/'+ff+'/left/'+im): - test_left_img.append(flying_dir+ss+'/'+ff+'/left/'+im) + for ff in flying: + imm_l = os.listdir(flying_dir+ss+'/'+ff+'/left/') + for im in imm_l: + if is_image_file(flying_dir+ss+'/'+ff+'/left/'+im): + test_left_img.append(flying_dir+ss+'/'+ff+'/left/'+im) + + test_left_disp.append(flying_disp+'/TEST/'+ss+'/'+ff+'/left/'+im.split(".")[0]+'.pfm') - test_left_disp.append(flying_disp+'/TEST/'+ss+'/'+ff+'/left/'+im.split(".")[0]+'.pfm') + if is_image_file(flying_dir+ss+'/'+ff+'/right/'+im): + test_right_img.append(flying_dir+ss+'/'+ff+'/right/'+im) - if is_image_file(flying_dir+ss+'/'+ff+'/right/'+im): - test_right_img.append(flying_dir+ss+'/'+ff+'/right/'+im) + driving_dir = filepath + [x for x in image if 'driving' in x][0] + '/' + driving_disp = filepath + [x for x in disp if 'driving' in x][0] - driving_dir = filepath + [x for x in image if 'driving' in x][0] + '/' - driving_disp = filepath + [x for x in disp if 'driving' in x][0] + subdir1 = ['35mm_focallength','15mm_focallength'] + subdir2 = ['scene_backwards','scene_forwards'] + subdir3 = ['fast','slow'] - subdir1 = ['35mm_focallength','15mm_focallength'] - subdir2 = ['scene_backwards','scene_forwards'] - subdir3 = ['fast','slow'] + for i in subdir1: + for j in subdir2: + for k in subdir3: + imm_l = os.listdir(driving_dir+i+'/'+j+'/'+k+'/left/') + for im in imm_l: + if is_image_file(driving_dir+i+'/'+j+'/'+k+'/left/'+im): + all_left_img.append(driving_dir+i+'/'+j+'/'+k+'/left/'+im) - for i in subdir1: - for j in subdir2: - for k in subdir3: - imm_l = os.listdir(driving_dir+i+'/'+j+'/'+k+'/left/') - for im in imm_l: - if is_image_file(driving_dir+i+'/'+j+'/'+k+'/left/'+im): - all_left_img.append(driving_dir+i+'/'+j+'/'+k+'/left/'+im) - all_left_disp.append(driving_disp+'/'+i+'/'+j+'/'+k+'/left/'+im.split(".")[0]+'.pfm') + all_left_disp.append(driving_disp+'/'+i+'/'+j+'/'+k+'/left/'+im.split(".")[0]+'.pfm') - if is_image_file(driving_dir+i+'/'+j+'/'+k+'/right/'+im): - all_right_img.append(driving_dir+i+'/'+j+'/'+k+'/right/'+im) + if is_image_file(driving_dir+i+'/'+j+'/'+k+'/right/'+im): + all_right_img.append(driving_dir+i+'/'+j+'/'+k+'/right/'+im) - return all_left_img, all_right_img, all_left_disp, test_left_img, test_right_img, test_left_disp + return all_left_img, all_right_img, all_left_disp, test_left_img, test_right_img, test_left_disp diff --git a/main.py b/main.py index f33ef94..e3f6c7e 100644 --- a/main.py +++ b/main.py @@ -21,9 +21,9 @@ help='select model') parser.add_argument('--datapath', default='/media/jiaren/ImageNet/SceneFlowData/', help='datapath') -parser.add_argument('--epochs', type=int, default=0, +parser.add_argument('--epochs', type=int, default=10, help='number of epochs to train') -parser.add_argument('--loadmodel', default= './trained/pretrained_sceneflow.tar', +parser.add_argument('--loadmodel', default= None, help='load model') parser.add_argument('--savemodel', default='./', help='save model') diff --git a/models/basic.py b/models/basic.py index 5ee5a59..bbaefd5 100644 --- a/models/basic.py +++ b/models/basic.py @@ -5,7 +5,7 @@ from torch.autograd import Variable import torch.nn.functional as F import math -from submodule import * +from .submodule import * class PSMNet(nn.Module): def __init__(self, maxdisp): diff --git a/models/stackhourglass.py b/models/stackhourglass.py index fce9f29..48238de 100644 --- a/models/stackhourglass.py +++ b/models/stackhourglass.py @@ -5,7 +5,7 @@ from torch.autograd import Variable import torch.nn.functional as F import math -from submodule import * +from .submodule import * class hourglass(nn.Module): def __init__(self, inplanes): @@ -135,23 +135,23 @@ def forward(self, left, right): cost3 = self.classif3(out3) + cost2 if self.training: - cost1 = F.upsample(cost1, [self.maxdisp,left.size()[2],left.size()[3]], mode='trilinear') - cost2 = F.upsample(cost2, [self.maxdisp,left.size()[2],left.size()[3]], mode='trilinear') + cost1 = F.upsample(cost1, [self.maxdisp,left.size()[2],left.size()[3]], mode='trilinear') + cost2 = F.upsample(cost2, [self.maxdisp,left.size()[2],left.size()[3]], mode='trilinear') - cost1 = torch.squeeze(cost1,1) - pred1 = F.softmax(cost1,dim=1) - pred1 = disparityregression(self.maxdisp)(pred1) + cost1 = torch.squeeze(cost1,1) + pred1 = F.softmax(cost1,dim=1) + pred1 = disparityregression(self.maxdisp)(pred1) - cost2 = torch.squeeze(cost2,1) - pred2 = F.softmax(cost2,dim=1) - pred2 = disparityregression(self.maxdisp)(pred2) + cost2 = torch.squeeze(cost2,1) + pred2 = F.softmax(cost2,dim=1) + pred2 = disparityregression(self.maxdisp)(pred2) cost3 = F.upsample(cost3, [self.maxdisp,left.size()[2],left.size()[3]], mode='trilinear') cost3 = torch.squeeze(cost3,1) pred3 = F.softmax(cost3,dim=1) - #For your information: This formulation 'softmax(c)' learned "similarity" - #while 'softmax(-c)' learned 'matching cost' as mentioned in the paper. - #However, 'c' or '-c' do not affect the performance because feature-based cost volume provided flexibility. + #For your information: This formulation 'softmax(c)' learned "similarity" + #while 'softmax(-c)' learned 'matching cost' as mentioned in the paper. + #However, 'c' or '-c' do not affect the performance because feature-based cost volume provided flexibility. pred3 = disparityregression(self.maxdisp)(pred3) if self.training: diff --git a/models/submodule.py b/models/submodule.py index a7c8d57..d7e1408 100644 --- a/models/submodule.py +++ b/models/submodule.py @@ -42,25 +42,13 @@ def forward(self, x): return out -class matchshifted(nn.Module): - def __init__(self): - super(matchshifted, self).__init__() - - def forward(self, left, right, shift): - batch, filters, height, width = left.size() - shifted_left = F.pad(torch.index_select(left, 3, Variable(torch.LongTensor([i for i in range(shift,width)])).cuda()),(shift,0,0,0)) - shifted_right = F.pad(torch.index_select(right, 3, Variable(torch.LongTensor([i for i in range(width-shift)])).cuda()),(shift,0,0,0)) - out = torch.cat((shifted_left,shifted_right),1).view(batch,filters*2,1,height,width) - return out - class disparityregression(nn.Module): def __init__(self, maxdisp): super(disparityregression, self).__init__() - self.disp = Variable(torch.Tensor(np.reshape(np.array(range(maxdisp)),[1,maxdisp,1,1])).cuda(), requires_grad=False) + self.disp = torch.Tensor(np.reshape(np.array(range(maxdisp+1)),[1, maxdisp+1,1,1])).cuda() def forward(self, x): - disp = self.disp.repeat(x.size()[0],1,x.size()[2],x.size()[3]) - out = torch.sum(x*disp,1) + out = torch.sum(x*self.disp.data,2, keepdim=True) return out class feature_extraction(nn.Module): From d1597415149cb6428422c28bb2cd99ebefa935cd Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Sun, 20 Dec 2020 15:49:28 +0800 Subject: [PATCH 30/38] Update README.md Update PSMNet --- README.md | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index f46d5d2..546eb25 100644 --- a/README.md +++ b/README.md @@ -2,11 +2,8 @@ This repository contains the code (in PyTorch) for "[Pyramid Stereo Matching Network](https://arxiv.org/abs/1803.08669)" paper (CVPR 2018) by [Jia-Ren Chang](https://jiarenchang.github.io/) and [Yong-Sheng Chen](https://people.cs.nctu.edu.tw/~yschen/). -## ※We are working on moving these codes to python 3.7 and lastest PyTorch #### changelog -2020/04/16: Update Test_img.py: now support torch 1.4 / torchvision 0.5.0 and python 3.7 for inference - -2020/04/16: Update submission.py, SecenFlowLoader.py, readpfm.py, main.py +2020/12/20: Update PSMNet: now support torch 1.6.0 / torchvision 0.5.0 and python 3.7, Removed inconsistent indentation. ### Citation ``` @@ -37,7 +34,7 @@ Recent work has shown that depth estimation from a stereo pair of images can be ### Dependencies - [Python 3.7](https://www.python.org/downloads/) -- [PyTorch(1.4.0+)](http://pytorch.org) +- [PyTorch(1.6.0+)](http://pytorch.org) - torchvision 0.5.0 - [KITTI Stereo](http://www.cvlibs.net/datasets/kitti/eval_stereo.php) - [Scene Flow](https://lmb.informatik.uni-freiburg.de/resources/datasets/SceneFlowDatasets.en.html) From 15ebe30c3b6867e8050970f4ee4fd9fe2846d6f1 Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Sun, 20 Dec 2020 16:49:46 +0800 Subject: [PATCH 31/38] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 546eb25..e904d33 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ This repository contains the code (in PyTorch) for "[Pyramid Stereo Matching Net #### changelog 2020/12/20: Update PSMNet: now support torch 1.6.0 / torchvision 0.5.0 and python 3.7, Removed inconsistent indentation. - +2020/12/20: Our proposed Real-Time Stereo can be found here [Real-time Stereo] (https://github.com/JiaRenChang/RealtimeStereo). ### Citation ``` @inproceedings{chang2018pyramid, From 7d042aa2d8889d3bfe91d4509b5349311fff5462 Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Sun, 20 Dec 2020 16:50:07 +0800 Subject: [PATCH 32/38] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e904d33..120bb75 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ This repository contains the code (in PyTorch) for "[Pyramid Stereo Matching Net #### changelog 2020/12/20: Update PSMNet: now support torch 1.6.0 / torchvision 0.5.0 and python 3.7, Removed inconsistent indentation. -2020/12/20: Our proposed Real-Time Stereo can be found here [Real-time Stereo] (https://github.com/JiaRenChang/RealtimeStereo). +2020/12/20: Our proposed Real-Time Stereo can be found here [Real-time Stereo](https://github.com/JiaRenChang/RealtimeStereo). ### Citation ``` @inproceedings{chang2018pyramid, From de44aed54757b70bed726a1a92c68a55c06d6b8e Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Sun, 20 Dec 2020 16:50:28 +0800 Subject: [PATCH 33/38] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 120bb75..e022d66 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ This repository contains the code (in PyTorch) for "[Pyramid Stereo Matching Net #### changelog 2020/12/20: Update PSMNet: now support torch 1.6.0 / torchvision 0.5.0 and python 3.7, Removed inconsistent indentation. + 2020/12/20: Our proposed Real-Time Stereo can be found here [Real-time Stereo](https://github.com/JiaRenChang/RealtimeStereo). ### Citation ``` From c31ab73de3de7afd9cb198c3e7b22a95021e9ae2 Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Tue, 22 Dec 2020 19:55:31 +0800 Subject: [PATCH 34/38] Update submodule.py correction disparity regression --- models/submodule.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/submodule.py b/models/submodule.py index d7e1408..d76f744 100644 --- a/models/submodule.py +++ b/models/submodule.py @@ -45,7 +45,7 @@ def forward(self, x): class disparityregression(nn.Module): def __init__(self, maxdisp): super(disparityregression, self).__init__() - self.disp = torch.Tensor(np.reshape(np.array(range(maxdisp+1)),[1, maxdisp+1,1,1])).cuda() + self.disp = torch.Tensor(np.reshape(np.array(range(maxdisp)),[1, maxdisp,1,1])).cuda() def forward(self, x): out = torch.sum(x*self.disp.data,2, keepdim=True) From ce82dfd9e6ead62e290b606f1197fcc768a67eae Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Tue, 22 Dec 2020 20:06:45 +0800 Subject: [PATCH 35/38] Update submodule.py --- models/submodule.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/submodule.py b/models/submodule.py index d76f744..2953bfd 100644 --- a/models/submodule.py +++ b/models/submodule.py @@ -48,7 +48,7 @@ def __init__(self, maxdisp): self.disp = torch.Tensor(np.reshape(np.array(range(maxdisp)),[1, maxdisp,1,1])).cuda() def forward(self, x): - out = torch.sum(x*self.disp.data,2, keepdim=True) + out = torch.sum(x*self.disp.data,1, keepdim=True) return out class feature_extraction(nn.Module): From eb91b0113411075cb3d2fd4d700730941a1f0c37 Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Fri, 2 Apr 2021 08:31:22 +0800 Subject: [PATCH 36/38] fix point to same memory --- finetune.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/finetune.py b/finetune.py index c944c9a..27a76f9 100644 --- a/finetune.py +++ b/finetune.py @@ -16,6 +16,7 @@ import numpy as np import time import math +import copy from dataloader import KITTIloader2015 as ls from dataloader import KITTILoader as DA @@ -125,7 +126,7 @@ def test(imgL,imgR,disp_true): pred_disp = output3.data.cpu() #computing 3-px error# - true_disp = disp_true + true_disp = copy.deepcopy(disp_true) index = np.argwhere(true_disp>0) disp_true[index[0][:], index[1][:], index[2][:]] = np.abs(true_disp[index[0][:], index[1][:], index[2][:]]-pred_disp[index[0][:], index[1][:], index[2][:]]) correct = (disp_true[index[0][:], index[1][:], index[2][:]] < 3)|(disp_true[index[0][:], index[1][:], index[2][:]] < true_disp[index[0][:], index[1][:], index[2][:]]*0.05) From 02f7c0b92e3e3918d1843fcec782099c2c28d23c Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Wed, 22 Sep 2021 16:53:50 +0800 Subject: [PATCH 37/38] Update README.md update new pretrained model --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index e022d66..009bf93 100644 --- a/README.md +++ b/README.md @@ -90,10 +90,11 @@ python submission.py --maxdisp 192 \ ※NOTE: The pretrained model were saved in .tar; however, you don't need to untar it. Use torch.load() to load it. Update: 2018/9/6 We released the pre-trained KITTI 2012 model. +Update: 2021/9/22 a pretrained model using torch 1.8.1 (the previous model weight are trained torch 0.4.1) -| KITTI 2015 | Scene Flow | KITTI 2012| -|---|---|---| -|[Google Drive](https://drive.google.com/file/d/1pHWjmhKMG4ffCrpcsp_MTXMJXhgl3kF9/view?usp=sharing)|[Google Drive](https://drive.google.com/file/d/1xoqkQ2NXik1TML_FMUTNZJFAHrhLdKZG/view?usp=sharing)|[Google Drive](https://drive.google.com/file/d/1p4eJ2xDzvQxaqB20A_MmSP9-KORBX1pZ/view?usp=sharing)| +| KITTI 2015 | Scene Flow | KITTI 2012 | Scene Flow (torch 1.8.1) +|---|---|---|---| +|[Google Drive](https://drive.google.com/file/d/1pHWjmhKMG4ffCrpcsp_MTXMJXhgl3kF9/view?usp=sharing)|[Google Drive](https://drive.google.com/file/d/1xoqkQ2NXik1TML_FMUTNZJFAHrhLdKZG/view?usp=sharing)|[Google Drive](https://drive.google.com/file/d/1p4eJ2xDzvQxaqB20A_MmSP9-KORBX1pZ/view?usp=sharing)| [Google Drive](https://drive.google.com/file/d/1NDKrWHkwgMKtDwynXVU12emK3G5d5kkp/view?usp=sharing) ### Test on your own stereo pair ``` From 87ac9093afbf6545c093bd9d26c5ffd66e49a7b8 Mon Sep 17 00:00:00 2001 From: Jia-Ren Chang Date: Wed, 22 Sep 2021 17:09:08 +0800 Subject: [PATCH 38/38] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 009bf93..38fdc7f 100644 --- a/README.md +++ b/README.md @@ -90,6 +90,7 @@ python submission.py --maxdisp 192 \ ※NOTE: The pretrained model were saved in .tar; however, you don't need to untar it. Use torch.load() to load it. Update: 2018/9/6 We released the pre-trained KITTI 2012 model. + Update: 2021/9/22 a pretrained model using torch 1.8.1 (the previous model weight are trained torch 0.4.1) | KITTI 2015 | Scene Flow | KITTI 2012 | Scene Flow (torch 1.8.1)