From 4baec0019c524c8bc66060073bac492b7bae2244 Mon Sep 17 00:00:00 2001 From: kgajdamo Date: Wed, 31 Aug 2022 11:17:23 +0200 Subject: [PATCH 1/3] add neighbor sampler benchmark --- benchmark/sampler/neighbor.py | 62 +++++++++++++++++++++++++++++++++++ pyg_lib/testing.py | 31 ++++++++++++++---- 2 files changed, 86 insertions(+), 7 deletions(-) create mode 100644 benchmark/sampler/neighbor.py diff --git a/benchmark/sampler/neighbor.py b/benchmark/sampler/neighbor.py new file mode 100644 index 000000000..afc24a130 --- /dev/null +++ b/benchmark/sampler/neighbor.py @@ -0,0 +1,62 @@ +import argparse +import time + +import torch +# uncomment below to enable torch.ops.torch_sparse.neighbor_sample +# import torch_sparse +from tqdm import tqdm + +import pyg_lib +from pyg_lib.testing import withDataset, withSeed + +argparser = argparse.ArgumentParser('Neighbor Sampler benchmark') +argparser.add_argument('--batch-sizes', nargs='+', + default=[512, 1024, 2048, 4096, 8192], type=int) +argparser.add_argument('--num_neighbors', default=[[-1], [15, 10, 5], + [20, 15, 10]], type=int) +argparser.add_argument('--replace', default=False, type=bool) +argparser.add_argument('--directed', default=True, type=bool) + +args = argparser.parse_args() + + +@withSeed +@withDataset('DIMACS10', 'citationCiteseer', True) +def test_neighbor(dataset, **kwargs): + (rowptr, col, colptr, row), num_nodes = dataset, dataset[0].size(0) - 1 + + for num_neighbors in args.num_neighbors: + for batch_size in args.batch_sizes: + # pyg-lib neighbor sampler + start = time.perf_counter() + for node in tqdm(range(num_nodes - batch_size)): + seed = torch.arange(node, node + batch_size) + pyg_lib.sampler.neighbor_sample(rowptr, col, seed, + num_neighbors, args.replace, + args.directed, disjoint=False, + return_edge_id=True) + stop = time.perf_counter() + print('-------------------------') + print('pyg-lib neighbor sample') + print(f'Batch size={batch_size}, ' + f'Num_neighbors={num_neighbors}, ' + f'Inference time={stop-start:.3f} seconds\n') + + # pytorch-sparse neighbor sampler + start = time.perf_counter() + for node in tqdm(range(num_nodes - batch_size)): + seed = torch.arange(node, node + batch_size) + torch.ops.torch_sparse.neighbor_sample(colptr, row, seed, + num_neighbors, + args.replace, + args.directed) + stop = time.perf_counter() + print('-------------------------') + print('pytorch_sparse neighbor sample') + print(f'Batch size={batch_size}, ' + f'Num_neighbors={num_neighbors}, ' + f'Inference time={stop-start:.3f} seconds\n') + + +if __name__ == '__main__': + test_neighbor() diff --git a/pyg_lib/testing.py b/pyg_lib/testing.py index 6a4daef35..de9918651 100644 --- a/pyg_lib/testing.py +++ b/pyg_lib/testing.py @@ -27,7 +27,8 @@ def wrapper(*args, **kwargs): return wrapper -def withDataset(group: str, name: str) -> Callable: +def withDataset(group: str, name: str, + return_csc: Optional[bool] = False) -> Callable: def decorator(func: Callable) -> Callable: def wrapper(*args, **kwargs): dataset = get_sparse_matrix( @@ -35,6 +36,7 @@ def wrapper(*args, **kwargs): name, dtype=kwargs.get('dtype', torch.long), device=kwargs.get('device', None), + return_csc=return_csc, ) func(*args, dataset=dataset, **kwargs) @@ -52,9 +54,12 @@ def get_sparse_matrix( name: str, dtype: torch.dtype = torch.long, device: Optional[torch.device] = None, -) -> Tuple[Tensor, Tensor]: + return_csc: Optional[bool] = False, +) -> Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]: r"""Returns a sparse matrix :obj:`(rowptr, col)` from the `Suite Sparse Matrix Collection `_. + In addition, may return a sparse matrix in CSC format, + then output will be :obj:`(rowptr, col, colptr, row)`. Args: group (string): The group of the sparse matrix. @@ -63,10 +68,14 @@ def get_sparse_matrix( tensors. (default: :obj:`torch.long`) device (torch.device, optional): the desired device of returned tensors. (default: :obj:`None`) + return_csc (bool, optional): If set to :obj:`True`, will additionaly + return a sparse matrix in CSC format. (default: :obj:`False`) Returns: - (torch.Tensor, torch.Tensor): Compressed source node indices and target - node indices of the sparse matrix. + (torch.Tensor, torch.Tensor, Optional[torch.Tensor], + Optional[torch.Tensor]): Compressed source node indices and target node + indices of the sparse matrix. In addition, may return a sparse matrix + in CSC format. """ path = osp.join(get_home_dir(), f'{name}.mat') if not osp.exists(path): @@ -81,10 +90,18 @@ def get_sparse_matrix( print(' Done!') from scipy.io import loadmat - mat = loadmat(path)['Problem'][0][0][2].tocsr() + csr_mat = loadmat(path)['Problem'][0][0][2].tocsr() - rowptr = torch.from_numpy(mat.indptr).to(device, dtype) - col = torch.from_numpy(mat.indices).to(device, dtype) + rowptr = torch.from_numpy(csr_mat.indptr).to(device, dtype) + col = torch.from_numpy(csr_mat.indices).to(device, dtype) + + if return_csc: + csc_mat = loadmat(path)['Problem'][0][0][2].tocsc() + + colptr = torch.from_numpy(csc_mat.indptr).to(device, dtype) + row = torch.from_numpy(csc_mat.indices).to(device, dtype) + + return rowptr, col, colptr, row return rowptr, col From 4ec08ebcfd04be6edbf6ff803eb5b25a40bf7640 Mon Sep 17 00:00:00 2001 From: kgajdamo Date: Thu, 1 Sep 2022 11:42:50 +0200 Subject: [PATCH 2/3] apply code review comments --- benchmark/sampler/neighbor.py | 34 +++++++++++++++++++++------------- pyg_lib/testing.py | 31 +++++++------------------------ 2 files changed, 28 insertions(+), 37 deletions(-) diff --git a/benchmark/sampler/neighbor.py b/benchmark/sampler/neighbor.py index afc24a130..dffb5664e 100644 --- a/benchmark/sampler/neighbor.py +++ b/benchmark/sampler/neighbor.py @@ -2,8 +2,12 @@ import time import torch -# uncomment below to enable torch.ops.torch_sparse.neighbor_sample -# import torch_sparse + +try: + import torch_sparse # noqa + baseline_neighbor_sample = torch.ops.torch_sparse.neighbor_sample +except ImportError: + baseline_neighbor_sample = None from tqdm import tqdm import pyg_lib @@ -14,23 +18,25 @@ default=[512, 1024, 2048, 4096, 8192], type=int) argparser.add_argument('--num_neighbors', default=[[-1], [15, 10, 5], [20, 15, 10]], type=int) -argparser.add_argument('--replace', default=False, type=bool) -argparser.add_argument('--directed', default=True, type=bool) +argparser.add_argument('--replace', action='store_true') +argparser.add_argument('--directed', action='store_true') args = argparser.parse_args() @withSeed -@withDataset('DIMACS10', 'citationCiteseer', True) +@withDataset('DIMACS10', 'citationCiteseer') def test_neighbor(dataset, **kwargs): - (rowptr, col, colptr, row), num_nodes = dataset, dataset[0].size(0) - 1 + (rowptr, col), num_nodes = dataset, dataset[0].size(0) - 1 for num_neighbors in args.num_neighbors: for batch_size in args.batch_sizes: # pyg-lib neighbor sampler start = time.perf_counter() - for node in tqdm(range(num_nodes - batch_size)): - seed = torch.arange(node, node + batch_size) + for node in tqdm(range(0, num_nodes, batch_size)): + last_seed_node = node + batch_size \ + if node + batch_size < num_nodes else num_nodes + seed = torch.arange(node, last_seed_node) pyg_lib.sampler.neighbor_sample(rowptr, col, seed, num_neighbors, args.replace, args.directed, disjoint=False, @@ -40,13 +46,15 @@ def test_neighbor(dataset, **kwargs): print('pyg-lib neighbor sample') print(f'Batch size={batch_size}, ' f'Num_neighbors={num_neighbors}, ' - f'Inference time={stop-start:.3f} seconds\n') + f'Time={stop-start:.3f} seconds\n') # pytorch-sparse neighbor sampler start = time.perf_counter() - for node in tqdm(range(num_nodes - batch_size)): - seed = torch.arange(node, node + batch_size) - torch.ops.torch_sparse.neighbor_sample(colptr, row, seed, + for node in tqdm(range(0, num_nodes, batch_size)): + last_seed_node = node + batch_size \ + if node + batch_size < num_nodes else num_nodes + seed = torch.arange(node, last_seed_node) + torch.ops.torch_sparse.neighbor_sample(rowptr, col, seed, num_neighbors, args.replace, args.directed) @@ -55,7 +63,7 @@ def test_neighbor(dataset, **kwargs): print('pytorch_sparse neighbor sample') print(f'Batch size={batch_size}, ' f'Num_neighbors={num_neighbors}, ' - f'Inference time={stop-start:.3f} seconds\n') + f'Time={stop-start:.3f} seconds\n') if __name__ == '__main__': diff --git a/pyg_lib/testing.py b/pyg_lib/testing.py index de9918651..6a4daef35 100644 --- a/pyg_lib/testing.py +++ b/pyg_lib/testing.py @@ -27,8 +27,7 @@ def wrapper(*args, **kwargs): return wrapper -def withDataset(group: str, name: str, - return_csc: Optional[bool] = False) -> Callable: +def withDataset(group: str, name: str) -> Callable: def decorator(func: Callable) -> Callable: def wrapper(*args, **kwargs): dataset = get_sparse_matrix( @@ -36,7 +35,6 @@ def wrapper(*args, **kwargs): name, dtype=kwargs.get('dtype', torch.long), device=kwargs.get('device', None), - return_csc=return_csc, ) func(*args, dataset=dataset, **kwargs) @@ -54,12 +52,9 @@ def get_sparse_matrix( name: str, dtype: torch.dtype = torch.long, device: Optional[torch.device] = None, - return_csc: Optional[bool] = False, -) -> Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]: +) -> Tuple[Tensor, Tensor]: r"""Returns a sparse matrix :obj:`(rowptr, col)` from the `Suite Sparse Matrix Collection `_. - In addition, may return a sparse matrix in CSC format, - then output will be :obj:`(rowptr, col, colptr, row)`. Args: group (string): The group of the sparse matrix. @@ -68,14 +63,10 @@ def get_sparse_matrix( tensors. (default: :obj:`torch.long`) device (torch.device, optional): the desired device of returned tensors. (default: :obj:`None`) - return_csc (bool, optional): If set to :obj:`True`, will additionaly - return a sparse matrix in CSC format. (default: :obj:`False`) Returns: - (torch.Tensor, torch.Tensor, Optional[torch.Tensor], - Optional[torch.Tensor]): Compressed source node indices and target node - indices of the sparse matrix. In addition, may return a sparse matrix - in CSC format. + (torch.Tensor, torch.Tensor): Compressed source node indices and target + node indices of the sparse matrix. """ path = osp.join(get_home_dir(), f'{name}.mat') if not osp.exists(path): @@ -90,18 +81,10 @@ def get_sparse_matrix( print(' Done!') from scipy.io import loadmat - csr_mat = loadmat(path)['Problem'][0][0][2].tocsr() + mat = loadmat(path)['Problem'][0][0][2].tocsr() - rowptr = torch.from_numpy(csr_mat.indptr).to(device, dtype) - col = torch.from_numpy(csr_mat.indices).to(device, dtype) - - if return_csc: - csc_mat = loadmat(path)['Problem'][0][0][2].tocsc() - - colptr = torch.from_numpy(csc_mat.indptr).to(device, dtype) - row = torch.from_numpy(csc_mat.indices).to(device, dtype) - - return rowptr, col, colptr, row + rowptr = torch.from_numpy(mat.indptr).to(device, dtype) + col = torch.from_numpy(mat.indices).to(device, dtype) return rowptr, col From 0236587863e0447fb80985af53954ebd60bf0919 Mon Sep 17 00:00:00 2001 From: kgajdamo Date: Thu, 1 Sep 2022 12:02:29 +0200 Subject: [PATCH 3/3] update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f2da547e9..a5bfd3310 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added `grouped_matmul` and `segment_matmul` CUDA implementations via `cutlass` ([#51](https://github.com/pyg-team/pyg-lib/pull/51), [#56](https://github.com/pyg-team/pyg-lib/pull/56), [#61](https://github.com/pyg-team/pyg-lib/pull/61), [#64](https://github.com/pyg-team/pyg-lib/pull/64), [#69](https://github.com/pyg-team/pyg-lib/pull/69)) - Added `pyg::sampler::neighbor_sample` implementation ([#54](https://github.com/pyg-team/pyg-lib/pull/54), [#76](https://github.com/pyg-team/pyg-lib/pull/76), [#77](https://github.com/pyg-team/pyg-lib/pull/77), [#78](https://github.com/pyg-team/pyg-lib/pull/78)) - Added `pyg::sampler::Mapper` utility for mapping global to local node indices ([#45](https://github.com/pyg-team/pyg-lib/pull/45))) -- Added benchmark script ([#45](https://github.com/pyg-team/pyg-lib/pull/45)) +- Added benchmark script ([#45](https://github.com/pyg-team/pyg-lib/pull/45), [#79](https://github.com/pyg-team/pyg-lib/pull/79)) - Added download script for benchmark data ([#44](https://github.com/pyg-team/pyg-lib/pull/44)) - Added `biased sampling` utils ([#38](https://github.com/pyg-team/pyg-lib/pull/38)) - Added `CHANGELOG.md` ([#39](https://github.com/pyg-team/pyg-lib/pull/39))