diff --git a/CHANGELOG.md b/CHANGELOG.md index d928ed41c..34fc020e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added `grouped_matmul` and `segment_matmul` CUDA implementations via `cutlass` ([#51](https://github.com/pyg-team/pyg-lib/pull/51), [#56](https://github.com/pyg-team/pyg-lib/pull/56), [#61](https://github.com/pyg-team/pyg-lib/pull/61), [#64](https://github.com/pyg-team/pyg-lib/pull/64), [#69](https://github.com/pyg-team/pyg-lib/pull/69)) - Added `pyg::sampler::neighbor_sample` implementation ([#54](https://github.com/pyg-team/pyg-lib/pull/54), [#76](https://github.com/pyg-team/pyg-lib/pull/76), [#77](https://github.com/pyg-team/pyg-lib/pull/77), [#78](https://github.com/pyg-team/pyg-lib/pull/78), [#80](https://github.com/pyg-team/pyg-lib/pull/80), [#81](https://github.com/pyg-team/pyg-lib/pull/81)), [#85](https://github.com/pyg-team/pyg-lib/pull/85), [#86](https://github.com/pyg-team/pyg-lib/pull/86), [#87](https://github.com/pyg-team/pyg-lib/pull/87), [#89](https://github.com/pyg-team/pyg-lib/pull/89)) - Added `pyg::sampler::Mapper` utility for mapping global to local node indices ([#45](https://github.com/pyg-team/pyg-lib/pull/45), [#83](https://github.com/pyg-team/pyg-lib/pull/83)) -- Added benchmark script ([#45](https://github.com/pyg-team/pyg-lib/pull/45), [#79](https://github.com/pyg-team/pyg-lib/pull/79), [#82](https://github.com/pyg-team/pyg-lib/pull/82), [#91](https://github.com/pyg-team/pyg-lib/pull/91)) +- Added benchmark script ([#45](https://github.com/pyg-team/pyg-lib/pull/45), [#79](https://github.com/pyg-team/pyg-lib/pull/79), [#82](https://github.com/pyg-team/pyg-lib/pull/82), [#91](https://github.com/pyg-team/pyg-lib/pull/91), [#93](https://github.com/pyg-team/pyg-lib/pull/93)) - Added download script for benchmark data ([#44](https://github.com/pyg-team/pyg-lib/pull/44)) - Added `biased sampling` utils ([#38](https://github.com/pyg-team/pyg-lib/pull/38)) - Added `CHANGELOG.md` ([#39](https://github.com/pyg-team/pyg-lib/pull/39)) diff --git a/benchmark/sampler/neighbor.py b/benchmark/sampler/neighbor.py index 01eef7a7f..2e575125e 100644 --- a/benchmark/sampler/neighbor.py +++ b/benchmark/sampler/neighbor.py @@ -2,18 +2,14 @@ import ast import time +import dgl import torch +import torch_sparse # noqa from tqdm import tqdm import pyg_lib from pyg_lib.testing import withDataset, withSeed -try: - import torch_sparse # noqa - baseline_neighbor_sample = torch.ops.torch_sparse.neighbor_sample -except ImportError: - baseline_neighbor_sample = None - argparser = argparse.ArgumentParser() argparser.add_argument('--batch-sizes', nargs='+', type=int, default=[ 512, @@ -37,6 +33,7 @@ @withDataset('DIMACS10', 'citationCiteseer') def test_neighbor(dataset, **kwargs): (rowptr, col), num_nodes = dataset, dataset[0].size(0) - 1 + dgl_graph = dgl.graph(('csc', (rowptr, col, torch.arange(col.size(0))))) if args.shuffle: node_perm = torch.randperm(num_nodes) @@ -45,8 +42,7 @@ def test_neighbor(dataset, **kwargs): for num_neighbors in args.num_neighbors: for batch_size in args.batch_sizes: - print(f'pyg-lib (batch_size={batch_size}, ' - f'num_neighbors={num_neighbors}):') + print(f'batch_size={batch_size}, num_neighbors={num_neighbors}):') t = time.perf_counter() for seed in tqdm(node_perm.split(batch_size)): pyg_lib.sampler.neighbor_sample( @@ -59,11 +55,8 @@ def test_neighbor(dataset, **kwargs): disjoint=False, return_edge_id=True, ) - print(f'time={time.perf_counter()-t:.3f} seconds') - print('-------------------------') + pyg_lib_duration = time.perf_counter() - t - print(f'torch-sparse (batch_size={batch_size}, ' - f'num_neighbors={num_neighbors}):') t = time.perf_counter() for seed in tqdm(node_perm.split(batch_size)): torch.ops.torch_sparse.neighbor_sample( @@ -74,8 +67,27 @@ def test_neighbor(dataset, **kwargs): args.replace, args.directed, ) - print(f'time={time.perf_counter()-t:.3f} seconds') - print('-------------------------') + torch_sparse_duration = time.perf_counter() - t + + dgl_sampler = dgl.dataloading.NeighborSampler( + num_neighbors, + replace=args.replace, + ) + dgl_loader = dgl.dataloading.DataLoader( + dgl_graph, + node_perm, + dgl_sampler, + batch_size=batch_size, + ) + t = time.perf_counter() + for _ in tqdm(dgl_loader): + pass + dgl_duration = time.perf_counter() - t + + print(f' pyg-lib={pyg_lib_duration:.3f} seconds') + print(f'torch-sparse={torch_sparse_duration:.3f} seconds') + print(f' dgl={dgl_duration:.3f} seconds') + print() if __name__ == '__main__':