Moved pytorch interop code to contrib

mdouze · facebook-github-bot · commit 8dfd51d3a76a · 2020-08-28T17:50:49.000-07:00
Summary:
The pytorch interop code was in a test until now. However, it is better if people can rely on it to be updated when the API is updated. Therefore, we move it into contrib.
Also added a README.md

Reviewed By: wickedfoo

Differential Revision: D23392962

fbshipit-source-id: 9b7c0e388a7ea3c0b73dc0018322138f49191673
diff --git a/contrib/README.md b/contrib/README.md
@@ -0,0 +1,45 @@
+
+# The contrib modules
+
+The contrib directory contains helper modules for Faiss for various tasks.
+
+## Code structure
+
+The contrib directory gets compiled in the module faiss.contrib.
+Note that although some of the modules may depend on additional modules (eg. GPU Faiss, pytorch, hdf5), they are not necessarily compiled in to avoid adding dependencies. It is the user's responsibility to provide them.
+
+In contrib, we are progressively dropping python2 support.
+
+## List of contrib modules
+
+### rpc.py
+
+A very simple Remote Procedure Call library, where function parameters and results are pickled, for use with client_server.py
+
+### client_server.py
+
+The server handles requests to a Faiss index. The client calls the remote index.
+This is mainly to shard datasets over several machines, see [Distributd index](https://github.com/facebookresearch/faiss/wiki/Indexes-that-do-not-fit-in-RAM#distributed-index)
+
+### ondisk.py
+
+Encloses the main logic to merge indexes into an on-disk index.
+See [On-disk storage](https://github.com/facebookresearch/faiss/wiki/Indexes-that-do-not-fit-in-RAM#on-disk-storage)
+
+### exhaustive_search.py
+
+Computes the ground-truth search results for a dataset that possibly does not fit in RAM. Uses GPU if available.
+Tested in `tests/test_contrib.TestComputeGT`
+
+### gpu.py
+
+(requires GPU Faiss)
+
+Interoperability functions for pytorch and Faiss: pass GPU data without copying back to CPU.
+Tested in `gpu/test/test_pytorch_faiss`
+
+### datasets.py
+
+(may require h5py)
+
+Defintion of how to access data for some standard datsets.
diff --git a/contrib/pytorch_tensors.py b/contrib/pytorch_tensors.py
@@ -0,0 +1,102 @@
+import faiss
+import torch
+
+def swig_ptr_from_FloatTensor(x):
+    """ gets a Faiss SWIG pointer from a pytorch trensor (on CPU or GPU) """
+    assert x.is_contiguous()
+    assert x.dtype == torch.float32
+    return faiss.cast_integer_to_float_ptr(
+        x.storage().data_ptr() + x.storage_offset() * 4)
+
+def swig_ptr_from_LongTensor(x):
+    """ gets a Faiss SWIG pointer from a pytorch trensor (on CPU or GPU) """
+    assert x.is_contiguous()
+    assert x.dtype == torch.int64, 'dtype=%s' % x.dtype
+    return faiss.cast_integer_to_long_ptr(
+        x.storage().data_ptr() + x.storage_offset() * 8)
+
+
+
+def search_index_pytorch(index, x, k, D=None, I=None):
+    """call the search function of an index with pytorch tensor I/O (CPU
+    and GPU supported)"""
+    assert x.is_contiguous()
+    n, d = x.size()
+    assert d == index.d
+
+    if D is None:
+        D = torch.empty((n, k), dtype=torch.float32, device=x.device)
+    else:
+        assert D.size() == (n, k)
+
+    if I is None:
+        I = torch.empty((n, k), dtype=torch.int64, device=x.device)
+    else:
+        assert I.size() == (n, k)
+    torch.cuda.synchronize()
+    xptr = swig_ptr_from_FloatTensor(x)
+    Iptr = swig_ptr_from_LongTensor(I)
+    Dptr = swig_ptr_from_FloatTensor(D)
+    index.search_c(n, xptr,
+                   k, Dptr, Iptr)
+    torch.cuda.synchronize()
+    return D, I
+
+
+def search_raw_array_pytorch(res, xb, xq, k, D=None, I=None,
+                             metric=faiss.METRIC_L2):
+    """search xq in xb, without building an index"""
+    assert xb.device == xq.device
+
+    nq, d = xq.size()
+    if xq.is_contiguous():
+        xq_row_major = True
+    elif xq.t().is_contiguous():
+        xq = xq.t()    # I initially wrote xq:t(), Lua is still haunting me :-)
+        xq_row_major = False
+    else:
+        raise TypeError('matrix should be row or column-major')
+
+    xq_ptr = swig_ptr_from_FloatTensor(xq)
+
+    nb, d2 = xb.size()
+    assert d2 == d
+    if xb.is_contiguous():
+        xb_row_major = True
+    elif xb.t().is_contiguous():
+        xb = xb.t()
+        xb_row_major = False
+    else:
+        raise TypeError('matrix should be row or column-major')
+    xb_ptr = swig_ptr_from_FloatTensor(xb)
+
+    if D is None:
+        D = torch.empty(nq, k, device=xb.device, dtype=torch.float32)
+    else:
+        assert D.shape == (nq, k)
+        assert D.device == xb.device
+
+    if I is None:
+        I = torch.empty(nq, k, device=xb.device, dtype=torch.int64)
+    else:
+        assert I.shape == (nq, k)
+        assert I.device == xb.device
+
+    D_ptr = swig_ptr_from_FloatTensor(D)
+    I_ptr = swig_ptr_from_LongTensor(I)
+
+    args = faiss.GpuDistanceParams()
+    args.metric = metric
+    args.k = k
+    args.dims = d
+    args.vectors = xb_ptr
+    args.vectorsRowMajor = xb_row_major
+    args.numVectors = nb
+    args.queries = xq_ptr
+    args.queriesRowMajor = xq_row_major
+    args.numQueries = nq
+    args.outDistances = D_ptr
+    args.outIndices = I_ptr
+    faiss.bfKnn(res, args)
+
+    return D, I
diff --git a/faiss/gpu/test/test_pytorch_faiss.py b/faiss/gpu/test/test_pytorch_faiss.py
@@ -10,102 +10,7 @@
 import faiss
 import torch
 
-def swig_ptr_from_FloatTensor(x):
-    assert x.is_contiguous()
-    assert x.dtype == torch.float32
-    return faiss.cast_integer_to_float_ptr(
-        x.storage().data_ptr() + x.storage_offset() * 4)
-
-def swig_ptr_from_LongTensor(x):
-    assert x.is_contiguous()
-    assert x.dtype == torch.int64, 'dtype=%s' % x.dtype
-    return faiss.cast_integer_to_long_ptr(
-        x.storage().data_ptr() + x.storage_offset() * 8)
-
-
-
-def search_index_pytorch(index, x, k, D=None, I=None):
-    """call the search function of an index with pytorch tensor I/O (CPU
-    and GPU supported)"""
-    assert x.is_contiguous()
-    n, d = x.size()
-    assert d == index.d
-
-    if D is None:
-        D = torch.empty((n, k), dtype=torch.float32, device=x.device)
-    else:
-        assert D.size() == (n, k)
-
-    if I is None:
-        I = torch.empty((n, k), dtype=torch.int64, device=x.device)
-    else:
-        assert I.size() == (n, k)
-    torch.cuda.synchronize()
-    xptr = swig_ptr_from_FloatTensor(x)
-    Iptr = swig_ptr_from_LongTensor(I)
-    Dptr = swig_ptr_from_FloatTensor(D)
-    index.search_c(n, xptr,
-                   k, Dptr, Iptr)
-    torch.cuda.synchronize()
-    return D, I
-
-
-def search_raw_array_pytorch(res, xb, xq, k, D=None, I=None,
-                             metric=faiss.METRIC_L2):
-    assert xb.device == xq.device
-
-    nq, d = xq.size()
-    if xq.is_contiguous():
-        xq_row_major = True
-    elif xq.t().is_contiguous():
-        xq = xq.t()    # I initially wrote xq:t(), Lua is still haunting me :-)
-        xq_row_major = False
-    else:
-        raise TypeError('matrix should be row or column-major')
-
-    xq_ptr = swig_ptr_from_FloatTensor(xq)
-
-    nb, d2 = xb.size()
-    assert d2 == d
-    if xb.is_contiguous():
-        xb_row_major = True
-    elif xb.t().is_contiguous():
-        xb = xb.t()
-        xb_row_major = False
-    else:
-        raise TypeError('matrix should be row or column-major')
-    xb_ptr = swig_ptr_from_FloatTensor(xb)
-
-    if D is None:
-        D = torch.empty(nq, k, device=xb.device, dtype=torch.float32)
-    else:
-        assert D.shape == (nq, k)
-        assert D.device == xb.device
-
-    if I is None:
-        I = torch.empty(nq, k, device=xb.device, dtype=torch.int64)
-    else:
-        assert I.shape == (nq, k)
-        assert I.device == xb.device
-
-    D_ptr = swig_ptr_from_FloatTensor(D)
-    I_ptr = swig_ptr_from_LongTensor(I)
-
-    args = faiss.GpuDistanceParams()
-    args.metric = metric
-    args.k = k
-    args.dims = d
-    args.vectors = xb_ptr
-    args.vectorsRowMajor = xb_row_major
-    args.numVectors = nb
-    args.queries = xq_ptr
-    args.queriesRowMajor = xq_row_major
-    args.numQueries = nq
-    args.outDistances = D_ptr
-    args.outIndices = I_ptr
-    faiss.bfKnn(res, args)
-
-    return D, I
+from faiss.contrib.pytorch_tensors import search_index_pytorch, search_raw_array_pytorch
 
 def to_column_major(x):
     if hasattr(torch, 'contiguous_format'):