Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "diskannpy"
version = "0.7.0rc1"
version = "0.7.0rc2"

description = "DiskANN Python extension module"
readme = "python/README.md"
Expand Down
6 changes: 3 additions & 3 deletions python/src/_static_disk_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ def __init__(
does not exist, you are required to provide it.
- **index_prefix**: The prefix of the index files. Defaults to "ann".
"""
index_prefix = _valid_index_prefix(index_directory, index_prefix)
index_prefix_path = _valid_index_prefix(index_directory, index_prefix)
vector_dtype, metric, _, _ = _ensure_index_metadata(
index_prefix,
index_prefix_path,
vector_dtype,
distance_metric,
1, # it doesn't matter because we don't need it in this context anyway
Expand All @@ -101,7 +101,7 @@ def __init__(
_index = _native_dap.StaticDiskFloatIndex
self._index = _index(
distance_metric=dap_metric,
index_path_prefix=index_prefix,
index_path_prefix=index_prefix_path,
num_threads=num_threads,
num_nodes_to_cache=num_nodes_to_cache,
cache_mechanism=cache_mechanism,
Expand Down
10 changes: 5 additions & 5 deletions python/src/_static_memory_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,22 +77,22 @@ def __init__(
does not exist, you are required to provide it.
- **enable_filters**: Indexes built with filters can also be used for filtered search.
"""
index_prefix = _valid_index_prefix(index_directory, index_prefix)
index_prefix_path = _valid_index_prefix(index_directory, index_prefix)
self._labels_map = {}
self._labels_metadata = {}
if enable_filters:
try:
with open(index_prefix + "_labels_map.txt", "r") as labels_map_if:
with open(f"{index_prefix_path}_labels_map.txt", "r") as labels_map_if:
for line in labels_map_if:
(key, val) = line.split("\t")
self._labels_map[key] = int(val)
with open(f"{index_prefix}_label_metadata.json", "r") as labels_metadata_if:
with open(f"{index_prefix_path}_label_metadata.json", "r") as labels_metadata_if:
self._labels_metadata = json.load(labels_metadata_if)
except: # noqa: E722
# exceptions are basically presumed to be either file not found or file not formatted correctly
raise RuntimeException("Filter labels file was unable to be processed.")
vector_dtype, metric, num_points, dims = _ensure_index_metadata(
index_prefix,
index_prefix_path,
vector_dtype,
distance_metric,
1, # it doesn't matter because we don't need it in this context anyway
Expand All @@ -119,7 +119,7 @@ def __init__(
distance_metric=dap_metric,
num_points=num_points,
dimensions=dims,
index_path=os.path.join(index_directory, index_prefix),
index_path=index_prefix_path,
num_threads=num_threads,
initial_search_complexity=initial_search_complexity,
)
Expand Down
32 changes: 31 additions & 1 deletion python/tests/test_static_disk_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import shutil
import unittest
from pathlib import Path
from tempfile import mkdtemp

import diskannpy as dap
Expand Down Expand Up @@ -165,4 +166,33 @@ def test_zero_threads(self):
k = 5
ids, dists = index.batch_search(
query_vectors, k_neighbors=k, complexity=5, beam_width=2, num_threads=0
)
)

def test_relative_paths(self):
# Issue 483 and 491 both fixed errors that were somehow slipping past our unit tests
# os.path.join() acts as a semi-merge if you give it two paths that look absolute.
# since our unit tests are using absolute paths via tempfile.mkdtemp(), the double os.path.join() was never
# caught by our tests, but was very easy to trip when using relative paths
rel_dir = "tmp"
Path(rel_dir).mkdir(exist_ok=True)
try:
tiny_index_vecs = random_vectors(20, 10, dtype=np.float32, seed=12345)
dap.build_disk_index(
data=tiny_index_vecs,
distance_metric="l2",
index_directory=rel_dir,
graph_degree=16,
complexity=32,
search_memory_maximum=0.00003,
build_memory_maximum=1,
num_threads=0,
pq_disk_bytes=0,
)
index = dap.StaticDiskIndex(
index_directory=rel_dir,
num_threads=16,
num_nodes_to_cache=10,
)

finally:
shutil.rmtree(rel_dir, ignore_errors=True)
28 changes: 28 additions & 0 deletions python/tests/test_static_memory_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import shutil
import unittest

from pathlib import Path
from tempfile import mkdtemp

import diskannpy as dap
Expand Down Expand Up @@ -191,6 +192,33 @@ def test_zero_threads(self):
k = 5
ids, dists = index.batch_search(query_vectors, k_neighbors=k, complexity=5, num_threads=0)

def test_relative_paths(self):
# Issue 483 and 491 both fixed errors that were somehow slipping past our unit tests
# os.path.join() acts as a semi-merge if you give it two paths that look absolute.
# since our unit tests are using absolute paths via tempfile.mkdtemp(), the double os.path.join() was never
# caught by our tests, but was very easy to trip when using relative paths
rel_dir = "tmp"
Path(rel_dir).mkdir(exist_ok=True)
try:
tiny_index_vecs = random_vectors(20, 10, dtype=np.float32, seed=12345)
dap.build_memory_index(
data=tiny_index_vecs,
distance_metric="l2",
index_directory=rel_dir,
graph_degree=16,
complexity=32,
num_threads=0,
)
index = dap.StaticMemoryIndex(
index_directory=rel_dir,
num_threads=0,
initial_search_complexity=32,
)

finally:
shutil.rmtree(rel_dir, ignore_errors=True)



class TestFilteredStaticMemoryIndex(unittest.TestCase):
def test_simple_scenario(self):
Expand Down