Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 00703be

Browse files
authored
fix: index stats reports incorrect partition size (#4847)
fix #4843 Signed-off-by: BubbleCal <[email protected]>
1 parent 8691134 commit 00703be

File tree

3 files changed

+29
-1
lines changed

3 files changed

+29
-1
lines changed

python/python/tests/test_vector_index.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,30 @@ def test_create_ivf_pq_with_target_partition_size(dataset, tmp_path):
609609
assert ann_ds.stats.index_stats("vector_idx")["indices"][0]["num_partitions"] == 2
610610

611611

612+
def test_index_size_stats(tmp_path: Path):
613+
num_rows = 512
614+
dims = 32
615+
schema = pa.schema([pa.field("a", pa.list_(pa.float32(), dims), False)])
616+
values = pc.random(num_rows * dims).cast("float32")
617+
table = pa.Table.from_pydict(
618+
{"a": pa.FixedSizeListArray.from_arrays(values, dims)}, schema=schema
619+
)
620+
621+
base_dir = tmp_path / "test"
622+
623+
dataset = lance.write_dataset(table, base_dir)
624+
625+
index_name = "vec_idx"
626+
dataset.create_index(
627+
"a", "IVF_PQ", name=index_name, num_partitions=2, num_sub_vectors=1
628+
)
629+
630+
# Expect to see non-zero sizes here but all sizes are zero
631+
stats = dataset.stats.index_stats(index_name)
632+
stats = stats["indices"][0]
633+
assert stats["partitions"][0]["size"] + stats["partitions"][1]["size"] == num_rows
634+
635+
612636
def test_ivf_flat_over_binary_vector(tmp_path):
613637
dim = 128
614638
nvec = 1000

rust/lance-index/src/vector/storage.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,10 @@ impl<Q: Quantization> IvfQuantizationStorage<Q> {
248248
self.reader.num_rows()
249249
}
250250

251+
pub fn partition_size(&self, part_id: usize) -> usize {
252+
self.ivf.partition_size(part_id)
253+
}
254+
251255
pub fn quantizer(&self) -> Result<Quantizer> {
252256
let metadata = self.metadata();
253257
Q::from_metadata(metadata, self.distance_type)

rust/lance/src/index/vector/ivf/v2.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,7 @@ impl<S: IvfSubIndex + 'static, Q: Quantization + 'static> Index for IVFIndex<S,
376376
fn statistics(&self) -> Result<serde_json::Value> {
377377
let partitions_statistics = (0..self.ivf.num_partitions())
378378
.map(|part_id| IvfIndexPartitionStatistics {
379-
size: self.ivf.partition_size(part_id) as u32,
379+
size: self.storage.partition_size(part_id) as u32,
380380
})
381381
.collect::<Vec<_>>();
382382

0 commit comments

Comments
 (0)