From f107bf0eff2b99c1758113a72ea662cb326db0da Mon Sep 17 00:00:00 2001 From: Dmitry Kolesnikov Date: Sat, 16 Mar 2024 18:22:00 +0200 Subject: [PATCH 1/2] Fix NaN for Cosine similarity on Zero Vector --- internal/noasm/cosine.go | 4 ++++ internal/pure/cosine.go | 4 ++++ vector_test.go | 45 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+) diff --git a/internal/noasm/cosine.go b/internal/noasm/cosine.go index 31f5f53..411f2bd 100644 --- a/internal/noasm/cosine.go +++ b/internal/noasm/cosine.go @@ -22,6 +22,7 @@ func CosineF32(a, b []float32) (d float32) { panic("vector length must be multiple of 4") } + e := float32(1e-8) ab := float32(0.0) aa := float32(0.0) bb := float32(0.0) @@ -53,6 +54,9 @@ func CosineF32(a, b []float32) (d float32) { } s := math32.Sqrt(aa) * math32.Sqrt(bb) + if s < e { + s = e + } d = (1 - ab/s) / 2 diff --git a/internal/pure/cosine.go b/internal/pure/cosine.go index 4150ab4..dbd5547 100644 --- a/internal/pure/cosine.go +++ b/internal/pure/cosine.go @@ -20,6 +20,7 @@ func CosineF32(a, b []float32) (d float32) { panic("vectors must have equal lengths") } + e := float32(1e-8) ab := float32(0.0) aa := float32(0.0) bb := float32(0.0) @@ -31,6 +32,9 @@ func CosineF32(a, b []float32) (d float32) { } s := math32.Sqrt(aa) * math32.Sqrt(bb) + if s < e { + s = e + } d = (1 - ab/s) / 2 diff --git a/vector_test.go b/vector_test.go index 9dac45d..c54dae5 100644 --- a/vector_test.go +++ b/vector_test.go @@ -9,6 +9,7 @@ package vector_test import ( + "math" "math/rand" "testing" @@ -45,6 +46,14 @@ func init() { n2 = Node{ID: 2, Vector: b} } +func zeroF32() vector.F32 { + v := make(vector.F32, n) + for i := 0; i < n; i++ { + v[i] = 0.0 + } + return v +} + func randF32() vector.F32 { v := make(vector.F32, n) for i := 0; i < n; i++ { @@ -133,6 +142,34 @@ func TestNoAsmCosineF32(t *testing.T) { } } +func TestPureCosineZeroF32(t *testing.T) { + sut := pure.Cosine(0) + + for i := 0; i < n*100; i++ { + a := zeroF32() + b := randF32() + + d := sut.Distance(a, b) + if math.IsNaN(float64(d)) { + t.Errorf("failed distance") + } + } +} + +func TestNoAsmCosineZeroF32(t *testing.T) { + sut := noasm.Cosine(0) + + for i := 0; i < n*100; i++ { + a := zeroF32() + b := randF32() + + d := sut.Distance(a, b) + if math.IsNaN(float64(d)) { + t.Errorf("failed distance") + } + } +} + // // Benchmark // @@ -153,6 +190,14 @@ func BenchmarkNoAsmEuclideanF32(t *testing.B) { } } +func BenchmarkNoAsmEuclideanUn(t *testing.B) { + euc := noasm.EuclideanU(0) + + for i := t.N; i > 0; i-- { + d = euc.Distance(a, b) + } +} + func BenchmarkSIMDEuclideanF32(t *testing.B) { euc := simd.Euclidean{} From 761646306f97e9bd93e1bc70241d5dc8a7c45aae Mon Sep 17 00:00:00 2001 From: Dmitry Kolesnikov Date: Sat, 16 Mar 2024 19:00:29 +0200 Subject: [PATCH 2/2] clean up code --- vector_test.go | 8 -------- 1 file changed, 8 deletions(-) diff --git a/vector_test.go b/vector_test.go index c54dae5..8c14001 100644 --- a/vector_test.go +++ b/vector_test.go @@ -190,14 +190,6 @@ func BenchmarkNoAsmEuclideanF32(t *testing.B) { } } -func BenchmarkNoAsmEuclideanUn(t *testing.B) { - euc := noasm.EuclideanU(0) - - for i := t.N; i > 0; i-- { - d = euc.Distance(a, b) - } -} - func BenchmarkSIMDEuclideanF32(t *testing.B) { euc := simd.Euclidean{}