Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
2506f4f
Added backend class for SparseEncoder and also SentenceTransformersSp…
Ryzhtus Jul 3, 2025
abd7ea5
Added SentenceTransformersSparseDocumentEmbedder
Ryzhtus Jul 3, 2025
82b87c2
Created a separate _SentenceTransformersSparseEmbeddingBackendFactory…
Ryzhtus Aug 3, 2025
73eaa97
Remove unused parameter
Ryzhtus Aug 3, 2025
74c222e
Wrapped output into SparseEmbedding dataclass + fix tests
Ryzhtus Aug 3, 2025
4ddde78
Return correct SparseEmbedding, imports and tests
Ryzhtus Aug 22, 2025
3ed6005
Merge branch 'main' into feat/support_sparse_models_in_sentence_trans…
Ryzhtus Aug 22, 2025
341767b
Merge branch 'main' into st-sparse
anakin87 Aug 28, 2025
71950af
fix fmt
anakin87 Aug 28, 2025
3c08b33
Merge branch 'deepset-ai:main' into feat/support_sparse_models_in_sen…
Ryzhtus Sep 6, 2025
a469c8f
Style changes and fixes
Ryzhtus Sep 6, 2025
be29552
Added a test for embed function
Ryzhtus Sep 14, 2025
f7536f9
Added integration test and fixed some other tests
Ryzhtus Sep 14, 2025
69dfa63
Merge branch 'main' into feat/support_sparse_models_in_sentence_trans…
Ryzhtus Sep 14, 2025
90dd503
Add lint fixes
Ryzhtus Sep 14, 2025
555e897
Merge branch 'feat/support_sparse_models_in_sentence_transformers' of…
Ryzhtus Sep 14, 2025
21313ce
Fixed positional arguments
Ryzhtus Sep 14, 2025
832b76b
Merge branch 'main' into st-sparse
anakin87 Sep 18, 2025
60e2805
fix types, simplify and more
anakin87 Sep 18, 2025
3620d09
fix
anakin87 Sep 18, 2025
d95b0e9
token fixes
anakin87 Sep 19, 2025
527e24b
pydocs, small model in test, cache improvement
anakin87 Sep 19, 2025
06e8d8b
try 3.9 for docs
anakin87 Sep 19, 2025
044652d
better to pin click
anakin87 Sep 19, 2025
cfc7dda
release note
anakin87 Sep 19, 2025
4e7850c
small fix
anakin87 Sep 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Wrapped output into SparseEmbedding dataclass + fix tests
  • Loading branch information
Ryzhtus committed Aug 3, 2025
commit 74c222e8e55efe57cba568545a3031d1fdddd189
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from typing import Any, Dict, List, Literal, Optional

from haystack.dataclasses.sparse_embedding import SparseEmbedding
from haystack.lazy_imports import LazyImport
from haystack.utils.auth import Secret

Expand Down Expand Up @@ -162,6 +163,19 @@ def __init__( # pylint: disable=too-many-positional-arguments
backend=backend,
)

def embed(self, data: List[str], **kwargs) -> List[List[float]]:
embeddings = self.model.encode(data, **kwargs).tolist()
return embeddings
def embed(self, data: List[str], **kwargs) -> List[SparseEmbedding]:
embeddings = self.model.encode(data, **kwargs)

sparse_embeddings = []

if isinstance(embeddings, list):
for embedding in embeddings:
sparse_embeddings.append(
SparseEmbedding(indices=embedding.indices.tolist(), values=embedding.values.tolist())
)
else:
sparse_embeddings.append(
SparseEmbedding(indices=embeddings.indices.tolist(), values=embeddings.values.tolist())
)

return sparse_embeddings
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,6 @@ def warm_up(self):
tokenizer_kwargs=self.tokenizer_kwargs,
config_kwargs=self.config_kwargs,
backend=self.backend,
sparse=True,
)
if self.tokenizer_kwargs and self.tokenizer_kwargs.get("model_max_length"):
self.embedding_backend.model.max_seq_length = self.tokenizer_kwargs["model_max_length"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ def test_from_dict_none_device(self):
assert component.local_files_only is False

@patch(
"haystack.components.embedders.sentence_transformers_text_embedder._SentenceTransformersEmbeddingBackendFactory"
"haystack.components.embedders.sentence_transformers_sparse_text_embedder._SentenceTransformersSparseEmbeddingBackendFactory"
)
def test_warmup(self, mocked_factory):
embedder = SentenceTransformersSparseTextEmbedder(
Expand All @@ -214,7 +214,7 @@ def test_warmup(self, mocked_factory):
)

@patch(
"haystack.components.embedders.sentence_transformers_text_embedder._SentenceTransformersEmbeddingBackendFactory"
"haystack.components.embedders.sentence_transformers_sparse_text_embedder._SentenceTransformersSparseEmbeddingBackendFactory"
)
def test_warmup_doesnt_reload(self, mocked_factory):
embedder = SentenceTransformersSparseTextEmbedder(model="model")
Expand Down Expand Up @@ -248,7 +248,7 @@ def test_run_wrong_input_format(self):
embedder.run(text=list_integers_input)

@patch(
"haystack.components.embedders.sentence_transformers_text_embedder._SentenceTransformersEmbeddingBackendFactory"
"haystack.components.embedders.sentence_transformers_sparse_text_embedder._SentenceTransformersSparseEmbeddingBackendFactory"
)
def test_model_onnx_backend(self, mocked_factory):
onnx_embedder = SentenceTransformersSparseTextEmbedder(
Expand All @@ -275,7 +275,7 @@ def test_model_onnx_backend(self, mocked_factory):
)

@patch(
"haystack.components.embedders.sentence_transformers_text_embedder._SentenceTransformersEmbeddingBackendFactory"
"haystack.components.embedders.sentence_transformers_sparse_text_embedder._SentenceTransformersSparseEmbeddingBackendFactory"
)
def test_model_openvino_backend(self, mocked_factory):
openvino_embedder = SentenceTransformersSparseTextEmbedder(
Expand All @@ -302,7 +302,7 @@ def test_model_openvino_backend(self, mocked_factory):
)

@patch(
"haystack.components.embedders.sentence_transformers_text_embedder._SentenceTransformersEmbeddingBackendFactory"
"haystack.components.embedders.sentence_transformers_sparse_text_embedder._SentenceTransformersSparseEmbeddingBackendFactory"
)
@pytest.mark.parametrize("model_kwargs", [{"torch_dtype": "bfloat16"}, {"torch_dtype": "float16"}])
def test_dtype_on_gpu(self, mocked_factory, model_kwargs):
Expand Down
Loading