Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 2ab5e84

Browse files
wachterjohanneschr-hertel
authored andcommitted
[Store][Platform] Add platform-level reranking and hybrid retriever
1 parent d92c21f commit 2ab5e84

17 files changed

Lines changed: 910 additions & 8 deletions

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@ CHANGELOG
55
---
66

77
* Add `RstLoader` and `RstToctreeLoader` for loading RST files and following toctree directives
8+
* Add platform-based `Reranker` for cross-encoder reranking via `PlatformInterface`
9+
* Add `CombinedStore` combining vector and text stores with Reciprocal Rank Fusion (RRF)
10+
* [BC BREAK] Add `?EventDispatcherInterface $eventDispatcher` as 3rd constructor parameter of `Retriever` (before `$logger`)
11+
* Add automatic text content preservation in `Vectorizer` metadata
812

913
0.6
1014
---

composer.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,14 @@
4141
"require": {
4242
"php": ">=8.2",
4343
"ext-fileinfo": "*",
44+
"psr/event-dispatcher": "^1.0",
4445
"psr/log": "^3.0",
4546
"symfony/ai-platform": "^0.6",
4647
"symfony/clock": "^7.3|^8.0",
4748
"symfony/http-client": "^7.3|^8.0",
4849
"symfony/polyfill-php83": "^1.32",
4950
"symfony/service-contracts": "^2.5|^3",
51+
"symfony/event-dispatcher-contracts": "^3.0",
5052
"symfony/uid": "^7.3|^8.0"
5153
},
5254
"require-dev": {

src/CombinedStore.php

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\AI\Store;
13+
14+
use Symfony\AI\Store\Document\VectorDocument;
15+
use Symfony\AI\Store\Exception\UnsupportedQueryTypeException;
16+
use Symfony\AI\Store\Query\HybridQuery;
17+
use Symfony\AI\Store\Query\QueryInterface;
18+
use Symfony\AI\Store\Query\TextQuery;
19+
use Symfony\AI\Store\Query\VectorQuery;
20+
21+
/**
22+
* Combines vector and text stores using Reciprocal Rank Fusion (RRF).
23+
*
24+
* Decomposes HybridQuery into VectorQuery and TextQuery, queries both
25+
* sub-stores independently, and merges results using RRF scoring.
26+
*
27+
* @author Johannes Wachter <[email protected]>
28+
*/
29+
final class CombinedStore implements StoreInterface
30+
{
31+
public function __construct(
32+
private readonly StoreInterface $vectorStore,
33+
private readonly StoreInterface $textStore,
34+
private readonly int $rrfK = 60,
35+
) {
36+
}
37+
38+
public function add(VectorDocument|array $documents): void
39+
{
40+
$this->vectorStore->add($documents);
41+
42+
if ($this->textStore !== $this->vectorStore) {
43+
$this->textStore->add($documents);
44+
}
45+
}
46+
47+
public function remove(string|array $ids, array $options = []): void
48+
{
49+
$this->vectorStore->remove($ids, $options);
50+
51+
if ($this->textStore !== $this->vectorStore) {
52+
$this->textStore->remove($ids, $options);
53+
}
54+
}
55+
56+
public function query(QueryInterface $query, array $options = []): iterable
57+
{
58+
if ($query instanceof HybridQuery) {
59+
return $this->hybridQuery($query, $options);
60+
}
61+
62+
if ($query instanceof VectorQuery && $this->vectorStore->supports(VectorQuery::class)) {
63+
return $this->vectorStore->query($query, $options);
64+
}
65+
66+
if ($query instanceof TextQuery && $this->textStore->supports(TextQuery::class)) {
67+
return $this->textStore->query($query, $options);
68+
}
69+
70+
throw new UnsupportedQueryTypeException($query::class, $this);
71+
}
72+
73+
public function supports(string $queryClass): bool
74+
{
75+
if (HybridQuery::class === $queryClass) {
76+
return $this->vectorStore->supports(VectorQuery::class)
77+
&& $this->textStore->supports(TextQuery::class);
78+
}
79+
80+
return $this->vectorStore->supports($queryClass)
81+
|| $this->textStore->supports($queryClass);
82+
}
83+
84+
/**
85+
* @param array<string, mixed> $options
86+
*
87+
* @return list<VectorDocument>
88+
*/
89+
private function hybridQuery(HybridQuery $query, array $options): array
90+
{
91+
$vectorResults = iterator_to_array(
92+
$this->vectorStore->query(new VectorQuery($query->getVector()), $options),
93+
);
94+
95+
$textResults = iterator_to_array(
96+
$this->textStore->query(new TextQuery($query->getText()), $options),
97+
);
98+
99+
return $this->reciprocalRankFusion($vectorResults, $textResults);
100+
}
101+
102+
/**
103+
* @param list<VectorDocument> $list1
104+
* @param list<VectorDocument> $list2
105+
*
106+
* @return list<VectorDocument>
107+
*/
108+
private function reciprocalRankFusion(array $list1, array $list2): array
109+
{
110+
/** @var array<string, float> $scores */
111+
$scores = [];
112+
113+
/** @var array<string, VectorDocument> $documentsById */
114+
$documentsById = [];
115+
116+
foreach ($list1 as $rank => $document) {
117+
$id = (string) $document->getId();
118+
$scores[$id] = ($scores[$id] ?? 0.0) + 1.0 / ($this->rrfK + $rank + 1);
119+
$documentsById[$id] = $document;
120+
}
121+
122+
foreach ($list2 as $rank => $document) {
123+
$id = (string) $document->getId();
124+
$scores[$id] = ($scores[$id] ?? 0.0) + 1.0 / ($this->rrfK + $rank + 1);
125+
$documentsById[$id] = $document;
126+
}
127+
128+
arsort($scores);
129+
130+
$result = [];
131+
foreach (array_keys($scores) as $id) {
132+
$result[] = $documentsById[$id]->withScore($scores[$id]);
133+
}
134+
135+
return $result;
136+
}
137+
}

src/Document/Vectorizer.php

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,14 @@ private function vectorizeEmbeddableDocument(EmbeddableDocumentInterface $docume
109109
throw new RuntimeException('No vector returned for vectorization.');
110110
}
111111

112-
return new VectorDocument($document->getId(), $vectors[0], $document->getMetadata());
112+
// Preserve the original text in metadata so downstream consumers
113+
// (e.g. text search, reranking) can access it via Metadata::getText().
114+
$metadata = $document->getMetadata();
115+
if (!$metadata->hasText()) {
116+
$metadata->setText($document->getContent());
117+
}
118+
119+
return new VectorDocument($document->getId(), $vectors[0], $metadata);
113120
}
114121

115122
/**
@@ -186,7 +193,14 @@ private function vectorizeEmbeddableDocuments(array $documents, array $options =
186193

187194
$vectorDocuments = [];
188195
foreach ($documents as $i => $document) {
189-
$vectorDocuments[] = new VectorDocument($document->getId(), $vectors[$i], $document->getMetadata());
196+
// Preserve the original text in metadata so downstream consumers
197+
// (e.g. text search, reranking) can access it via Metadata::getText().
198+
$metadata = $document->getMetadata();
199+
if (!$metadata->hasText()) {
200+
$metadata->setText($document->getContent());
201+
}
202+
203+
$vectorDocuments[] = new VectorDocument($document->getId(), $vectors[$i], $metadata);
190204
}
191205

192206
$this->logger->info('Vectorization process completed', [

src/Event/PostQueryEvent.php

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\AI\Store\Event;
13+
14+
use Symfony\AI\Store\Document\VectorDocument;
15+
use Symfony\Contracts\EventDispatcher\Event;
16+
17+
/**
18+
* Event dispatched after documents are retrieved from the store.
19+
*
20+
* Listeners can modify the documents list, for example to rerank results.
21+
*
22+
* @author Johannes Wachter <[email protected]>
23+
*/
24+
final class PostQueryEvent extends Event
25+
{
26+
/**
27+
* @param iterable<VectorDocument> $documents
28+
* @param array<string, mixed> $options
29+
*/
30+
public function __construct(
31+
private readonly string $query,
32+
private iterable $documents,
33+
private readonly array $options = [],
34+
) {
35+
}
36+
37+
public function getQuery(): string
38+
{
39+
return $this->query;
40+
}
41+
42+
/**
43+
* @return iterable<VectorDocument>
44+
*/
45+
public function getDocuments(): iterable
46+
{
47+
return $this->documents;
48+
}
49+
50+
/**
51+
* @param iterable<VectorDocument> $documents
52+
*/
53+
public function setDocuments(iterable $documents): void
54+
{
55+
$this->documents = $documents;
56+
}
57+
58+
/**
59+
* @return array<string, mixed>
60+
*/
61+
public function getOptions(): array
62+
{
63+
return $this->options;
64+
}
65+
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\AI\Store\EventListener;
13+
14+
use Symfony\AI\Store\Event\PostQueryEvent;
15+
use Symfony\AI\Store\Reranker\RerankerInterface;
16+
17+
/**
18+
* Reranks retrieved documents using a cross-encoder model.
19+
*
20+
* @author Johannes Wachter <[email protected]>
21+
*/
22+
final class RerankerListener
23+
{
24+
public function __construct(
25+
private readonly RerankerInterface $reranker,
26+
private readonly int $topK = 5,
27+
) {
28+
}
29+
30+
public function __invoke(PostQueryEvent $event): void
31+
{
32+
$documents = $event->getDocuments();
33+
if (!\is_array($documents)) {
34+
$documents = iterator_to_array($documents);
35+
}
36+
37+
$reranked = $this->reranker->rerank(
38+
$event->getQuery(),
39+
array_values($documents),
40+
$event->getOptions()['topK'] ?? $this->topK,
41+
);
42+
43+
$event->setDocuments($reranked);
44+
}
45+
}

src/Reranker/Reranker.php

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\AI\Store\Reranker;
13+
14+
use Psr\Log\LoggerInterface;
15+
use Psr\Log\NullLogger;
16+
use Symfony\AI\Platform\PlatformInterface;
17+
use Symfony\AI\Store\Document\VectorDocument;
18+
19+
/**
20+
* Platform-based reranker that delegates to PlatformInterface for cross-encoder scoring.
21+
*
22+
* @author Johannes Wachter <[email protected]>
23+
*/
24+
final class Reranker implements RerankerInterface
25+
{
26+
public function __construct(
27+
private readonly PlatformInterface $platform,
28+
private readonly string $model,
29+
private readonly LoggerInterface $logger = new NullLogger(),
30+
) {
31+
}
32+
33+
public function rerank(string $query, array $documents, int $topK = 5): array
34+
{
35+
if ([] === $documents) {
36+
return [];
37+
}
38+
39+
$texts = array_map(
40+
static fn (VectorDocument $doc): string => $doc->getMetadata()->getText()
41+
?? $doc->getMetadata()->getSource() ?? '',
42+
$documents,
43+
);
44+
45+
$this->logger->debug('Reranking {count} documents', ['count' => \count($documents)]);
46+
47+
$entries = $this->platform
48+
->invoke($this->model, ['query' => $query, 'texts' => $texts])
49+
->asReranking();
50+
51+
usort($entries, static fn ($a, $b): int => $b->getScore() <=> $a->getScore());
52+
$entries = \array_slice($entries, 0, $topK);
53+
54+
$reranked = [];
55+
foreach ($entries as $entry) {
56+
$reranked[] = $documents[$entry->getIndex()]->withScore($entry->getScore());
57+
}
58+
59+
$this->logger->debug('Reranking completed, returning {topK} documents', ['topK' => \count($reranked)]);
60+
61+
return $reranked;
62+
}
63+
}

0 commit comments

Comments
 (0)