Add hybrid/semantic/keyword retrieval modes with reranker config (#23)

shawnthapa · web-flow · commit f6d4d709515f · 2025-12-04T11:43:57.000-08:00
# Changes
- Add `HybridRetrieval`, `SemanticRetrieval`, and `KeywordRetrieval`
messages
- Add `RerankerModel` and `ReciprocalRankFusion` reranker configs
- Extend `CollectionsSearch` tool with `instructions` and
`retrieval_mode`
- Deprecate RankingMetric in favor of new retrieval modes
diff --git a/proto/xai/api/v1/chat.proto b/proto/xai/api/v1/chat.proto
@@ -4,6 +4,7 @@ package xai_api;
 
 import "google/protobuf/timestamp.proto";
 import "xai/api/v1/deferred.proto";
+import "xai/api/v1/documents.proto";
 import "xai/api/v1/image.proto";
 import "xai/api/v1/sample.proto";
 import "xai/api/v1/usage.proto";
@@ -665,6 +666,20 @@ message CollectionsSearch {
   // Optional number of chunks to be returned for each collections search.
   // Defaults to 10.
   optional int32 limit = 2;
+
+  // User-defined instructions to be included in the search query. Defaults to generic search
+  // instructions used by the collections search backend if unset.
+  optional string instructions = 3;
+
+  // How to perform the document search. Defaults to hybrid retrieval when unset.
+  oneof retrieval_mode {
+    // Perform hybrid retrieval combining keyword and semantic search.
+    HybridRetrieval hybrid_retrieval = 4;
+    // Perform pure semantic retrieval using dense embeddings.
+    SemanticRetrieval semantic_retrieval = 5;
+    // Perform keyword-based retrieval using sparse embeddings.
+    KeywordRetrieval keyword_retrieval = 6;
+  }
 }
 
 message AttachmentSearch {
diff --git a/proto/xai/api/v1/documents.proto b/proto/xai/api/v1/documents.proto
@@ -6,13 +6,68 @@ service Documents {
   rpc Search(SearchRequest) returns (SearchResponse) {}
 }
 
+// Document search using a combination of keyword and semantic search.
+message HybridRetrieval {
+  // Overfetch multiplier applied to the requested search limit.
+  // When set, fetches (limit * search_multiplier) results from each retrieval method
+  // before reranking, then returns the top `limit` results after reranking.
+  // Valid range is [1, 100]. Defaults to 1 when unset.
+  optional int32 search_multiplier = 1;
+
+  // Which reranker to use to limit results to the desired value.
+  oneof reranker {
+    // Use a reranker model to perform the reranking.
+    RerankerModel reranker_model = 2;
+    // Use RRF to perform the reranking.
+    ReciprocalRankFusion reciprocal_rank_fusion = 3;
+  }
+}
+
+// Document search using keyword matching (sparse embeddings).
+message KeywordRetrieval {
+  // Optional, but always used when doing search across multiple collections.
+  optional RerankerModel reranker = 1;
+}
+
+// Document search using semantic similarity (dense embeddings).
+message SemanticRetrieval {
+  // Optional, but always used when doing search across multiple collections.
+  optional RerankerModel reranker = 1;
+}
+
+// Configuration for reciprocal rank fusion (RRF) reranking.
+message ReciprocalRankFusion {
+  // The RRF constant k used in the reciprocal rank fusion formula. Defaults to 60.
+  optional int32 k = 1;
+
+  // Weight for embedding (dense) search results. Should be between 0 and 1. Defaults to 0.5.
+  float embedding_weight = 3;
+
+  // Weight for keyword (sparse) search results. Should be between 0 and 1. Defaults to 0.5.
+  float text_weight = 4;
+
+  // Deprecated: Use embedding_weight and text_weight instead.
+  reserved 2;
+}
+
+// Configuration for model-based reranking.
+message RerankerModel {
+  // The model to use for reranking. Defaults to standard reranker model.
+  optional string model = 1;
+
+  // Instructions for the reranking model. Defaults to generic reranking instructions.
+  optional string instructions = 2;
+}
+
 // RankingMetric is the metric to use for the search.
+// Deprecated: Metric now comes from what is set in the collection creation.
 enum RankingMetric {
   RANKING_METRIC_UNKNOWN = 0;
   RANKING_METRIC_L2_DISTANCE = 1;
   RANKING_METRIC_COSINE_SIMILARITY = 2;
 }
 
+// Message that contains settings needed to do a document search.
 message SearchRequest {
   // The query to search for which will be embedded using the
   // same embedding model as the one used for the source to query.
@@ -21,14 +76,27 @@ message SearchRequest {
   DocumentsSource source = 2;
   // The number of chunks to return.
   // Will always return the top matching chunks.
-  // Optional, defaults to 10
+  // Optional, defaults to 10.
   optional int32 limit = 3;
-
-  // The ranking metric to use for the search. Defaults to RANK_METRIC_L2_DISTANCE.
-  optional RankingMetric ranking_metric = 4;
-
   // User-defined instructions to be included in the search query. Defaults to generic search instructions.
   optional string instructions = 5;
+
+  // How to perform the document search. Defaults to HybridRetrieval
+  oneof retrieval_mode {
+    HybridRetrieval hybrid_retrieval = 11;
+    SemanticRetrieval semantic_retrieval = 12;
+    KeywordRetrieval keyword_retrieval = 13;
+  }
+
+  // Deprecated: Metric now comes from what is set during collection creation.
+  optional RankingMetric ranking_metric = 4 [deprecated = true];
+
+  // Deprecated: Use retrieval_mode instead
+  reserved 6;
+  reserved 7;
+  reserved 8;
+  reserved 9;
+  reserved 10;
 }
 
 // SearchResponse message contains the results of a document search operation.