Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 2a837f7

Browse files
committed
move index_map to file
1 parent 20b2cb4 commit 2a837f7

File tree

2 files changed

+372
-373
lines changed

2 files changed

+372
-373
lines changed
Lines changed: 370 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,370 @@
1+
/// the map size to use when we don't succeed in reading it in indexes.
2+
const DEFAULT_MAP_SIZE: usize = 10 * 1024 * 1024 * 1024; // 10 GiB
3+
4+
use std::collections::BTreeMap;
5+
use std::path::Path;
6+
use std::time::Duration;
7+
8+
use meilisearch_types::heed::{EnvClosingEvent, EnvOpenOptions};
9+
use meilisearch_types::milli::Index;
10+
use time::OffsetDateTime;
11+
use uuid::Uuid;
12+
13+
use super::IndexStatus::{self, Available, BeingDeleted, Closing, Missing};
14+
use crate::lru::{InsertionOutcome, LruMap};
15+
use crate::{clamp_to_page_size, Result};
16+
17+
/// Keep an internally consistent view of the open indexes in memory.
18+
///
19+
/// This view is made of an LRU cache that will evict the least frequently used indexes when new indexes are opened.
20+
/// Indexes that are being closed (for resizing or due to cache eviction) or deleted cannot be evicted from the cache and
21+
/// are stored separately.
22+
///
23+
/// This view provides operations to change the state of the index as it is known in memory:
24+
/// open an index (making it available for queries), close an index (specifying the new size it should be opened with),
25+
/// delete an index.
26+
///
27+
/// External consistency with the other bits of data of an index is provided by the `IndexMapper` parent structure.
28+
pub struct IndexMap {
29+
/// A LRU map of indexes that are in the open state and available for queries.
30+
available: LruMap<Uuid, Index>,
31+
/// A map of indexes that are not available for queries, either because they are being deleted
32+
/// or because they are being closed.
33+
///
34+
/// If they are being deleted, the UUID points to `None`.
35+
unavailable: BTreeMap<Uuid, Option<ClosingIndex>>,
36+
37+
/// A monotonically increasing generation number, used to differentiate between multiple successive index closing requests.
38+
///
39+
/// Because multiple readers could be waiting on an index to close, the following could theoretically happen:
40+
///
41+
/// 1. Multiple readers wait for the index closing to occur.
42+
/// 2. One of them "wins the race", takes the lock and then removes the index that finished closing from the map.
43+
/// 3. The index is reopened, but must be closed again (such as being resized again).
44+
/// 4. One reader that "lost the race" in (2) wakes up and tries to take the lock and remove the index from the map.
45+
///
46+
/// In that situation, the index may or may not have finished closing. The `generation` field allows to remember which
47+
/// closing request was made, so the reader that "lost the race" has the old generation and will need to wait again for the index
48+
/// to close.
49+
generation: usize,
50+
}
51+
52+
#[derive(Clone)]
53+
pub struct ClosingIndex {
54+
uuid: Uuid,
55+
closing_event: EnvClosingEvent,
56+
map_size: usize,
57+
generation: usize,
58+
}
59+
60+
impl ClosingIndex {
61+
/// Waits for the index to be definitely closed.
62+
///
63+
/// To avoid blocking, users should relinquish their locks to the IndexMap before calling this function.
64+
///
65+
/// After the index is physically closed, the in memory map must still be updated to take this into account.
66+
/// To do so, a `ReopenableIndex` is returned, that can be used to either definitely close or definitely open
67+
/// the index without waiting anymore.
68+
pub fn wait_timeout(self, timeout: Duration) -> Option<ReopenableIndex> {
69+
self.closing_event.wait_timeout(timeout).then_some(ReopenableIndex {
70+
uuid: self.uuid,
71+
map_size: self.map_size,
72+
generation: self.generation,
73+
})
74+
}
75+
}
76+
77+
pub struct ReopenableIndex {
78+
uuid: Uuid,
79+
map_size: usize,
80+
generation: usize,
81+
}
82+
83+
impl ReopenableIndex {
84+
/// Attempts to reopen the index, which can result in the index being reopened again or not
85+
/// (e.g. if another thread already opened and closed the index again).
86+
///
87+
/// Use get again on the IndexMap to get the updated status.
88+
///
89+
/// Fails if the underlying index creation fails.
90+
///
91+
/// # Status table
92+
///
93+
/// | Previous Status | New Status |
94+
/// |-----------------|----------------------------------------------|
95+
/// | Missing | Missing |
96+
/// | BeingDeleted | BeingDeleted |
97+
/// | Closing | Available or Closing depending on generation |
98+
/// | Available | Available |
99+
///
100+
pub fn reopen(self, map: &mut IndexMap, path: &Path) -> Result<()> {
101+
if let Closing(reopen) = map.get(&self.uuid) {
102+
if reopen.generation != self.generation {
103+
return Ok(());
104+
}
105+
map.unavailable.remove(&self.uuid);
106+
map.create(&self.uuid, path, None, self.map_size)?;
107+
}
108+
Ok(())
109+
}
110+
111+
/// Attempts to close the index, which may or may not result in the index being closed
112+
/// (e.g. if another thread already reopened the index again).
113+
///
114+
/// Use get again on the IndexMap to get the updated status.
115+
///
116+
/// # Status table
117+
///
118+
/// | Previous Status | New Status |
119+
/// |-----------------|--------------------------------------------|
120+
/// | Missing | Missing |
121+
/// | BeingDeleted | BeingDeleted |
122+
/// | Closing | Missing or Closing depending on generation |
123+
/// | Available | Available |
124+
pub fn close(self, map: &mut IndexMap) {
125+
if let Closing(reopen) = map.get(&self.uuid) {
126+
if reopen.generation != self.generation {
127+
return;
128+
}
129+
map.unavailable.remove(&self.uuid);
130+
}
131+
}
132+
}
133+
134+
impl IndexMap {
135+
pub fn new(cap: usize) -> IndexMap {
136+
Self { unavailable: Default::default(), available: LruMap::new(cap), generation: 0 }
137+
}
138+
139+
/// Gets the current status of an index in the map.
140+
///
141+
/// If the index is available it can be accessed from the returned status.
142+
pub fn get(&self, uuid: &Uuid) -> IndexStatus {
143+
self.available
144+
.get(uuid)
145+
.map(|index| Available(index.clone()))
146+
.unwrap_or_else(|| self.get_unavailable(uuid))
147+
}
148+
149+
fn get_unavailable(&self, uuid: &Uuid) -> IndexStatus {
150+
match self.unavailable.get(uuid) {
151+
Some(Some(reopen)) => Closing(reopen.clone()),
152+
Some(None) => BeingDeleted,
153+
None => Missing,
154+
}
155+
}
156+
157+
/// Attempts to create a new index that wasn't existing before.
158+
///
159+
/// # Status table
160+
///
161+
/// | Previous Status | New Status |
162+
/// |-----------------|------------|
163+
/// | Missing | Available |
164+
/// | BeingDeleted | panics |
165+
/// | Closing | panics |
166+
/// | Available | panics |
167+
///
168+
pub fn create(
169+
&mut self,
170+
uuid: &Uuid,
171+
path: &Path,
172+
date: Option<(OffsetDateTime, OffsetDateTime)>,
173+
map_size: usize,
174+
) -> Result<Index> {
175+
if !matches!(self.get_unavailable(uuid), Missing) {
176+
panic!("Attempt to open an index that was unavailable");
177+
}
178+
let index = create_or_open_index(path, date, map_size)?;
179+
match self.available.insert(*uuid, index.clone()) {
180+
InsertionOutcome::InsertedNew => (),
181+
InsertionOutcome::Evicted(evicted_uuid, evicted_index) => {
182+
self.close(evicted_uuid, evicted_index, 0);
183+
}
184+
InsertionOutcome::Replaced(_) => {
185+
panic!("Attempt to open an index that was already opened")
186+
}
187+
}
188+
Ok(index)
189+
}
190+
191+
/// Increases the current generation. See documentation for this field.
192+
///
193+
/// In the unlikely event that the 2^64 generations would have been exhausted, we simply wrap-around.
194+
///
195+
/// For this to cause an issue, one should be able to stop a reader in time after it got a `ReopenableIndex` and before it takes the lock
196+
/// to remove it from the unavailable map, and keep the reader in this frozen state for 2^64 closing of other indexes.
197+
///
198+
/// This seems overwhelmingly impossible to achieve in practice.
199+
fn next_generation(&mut self) -> usize {
200+
self.generation = self.generation.wrapping_add(1);
201+
self.generation
202+
}
203+
204+
/// Attempts to close an index.
205+
///
206+
/// # Status table
207+
///
208+
/// | Previous Status | New Status |
209+
/// |-----------------|---------------|
210+
/// | Missing | Missing |
211+
/// | BeingDeleted | BeingDeleted |
212+
/// | Closing | Closing |
213+
/// | Available | Closing |
214+
///
215+
pub fn close_for_resize(&mut self, uuid: &Uuid, map_size_growth: usize) {
216+
let Some(index) = self.available.remove(uuid) else { return; };
217+
self.close(*uuid, index, map_size_growth);
218+
}
219+
220+
fn close(&mut self, uuid: Uuid, index: Index, map_size_growth: usize) {
221+
let map_size = index.map_size().unwrap_or(DEFAULT_MAP_SIZE) + map_size_growth;
222+
let closing_event = index.prepare_for_closing();
223+
let generation = self.next_generation();
224+
self.unavailable
225+
.insert(uuid, Some(ClosingIndex { uuid, closing_event, map_size, generation }));
226+
}
227+
228+
/// Attempts to delete and index.
229+
///
230+
/// `end_deletion` must be called just after.
231+
///
232+
/// # Status table
233+
///
234+
/// | Previous Status | New Status | Return value |
235+
/// |-----------------|--------------|-----------------------------|
236+
/// | Missing | BeingDeleted | Ok(None) |
237+
/// | BeingDeleted | BeingDeleted | Err(None) |
238+
/// | Closing | Closing | Err(Some(reopen)) |
239+
/// | Available | BeingDeleted | Ok(Some(env_closing_event)) |
240+
pub fn start_deletion(
241+
&mut self,
242+
uuid: &Uuid,
243+
) -> std::result::Result<Option<EnvClosingEvent>, Option<ClosingIndex>> {
244+
if let Some(index) = self.available.remove(uuid) {
245+
self.unavailable.insert(*uuid, None);
246+
return Ok(Some(index.prepare_for_closing()));
247+
}
248+
match self.unavailable.remove(uuid) {
249+
Some(Some(reopen)) => Err(Some(reopen)),
250+
Some(None) => Err(None),
251+
None => Ok(None),
252+
}
253+
}
254+
255+
/// Marks that an index deletion finished.
256+
///
257+
/// Must be used after calling `start_deletion`.
258+
///
259+
/// # Status table
260+
///
261+
/// | Previous Status | New Status |
262+
/// |-----------------|------------|
263+
/// | Missing | Missing |
264+
/// | BeingDeleted | Missing |
265+
/// | Closing | panics |
266+
/// | Available | panics |
267+
pub fn end_deletion(&mut self, uuid: &Uuid) {
268+
assert!(
269+
self.available.get(uuid).is_none(),
270+
"Attempt to finish deletion of an index that was not being deleted"
271+
);
272+
// Do not panic if the index was Missing or BeingDeleted
273+
assert!(
274+
!matches!(self.unavailable.remove(uuid), Some(Some(_))),
275+
"Attempt to finish deletion of an index that was being closed"
276+
);
277+
}
278+
}
279+
280+
/// Create or open an index in the specified path.
281+
/// The path *must* exist or an error will be thrown.
282+
fn create_or_open_index(
283+
path: &Path,
284+
date: Option<(OffsetDateTime, OffsetDateTime)>,
285+
map_size: usize,
286+
) -> Result<Index> {
287+
let mut options = EnvOpenOptions::new();
288+
options.map_size(clamp_to_page_size(map_size));
289+
options.max_readers(1024);
290+
291+
if let Some((created, updated)) = date {
292+
Ok(Index::new_with_creation_dates(options, path, created, updated)?)
293+
} else {
294+
Ok(Index::new(options, path)?)
295+
}
296+
}
297+
298+
/// Putting the tests of the LRU down there so we have access to the cache's private members
299+
#[cfg(test)]
300+
mod tests {
301+
302+
use meilisearch_types::heed::Env;
303+
use meilisearch_types::Index;
304+
use uuid::Uuid;
305+
306+
use super::super::IndexMapper;
307+
use crate::tests::IndexSchedulerHandle;
308+
use crate::utils::clamp_to_page_size;
309+
use crate::IndexScheduler;
310+
311+
impl IndexMapper {
312+
fn test() -> (Self, Env, IndexSchedulerHandle) {
313+
let (index_scheduler, handle) = IndexScheduler::test(true, vec![]);
314+
(index_scheduler.index_mapper, index_scheduler.env, handle)
315+
}
316+
}
317+
318+
fn check_first_unavailable(mapper: &IndexMapper, expected_uuid: Uuid, is_closing: bool) {
319+
let index_map = mapper.index_map.read().unwrap();
320+
let (uuid, state) = index_map.unavailable.first_key_value().unwrap();
321+
assert_eq!(uuid, &expected_uuid);
322+
assert_eq!(state.is_some(), is_closing);
323+
}
324+
325+
#[test]
326+
fn evict_indexes() {
327+
let (mapper, env, _handle) = IndexMapper::test();
328+
let mut uuids = vec![];
329+
// LRU cap + 1
330+
for i in 0..(5 + 1) {
331+
let index_name = format!("index-{i}");
332+
let wtxn = env.write_txn().unwrap();
333+
mapper.create_index(wtxn, &index_name, None).unwrap();
334+
let txn = env.read_txn().unwrap();
335+
uuids.push(mapper.index_mapping.get(&txn, &index_name).unwrap().unwrap());
336+
}
337+
// index-0 was evicted
338+
check_first_unavailable(&mapper, uuids[0], true);
339+
340+
// get back the evicted index
341+
let wtxn = env.write_txn().unwrap();
342+
mapper.create_index(wtxn, "index-0", None).unwrap();
343+
344+
// Least recently used is now index-1
345+
check_first_unavailable(&mapper, uuids[1], true);
346+
}
347+
348+
#[test]
349+
fn resize_index() {
350+
let (mapper, env, _handle) = IndexMapper::test();
351+
let index = mapper.create_index(env.write_txn().unwrap(), "index", None).unwrap();
352+
assert_index_size(index, mapper.index_base_map_size);
353+
354+
mapper.resize_index(&env.read_txn().unwrap(), "index").unwrap();
355+
356+
let index = mapper.create_index(env.write_txn().unwrap(), "index", None).unwrap();
357+
assert_index_size(index, mapper.index_base_map_size + mapper.index_growth_amount);
358+
359+
mapper.resize_index(&env.read_txn().unwrap(), "index").unwrap();
360+
361+
let index = mapper.create_index(env.write_txn().unwrap(), "index", None).unwrap();
362+
assert_index_size(index, mapper.index_base_map_size + mapper.index_growth_amount * 2);
363+
}
364+
365+
fn assert_index_size(index: Index, expected: usize) {
366+
let expected = clamp_to_page_size(expected);
367+
let index_map_size = index.map_size().unwrap();
368+
assert_eq!(index_map_size, expected);
369+
}
370+
}

0 commit comments

Comments
 (0)