Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit a69701b

Browse files
authored
perf(gatsby): support fast filters for gte (gatsbyjs#23348)
* perf(gatsby): support fast filters for gte * Use separate binary search func for desc array
1 parent 874088c commit a69701b

File tree

2 files changed

+146
-28
lines changed

2 files changed

+146
-28
lines changed

packages/gatsby/src/redux/nodes.ts

Lines changed: 145 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import { createPageDependency } from "./actions/add-page-dependency"
44
import { IDbQueryElemMatch } from "../db/common/query"
55

66
// Only list supported ops here. "CacheableFilterOp"
7-
type FilterOp = "$eq" | "$lte"
7+
type FilterOp = "$eq" | "$lte" | "$gte"
88
// Note: `undefined` is an encoding for a property that does not exist
99
type FilterValueNullable = string | number | boolean | null | undefined
1010
// This is filter value in most cases
@@ -15,12 +15,18 @@ export interface IFilterCache {
1515
// In this set, `undefined` values represent nodes that did not have the path
1616
byValue: Map<FilterValueNullable, Set<IGatsbyNode>>
1717
meta: {
18-
// Ordered set of all values found by this filter. No null / undefs.
18+
// Ordered set of all values (by `<`) found by this filter. No null / undefs
1919
valuesAsc?: Array<FilterValue>
2020
// Flat set of nodes, ordered by valueAsc, but not ordered per value group
2121
nodesByValueAsc?: Array<IGatsbyNode>
2222
// Ranges of nodes per value, maps to the nodesByValueAsc array
23-
valueRanges?: Map<FilterValue, [number, number]>
23+
valueRangesAsc?: Map<FilterValue, [number, number]>
24+
// Ordered set of all values (by `>`) found by this filter. No null / undefs
25+
valuesDesc?: Array<FilterValue>
26+
// Flat set of nodes, ordered by valueDesc, but not ordered per value group
27+
nodesByValueDesc?: Array<IGatsbyNode>
28+
// Ranges of nodes per value, maps to the nodesByValueDesc array
29+
valueRangesDesc?: Map<FilterValue, [number, number]>
2430
}
2531
}
2632
export type FiltersCache = Map<FilterCacheKey, IFilterCache>
@@ -170,7 +176,14 @@ export const addResolvedNodes = (
170176
return resolvedNodes
171177
}
172178

173-
export const postIndexingMetaSetup = (filterCache: IFilterCache): void => {
179+
export const postIndexingMetaSetup = (
180+
filterCache: IFilterCache,
181+
op: FilterOp
182+
): void => {
183+
if (op !== `$lte` && op !== `$gte`) {
184+
return
185+
}
186+
174187
// Create an ordered array of individual nodes, ordered (grouped) by the
175188
// value to which the filter resolves. Nodes are not ordered per value.
176189
// This way non-eq ops can simply slice the array to get a range.
@@ -189,7 +202,25 @@ export const postIndexingMetaSetup = (filterCache: IFilterCache): void => {
189202
>
190203

191204
// Sort all sets by its value, asc. Ignore/allow potential type casting.
192-
entries.sort(([a], [b]) => (a < b ? -1 : a > b ? 1 : 0))
205+
// Note: while `<` is the inverse of `>=`, the ordering might coerce values.
206+
// This coercion makes the op no longer idempotent (normally the result of
207+
// `a < b` is the opposite of `b >= a` for any a or b of the same type). The
208+
// exception is a number that is `NaN`, which we're ignoring here as it's most
209+
// likely a bug in the user code. However, when coercing the ops may end up
210+
// comparing against `NaN`, too. For example: `("abc" <= 12) !== (12 > "abc")`
211+
// which ends up doing `NaN <= 12` and `NaN > "abc"`, which will both yield
212+
// false.
213+
// So instead we potentially track two ordered lists; ascending and descending
214+
// and the only difference when comparing the inverse of one to the other
215+
// should be how these `NaN` cases end up getting ordered.
216+
// It's fine for `lt` and `lte` to use the same ordered set. Same for gt/gte.
217+
if (op === `$lte`) {
218+
// Order ascending; first value is lowest
219+
entries.sort(([a], [b]) => (a < b ? -1 : a > b ? 1 : 0))
220+
} else if (op === `$gte`) {
221+
// Order descending; first value is highest
222+
entries.sort(([a], [b]) => (a > b ? -1 : a < b ? 1 : 0))
223+
}
193224

194225
const orderedNodes: Array<IGatsbyNode> = []
195226
const orderedValues: Array<FilterValue> = []
@@ -205,12 +236,21 @@ export const postIndexingMetaSetup = (filterCache: IFilterCache): void => {
205236
orderedValues.push(v)
206237
})
207238

208-
filterCache.meta.valuesAsc = orderedValues
209-
filterCache.meta.nodesByValueAsc = orderedNodes
210-
// The nodesByValueAsc is ordered by value, but multiple nodes per value are
211-
// not ordered. To make lt as fast as lte, we must know the start and stop
212-
// index for each value. Similarly useful for for `ne`.
213-
filterCache.meta.valueRanges = offsets
239+
if (op === `$lte`) {
240+
filterCache.meta.valuesAsc = orderedValues
241+
filterCache.meta.nodesByValueAsc = orderedNodes
242+
// The nodesByValueAsc is ordered by value, but multiple nodes per value are
243+
// not ordered. To make lt as fast as lte, we must know the start and stop
244+
// index for each value. Similarly useful for for `ne`.
245+
filterCache.meta.valueRangesAsc = offsets
246+
} else if (op === `$gte`) {
247+
filterCache.meta.valuesDesc = orderedValues
248+
filterCache.meta.nodesByValueDesc = orderedNodes
249+
// The nodesByValueDesc is ordered by value, but multiple nodes per value are
250+
// not ordered. To make gt as fast as gte, we must know the start and stop
251+
// index for each value. Similarly useful for for `ne`.
252+
filterCache.meta.valueRangesDesc = offsets
253+
}
214254
}
215255

216256
/**
@@ -253,9 +293,7 @@ export const ensureIndexByQuery = (
253293
})
254294
}
255295

256-
if (op === `$lte`) {
257-
postIndexingMetaSetup(filterCache)
258-
}
296+
postIndexingMetaSetup(filterCache, op)
259297
}
260298

261299
function addNodeToFilterCache(
@@ -353,9 +391,7 @@ export const ensureIndexByElemMatch = (
353391
})
354392
}
355393

356-
if (op === `$lte`) {
357-
postIndexingMetaSetup(filterCache)
358-
}
394+
postIndexingMetaSetup(filterCache, op)
359395
}
360396

361397
function addNodeToBucketWithElemMatch(
@@ -416,8 +452,8 @@ function addNodeToBucketWithElemMatch(
416452
}
417453
}
418454

419-
const binarySearch = (
420-
values: Array<FilterValue>,
455+
const binarySearchAsc = (
456+
values: Array<FilterValue>, // Assume ordered asc
421457
needle: FilterValue
422458
): [number, number] | undefined => {
423459
let min = 0
@@ -451,6 +487,41 @@ const binarySearch = (
451487
// Shouldn't be reachable, but just in case, fall back to Sift if so.
452488
return undefined
453489
}
490+
const binarySearchDesc = (
491+
values: Array<FilterValue>, // Assume ordered desc
492+
needle: FilterValue
493+
): [number, number] | undefined => {
494+
let min = 0
495+
let max = values.length - 1
496+
let pivot = Math.floor(values.length / 2)
497+
while (min <= max) {
498+
const value = values[pivot]
499+
if (needle < value) {
500+
// Move pivot to middle of nodes right of current pivot
501+
// assert pivot < min
502+
min = pivot
503+
} else if (needle > value) {
504+
// Move pivot to middle of nodes left of current pivot
505+
// assert pivot > max
506+
max = pivot
507+
} else {
508+
// This means needle === value
509+
// TODO: except for NaN ... and potentially certain type casting cases
510+
return [pivot, pivot]
511+
}
512+
513+
if (max - min <= 1) {
514+
// End of search. Needle not found (as expected). Use pivot as index.
515+
// If the needle was not found, max-min==1 and max is returned.
516+
return [min, max]
517+
}
518+
519+
pivot = Math.floor((max - min) / 2)
520+
}
521+
522+
// Shouldn't be reachable, but just in case, fall back to Sift if so.
523+
return undefined
524+
}
454525

455526
/**
456527
* Given the cache key for a filter and a target value return the set of nodes
@@ -486,17 +557,17 @@ export const getNodesFromCacheByValue = (
486557
return filterCache.byValue.get(filterValue)
487558
}
488559

560+
if (filterValue == null) {
561+
// This is an edge case and this value should be directly indexed
562+
// For `lte`/`gte` this should only return nodes for `null`, not a "range"
563+
return filterCache.byValue.get(filterValue)
564+
}
565+
489566
if (op === `$lte`) {
490567
// First try a direct approach. If a value is queried that also exists then
491568
// we can prevent a binary search through the whole set, O(1) vs O(log n)
492569

493-
if (filterValue == null) {
494-
// This is an edge case and this value should be directly indexed
495-
// For `lte` this should only return nodes for `null`, not a "range"
496-
return filterCache.byValue.get(filterValue)
497-
}
498-
499-
const ranges = filterCache.meta.valueRanges
570+
const ranges = filterCache.meta.valueRangesAsc
500571
const nodes = filterCache.meta.nodesByValueAsc
501572

502573
const range = ranges!.get(filterValue)
@@ -512,11 +583,12 @@ export const getNodesFromCacheByValue = (
512583
const values = filterCache.meta.valuesAsc as Array<FilterValue>
513584
// It shouldn't find the targetValue (but it might) and return the index of
514585
// the two value between which targetValue sits, or first/last element.
515-
const point = binarySearch(values, filterValue)
586+
const point = binarySearchAsc(values, filterValue)
516587
if (!point) {
517588
return undefined
518589
}
519590
const [pivotMin, pivotMax] = point
591+
520592
// Each pivot index must have a value and a range
521593
// The returned min/max index may include the lower/upper bound, so we still
522594
// have to do lte checks for both values.
@@ -537,6 +609,52 @@ export const getNodesFromCacheByValue = (
537609
return new Set(nodes!.slice(0, until))
538610
}
539611

612+
if (op === `$gte`) {
613+
// First try a direct approach. If a value is queried that also exists then
614+
// we can prevent a binary search through the whole set, O(1) vs O(log n)
615+
616+
const ranges = filterCache.meta.valueRangesDesc
617+
const nodes = filterCache.meta.nodesByValueDesc
618+
619+
const range = ranges!.get(filterValue)
620+
if (range) {
621+
return new Set(nodes!.slice(0, range[1]))
622+
}
623+
624+
// Query may ask for a value that doesn't appear in the set, like if the
625+
// set is [1, 2, 5, 6] and the query is <= 3. In that case we have to
626+
// apply a search (we'll do binary) to determine the offset to slice from.
627+
628+
// Note: for gte, the valueDesc array must be set at this point
629+
const values = filterCache.meta.valuesDesc as Array<FilterValue>
630+
// It shouldn't find the targetValue (but it might) and return the index of
631+
// the two value between which targetValue sits, or first/last element.
632+
const point = binarySearchDesc(values, filterValue)
633+
if (!point) {
634+
return undefined
635+
}
636+
const [pivotMin, pivotMax] = point
637+
638+
// Each pivot index must have a value and a range
639+
// The returned min/max index may include the lower/upper bound, so we still
640+
// have to do gte checks for both values.
641+
let pivotValue = values[pivotMax]
642+
if (pivotValue < filterValue) {
643+
pivotValue = values[pivotMin]
644+
}
645+
646+
// Note: the pivot value _shouldnt_ match the filter value because that
647+
// means the value was actually found, but those should have been indexed
648+
// so should have yielded a result in the .get() above.
649+
650+
const [exclPivot, inclPivot] = ranges!.get(pivotValue) as [number, number]
651+
652+
// Note: technically, `5 >= "5" === true` but `5` would not be cached.
653+
// So we have to consider weak comparison and may have to include the pivot
654+
const until = pivotValue >= filterValue ? inclPivot : exclPivot
655+
return new Set(nodes!.slice(0, until))
656+
}
657+
540658
// Unreachable because we checked all values of FilterOp (which op is)
541659
return undefined
542660
}

packages/gatsby/src/redux/run-sift.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ const FAST_OPS = [
2626
// "$lt",
2727
`$lte`,
2828
// "$gt",
29-
// "$gte"
29+
`$gte`,
3030
]
3131

3232
/**

0 commit comments

Comments
 (0)