Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit dae761a

Browse files
committed
Add empty BRIN ranges during CREATE INDEX
When building BRIN indexes, the brinbuildCallback only advances to the next page range when seeing a tuple that doesn't belong to the current one. This means that the index may end up missing ranges at the end of the table, if those pages do not contain any indexable tuples. We tend not to have completely empty pages at the end of a relation, but this also applies to partial indexes, where the tuples may simply not match the index predicate. This results in inefficient scans using the affected BRIN index - without the summaries, the page ranges have to be read and processed, which consumes I/O and possibly also CPU time. The existing code already added empty ranges for earlier parts of the table, this commit makes sure we add them for the ranges at the end of the table too. Patch by Matthias van de Meent, with review/improvements by me. Author: Matthias van de Meent Reviewed-by: Tomas Vondra Discussion: https://postgr.es/m/CAEze2WiMsPZg%3DxkvSF_jt4%3D69k6K7gz5B8V2wY3gCGZ%2B1BzCbQ%40mail.gmail.com
1 parent 00edb20 commit dae761a

File tree

3 files changed

+152
-4
lines changed

3 files changed

+152
-4
lines changed

contrib/pageinspect/expected/brin.out

+18
Original file line numberDiff line numberDiff line change
@@ -89,4 +89,22 @@ SELECT brin_revmap_data(decode(repeat('00', :block_size), 'hex'));
8989

9090
(1 row)
9191

92+
-- Test that partial indexes have all pages, including empty ones.
93+
CREATE TABLE test2 (a int);
94+
INSERT INTO test2 SELECT i FROM generate_series(1,1000) s(i);
95+
-- No rows match the index predicate, make sure the index has the right number
96+
-- of ranges (same as number of page ranges).
97+
CREATE INDEX ON test2 USING brin (a) WITH (pages_per_range=1) WHERE (a IS NULL);
98+
ANALYZE test2;
99+
-- Does the index have one summary of the relation?
100+
SELECT (COUNT(*) = (SELECT relpages FROM pg_class WHERE relname = 'test2')) AS ranges_do_match
101+
FROM generate_series((SELECT (lastrevmappage + 1) FROM brin_metapage_info(get_raw_page('test2_a_idx', 0))),
102+
(SELECT (relpages - 1) FROM pg_class WHERE relname = 'test2_a_idx')) AS pages(p),
103+
LATERAL brin_page_items(get_raw_page('test2_a_idx', p), 'test2_a_idx') AS items;
104+
ranges_do_match
105+
-----------------
106+
t
107+
(1 row)
108+
92109
DROP TABLE test1;
110+
DROP TABLE test2;

contrib/pageinspect/sql/brin.sql

+17
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,21 @@ SELECT brin_page_items(decode(repeat('00', :block_size), 'hex'), 'test1_a_idx');
3636
SELECT brin_metapage_info(decode(repeat('00', :block_size), 'hex'));
3737
SELECT brin_revmap_data(decode(repeat('00', :block_size), 'hex'));
3838

39+
-- Test that partial indexes have all pages, including empty ones.
40+
CREATE TABLE test2 (a int);
41+
INSERT INTO test2 SELECT i FROM generate_series(1,1000) s(i);
42+
43+
-- No rows match the index predicate, make sure the index has the right number
44+
-- of ranges (same as number of page ranges).
45+
CREATE INDEX ON test2 USING brin (a) WITH (pages_per_range=1) WHERE (a IS NULL);
46+
47+
ANALYZE test2;
48+
49+
-- Does the index have one summary of the relation?
50+
SELECT (COUNT(*) = (SELECT relpages FROM pg_class WHERE relname = 'test2')) AS ranges_do_match
51+
FROM generate_series((SELECT (lastrevmappage + 1) FROM brin_metapage_info(get_raw_page('test2_a_idx', 0))),
52+
(SELECT (relpages - 1) FROM pg_class WHERE relname = 'test2_a_idx')) AS pages(p),
53+
LATERAL brin_page_items(get_raw_page('test2_a_idx', p), 'test2_a_idx') AS items;
54+
3955
DROP TABLE test1;
56+
DROP TABLE test2;

src/backend/access/brin/brin.c

+117-4
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,13 @@ typedef struct BrinBuildState
5353
Buffer bs_currentInsertBuf;
5454
BlockNumber bs_pagesPerRange;
5555
BlockNumber bs_currRangeStart;
56+
BlockNumber bs_maxRangeStart;
5657
BrinRevmap *bs_rmAccess;
5758
BrinDesc *bs_bdesc;
5859
BrinMemTuple *bs_dtuple;
60+
BrinTuple *bs_emptyTuple;
61+
Size bs_emptyTupleLen;
62+
MemoryContext bs_context;
5963
} BrinBuildState;
6064

6165
/*
@@ -82,7 +86,9 @@ typedef struct BrinOpaque
8286
#define BRIN_ALL_BLOCKRANGES InvalidBlockNumber
8387

8488
static BrinBuildState *initialize_brin_buildstate(Relation idxRel,
85-
BrinRevmap *revmap, BlockNumber pagesPerRange);
89+
BrinRevmap *revmap,
90+
BlockNumber pagesPerRange,
91+
BlockNumber tablePages);
8692
static BrinInsertState *initialize_brin_insertstate(Relation idxRel, IndexInfo *indexInfo);
8793
static void terminate_brin_buildstate(BrinBuildState *state);
8894
static void brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange,
@@ -94,6 +100,8 @@ static void brin_vacuum_scan(Relation idxrel, BufferAccessStrategy strategy);
94100
static bool add_values_to_range(Relation idxRel, BrinDesc *bdesc,
95101
BrinMemTuple *dtup, const Datum *values, const bool *nulls);
96102
static bool check_null_keys(BrinValues *bval, ScanKey *nullkeys, int nnullkeys);
103+
static void brin_fill_empty_ranges(BrinBuildState *state,
104+
BlockNumber prevRange, BlockNumber maxRange);
97105

98106
/*
99107
* BRIN handler function: return IndexAmRoutine with access method parameters
@@ -933,7 +941,8 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
933941
* Initialize our state, including the deformed tuple state.
934942
*/
935943
revmap = brinRevmapInitialize(index, &pagesPerRange);
936-
state = initialize_brin_buildstate(index, revmap, pagesPerRange);
944+
state = initialize_brin_buildstate(index, revmap, pagesPerRange,
945+
RelationGetNumberOfBlocks(heap));
937946

938947
/*
939948
* Now scan the relation. No syncscan allowed here because we want the
@@ -945,6 +954,17 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
945954
/* process the final batch */
946955
form_and_insert_tuple(state);
947956

957+
/*
958+
* Backfill the final ranges with empty data.
959+
*
960+
* This saves us from doing what amounts to full table scans when the
961+
* index with a predicate like WHERE (nonnull_column IS NULL), or other
962+
* very selective predicates.
963+
*/
964+
brin_fill_empty_ranges(state,
965+
state->bs_currRangeStart,
966+
state->bs_maxRangeStart);
967+
948968
/* release resources */
949969
idxtuples = state->bs_numtuples;
950970
brinRevmapTerminate(state->bs_rmAccess);
@@ -1358,9 +1378,10 @@ brinGetStats(Relation index, BrinStatsData *stats)
13581378
*/
13591379
static BrinBuildState *
13601380
initialize_brin_buildstate(Relation idxRel, BrinRevmap *revmap,
1361-
BlockNumber pagesPerRange)
1381+
BlockNumber pagesPerRange, BlockNumber tablePages)
13621382
{
13631383
BrinBuildState *state;
1384+
BlockNumber lastRange = 0;
13641385

13651386
state = palloc_object(BrinBuildState);
13661387

@@ -1373,6 +1394,22 @@ initialize_brin_buildstate(Relation idxRel, BrinRevmap *revmap,
13731394
state->bs_bdesc = brin_build_desc(idxRel);
13741395
state->bs_dtuple = brin_new_memtuple(state->bs_bdesc);
13751396

1397+
/* Remember the memory context to use for an empty tuple, if needed. */
1398+
state->bs_context = CurrentMemoryContext;
1399+
state->bs_emptyTuple = NULL;
1400+
state->bs_emptyTupleLen = 0;
1401+
1402+
/*
1403+
* Calculate the start of the last page range. Page numbers are 0-based,
1404+
* so to calculate the index we need to subtract one. The integer division
1405+
* gives us the index of the page range.
1406+
*/
1407+
if (tablePages > 0)
1408+
lastRange = ((tablePages - 1) / pagesPerRange) * pagesPerRange;
1409+
1410+
/* Now calculate the start of the next range. */
1411+
state->bs_maxRangeStart = lastRange + state->bs_pagesPerRange;
1412+
13761413
return state;
13771414
}
13781415

@@ -1612,7 +1649,8 @@ brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange,
16121649
/* first time through */
16131650
Assert(!indexInfo);
16141651
state = initialize_brin_buildstate(index, revmap,
1615-
pagesPerRange);
1652+
pagesPerRange,
1653+
InvalidBlockNumber);
16161654
indexInfo = BuildIndexInfo(index);
16171655
}
16181656
summarize_range(indexInfo, state, heapRel, startBlk, heapNumBlocks);
@@ -1982,3 +2020,78 @@ check_null_keys(BrinValues *bval, ScanKey *nullkeys, int nnullkeys)
19822020

19832021
return true;
19842022
}
2023+
2024+
/*
2025+
* brin_build_empty_tuple
2026+
* Maybe initialize a BRIN tuple representing empty range.
2027+
*
2028+
* Returns a BRIN tuple representing an empty page range starting at the
2029+
* specified block number. The empty tuple is initialized only once, when it's
2030+
* needed for the first time, stored in the memory context bs_context to ensure
2031+
* proper life span, and reused on following calls. All empty tuples are
2032+
* exactly the same except for the bs_blkno field, which is set to the value
2033+
* in blkno parameter.
2034+
*/
2035+
static void
2036+
brin_build_empty_tuple(BrinBuildState *state, BlockNumber blkno)
2037+
{
2038+
/* First time an empty tuple is requested? If yes, initialize it. */
2039+
if (state->bs_emptyTuple == NULL)
2040+
{
2041+
MemoryContext oldcxt;
2042+
BrinMemTuple *dtuple = brin_new_memtuple(state->bs_bdesc);
2043+
2044+
/* Allocate the tuple in context for the whole index build. */
2045+
oldcxt = MemoryContextSwitchTo(state->bs_context);
2046+
2047+
state->bs_emptyTuple = brin_form_tuple(state->bs_bdesc, blkno, dtuple,
2048+
&state->bs_emptyTupleLen);
2049+
2050+
MemoryContextSwitchTo(oldcxt);
2051+
}
2052+
else
2053+
{
2054+
/* If we already have an empty tuple, just update the block. */
2055+
state->bs_emptyTuple->bt_blkno = blkno;
2056+
}
2057+
}
2058+
2059+
/*
2060+
* brin_fill_empty_ranges
2061+
* Add BRIN index tuples representing empty page ranges.
2062+
*
2063+
* prevRange/nextRange determine for which page ranges to add empty summaries.
2064+
* Both boundaries are exclusive, i.e. only ranges starting at blkno for which
2065+
* (prevRange < blkno < nextRange) will be added to the index.
2066+
*
2067+
* If prevRange is InvalidBlockNumber, this means there was no previous page
2068+
* range (i.e. the first empty range to add is for blkno=0).
2069+
*
2070+
* The empty tuple is built only once, and then reused for all future calls.
2071+
*/
2072+
static void
2073+
brin_fill_empty_ranges(BrinBuildState *state,
2074+
BlockNumber prevRange, BlockNumber nextRange)
2075+
{
2076+
BlockNumber blkno;
2077+
2078+
/*
2079+
* If we already summarized some ranges, we need to start with the next
2080+
* one. Otherwise start from the first range of the table.
2081+
*/
2082+
blkno = (prevRange == InvalidBlockNumber) ? 0 : (prevRange + state->bs_pagesPerRange);
2083+
2084+
/* Generate empty ranges until we hit the next non-empty range. */
2085+
while (blkno < nextRange)
2086+
{
2087+
/* Did we already build the empty tuple? If not, do it now. */
2088+
brin_build_empty_tuple(state, blkno);
2089+
2090+
brin_doinsert(state->bs_irel, state->bs_pagesPerRange, state->bs_rmAccess,
2091+
&state->bs_currentInsertBuf,
2092+
blkno, state->bs_emptyTuple, state->bs_emptyTupleLen);
2093+
2094+
/* try next page range */
2095+
blkno += state->bs_pagesPerRange;
2096+
}
2097+
}

0 commit comments

Comments
 (0)