Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 66c0185

Browse files
committed
Allow planner to use Merge Append to efficiently implement UNION
Until now, UNION queries have often been suboptimal as the planner has only ever considered using an Append node and making the results unique by either using a Hash Aggregate, or by Sorting the entire Append result and running it through the Unique operator. Both of these methods always require reading all rows from the union subqueries. Here we adjust the union planner so that it can request that each subquery produce results in target list order so that these can be Merge Appended together and made unique with a Unique node. This can improve performance significantly as the union child can make use of the likes of btree indexes and/or Merge Joins to provide the top-level UNION with presorted input. This is especially good if the top-level UNION contains a LIMIT node that limits the output rows to a small subset of the unioned rows as cheap startup plans can be used. Author: David Rowley Reviewed-by: Richard Guo, Andy Fan Discussion: https://postgr.es/m/CAApHDvpb_63XQodmxKUF8vb9M7CxyUyT4sWvEgqeQU-GB7QFoQ@mail.gmail.com
1 parent 47f99a4 commit 66c0185

File tree

15 files changed

+707
-253
lines changed

15 files changed

+707
-253
lines changed

contrib/postgres_fdw/expected/postgres_fdw.out

+7
Original file line numberDiff line numberDiff line change
@@ -11495,6 +11495,10 @@ DROP INDEX base_tbl1_idx;
1149511495
DROP INDEX base_tbl2_idx;
1149611496
DROP INDEX async_p3_idx;
1149711497
-- UNION queries
11498+
SET enable_sort TO off;
11499+
SET enable_incremental_sort TO off;
11500+
-- Adjust fdw_startup_cost so that we get an unordered path in the Append.
11501+
ALTER SERVER loopback2 OPTIONS (ADD fdw_startup_cost '0.00');
1149811502
EXPLAIN (VERBOSE, COSTS OFF)
1149911503
INSERT INTO result_tbl
1150011504
(SELECT a, b, 'AAA' || c FROM async_p1 ORDER BY a LIMIT 10)
@@ -11576,6 +11580,9 @@ SELECT * FROM result_tbl ORDER BY a;
1157611580
(12 rows)
1157711581

1157811582
DELETE FROM result_tbl;
11583+
RESET enable_incremental_sort;
11584+
RESET enable_sort;
11585+
ALTER SERVER loopback2 OPTIONS (DROP fdw_startup_cost);
1157911586
-- Disable async execution if we use gating Result nodes for pseudoconstant
1158011587
-- quals
1158111588
EXPLAIN (VERBOSE, COSTS OFF)

contrib/postgres_fdw/sql/postgres_fdw.sql

+9
Original file line numberDiff line numberDiff line change
@@ -3877,6 +3877,11 @@ DROP INDEX base_tbl2_idx;
38773877
DROP INDEX async_p3_idx;
38783878

38793879
-- UNION queries
3880+
SET enable_sort TO off;
3881+
SET enable_incremental_sort TO off;
3882+
-- Adjust fdw_startup_cost so that we get an unordered path in the Append.
3883+
ALTER SERVER loopback2 OPTIONS (ADD fdw_startup_cost '0.00');
3884+
38803885
EXPLAIN (VERBOSE, COSTS OFF)
38813886
INSERT INTO result_tbl
38823887
(SELECT a, b, 'AAA' || c FROM async_p1 ORDER BY a LIMIT 10)
@@ -3903,6 +3908,10 @@ UNION ALL
39033908
SELECT * FROM result_tbl ORDER BY a;
39043909
DELETE FROM result_tbl;
39053910

3911+
RESET enable_incremental_sort;
3912+
RESET enable_sort;
3913+
ALTER SERVER loopback2 OPTIONS (DROP fdw_startup_cost);
3914+
39063915
-- Disable async execution if we use gating Result nodes for pseudoconstant
39073916
-- quals
39083917
EXPLAIN (VERBOSE, COSTS OFF)

src/backend/optimizer/path/equivclass.c

+61
Original file line numberDiff line numberDiff line change
@@ -2867,6 +2867,67 @@ add_child_join_rel_equivalences(PlannerInfo *root,
28672867
MemoryContextSwitchTo(oldcontext);
28682868
}
28692869

2870+
/*
2871+
* add_setop_child_rel_equivalences
2872+
* Add equivalence members for each non-resjunk target in 'child_tlist'
2873+
* to the EquivalenceClass in the corresponding setop_pathkey's pk_class.
2874+
*
2875+
* 'root' is the PlannerInfo belonging to the top-level set operation.
2876+
* 'child_rel' is the RelOptInfo of the child relation we're adding
2877+
* EquivalenceMembers for.
2878+
* 'child_tlist' is the target list for the setop child relation. The target
2879+
* list expressions are what we add as EquivalenceMembers.
2880+
* 'setop_pathkeys' is a list of PathKeys which must contain an entry for each
2881+
* non-resjunk target in 'child_tlist'.
2882+
*/
2883+
void
2884+
add_setop_child_rel_equivalences(PlannerInfo *root, RelOptInfo *child_rel,
2885+
List *child_tlist, List *setop_pathkeys)
2886+
{
2887+
ListCell *lc;
2888+
ListCell *lc2 = list_head(setop_pathkeys);
2889+
2890+
foreach(lc, child_tlist)
2891+
{
2892+
TargetEntry *tle = lfirst_node(TargetEntry, lc);
2893+
EquivalenceMember *parent_em;
2894+
PathKey *pk;
2895+
2896+
if (tle->resjunk)
2897+
continue;
2898+
2899+
if (lc2 == NULL)
2900+
elog(ERROR, "too few pathkeys for set operation");
2901+
2902+
pk = lfirst_node(PathKey, lc2);
2903+
parent_em = linitial(pk->pk_eclass->ec_members);
2904+
2905+
/*
2906+
* We can safely pass the parent member as the first member in the
2907+
* ec_members list as this is added first in generate_union_paths,
2908+
* likewise, the JoinDomain can be that of the initial member of the
2909+
* Pathkey's EquivalenceClass.
2910+
*/
2911+
add_eq_member(pk->pk_eclass,
2912+
tle->expr,
2913+
child_rel->relids,
2914+
parent_em->em_jdomain,
2915+
parent_em,
2916+
exprType((Node *) tle->expr));
2917+
2918+
lc2 = lnext(setop_pathkeys, lc2);
2919+
}
2920+
2921+
/*
2922+
* transformSetOperationStmt() ensures that the targetlist never contains
2923+
* any resjunk columns, so all eclasses that exist in 'root' must have
2924+
* received a new member in the loop above. Add them to the child_rel's
2925+
* eclass_indexes.
2926+
*/
2927+
child_rel->eclass_indexes = bms_add_range(child_rel->eclass_indexes, 0,
2928+
list_length(root->eq_classes) - 1);
2929+
}
2930+
28702931

28712932
/*
28722933
* generate_implied_equalities_for_column

src/backend/optimizer/path/pathkeys.c

+19
Original file line numberDiff line numberDiff line change
@@ -2191,6 +2191,22 @@ pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
21912191
return n;
21922192
}
21932193

2194+
/*
2195+
* pathkeys_useful_for_setop
2196+
* Count the number of leading common pathkeys root's 'setop_pathkeys' in
2197+
* 'pathkeys'.
2198+
*/
2199+
static int
2200+
pathkeys_useful_for_setop(PlannerInfo *root, List *pathkeys)
2201+
{
2202+
int n_common_pathkeys;
2203+
2204+
(void) pathkeys_count_contained_in(root->setop_pathkeys, pathkeys,
2205+
&n_common_pathkeys);
2206+
2207+
return n_common_pathkeys;
2208+
}
2209+
21942210
/*
21952211
* truncate_useless_pathkeys
21962212
* Shorten the given pathkey list to just the useful pathkeys.
@@ -2208,6 +2224,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
22082224
if (nuseful2 > nuseful)
22092225
nuseful = nuseful2;
22102226
nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
2227+
if (nuseful2 > nuseful)
2228+
nuseful = nuseful2;
2229+
nuseful2 = pathkeys_useful_for_setop(root, pathkeys);
22112230
if (nuseful2 > nuseful)
22122231
nuseful = nuseful2;
22132232

src/backend/optimizer/plan/planner.c

+84-1
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
#include "optimizer/tlist.h"
5555
#include "parser/analyze.h"
5656
#include "parser/parse_agg.h"
57+
#include "parser/parse_clause.h"
5758
#include "parser/parse_relation.h"
5859
#include "parser/parsetree.h"
5960
#include "partitioning/partdesc.h"
@@ -119,6 +120,8 @@ typedef struct
119120
{
120121
List *activeWindows; /* active windows, if any */
121122
grouping_sets_data *gset_data; /* grouping sets data, if any */
123+
SetOperationStmt *setop; /* parent set operation or NULL if not a
124+
* subquery belonging to a set operation */
122125
} standard_qp_extra;
123126

124127
/* Local functions */
@@ -249,6 +252,8 @@ static bool group_by_has_partkey(RelOptInfo *input_rel,
249252
List *targetList,
250253
List *groupClause);
251254
static int common_prefix_cmp(const void *a, const void *b);
255+
static List *generate_setop_child_grouplist(SetOperationStmt *op,
256+
List *targetlist);
252257

253258

254259
/*****************************************************************************
@@ -1500,6 +1505,18 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
15001505
qp_extra.activeWindows = activeWindows;
15011506
qp_extra.gset_data = gset_data;
15021507

1508+
/*
1509+
* Check if we're a subquery for a set operation. If we are, store
1510+
* the SetOperationStmt in qp_extra.
1511+
*/
1512+
if (root->parent_root != NULL &&
1513+
root->parent_root->parse->setOperations != NULL &&
1514+
IsA(root->parent_root->parse->setOperations, SetOperationStmt))
1515+
qp_extra.setop =
1516+
(SetOperationStmt *) root->parent_root->parse->setOperations;
1517+
else
1518+
qp_extra.setop = NULL;
1519+
15031520
/*
15041521
* Generate the best unsorted and presorted paths for the scan/join
15051522
* portion of this Query, ie the processing represented by the
@@ -3433,6 +3450,27 @@ standard_qp_callback(PlannerInfo *root, void *extra)
34333450
parse->sortClause,
34343451
tlist);
34353452

3453+
/* setting setop_pathkeys might be useful to the union planner */
3454+
if (qp_extra->setop != NULL &&
3455+
set_operation_ordered_results_useful(qp_extra->setop))
3456+
{
3457+
List *groupClauses;
3458+
bool sortable;
3459+
3460+
groupClauses = generate_setop_child_grouplist(qp_extra->setop, tlist);
3461+
3462+
root->setop_pathkeys =
3463+
make_pathkeys_for_sortclauses_extended(root,
3464+
&groupClauses,
3465+
tlist,
3466+
false,
3467+
&sortable);
3468+
if (!sortable)
3469+
root->setop_pathkeys = NIL;
3470+
}
3471+
else
3472+
root->setop_pathkeys = NIL;
3473+
34363474
/*
34373475
* Figure out whether we want a sorted result from query_planner.
34383476
*
@@ -3442,7 +3480,9 @@ standard_qp_callback(PlannerInfo *root, void *extra)
34423480
* sortable DISTINCT clause that's more rigorous than the ORDER BY clause,
34433481
* we try to produce output that's sufficiently well sorted for the
34443482
* DISTINCT. Otherwise, if there is an ORDER BY clause, we want to sort
3445-
* by the ORDER BY clause.
3483+
* by the ORDER BY clause. Otherwise, if we're a subquery being planned
3484+
* for a set operation which can benefit from presorted results and have a
3485+
* sortable targetlist, we want to sort by the target list.
34463486
*
34473487
* Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a superset
34483488
* of GROUP BY, it would be tempting to request sort by ORDER BY --- but
@@ -3460,6 +3500,8 @@ standard_qp_callback(PlannerInfo *root, void *extra)
34603500
root->query_pathkeys = root->distinct_pathkeys;
34613501
else if (root->sort_pathkeys)
34623502
root->query_pathkeys = root->sort_pathkeys;
3503+
else if (root->setop_pathkeys != NIL)
3504+
root->query_pathkeys = root->setop_pathkeys;
34633505
else
34643506
root->query_pathkeys = NIL;
34653507
}
@@ -7866,3 +7908,44 @@ group_by_has_partkey(RelOptInfo *input_rel,
78667908

78677909
return true;
78687910
}
7911+
7912+
/*
7913+
* generate_setop_child_grouplist
7914+
* Build a SortGroupClause list defining the sort/grouping properties
7915+
* of the child of a set operation.
7916+
*
7917+
* This is similar to generate_setop_grouplist() but differs as the setop
7918+
* child query's targetlist entries may already have a tleSortGroupRef
7919+
* assigned for other purposes, such as GROUP BYs. Here we keep the
7920+
* SortGroupClause list in the same order as 'op' groupClauses and just adjust
7921+
* the tleSortGroupRef to reference the TargetEntry's 'ressortgroupref'.
7922+
*/
7923+
static List *
7924+
generate_setop_child_grouplist(SetOperationStmt *op, List *targetlist)
7925+
{
7926+
List *grouplist = copyObject(op->groupClauses);
7927+
ListCell *lg;
7928+
ListCell *lt;
7929+
7930+
lg = list_head(grouplist);
7931+
foreach(lt, targetlist)
7932+
{
7933+
TargetEntry *tle = (TargetEntry *) lfirst(lt);
7934+
SortGroupClause *sgc;
7935+
7936+
/* resjunk columns could have sortgrouprefs. Leave these alone */
7937+
if (tle->resjunk)
7938+
continue;
7939+
7940+
/* we expect every non-resjunk target to have a SortGroupClause */
7941+
Assert(lg != NULL);
7942+
sgc = (SortGroupClause *) lfirst(lg);
7943+
lg = lnext(grouplist, lg);
7944+
7945+
/* assign a tleSortGroupRef, or reuse the existing one */
7946+
sgc->tleSortGroupRef = assignSortGroupRef(tle, targetlist);
7947+
tle->ressortgroupref = sgc->tleSortGroupRef;
7948+
}
7949+
Assert(lg == NULL);
7950+
return grouplist;
7951+
}

0 commit comments

Comments
 (0)