Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 97930cf

Browse files
committed
Improve eqjoinsel's ndistinct clamping to work for multiple levels of join.
This patch fixes an oversight in my commit 7f3eba3 of 2008-10-23. That patch accounted for baserel restriction clauses that reduced the number of rows coming out of a table (and hence the number of possibly-distinct values of a join variable), but not for join restriction clauses that might have been applied at a lower level of join. To account for the latter, look up the sizes of the min_lefthand and min_righthand inputs of the current join, and clamp with those in the same way as for the base relations. Noted while investigating a complaint from Ben Chobot, although this in itself doesn't seem to explain his report. Back-patch to 8.4; previous versions used different estimation methods for which this heuristic isn't relevant.
1 parent 5cfe33f commit 97930cf

File tree

1 file changed

+73
-8
lines changed

1 file changed

+73
-8
lines changed

src/backend/utils/adt/selfuncs.c

Lines changed: 73 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -142,9 +142,11 @@ static double ineq_histogram_selectivity(PlannerInfo *root,
142142
FmgrInfo *opproc, bool isgt,
143143
Datum constval, Oid consttype);
144144
static double eqjoinsel_inner(Oid operator,
145-
VariableStatData *vardata1, VariableStatData *vardata2);
145+
VariableStatData *vardata1, VariableStatData *vardata2,
146+
RelOptInfo *rel1, RelOptInfo *rel2);
146147
static double eqjoinsel_semi(Oid operator,
147-
VariableStatData *vardata1, VariableStatData *vardata2);
148+
VariableStatData *vardata1, VariableStatData *vardata2,
149+
RelOptInfo *rel1, RelOptInfo *rel2);
148150
static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
149151
Datum lobound, Datum hibound, Oid boundstypid,
150152
double *scaledlobound, double *scaledhibound);
@@ -173,6 +175,7 @@ static bool get_actual_variable_range(PlannerInfo *root,
173175
VariableStatData *vardata,
174176
Oid sortop,
175177
Datum *min, Datum *max);
178+
static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
176179
static Selectivity prefix_selectivity(PlannerInfo *root,
177180
VariableStatData *vardata,
178181
Oid vartype, Oid opfamily, Const *prefixcon);
@@ -2008,24 +2011,47 @@ eqjoinsel(PG_FUNCTION_ARGS)
20082011
VariableStatData vardata1;
20092012
VariableStatData vardata2;
20102013
bool join_is_reversed;
2014+
RelOptInfo *rel1;
2015+
RelOptInfo *rel2;
20112016

20122017
get_join_variables(root, args, sjinfo,
20132018
&vardata1, &vardata2, &join_is_reversed);
20142019

2020+
/*
2021+
* Identify the join's direct input relations. We use the min lefthand
2022+
* and min righthand as the inputs, even though the join might actually
2023+
* get done with larger input relations. The min inputs are guaranteed to
2024+
* have been formed by now, though, and always using them ensures
2025+
* consistency of estimates.
2026+
*/
2027+
if (!join_is_reversed)
2028+
{
2029+
rel1 = find_join_input_rel(root, sjinfo->min_lefthand);
2030+
rel2 = find_join_input_rel(root, sjinfo->min_righthand);
2031+
}
2032+
else
2033+
{
2034+
rel1 = find_join_input_rel(root, sjinfo->min_righthand);
2035+
rel2 = find_join_input_rel(root, sjinfo->min_lefthand);
2036+
}
2037+
20152038
switch (sjinfo->jointype)
20162039
{
20172040
case JOIN_INNER:
20182041
case JOIN_LEFT:
20192042
case JOIN_FULL:
2020-
selec = eqjoinsel_inner(operator, &vardata1, &vardata2);
2043+
selec = eqjoinsel_inner(operator, &vardata1, &vardata2,
2044+
rel1, rel2);
20212045
break;
20222046
case JOIN_SEMI:
20232047
case JOIN_ANTI:
20242048
if (!join_is_reversed)
2025-
selec = eqjoinsel_semi(operator, &vardata1, &vardata2);
2049+
selec = eqjoinsel_semi(operator, &vardata1, &vardata2,
2050+
rel1, rel2);
20262051
else
20272052
selec = eqjoinsel_semi(get_commutator(operator),
2028-
&vardata2, &vardata1);
2053+
&vardata2, &vardata1,
2054+
rel2, rel1);
20292055
break;
20302056
default:
20312057
/* other values not expected here */
@@ -2051,7 +2077,8 @@ eqjoinsel(PG_FUNCTION_ARGS)
20512077
*/
20522078
static double
20532079
eqjoinsel_inner(Oid operator,
2054-
VariableStatData *vardata1, VariableStatData *vardata2)
2080+
VariableStatData *vardata1, VariableStatData *vardata2,
2081+
RelOptInfo *rel1, RelOptInfo *rel2)
20552082
{
20562083
double selec;
20572084
double nd1;
@@ -2252,15 +2279,19 @@ eqjoinsel_inner(Oid operator,
22522279
* be, providing a crude correction for the selectivity of restriction
22532280
* clauses on those relations. (We don't do that in the other path
22542281
* since there we are comparing the nd values to stats for the whole
2255-
* relations.)
2282+
* relations.) We can apply this clamp both with respect to the base
2283+
* relations from which the join variables come, and to the immediate
2284+
* input relations of the current join.
22562285
*/
22572286
double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
22582287
double nullfrac2 = stats2 ? stats2->stanullfrac : 0.0;
22592288

22602289
if (vardata1->rel)
22612290
nd1 = Min(nd1, vardata1->rel->rows);
2291+
nd1 = Min(nd1, rel1->rows);
22622292
if (vardata2->rel)
22632293
nd2 = Min(nd2, vardata2->rel->rows);
2294+
nd2 = Min(nd2, rel2->rows);
22642295

22652296
selec = (1.0 - nullfrac1) * (1.0 - nullfrac2);
22662297
if (nd1 > nd2)
@@ -2287,7 +2318,8 @@ eqjoinsel_inner(Oid operator,
22872318
*/
22882319
static double
22892320
eqjoinsel_semi(Oid operator,
2290-
VariableStatData *vardata1, VariableStatData *vardata2)
2321+
VariableStatData *vardata1, VariableStatData *vardata2,
2322+
RelOptInfo *rel1, RelOptInfo *rel2)
22912323
{
22922324
double selec;
22932325
double nd1;
@@ -2435,8 +2467,10 @@ eqjoinsel_semi(Oid operator,
24352467
{
24362468
if (vardata1->rel)
24372469
nd1 = Min(nd1, vardata1->rel->rows);
2470+
nd1 = Min(nd1, rel1->rows);
24382471
if (vardata2->rel)
24392472
nd2 = Min(nd2, vardata2->rel->rows);
2473+
nd2 = Min(nd2, rel2->rows);
24402474

24412475
if (nd1 <= nd2 || nd2 <= 0)
24422476
selec = 1.0 - nullfrac1;
@@ -4759,6 +4793,37 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
47594793
return have_data;
47604794
}
47614795

4796+
/*
4797+
* find_join_input_rel
4798+
* Look up the input relation for a join.
4799+
*
4800+
* We assume that the input relation's RelOptInfo must have been constructed
4801+
* already.
4802+
*/
4803+
static RelOptInfo *
4804+
find_join_input_rel(PlannerInfo *root, Relids relids)
4805+
{
4806+
RelOptInfo *rel = NULL;
4807+
4808+
switch (bms_membership(relids))
4809+
{
4810+
case BMS_EMPTY_SET:
4811+
/* should not happen */
4812+
break;
4813+
case BMS_SINGLETON:
4814+
rel = find_base_rel(root, bms_singleton_member(relids));
4815+
break;
4816+
case BMS_MULTIPLE:
4817+
rel = find_join_rel(root, relids);
4818+
break;
4819+
}
4820+
4821+
if (rel == NULL)
4822+
elog(ERROR, "could not find RelOptInfo for given relids");
4823+
4824+
return rel;
4825+
}
4826+
47624827

47634828
/*-------------------------------------------------------------------------
47644829
*

0 commit comments

Comments
 (0)