Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit ca4f70c

Browse files
committed
Improve eqjoinsel's ndistinct clamping to work for multiple levels of join.
This patch fixes an oversight in my commit 7f3eba3 of 2008-10-23. That patch accounted for baserel restriction clauses that reduced the number of rows coming out of a table (and hence the number of possibly-distinct values of a join variable), but not for join restriction clauses that might have been applied at a lower level of join. To account for the latter, look up the sizes of the min_lefthand and min_righthand inputs of the current join, and clamp with those in the same way as for the base relations. Noted while investigating a complaint from Ben Chobot, although this in itself doesn't seem to explain his report. Back-patch to 8.4; previous versions used different estimation methods for which this heuristic isn't relevant.
1 parent edf4eda commit ca4f70c

File tree

1 file changed

+73
-8
lines changed

1 file changed

+73
-8
lines changed

src/backend/utils/adt/selfuncs.c

Lines changed: 73 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -141,9 +141,11 @@ static double ineq_histogram_selectivity(PlannerInfo *root,
141141
FmgrInfo *opproc, bool isgt,
142142
Datum constval, Oid consttype);
143143
static double eqjoinsel_inner(Oid operator,
144-
VariableStatData *vardata1, VariableStatData *vardata2);
144+
VariableStatData *vardata1, VariableStatData *vardata2,
145+
RelOptInfo *rel1, RelOptInfo *rel2);
145146
static double eqjoinsel_semi(Oid operator,
146-
VariableStatData *vardata1, VariableStatData *vardata2);
147+
VariableStatData *vardata1, VariableStatData *vardata2,
148+
RelOptInfo *rel1, RelOptInfo *rel2);
147149
static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
148150
Datum lobound, Datum hibound, Oid boundstypid,
149151
double *scaledlobound, double *scaledhibound);
@@ -172,6 +174,7 @@ static bool get_actual_variable_range(PlannerInfo *root,
172174
VariableStatData *vardata,
173175
Oid sortop,
174176
Datum *min, Datum *max);
177+
static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
175178
static Selectivity prefix_selectivity(PlannerInfo *root,
176179
VariableStatData *vardata,
177180
Oid vartype, Oid opfamily, Const *prefixcon);
@@ -2007,24 +2010,47 @@ eqjoinsel(PG_FUNCTION_ARGS)
20072010
VariableStatData vardata1;
20082011
VariableStatData vardata2;
20092012
bool join_is_reversed;
2013+
RelOptInfo *rel1;
2014+
RelOptInfo *rel2;
20102015

20112016
get_join_variables(root, args, sjinfo,
20122017
&vardata1, &vardata2, &join_is_reversed);
20132018

2019+
/*
2020+
* Identify the join's direct input relations. We use the min lefthand
2021+
* and min righthand as the inputs, even though the join might actually
2022+
* get done with larger input relations. The min inputs are guaranteed to
2023+
* have been formed by now, though, and always using them ensures
2024+
* consistency of estimates.
2025+
*/
2026+
if (!join_is_reversed)
2027+
{
2028+
rel1 = find_join_input_rel(root, sjinfo->min_lefthand);
2029+
rel2 = find_join_input_rel(root, sjinfo->min_righthand);
2030+
}
2031+
else
2032+
{
2033+
rel1 = find_join_input_rel(root, sjinfo->min_righthand);
2034+
rel2 = find_join_input_rel(root, sjinfo->min_lefthand);
2035+
}
2036+
20142037
switch (sjinfo->jointype)
20152038
{
20162039
case JOIN_INNER:
20172040
case JOIN_LEFT:
20182041
case JOIN_FULL:
2019-
selec = eqjoinsel_inner(operator, &vardata1, &vardata2);
2042+
selec = eqjoinsel_inner(operator, &vardata1, &vardata2,
2043+
rel1, rel2);
20202044
break;
20212045
case JOIN_SEMI:
20222046
case JOIN_ANTI:
20232047
if (!join_is_reversed)
2024-
selec = eqjoinsel_semi(operator, &vardata1, &vardata2);
2048+
selec = eqjoinsel_semi(operator, &vardata1, &vardata2,
2049+
rel1, rel2);
20252050
else
20262051
selec = eqjoinsel_semi(get_commutator(operator),
2027-
&vardata2, &vardata1);
2052+
&vardata2, &vardata1,
2053+
rel2, rel1);
20282054
break;
20292055
default:
20302056
/* other values not expected here */
@@ -2050,7 +2076,8 @@ eqjoinsel(PG_FUNCTION_ARGS)
20502076
*/
20512077
static double
20522078
eqjoinsel_inner(Oid operator,
2053-
VariableStatData *vardata1, VariableStatData *vardata2)
2079+
VariableStatData *vardata1, VariableStatData *vardata2,
2080+
RelOptInfo *rel1, RelOptInfo *rel2)
20542081
{
20552082
double selec;
20562083
double nd1;
@@ -2251,15 +2278,19 @@ eqjoinsel_inner(Oid operator,
22512278
* be, providing a crude correction for the selectivity of restriction
22522279
* clauses on those relations. (We don't do that in the other path
22532280
* since there we are comparing the nd values to stats for the whole
2254-
* relations.)
2281+
* relations.) We can apply this clamp both with respect to the base
2282+
* relations from which the join variables come, and to the immediate
2283+
* input relations of the current join.
22552284
*/
22562285
double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
22572286
double nullfrac2 = stats2 ? stats2->stanullfrac : 0.0;
22582287

22592288
if (vardata1->rel)
22602289
nd1 = Min(nd1, vardata1->rel->rows);
2290+
nd1 = Min(nd1, rel1->rows);
22612291
if (vardata2->rel)
22622292
nd2 = Min(nd2, vardata2->rel->rows);
2293+
nd2 = Min(nd2, rel2->rows);
22632294

22642295
selec = (1.0 - nullfrac1) * (1.0 - nullfrac2);
22652296
if (nd1 > nd2)
@@ -2286,7 +2317,8 @@ eqjoinsel_inner(Oid operator,
22862317
*/
22872318
static double
22882319
eqjoinsel_semi(Oid operator,
2289-
VariableStatData *vardata1, VariableStatData *vardata2)
2320+
VariableStatData *vardata1, VariableStatData *vardata2,
2321+
RelOptInfo *rel1, RelOptInfo *rel2)
22902322
{
22912323
double selec;
22922324
double nd1;
@@ -2434,8 +2466,10 @@ eqjoinsel_semi(Oid operator,
24342466
{
24352467
if (vardata1->rel)
24362468
nd1 = Min(nd1, vardata1->rel->rows);
2469+
nd1 = Min(nd1, rel1->rows);
24372470
if (vardata2->rel)
24382471
nd2 = Min(nd2, vardata2->rel->rows);
2472+
nd2 = Min(nd2, rel2->rows);
24392473

24402474
if (nd1 <= nd2 || nd2 <= 0)
24412475
selec = 1.0 - nullfrac1;
@@ -4758,6 +4792,37 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
47584792
return have_data;
47594793
}
47604794

4795+
/*
4796+
* find_join_input_rel
4797+
* Look up the input relation for a join.
4798+
*
4799+
* We assume that the input relation's RelOptInfo must have been constructed
4800+
* already.
4801+
*/
4802+
static RelOptInfo *
4803+
find_join_input_rel(PlannerInfo *root, Relids relids)
4804+
{
4805+
RelOptInfo *rel = NULL;
4806+
4807+
switch (bms_membership(relids))
4808+
{
4809+
case BMS_EMPTY_SET:
4810+
/* should not happen */
4811+
break;
4812+
case BMS_SINGLETON:
4813+
rel = find_base_rel(root, bms_singleton_member(relids));
4814+
break;
4815+
case BMS_MULTIPLE:
4816+
rel = find_join_rel(root, relids);
4817+
break;
4818+
}
4819+
4820+
if (rel == NULL)
4821+
elog(ERROR, "could not find RelOptInfo for given relids");
4822+
4823+
return rel;
4824+
}
4825+
47614826

47624827
/*-------------------------------------------------------------------------
47634828
*

0 commit comments

Comments
 (0)