Thanks to visit codestin.com
Credit goes to doxygen.postgresql.org

PostgreSQL Source Code git master
selfuncs.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * selfuncs.c
4 * Selectivity functions and index cost estimation functions for
5 * standard operators and index access methods.
6 *
7 * Selectivity routines are registered in the pg_operator catalog
8 * in the "oprrest" and "oprjoin" attributes.
9 *
10 * Index cost functions are located via the index AM's API struct,
11 * which is obtained from the handler function registered in pg_am.
12 *
13 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
14 * Portions Copyright (c) 1994, Regents of the University of California
15 *
16 *
17 * IDENTIFICATION
18 * src/backend/utils/adt/selfuncs.c
19 *
20 *-------------------------------------------------------------------------
21 */
22
23/*----------
24 * Operator selectivity estimation functions are called to estimate the
25 * selectivity of WHERE clauses whose top-level operator is their operator.
26 * We divide the problem into two cases:
27 * Restriction clause estimation: the clause involves vars of just
28 * one relation.
29 * Join clause estimation: the clause involves vars of multiple rels.
30 * Join selectivity estimation is far more difficult and usually less accurate
31 * than restriction estimation.
32 *
33 * When dealing with the inner scan of a nestloop join, we consider the
34 * join's joinclauses as restriction clauses for the inner relation, and
35 * treat vars of the outer relation as parameters (a/k/a constants of unknown
36 * values). So, restriction estimators need to be able to accept an argument
37 * telling which relation is to be treated as the variable.
38 *
39 * The call convention for a restriction estimator (oprrest function) is
40 *
41 * Selectivity oprrest (PlannerInfo *root,
42 * Oid operator,
43 * List *args,
44 * int varRelid);
45 *
46 * root: general information about the query (rtable and RelOptInfo lists
47 * are particularly important for the estimator).
48 * operator: OID of the specific operator in question.
49 * args: argument list from the operator clause.
50 * varRelid: if not zero, the relid (rtable index) of the relation to
51 * be treated as the variable relation. May be zero if the args list
52 * is known to contain vars of only one relation.
53 *
54 * This is represented at the SQL level (in pg_proc) as
55 *
56 * float8 oprrest (internal, oid, internal, int4);
57 *
58 * The result is a selectivity, that is, a fraction (0 to 1) of the rows
59 * of the relation that are expected to produce a TRUE result for the
60 * given operator.
61 *
62 * The call convention for a join estimator (oprjoin function) is similar
63 * except that varRelid is not needed, and instead join information is
64 * supplied:
65 *
66 * Selectivity oprjoin (PlannerInfo *root,
67 * Oid operator,
68 * List *args,
69 * JoinType jointype,
70 * SpecialJoinInfo *sjinfo);
71 *
72 * float8 oprjoin (internal, oid, internal, int2, internal);
73 *
74 * (Before Postgres 8.4, join estimators had only the first four of these
75 * parameters. That signature is still allowed, but deprecated.) The
76 * relationship between jointype and sjinfo is explained in the comments for
77 * clause_selectivity() --- the short version is that jointype is usually
78 * best ignored in favor of examining sjinfo.
79 *
80 * Join selectivity for regular inner and outer joins is defined as the
81 * fraction (0 to 1) of the cross product of the relations that is expected
82 * to produce a TRUE result for the given operator. For both semi and anti
83 * joins, however, the selectivity is defined as the fraction of the left-hand
84 * side relation's rows that are expected to have a match (ie, at least one
85 * row with a TRUE result) in the right-hand side.
86 *
87 * For both oprrest and oprjoin functions, the operator's input collation OID
88 * (if any) is passed using the standard fmgr mechanism, so that the estimator
89 * function can fetch it with PG_GET_COLLATION(). Note, however, that all
90 * statistics in pg_statistic are currently built using the relevant column's
91 * collation.
92 *----------
93 */
94
95#include "postgres.h"
96
97#include <ctype.h>
98#include <math.h>
99
100#include "access/brin.h"
101#include "access/brin_page.h"
102#include "access/gin.h"
103#include "access/table.h"
104#include "access/tableam.h"
105#include "access/visibilitymap.h"
106#include "catalog/pg_collation.h"
107#include "catalog/pg_operator.h"
108#include "catalog/pg_statistic.h"
110#include "executor/nodeAgg.h"
111#include "miscadmin.h"
112#include "nodes/makefuncs.h"
113#include "nodes/nodeFuncs.h"
114#include "optimizer/clauses.h"
115#include "optimizer/cost.h"
116#include "optimizer/optimizer.h"
117#include "optimizer/pathnode.h"
118#include "optimizer/paths.h"
119#include "optimizer/plancat.h"
120#include "parser/parse_clause.h"
122#include "parser/parsetree.h"
123#include "rewrite/rewriteManip.h"
125#include "storage/bufmgr.h"
126#include "utils/acl.h"
127#include "utils/array.h"
128#include "utils/builtins.h"
129#include "utils/date.h"
130#include "utils/datum.h"
131#include "utils/fmgroids.h"
132#include "utils/index_selfuncs.h"
133#include "utils/lsyscache.h"
134#include "utils/memutils.h"
135#include "utils/pg_locale.h"
136#include "utils/rel.h"
137#include "utils/selfuncs.h"
138#include "utils/snapmgr.h"
139#include "utils/spccache.h"
140#include "utils/syscache.h"
141#include "utils/timestamp.h"
142#include "utils/typcache.h"
143
144#define DEFAULT_PAGE_CPU_MULTIPLIER 50.0
145
146/* Hooks for plugins to get control when we ask for stats */
149
150static double eqsel_internal(PG_FUNCTION_ARGS, bool negate);
151static double eqjoinsel_inner(Oid opfuncoid, Oid collation,
152 VariableStatData *vardata1, VariableStatData *vardata2,
153 double nd1, double nd2,
154 bool isdefault1, bool isdefault2,
155 AttStatsSlot *sslot1, AttStatsSlot *sslot2,
157 bool have_mcvs1, bool have_mcvs2);
158static double eqjoinsel_semi(Oid opfuncoid, Oid collation,
159 VariableStatData *vardata1, VariableStatData *vardata2,
160 double nd1, double nd2,
161 bool isdefault1, bool isdefault2,
162 AttStatsSlot *sslot1, AttStatsSlot *sslot2,
164 bool have_mcvs1, bool have_mcvs2,
165 RelOptInfo *inner_rel);
167 RelOptInfo *rel, List **varinfos, double *ndistinct);
168static bool convert_to_scalar(Datum value, Oid valuetypid, Oid collid,
169 double *scaledvalue,
170 Datum lobound, Datum hibound, Oid boundstypid,
171 double *scaledlobound, double *scaledhibound);
172static double convert_numeric_to_scalar(Datum value, Oid typid, bool *failure);
173static void convert_string_to_scalar(char *value,
174 double *scaledvalue,
175 char *lobound,
176 double *scaledlobound,
177 char *hibound,
178 double *scaledhibound);
180 double *scaledvalue,
181 Datum lobound,
182 double *scaledlobound,
183 Datum hibound,
184 double *scaledhibound);
185static double convert_one_string_to_scalar(char *value,
186 int rangelo, int rangehi);
187static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
188 int rangelo, int rangehi);
189static char *convert_string_datum(Datum value, Oid typid, Oid collid,
190 bool *failure);
191static double convert_timevalue_to_scalar(Datum value, Oid typid,
192 bool *failure);
194 VariableStatData *vardata);
196 int indexcol, VariableStatData *vardata);
198 Oid sortop, Oid collation,
199 Datum *min, Datum *max);
200static void get_stats_slot_range(AttStatsSlot *sslot,
201 Oid opfuncoid, FmgrInfo *opproc,
202 Oid collation, int16 typLen, bool typByVal,
203 Datum *min, Datum *max, bool *p_have_data);
205 VariableStatData *vardata,
206 Oid sortop, Oid collation,
207 Datum *min, Datum *max);
208static bool get_actual_variable_endpoint(Relation heapRel,
209 Relation indexRel,
210 ScanDirection indexscandir,
211 ScanKey scankeys,
212 int16 typLen,
213 bool typByVal,
214 TupleTableSlot *tableslot,
215 MemoryContext outercontext,
216 Datum *endpointDatum);
219 VariableStatData *vardata);
220
221
222/*
223 * eqsel - Selectivity of "=" for any data types.
224 *
225 * Note: this routine is also used to estimate selectivity for some
226 * operators that are not "=" but have comparable selectivity behavior,
227 * such as "~=" (geometric approximate-match). Even for "=", we must
228 * keep in mind that the left and right datatypes may differ.
229 */
230Datum
232{
233 PG_RETURN_FLOAT8((float8) eqsel_internal(fcinfo, false));
234}
235
236/*
237 * Common code for eqsel() and neqsel()
238 */
239static double
241{
243 Oid operator = PG_GETARG_OID(1);
245 int varRelid = PG_GETARG_INT32(3);
246 Oid collation = PG_GET_COLLATION();
247 VariableStatData vardata;
248 Node *other;
249 bool varonleft;
250 double selec;
251
252 /*
253 * When asked about <>, we do the estimation using the corresponding =
254 * operator, then convert to <> via "1.0 - eq_selectivity - nullfrac".
255 */
256 if (negate)
257 {
258 operator = get_negator(operator);
259 if (!OidIsValid(operator))
260 {
261 /* Use default selectivity (should we raise an error instead?) */
262 return 1.0 - DEFAULT_EQ_SEL;
263 }
264 }
265
266 /*
267 * If expression is not variable = something or something = variable, then
268 * punt and return a default estimate.
269 */
270 if (!get_restriction_variable(root, args, varRelid,
271 &vardata, &other, &varonleft))
272 return negate ? (1.0 - DEFAULT_EQ_SEL) : DEFAULT_EQ_SEL;
273
274 /*
275 * We can do a lot better if the something is a constant. (Note: the
276 * Const might result from estimation rather than being a simple constant
277 * in the query.)
278 */
279 if (IsA(other, Const))
280 selec = var_eq_const(&vardata, operator, collation,
281 ((Const *) other)->constvalue,
282 ((Const *) other)->constisnull,
283 varonleft, negate);
284 else
285 selec = var_eq_non_const(&vardata, operator, collation, other,
286 varonleft, negate);
287
288 ReleaseVariableStats(vardata);
289
290 return selec;
291}
292
293/*
294 * var_eq_const --- eqsel for var = const case
295 *
296 * This is exported so that some other estimation functions can use it.
297 */
298double
299var_eq_const(VariableStatData *vardata, Oid oproid, Oid collation,
300 Datum constval, bool constisnull,
301 bool varonleft, bool negate)
302{
303 double selec;
304 double nullfrac = 0.0;
305 bool isdefault;
306 Oid opfuncoid;
307
308 /*
309 * If the constant is NULL, assume operator is strict and return zero, ie,
310 * operator will never return TRUE. (It's zero even for a negator op.)
311 */
312 if (constisnull)
313 return 0.0;
314
315 /*
316 * Grab the nullfrac for use below. Note we allow use of nullfrac
317 * regardless of security check.
318 */
319 if (HeapTupleIsValid(vardata->statsTuple))
320 {
321 Form_pg_statistic stats;
322
323 stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
324 nullfrac = stats->stanullfrac;
325 }
326
327 /*
328 * If we matched the var to a unique index, DISTINCT or GROUP-BY clause,
329 * assume there is exactly one match regardless of anything else. (This
330 * is slightly bogus, since the index or clause's equality operator might
331 * be different from ours, but it's much more likely to be right than
332 * ignoring the information.)
333 */
334 if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
335 {
336 selec = 1.0 / vardata->rel->tuples;
337 }
338 else if (HeapTupleIsValid(vardata->statsTuple) &&
340 (opfuncoid = get_opcode(oproid))))
341 {
342 AttStatsSlot sslot;
343 bool match = false;
344 int i;
345
346 /*
347 * Is the constant "=" to any of the column's most common values?
348 * (Although the given operator may not really be "=", we will assume
349 * that seeing whether it returns TRUE is an appropriate test. If you
350 * don't like this, maybe you shouldn't be using eqsel for your
351 * operator...)
352 */
353 if (get_attstatsslot(&sslot, vardata->statsTuple,
354 STATISTIC_KIND_MCV, InvalidOid,
356 {
357 LOCAL_FCINFO(fcinfo, 2);
358 FmgrInfo eqproc;
359
360 fmgr_info(opfuncoid, &eqproc);
361
362 /*
363 * Save a few cycles by setting up the fcinfo struct just once.
364 * Using FunctionCallInvoke directly also avoids failure if the
365 * eqproc returns NULL, though really equality functions should
366 * never do that.
367 */
368 InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
369 NULL, NULL);
370 fcinfo->args[0].isnull = false;
371 fcinfo->args[1].isnull = false;
372 /* be careful to apply operator right way 'round */
373 if (varonleft)
374 fcinfo->args[1].value = constval;
375 else
376 fcinfo->args[0].value = constval;
377
378 for (i = 0; i < sslot.nvalues; i++)
379 {
380 Datum fresult;
381
382 if (varonleft)
383 fcinfo->args[0].value = sslot.values[i];
384 else
385 fcinfo->args[1].value = sslot.values[i];
386 fcinfo->isnull = false;
387 fresult = FunctionCallInvoke(fcinfo);
388 if (!fcinfo->isnull && DatumGetBool(fresult))
389 {
390 match = true;
391 break;
392 }
393 }
394 }
395 else
396 {
397 /* no most-common-value info available */
398 i = 0; /* keep compiler quiet */
399 }
400
401 if (match)
402 {
403 /*
404 * Constant is "=" to this common value. We know selectivity
405 * exactly (or as exactly as ANALYZE could calculate it, anyway).
406 */
407 selec = sslot.numbers[i];
408 }
409 else
410 {
411 /*
412 * Comparison is against a constant that is neither NULL nor any
413 * of the common values. Its selectivity cannot be more than
414 * this:
415 */
416 double sumcommon = 0.0;
417 double otherdistinct;
418
419 for (i = 0; i < sslot.nnumbers; i++)
420 sumcommon += sslot.numbers[i];
421 selec = 1.0 - sumcommon - nullfrac;
422 CLAMP_PROBABILITY(selec);
423
424 /*
425 * and in fact it's probably a good deal less. We approximate that
426 * all the not-common values share this remaining fraction
427 * equally, so we divide by the number of other distinct values.
428 */
429 otherdistinct = get_variable_numdistinct(vardata, &isdefault) -
430 sslot.nnumbers;
431 if (otherdistinct > 1)
432 selec /= otherdistinct;
433
434 /*
435 * Another cross-check: selectivity shouldn't be estimated as more
436 * than the least common "most common value".
437 */
438 if (sslot.nnumbers > 0 && selec > sslot.numbers[sslot.nnumbers - 1])
439 selec = sslot.numbers[sslot.nnumbers - 1];
440 }
441
442 free_attstatsslot(&sslot);
443 }
444 else
445 {
446 /*
447 * No ANALYZE stats available, so make a guess using estimated number
448 * of distinct values and assuming they are equally common. (The guess
449 * is unlikely to be very good, but we do know a few special cases.)
450 */
451 selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
452 }
453
454 /* now adjust if we wanted <> rather than = */
455 if (negate)
456 selec = 1.0 - selec - nullfrac;
457
458 /* result should be in range, but make sure... */
459 CLAMP_PROBABILITY(selec);
460
461 return selec;
462}
463
464/*
465 * var_eq_non_const --- eqsel for var = something-other-than-const case
466 *
467 * This is exported so that some other estimation functions can use it.
468 */
469double
470var_eq_non_const(VariableStatData *vardata, Oid oproid, Oid collation,
471 Node *other,
472 bool varonleft, bool negate)
473{
474 double selec;
475 double nullfrac = 0.0;
476 bool isdefault;
477
478 /*
479 * Grab the nullfrac for use below.
480 */
481 if (HeapTupleIsValid(vardata->statsTuple))
482 {
483 Form_pg_statistic stats;
484
485 stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
486 nullfrac = stats->stanullfrac;
487 }
488
489 /*
490 * If we matched the var to a unique index, DISTINCT or GROUP-BY clause,
491 * assume there is exactly one match regardless of anything else. (This
492 * is slightly bogus, since the index or clause's equality operator might
493 * be different from ours, but it's much more likely to be right than
494 * ignoring the information.)
495 */
496 if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
497 {
498 selec = 1.0 / vardata->rel->tuples;
499 }
500 else if (HeapTupleIsValid(vardata->statsTuple))
501 {
502 double ndistinct;
503 AttStatsSlot sslot;
504
505 /*
506 * Search is for a value that we do not know a priori, but we will
507 * assume it is not NULL. Estimate the selectivity as non-null
508 * fraction divided by number of distinct values, so that we get a
509 * result averaged over all possible values whether common or
510 * uncommon. (Essentially, we are assuming that the not-yet-known
511 * comparison value is equally likely to be any of the possible
512 * values, regardless of their frequency in the table. Is that a good
513 * idea?)
514 */
515 selec = 1.0 - nullfrac;
516 ndistinct = get_variable_numdistinct(vardata, &isdefault);
517 if (ndistinct > 1)
518 selec /= ndistinct;
519
520 /*
521 * Cross-check: selectivity should never be estimated as more than the
522 * most common value's.
523 */
524 if (get_attstatsslot(&sslot, vardata->statsTuple,
525 STATISTIC_KIND_MCV, InvalidOid,
527 {
528 if (sslot.nnumbers > 0 && selec > sslot.numbers[0])
529 selec = sslot.numbers[0];
530 free_attstatsslot(&sslot);
531 }
532 }
533 else
534 {
535 /*
536 * No ANALYZE stats available, so make a guess using estimated number
537 * of distinct values and assuming they are equally common. (The guess
538 * is unlikely to be very good, but we do know a few special cases.)
539 */
540 selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
541 }
542
543 /* now adjust if we wanted <> rather than = */
544 if (negate)
545 selec = 1.0 - selec - nullfrac;
546
547 /* result should be in range, but make sure... */
548 CLAMP_PROBABILITY(selec);
549
550 return selec;
551}
552
553/*
554 * neqsel - Selectivity of "!=" for any data types.
555 *
556 * This routine is also used for some operators that are not "!="
557 * but have comparable selectivity behavior. See above comments
558 * for eqsel().
559 */
560Datum
562{
563 PG_RETURN_FLOAT8((float8) eqsel_internal(fcinfo, true));
564}
565
566/*
567 * scalarineqsel - Selectivity of "<", "<=", ">", ">=" for scalars.
568 *
569 * This is the guts of scalarltsel/scalarlesel/scalargtsel/scalargesel.
570 * The isgt and iseq flags distinguish which of the four cases apply.
571 *
572 * The caller has commuted the clause, if necessary, so that we can treat
573 * the variable as being on the left. The caller must also make sure that
574 * the other side of the clause is a non-null Const, and dissect that into
575 * a value and datatype. (This definition simplifies some callers that
576 * want to estimate against a computed value instead of a Const node.)
577 *
578 * This routine works for any datatype (or pair of datatypes) known to
579 * convert_to_scalar(). If it is applied to some other datatype,
580 * it will return an approximate estimate based on assuming that the constant
581 * value falls in the middle of the bin identified by binary search.
582 */
583static double
584scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq,
585 Oid collation,
586 VariableStatData *vardata, Datum constval, Oid consttype)
587{
588 Form_pg_statistic stats;
589 FmgrInfo opproc;
590 double mcv_selec,
591 hist_selec,
592 sumcommon;
593 double selec;
594
595 if (!HeapTupleIsValid(vardata->statsTuple))
596 {
597 /*
598 * No stats are available. Typically this means we have to fall back
599 * on the default estimate; but if the variable is CTID then we can
600 * make an estimate based on comparing the constant to the table size.
601 */
602 if (vardata->var && IsA(vardata->var, Var) &&
603 ((Var *) vardata->var)->varattno == SelfItemPointerAttributeNumber)
604 {
605 ItemPointer itemptr;
606 double block;
607 double density;
608
609 /*
610 * If the relation's empty, we're going to include all of it.
611 * (This is mostly to avoid divide-by-zero below.)
612 */
613 if (vardata->rel->pages == 0)
614 return 1.0;
615
616 itemptr = (ItemPointer) DatumGetPointer(constval);
617 block = ItemPointerGetBlockNumberNoCheck(itemptr);
618
619 /*
620 * Determine the average number of tuples per page (density).
621 *
622 * Since the last page will, on average, be only half full, we can
623 * estimate it to have half as many tuples as earlier pages. So
624 * give it half the weight of a regular page.
625 */
626 density = vardata->rel->tuples / (vardata->rel->pages - 0.5);
627
628 /* If target is the last page, use half the density. */
629 if (block >= vardata->rel->pages - 1)
630 density *= 0.5;
631
632 /*
633 * Using the average tuples per page, calculate how far into the
634 * page the itemptr is likely to be and adjust block accordingly,
635 * by adding that fraction of a whole block (but never more than a
636 * whole block, no matter how high the itemptr's offset is). Here
637 * we are ignoring the possibility of dead-tuple line pointers,
638 * which is fairly bogus, but we lack the info to do better.
639 */
640 if (density > 0.0)
641 {
643
644 block += Min(offset / density, 1.0);
645 }
646
647 /*
648 * Convert relative block number to selectivity. Again, the last
649 * page has only half weight.
650 */
651 selec = block / (vardata->rel->pages - 0.5);
652
653 /*
654 * The calculation so far gave us a selectivity for the "<=" case.
655 * We'll have one fewer tuple for "<" and one additional tuple for
656 * ">=", the latter of which we'll reverse the selectivity for
657 * below, so we can simply subtract one tuple for both cases. The
658 * cases that need this adjustment can be identified by iseq being
659 * equal to isgt.
660 */
661 if (iseq == isgt && vardata->rel->tuples >= 1.0)
662 selec -= (1.0 / vardata->rel->tuples);
663
664 /* Finally, reverse the selectivity for the ">", ">=" cases. */
665 if (isgt)
666 selec = 1.0 - selec;
667
668 CLAMP_PROBABILITY(selec);
669 return selec;
670 }
671
672 /* no stats available, so default result */
673 return DEFAULT_INEQ_SEL;
674 }
675 stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
676
677 fmgr_info(get_opcode(operator), &opproc);
678
679 /*
680 * If we have most-common-values info, add up the fractions of the MCV
681 * entries that satisfy MCV OP CONST. These fractions contribute directly
682 * to the result selectivity. Also add up the total fraction represented
683 * by MCV entries.
684 */
685 mcv_selec = mcv_selectivity(vardata, &opproc, collation, constval, true,
686 &sumcommon);
687
688 /*
689 * If there is a histogram, determine which bin the constant falls in, and
690 * compute the resulting contribution to selectivity.
691 */
692 hist_selec = ineq_histogram_selectivity(root, vardata,
693 operator, &opproc, isgt, iseq,
694 collation,
695 constval, consttype);
696
697 /*
698 * Now merge the results from the MCV and histogram calculations,
699 * realizing that the histogram covers only the non-null values that are
700 * not listed in MCV.
701 */
702 selec = 1.0 - stats->stanullfrac - sumcommon;
703
704 if (hist_selec >= 0.0)
705 selec *= hist_selec;
706 else
707 {
708 /*
709 * If no histogram but there are values not accounted for by MCV,
710 * arbitrarily assume half of them will match.
711 */
712 selec *= 0.5;
713 }
714
715 selec += mcv_selec;
716
717 /* result should be in range, but make sure... */
718 CLAMP_PROBABILITY(selec);
719
720 return selec;
721}
722
723/*
724 * mcv_selectivity - Examine the MCV list for selectivity estimates
725 *
726 * Determine the fraction of the variable's MCV population that satisfies
727 * the predicate (VAR OP CONST), or (CONST OP VAR) if !varonleft. Also
728 * compute the fraction of the total column population represented by the MCV
729 * list. This code will work for any boolean-returning predicate operator.
730 *
731 * The function result is the MCV selectivity, and the fraction of the
732 * total population is returned into *sumcommonp. Zeroes are returned
733 * if there is no MCV list.
734 */
735double
736mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Oid collation,
737 Datum constval, bool varonleft,
738 double *sumcommonp)
739{
740 double mcv_selec,
741 sumcommon;
742 AttStatsSlot sslot;
743 int i;
744
745 mcv_selec = 0.0;
746 sumcommon = 0.0;
747
748 if (HeapTupleIsValid(vardata->statsTuple) &&
749 statistic_proc_security_check(vardata, opproc->fn_oid) &&
750 get_attstatsslot(&sslot, vardata->statsTuple,
751 STATISTIC_KIND_MCV, InvalidOid,
753 {
754 LOCAL_FCINFO(fcinfo, 2);
755
756 /*
757 * We invoke the opproc "by hand" so that we won't fail on NULL
758 * results. Such cases won't arise for normal comparison functions,
759 * but generic_restriction_selectivity could perhaps be used with
760 * operators that can return NULL. A small side benefit is to not
761 * need to re-initialize the fcinfo struct from scratch each time.
762 */
763 InitFunctionCallInfoData(*fcinfo, opproc, 2, collation,
764 NULL, NULL);
765 fcinfo->args[0].isnull = false;
766 fcinfo->args[1].isnull = false;
767 /* be careful to apply operator right way 'round */
768 if (varonleft)
769 fcinfo->args[1].value = constval;
770 else
771 fcinfo->args[0].value = constval;
772
773 for (i = 0; i < sslot.nvalues; i++)
774 {
775 Datum fresult;
776
777 if (varonleft)
778 fcinfo->args[0].value = sslot.values[i];
779 else
780 fcinfo->args[1].value = sslot.values[i];
781 fcinfo->isnull = false;
782 fresult = FunctionCallInvoke(fcinfo);
783 if (!fcinfo->isnull && DatumGetBool(fresult))
784 mcv_selec += sslot.numbers[i];
785 sumcommon += sslot.numbers[i];
786 }
787 free_attstatsslot(&sslot);
788 }
789
790 *sumcommonp = sumcommon;
791 return mcv_selec;
792}
793
794/*
795 * histogram_selectivity - Examine the histogram for selectivity estimates
796 *
797 * Determine the fraction of the variable's histogram entries that satisfy
798 * the predicate (VAR OP CONST), or (CONST OP VAR) if !varonleft.
799 *
800 * This code will work for any boolean-returning predicate operator, whether
801 * or not it has anything to do with the histogram sort operator. We are
802 * essentially using the histogram just as a representative sample. However,
803 * small histograms are unlikely to be all that representative, so the caller
804 * should be prepared to fall back on some other estimation approach when the
805 * histogram is missing or very small. It may also be prudent to combine this
806 * approach with another one when the histogram is small.
807 *
808 * If the actual histogram size is not at least min_hist_size, we won't bother
809 * to do the calculation at all. Also, if the n_skip parameter is > 0, we
810 * ignore the first and last n_skip histogram elements, on the grounds that
811 * they are outliers and hence not very representative. Typical values for
812 * these parameters are 10 and 1.
813 *
814 * The function result is the selectivity, or -1 if there is no histogram
815 * or it's smaller than min_hist_size.
816 *
817 * The output parameter *hist_size receives the actual histogram size,
818 * or zero if no histogram. Callers may use this number to decide how
819 * much faith to put in the function result.
820 *
821 * Note that the result disregards both the most-common-values (if any) and
822 * null entries. The caller is expected to combine this result with
823 * statistics for those portions of the column population. It may also be
824 * prudent to clamp the result range, ie, disbelieve exact 0 or 1 outputs.
825 */
826double
828 FmgrInfo *opproc, Oid collation,
829 Datum constval, bool varonleft,
830 int min_hist_size, int n_skip,
831 int *hist_size)
832{
833 double result;
834 AttStatsSlot sslot;
835
836 /* check sanity of parameters */
837 Assert(n_skip >= 0);
838 Assert(min_hist_size > 2 * n_skip);
839
840 if (HeapTupleIsValid(vardata->statsTuple) &&
841 statistic_proc_security_check(vardata, opproc->fn_oid) &&
842 get_attstatsslot(&sslot, vardata->statsTuple,
843 STATISTIC_KIND_HISTOGRAM, InvalidOid,
845 {
846 *hist_size = sslot.nvalues;
847 if (sslot.nvalues >= min_hist_size)
848 {
849 LOCAL_FCINFO(fcinfo, 2);
850 int nmatch = 0;
851 int i;
852
853 /*
854 * We invoke the opproc "by hand" so that we won't fail on NULL
855 * results. Such cases won't arise for normal comparison
856 * functions, but generic_restriction_selectivity could perhaps be
857 * used with operators that can return NULL. A small side benefit
858 * is to not need to re-initialize the fcinfo struct from scratch
859 * each time.
860 */
861 InitFunctionCallInfoData(*fcinfo, opproc, 2, collation,
862 NULL, NULL);
863 fcinfo->args[0].isnull = false;
864 fcinfo->args[1].isnull = false;
865 /* be careful to apply operator right way 'round */
866 if (varonleft)
867 fcinfo->args[1].value = constval;
868 else
869 fcinfo->args[0].value = constval;
870
871 for (i = n_skip; i < sslot.nvalues - n_skip; i++)
872 {
873 Datum fresult;
874
875 if (varonleft)
876 fcinfo->args[0].value = sslot.values[i];
877 else
878 fcinfo->args[1].value = sslot.values[i];
879 fcinfo->isnull = false;
880 fresult = FunctionCallInvoke(fcinfo);
881 if (!fcinfo->isnull && DatumGetBool(fresult))
882 nmatch++;
883 }
884 result = ((double) nmatch) / ((double) (sslot.nvalues - 2 * n_skip));
885 }
886 else
887 result = -1;
888 free_attstatsslot(&sslot);
889 }
890 else
891 {
892 *hist_size = 0;
893 result = -1;
894 }
895
896 return result;
897}
898
899/*
900 * generic_restriction_selectivity - Selectivity for almost anything
901 *
902 * This function estimates selectivity for operators that we don't have any
903 * special knowledge about, but are on data types that we collect standard
904 * MCV and/or histogram statistics for. (Additional assumptions are that
905 * the operator is strict and immutable, or at least stable.)
906 *
907 * If we have "VAR OP CONST" or "CONST OP VAR", selectivity is estimated by
908 * applying the operator to each element of the column's MCV and/or histogram
909 * stats, and merging the results using the assumption that the histogram is
910 * a reasonable random sample of the column's non-MCV population. Note that
911 * if the operator's semantics are related to the histogram ordering, this
912 * might not be such a great assumption; other functions such as
913 * scalarineqsel() are probably a better match in such cases.
914 *
915 * Otherwise, fall back to the default selectivity provided by the caller.
916 */
917double
919 List *args, int varRelid,
920 double default_selectivity)
921{
922 double selec;
923 VariableStatData vardata;
924 Node *other;
925 bool varonleft;
926
927 /*
928 * If expression is not variable OP something or something OP variable,
929 * then punt and return the default estimate.
930 */
931 if (!get_restriction_variable(root, args, varRelid,
932 &vardata, &other, &varonleft))
933 return default_selectivity;
934
935 /*
936 * If the something is a NULL constant, assume operator is strict and
937 * return zero, ie, operator will never return TRUE.
938 */
939 if (IsA(other, Const) &&
940 ((Const *) other)->constisnull)
941 {
942 ReleaseVariableStats(vardata);
943 return 0.0;
944 }
945
946 if (IsA(other, Const))
947 {
948 /* Variable is being compared to a known non-null constant */
949 Datum constval = ((Const *) other)->constvalue;
950 FmgrInfo opproc;
951 double mcvsum;
952 double mcvsel;
953 double nullfrac;
954 int hist_size;
955
956 fmgr_info(get_opcode(oproid), &opproc);
957
958 /*
959 * Calculate the selectivity for the column's most common values.
960 */
961 mcvsel = mcv_selectivity(&vardata, &opproc, collation,
962 constval, varonleft,
963 &mcvsum);
964
965 /*
966 * If the histogram is large enough, see what fraction of it matches
967 * the query, and assume that's representative of the non-MCV
968 * population. Otherwise use the default selectivity for the non-MCV
969 * population.
970 */
971 selec = histogram_selectivity(&vardata, &opproc, collation,
972 constval, varonleft,
973 10, 1, &hist_size);
974 if (selec < 0)
975 {
976 /* Nope, fall back on default */
977 selec = default_selectivity;
978 }
979 else if (hist_size < 100)
980 {
981 /*
982 * For histogram sizes from 10 to 100, we combine the histogram
983 * and default selectivities, putting increasingly more trust in
984 * the histogram for larger sizes.
985 */
986 double hist_weight = hist_size / 100.0;
987
988 selec = selec * hist_weight +
989 default_selectivity * (1.0 - hist_weight);
990 }
991
992 /* In any case, don't believe extremely small or large estimates. */
993 if (selec < 0.0001)
994 selec = 0.0001;
995 else if (selec > 0.9999)
996 selec = 0.9999;
997
998 /* Don't forget to account for nulls. */
999 if (HeapTupleIsValid(vardata.statsTuple))
1000 nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac;
1001 else
1002 nullfrac = 0.0;
1003
1004 /*
1005 * Now merge the results from the MCV and histogram calculations,
1006 * realizing that the histogram covers only the non-null values that
1007 * are not listed in MCV.
1008 */
1009 selec *= 1.0 - nullfrac - mcvsum;
1010 selec += mcvsel;
1011 }
1012 else
1013 {
1014 /* Comparison value is not constant, so we can't do anything */
1015 selec = default_selectivity;
1016 }
1017
1018 ReleaseVariableStats(vardata);
1019
1020 /* result should be in range, but make sure... */
1021 CLAMP_PROBABILITY(selec);
1022
1023 return selec;
1024}
1025
1026/*
1027 * ineq_histogram_selectivity - Examine the histogram for scalarineqsel
1028 *
1029 * Determine the fraction of the variable's histogram population that
1030 * satisfies the inequality condition, ie, VAR < (or <=, >, >=) CONST.
1031 * The isgt and iseq flags distinguish which of the four cases apply.
1032 *
1033 * While opproc could be looked up from the operator OID, common callers
1034 * also need to call it separately, so we make the caller pass both.
1035 *
1036 * Returns -1 if there is no histogram (valid results will always be >= 0).
1037 *
1038 * Note that the result disregards both the most-common-values (if any) and
1039 * null entries. The caller is expected to combine this result with
1040 * statistics for those portions of the column population.
1041 *
1042 * This is exported so that some other estimation functions can use it.
1043 */
1044double
1046 VariableStatData *vardata,
1047 Oid opoid, FmgrInfo *opproc, bool isgt, bool iseq,
1048 Oid collation,
1049 Datum constval, Oid consttype)
1050{
1051 double hist_selec;
1052 AttStatsSlot sslot;
1053
1054 hist_selec = -1.0;
1055
1056 /*
1057 * Someday, ANALYZE might store more than one histogram per rel/att,
1058 * corresponding to more than one possible sort ordering defined for the
1059 * column type. Right now, we know there is only one, so just grab it and
1060 * see if it matches the query.
1061 *
1062 * Note that we can't use opoid as search argument; the staop appearing in
1063 * pg_statistic will be for the relevant '<' operator, but what we have
1064 * might be some other inequality operator such as '>='. (Even if opoid
1065 * is a '<' operator, it could be cross-type.) Hence we must use
1066 * comparison_ops_are_compatible() to see if the operators match.
1067 */
1068 if (HeapTupleIsValid(vardata->statsTuple) &&
1069 statistic_proc_security_check(vardata, opproc->fn_oid) &&
1070 get_attstatsslot(&sslot, vardata->statsTuple,
1071 STATISTIC_KIND_HISTOGRAM, InvalidOid,
1073 {
1074 if (sslot.nvalues > 1 &&
1075 sslot.stacoll == collation &&
1077 {
1078 /*
1079 * Use binary search to find the desired location, namely the
1080 * right end of the histogram bin containing the comparison value,
1081 * which is the leftmost entry for which the comparison operator
1082 * succeeds (if isgt) or fails (if !isgt).
1083 *
1084 * In this loop, we pay no attention to whether the operator iseq
1085 * or not; that detail will be mopped up below. (We cannot tell,
1086 * anyway, whether the operator thinks the values are equal.)
1087 *
1088 * If the binary search accesses the first or last histogram
1089 * entry, we try to replace that endpoint with the true column min
1090 * or max as found by get_actual_variable_range(). This
1091 * ameliorates misestimates when the min or max is moving as a
1092 * result of changes since the last ANALYZE. Note that this could
1093 * result in effectively including MCVs into the histogram that
1094 * weren't there before, but we don't try to correct for that.
1095 */
1096 double histfrac;
1097 int lobound = 0; /* first possible slot to search */
1098 int hibound = sslot.nvalues; /* last+1 slot to search */
1099 bool have_end = false;
1100
1101 /*
1102 * If there are only two histogram entries, we'll want up-to-date
1103 * values for both. (If there are more than two, we need at most
1104 * one of them to be updated, so we deal with that within the
1105 * loop.)
1106 */
1107 if (sslot.nvalues == 2)
1109 vardata,
1110 sslot.staop,
1111 collation,
1112 &sslot.values[0],
1113 &sslot.values[1]);
1114
1115 while (lobound < hibound)
1116 {
1117 int probe = (lobound + hibound) / 2;
1118 bool ltcmp;
1119
1120 /*
1121 * If we find ourselves about to compare to the first or last
1122 * histogram entry, first try to replace it with the actual
1123 * current min or max (unless we already did so above).
1124 */
1125 if (probe == 0 && sslot.nvalues > 2)
1127 vardata,
1128 sslot.staop,
1129 collation,
1130 &sslot.values[0],
1131 NULL);
1132 else if (probe == sslot.nvalues - 1 && sslot.nvalues > 2)
1134 vardata,
1135 sslot.staop,
1136 collation,
1137 NULL,
1138 &sslot.values[probe]);
1139
1140 ltcmp = DatumGetBool(FunctionCall2Coll(opproc,
1141 collation,
1142 sslot.values[probe],
1143 constval));
1144 if (isgt)
1145 ltcmp = !ltcmp;
1146 if (ltcmp)
1147 lobound = probe + 1;
1148 else
1149 hibound = probe;
1150 }
1151
1152 if (lobound <= 0)
1153 {
1154 /*
1155 * Constant is below lower histogram boundary. More
1156 * precisely, we have found that no entry in the histogram
1157 * satisfies the inequality clause (if !isgt) or they all do
1158 * (if isgt). We estimate that that's true of the entire
1159 * table, so set histfrac to 0.0 (which we'll flip to 1.0
1160 * below, if isgt).
1161 */
1162 histfrac = 0.0;
1163 }
1164 else if (lobound >= sslot.nvalues)
1165 {
1166 /*
1167 * Inverse case: constant is above upper histogram boundary.
1168 */
1169 histfrac = 1.0;
1170 }
1171 else
1172 {
1173 /* We have values[i-1] <= constant <= values[i]. */
1174 int i = lobound;
1175 double eq_selec = 0;
1176 double val,
1177 high,
1178 low;
1179 double binfrac;
1180
1181 /*
1182 * In the cases where we'll need it below, obtain an estimate
1183 * of the selectivity of "x = constval". We use a calculation
1184 * similar to what var_eq_const() does for a non-MCV constant,
1185 * ie, estimate that all distinct non-MCV values occur equally
1186 * often. But multiplication by "1.0 - sumcommon - nullfrac"
1187 * will be done by our caller, so we shouldn't do that here.
1188 * Therefore we can't try to clamp the estimate by reference
1189 * to the least common MCV; the result would be too small.
1190 *
1191 * Note: since this is effectively assuming that constval
1192 * isn't an MCV, it's logically dubious if constval in fact is
1193 * one. But we have to apply *some* correction for equality,
1194 * and anyway we cannot tell if constval is an MCV, since we
1195 * don't have a suitable equality operator at hand.
1196 */
1197 if (i == 1 || isgt == iseq)
1198 {
1199 double otherdistinct;
1200 bool isdefault;
1201 AttStatsSlot mcvslot;
1202
1203 /* Get estimated number of distinct values */
1204 otherdistinct = get_variable_numdistinct(vardata,
1205 &isdefault);
1206
1207 /* Subtract off the number of known MCVs */
1208 if (get_attstatsslot(&mcvslot, vardata->statsTuple,
1209 STATISTIC_KIND_MCV, InvalidOid,
1211 {
1212 otherdistinct -= mcvslot.nnumbers;
1213 free_attstatsslot(&mcvslot);
1214 }
1215
1216 /* If result doesn't seem sane, leave eq_selec at 0 */
1217 if (otherdistinct > 1)
1218 eq_selec = 1.0 / otherdistinct;
1219 }
1220
1221 /*
1222 * Convert the constant and the two nearest bin boundary
1223 * values to a uniform comparison scale, and do a linear
1224 * interpolation within this bin.
1225 */
1226 if (convert_to_scalar(constval, consttype, collation,
1227 &val,
1228 sslot.values[i - 1], sslot.values[i],
1229 vardata->vartype,
1230 &low, &high))
1231 {
1232 if (high <= low)
1233 {
1234 /* cope if bin boundaries appear identical */
1235 binfrac = 0.5;
1236 }
1237 else if (val <= low)
1238 binfrac = 0.0;
1239 else if (val >= high)
1240 binfrac = 1.0;
1241 else
1242 {
1243 binfrac = (val - low) / (high - low);
1244
1245 /*
1246 * Watch out for the possibility that we got a NaN or
1247 * Infinity from the division. This can happen
1248 * despite the previous checks, if for example "low"
1249 * is -Infinity.
1250 */
1251 if (isnan(binfrac) ||
1252 binfrac < 0.0 || binfrac > 1.0)
1253 binfrac = 0.5;
1254 }
1255 }
1256 else
1257 {
1258 /*
1259 * Ideally we'd produce an error here, on the grounds that
1260 * the given operator shouldn't have scalarXXsel
1261 * registered as its selectivity func unless we can deal
1262 * with its operand types. But currently, all manner of
1263 * stuff is invoking scalarXXsel, so give a default
1264 * estimate until that can be fixed.
1265 */
1266 binfrac = 0.5;
1267 }
1268
1269 /*
1270 * Now, compute the overall selectivity across the values
1271 * represented by the histogram. We have i-1 full bins and
1272 * binfrac partial bin below the constant.
1273 */
1274 histfrac = (double) (i - 1) + binfrac;
1275 histfrac /= (double) (sslot.nvalues - 1);
1276
1277 /*
1278 * At this point, histfrac is an estimate of the fraction of
1279 * the population represented by the histogram that satisfies
1280 * "x <= constval". Somewhat remarkably, this statement is
1281 * true regardless of which operator we were doing the probes
1282 * with, so long as convert_to_scalar() delivers reasonable
1283 * results. If the probe constant is equal to some histogram
1284 * entry, we would have considered the bin to the left of that
1285 * entry if probing with "<" or ">=", or the bin to the right
1286 * if probing with "<=" or ">"; but binfrac would have come
1287 * out as 1.0 in the first case and 0.0 in the second, leading
1288 * to the same histfrac in either case. For probe constants
1289 * between histogram entries, we find the same bin and get the
1290 * same estimate with any operator.
1291 *
1292 * The fact that the estimate corresponds to "x <= constval"
1293 * and not "x < constval" is because of the way that ANALYZE
1294 * constructs the histogram: each entry is, effectively, the
1295 * rightmost value in its sample bucket. So selectivity
1296 * values that are exact multiples of 1/(histogram_size-1)
1297 * should be understood as estimates including a histogram
1298 * entry plus everything to its left.
1299 *
1300 * However, that breaks down for the first histogram entry,
1301 * which necessarily is the leftmost value in its sample
1302 * bucket. That means the first histogram bin is slightly
1303 * narrower than the rest, by an amount equal to eq_selec.
1304 * Another way to say that is that we want "x <= leftmost" to
1305 * be estimated as eq_selec not zero. So, if we're dealing
1306 * with the first bin (i==1), rescale to make that true while
1307 * adjusting the rest of that bin linearly.
1308 */
1309 if (i == 1)
1310 histfrac += eq_selec * (1.0 - binfrac);
1311
1312 /*
1313 * "x <= constval" is good if we want an estimate for "<=" or
1314 * ">", but if we are estimating for "<" or ">=", we now need
1315 * to decrease the estimate by eq_selec.
1316 */
1317 if (isgt == iseq)
1318 histfrac -= eq_selec;
1319 }
1320
1321 /*
1322 * Now the estimate is finished for "<" and "<=" cases. If we are
1323 * estimating for ">" or ">=", flip it.
1324 */
1325 hist_selec = isgt ? (1.0 - histfrac) : histfrac;
1326
1327 /*
1328 * The histogram boundaries are only approximate to begin with,
1329 * and may well be out of date anyway. Therefore, don't believe
1330 * extremely small or large selectivity estimates --- unless we
1331 * got actual current endpoint values from the table, in which
1332 * case just do the usual sanity clamp. Somewhat arbitrarily, we
1333 * set the cutoff for other cases at a hundredth of the histogram
1334 * resolution.
1335 */
1336 if (have_end)
1337 CLAMP_PROBABILITY(hist_selec);
1338 else
1339 {
1340 double cutoff = 0.01 / (double) (sslot.nvalues - 1);
1341
1342 if (hist_selec < cutoff)
1343 hist_selec = cutoff;
1344 else if (hist_selec > 1.0 - cutoff)
1345 hist_selec = 1.0 - cutoff;
1346 }
1347 }
1348 else if (sslot.nvalues > 1)
1349 {
1350 /*
1351 * If we get here, we have a histogram but it's not sorted the way
1352 * we want. Do a brute-force search to see how many of the
1353 * entries satisfy the comparison condition, and take that
1354 * fraction as our estimate. (This is identical to the inner loop
1355 * of histogram_selectivity; maybe share code?)
1356 */
1357 LOCAL_FCINFO(fcinfo, 2);
1358 int nmatch = 0;
1359
1360 InitFunctionCallInfoData(*fcinfo, opproc, 2, collation,
1361 NULL, NULL);
1362 fcinfo->args[0].isnull = false;
1363 fcinfo->args[1].isnull = false;
1364 fcinfo->args[1].value = constval;
1365 for (int i = 0; i < sslot.nvalues; i++)
1366 {
1367 Datum fresult;
1368
1369 fcinfo->args[0].value = sslot.values[i];
1370 fcinfo->isnull = false;
1371 fresult = FunctionCallInvoke(fcinfo);
1372 if (!fcinfo->isnull && DatumGetBool(fresult))
1373 nmatch++;
1374 }
1375 hist_selec = ((double) nmatch) / ((double) sslot.nvalues);
1376
1377 /*
1378 * As above, clamp to a hundredth of the histogram resolution.
1379 * This case is surely even less trustworthy than the normal one,
1380 * so we shouldn't believe exact 0 or 1 selectivity. (Maybe the
1381 * clamp should be more restrictive in this case?)
1382 */
1383 {
1384 double cutoff = 0.01 / (double) (sslot.nvalues - 1);
1385
1386 if (hist_selec < cutoff)
1387 hist_selec = cutoff;
1388 else if (hist_selec > 1.0 - cutoff)
1389 hist_selec = 1.0 - cutoff;
1390 }
1391 }
1392
1393 free_attstatsslot(&sslot);
1394 }
1395
1396 return hist_selec;
1397}
1398
1399/*
1400 * Common wrapper function for the selectivity estimators that simply
1401 * invoke scalarineqsel().
1402 */
1403static Datum
1405{
1407 Oid operator = PG_GETARG_OID(1);
1408 List *args = (List *) PG_GETARG_POINTER(2);
1409 int varRelid = PG_GETARG_INT32(3);
1410 Oid collation = PG_GET_COLLATION();
1411 VariableStatData vardata;
1412 Node *other;
1413 bool varonleft;
1414 Datum constval;
1415 Oid consttype;
1416 double selec;
1417
1418 /*
1419 * If expression is not variable op something or something op variable,
1420 * then punt and return a default estimate.
1421 */
1422 if (!get_restriction_variable(root, args, varRelid,
1423 &vardata, &other, &varonleft))
1425
1426 /*
1427 * Can't do anything useful if the something is not a constant, either.
1428 */
1429 if (!IsA(other, Const))
1430 {
1431 ReleaseVariableStats(vardata);
1433 }
1434
1435 /*
1436 * If the constant is NULL, assume operator is strict and return zero, ie,
1437 * operator will never return TRUE.
1438 */
1439 if (((Const *) other)->constisnull)
1440 {
1441 ReleaseVariableStats(vardata);
1442 PG_RETURN_FLOAT8(0.0);
1443 }
1444 constval = ((Const *) other)->constvalue;
1445 consttype = ((Const *) other)->consttype;
1446
1447 /*
1448 * Force the var to be on the left to simplify logic in scalarineqsel.
1449 */
1450 if (!varonleft)
1451 {
1452 operator = get_commutator(operator);
1453 if (!operator)
1454 {
1455 /* Use default selectivity (should we raise an error instead?) */
1456 ReleaseVariableStats(vardata);
1458 }
1459 isgt = !isgt;
1460 }
1461
1462 /* The rest of the work is done by scalarineqsel(). */
1463 selec = scalarineqsel(root, operator, isgt, iseq, collation,
1464 &vardata, constval, consttype);
1465
1466 ReleaseVariableStats(vardata);
1467
1468 PG_RETURN_FLOAT8((float8) selec);
1469}
1470
1471/*
1472 * scalarltsel - Selectivity of "<" for scalars.
1473 */
1474Datum
1476{
1477 return scalarineqsel_wrapper(fcinfo, false, false);
1478}
1479
1480/*
1481 * scalarlesel - Selectivity of "<=" for scalars.
1482 */
1483Datum
1485{
1486 return scalarineqsel_wrapper(fcinfo, false, true);
1487}
1488
1489/*
1490 * scalargtsel - Selectivity of ">" for scalars.
1491 */
1492Datum
1494{
1495 return scalarineqsel_wrapper(fcinfo, true, false);
1496}
1497
1498/*
1499 * scalargesel - Selectivity of ">=" for scalars.
1500 */
1501Datum
1503{
1504 return scalarineqsel_wrapper(fcinfo, true, true);
1505}
1506
1507/*
1508 * boolvarsel - Selectivity of Boolean variable.
1509 *
1510 * This can actually be called on any boolean-valued expression. If it
1511 * involves only Vars of the specified relation, and if there are statistics
1512 * about the Var or expression (the latter is possible if it's indexed) then
1513 * we'll produce a real estimate; otherwise it's just a default.
1514 */
1517{
1518 VariableStatData vardata;
1519 double selec;
1520
1521 examine_variable(root, arg, varRelid, &vardata);
1522 if (HeapTupleIsValid(vardata.statsTuple))
1523 {
1524 /*
1525 * A boolean variable V is equivalent to the clause V = 't', so we
1526 * compute the selectivity as if that is what we have.
1527 */
1528 selec = var_eq_const(&vardata, BooleanEqualOperator, InvalidOid,
1529 BoolGetDatum(true), false, true, false);
1530 }
1531 else if (is_funcclause(arg))
1532 {
1533 /*
1534 * If we have no stats and it's a function call, estimate 0.3333333.
1535 * This seems a pretty unprincipled choice, but Postgres has been
1536 * using that estimate for function calls since 1992. The hoariness
1537 * of this behavior suggests that we should not be in too much hurry
1538 * to use another value.
1539 */
1540 selec = 0.3333333;
1541 }
1542 else
1543 {
1544 /* Otherwise, the default estimate is 0.5 */
1545 selec = 0.5;
1546 }
1547 ReleaseVariableStats(vardata);
1548 return selec;
1549}
1550
1551/*
1552 * booltestsel - Selectivity of BooleanTest Node.
1553 */
1556 int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
1557{
1558 VariableStatData vardata;
1559 double selec;
1560
1561 examine_variable(root, arg, varRelid, &vardata);
1562
1563 if (HeapTupleIsValid(vardata.statsTuple))
1564 {
1565 Form_pg_statistic stats;
1566 double freq_null;
1567 AttStatsSlot sslot;
1568
1569 stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
1570 freq_null = stats->stanullfrac;
1571
1572 if (get_attstatsslot(&sslot, vardata.statsTuple,
1573 STATISTIC_KIND_MCV, InvalidOid,
1575 && sslot.nnumbers > 0)
1576 {
1577 double freq_true;
1578 double freq_false;
1579
1580 /*
1581 * Get first MCV frequency and derive frequency for true.
1582 */
1583 if (DatumGetBool(sslot.values[0]))
1584 freq_true = sslot.numbers[0];
1585 else
1586 freq_true = 1.0 - sslot.numbers[0] - freq_null;
1587
1588 /*
1589 * Next derive frequency for false. Then use these as appropriate
1590 * to derive frequency for each case.
1591 */
1592 freq_false = 1.0 - freq_true - freq_null;
1593
1594 switch (booltesttype)
1595 {
1596 case IS_UNKNOWN:
1597 /* select only NULL values */
1598 selec = freq_null;
1599 break;
1600 case IS_NOT_UNKNOWN:
1601 /* select non-NULL values */
1602 selec = 1.0 - freq_null;
1603 break;
1604 case IS_TRUE:
1605 /* select only TRUE values */
1606 selec = freq_true;
1607 break;
1608 case IS_NOT_TRUE:
1609 /* select non-TRUE values */
1610 selec = 1.0 - freq_true;
1611 break;
1612 case IS_FALSE:
1613 /* select only FALSE values */
1614 selec = freq_false;
1615 break;
1616 case IS_NOT_FALSE:
1617 /* select non-FALSE values */
1618 selec = 1.0 - freq_false;
1619 break;
1620 default:
1621 elog(ERROR, "unrecognized booltesttype: %d",
1622 (int) booltesttype);
1623 selec = 0.0; /* Keep compiler quiet */
1624 break;
1625 }
1626
1627 free_attstatsslot(&sslot);
1628 }
1629 else
1630 {
1631 /*
1632 * No most-common-value info available. Still have null fraction
1633 * information, so use it for IS [NOT] UNKNOWN. Otherwise adjust
1634 * for null fraction and assume a 50-50 split of TRUE and FALSE.
1635 */
1636 switch (booltesttype)
1637 {
1638 case IS_UNKNOWN:
1639 /* select only NULL values */
1640 selec = freq_null;
1641 break;
1642 case IS_NOT_UNKNOWN:
1643 /* select non-NULL values */
1644 selec = 1.0 - freq_null;
1645 break;
1646 case IS_TRUE:
1647 case IS_FALSE:
1648 /* Assume we select half of the non-NULL values */
1649 selec = (1.0 - freq_null) / 2.0;
1650 break;
1651 case IS_NOT_TRUE:
1652 case IS_NOT_FALSE:
1653 /* Assume we select NULLs plus half of the non-NULLs */
1654 /* equiv. to freq_null + (1.0 - freq_null) / 2.0 */
1655 selec = (freq_null + 1.0) / 2.0;
1656 break;
1657 default:
1658 elog(ERROR, "unrecognized booltesttype: %d",
1659 (int) booltesttype);
1660 selec = 0.0; /* Keep compiler quiet */
1661 break;
1662 }
1663 }
1664 }
1665 else
1666 {
1667 /*
1668 * If we can't get variable statistics for the argument, perhaps
1669 * clause_selectivity can do something with it. We ignore the
1670 * possibility of a NULL value when using clause_selectivity, and just
1671 * assume the value is either TRUE or FALSE.
1672 */
1673 switch (booltesttype)
1674 {
1675 case IS_UNKNOWN:
1676 selec = DEFAULT_UNK_SEL;
1677 break;
1678 case IS_NOT_UNKNOWN:
1679 selec = DEFAULT_NOT_UNK_SEL;
1680 break;
1681 case IS_TRUE:
1682 case IS_NOT_FALSE:
1683 selec = (double) clause_selectivity(root, arg,
1684 varRelid,
1685 jointype, sjinfo);
1686 break;
1687 case IS_FALSE:
1688 case IS_NOT_TRUE:
1689 selec = 1.0 - (double) clause_selectivity(root, arg,
1690 varRelid,
1691 jointype, sjinfo);
1692 break;
1693 default:
1694 elog(ERROR, "unrecognized booltesttype: %d",
1695 (int) booltesttype);
1696 selec = 0.0; /* Keep compiler quiet */
1697 break;
1698 }
1699 }
1700
1701 ReleaseVariableStats(vardata);
1702
1703 /* result should be in range, but make sure... */
1704 CLAMP_PROBABILITY(selec);
1705
1706 return (Selectivity) selec;
1707}
1708
1709/*
1710 * nulltestsel - Selectivity of NullTest Node.
1711 */
1714 int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
1715{
1716 VariableStatData vardata;
1717 double selec;
1718
1719 examine_variable(root, arg, varRelid, &vardata);
1720
1721 if (HeapTupleIsValid(vardata.statsTuple))
1722 {
1723 Form_pg_statistic stats;
1724 double freq_null;
1725
1726 stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
1727 freq_null = stats->stanullfrac;
1728
1729 switch (nulltesttype)
1730 {
1731 case IS_NULL:
1732
1733 /*
1734 * Use freq_null directly.
1735 */
1736 selec = freq_null;
1737 break;
1738 case IS_NOT_NULL:
1739
1740 /*
1741 * Select not unknown (not null) values. Calculate from
1742 * freq_null.
1743 */
1744 selec = 1.0 - freq_null;
1745 break;
1746 default:
1747 elog(ERROR, "unrecognized nulltesttype: %d",
1748 (int) nulltesttype);
1749 return (Selectivity) 0; /* keep compiler quiet */
1750 }
1751 }
1752 else if (vardata.var && IsA(vardata.var, Var) &&
1753 ((Var *) vardata.var)->varattno < 0)
1754 {
1755 /*
1756 * There are no stats for system columns, but we know they are never
1757 * NULL.
1758 */
1759 selec = (nulltesttype == IS_NULL) ? 0.0 : 1.0;
1760 }
1761 else
1762 {
1763 /*
1764 * No ANALYZE stats available, so make a guess
1765 */
1766 switch (nulltesttype)
1767 {
1768 case IS_NULL:
1769 selec = DEFAULT_UNK_SEL;
1770 break;
1771 case IS_NOT_NULL:
1772 selec = DEFAULT_NOT_UNK_SEL;
1773 break;
1774 default:
1775 elog(ERROR, "unrecognized nulltesttype: %d",
1776 (int) nulltesttype);
1777 return (Selectivity) 0; /* keep compiler quiet */
1778 }
1779 }
1780
1781 ReleaseVariableStats(vardata);
1782
1783 /* result should be in range, but make sure... */
1784 CLAMP_PROBABILITY(selec);
1785
1786 return (Selectivity) selec;
1787}
1788
1789/*
1790 * strip_array_coercion - strip binary-compatible relabeling from an array expr
1791 *
1792 * For array values, the parser normally generates ArrayCoerceExpr conversions,
1793 * but it seems possible that RelabelType might show up. Also, the planner
1794 * is not currently tense about collapsing stacked ArrayCoerceExpr nodes,
1795 * so we need to be ready to deal with more than one level.
1796 */
1797static Node *
1799{
1800 for (;;)
1801 {
1802 if (node && IsA(node, ArrayCoerceExpr))
1803 {
1804 ArrayCoerceExpr *acoerce = (ArrayCoerceExpr *) node;
1805
1806 /*
1807 * If the per-element expression is just a RelabelType on top of
1808 * CaseTestExpr, then we know it's a binary-compatible relabeling.
1809 */
1810 if (IsA(acoerce->elemexpr, RelabelType) &&
1811 IsA(((RelabelType *) acoerce->elemexpr)->arg, CaseTestExpr))
1812 node = (Node *) acoerce->arg;
1813 else
1814 break;
1815 }
1816 else if (node && IsA(node, RelabelType))
1817 {
1818 /* We don't really expect this case, but may as well cope */
1819 node = (Node *) ((RelabelType *) node)->arg;
1820 }
1821 else
1822 break;
1823 }
1824 return node;
1825}
1826
1827/*
1828 * scalararraysel - Selectivity of ScalarArrayOpExpr Node.
1829 */
1832 ScalarArrayOpExpr *clause,
1833 bool is_join_clause,
1834 int varRelid,
1835 JoinType jointype,
1836 SpecialJoinInfo *sjinfo)
1837{
1838 Oid operator = clause->opno;
1839 bool useOr = clause->useOr;
1840 bool isEquality = false;
1841 bool isInequality = false;
1842 Node *leftop;
1843 Node *rightop;
1844 Oid nominal_element_type;
1845 Oid nominal_element_collation;
1846 TypeCacheEntry *typentry;
1847 RegProcedure oprsel;
1848 FmgrInfo oprselproc;
1850 Selectivity s1disjoint;
1851
1852 /* First, deconstruct the expression */
1853 Assert(list_length(clause->args) == 2);
1854 leftop = (Node *) linitial(clause->args);
1855 rightop = (Node *) lsecond(clause->args);
1856
1857 /* aggressively reduce both sides to constants */
1858 leftop = estimate_expression_value(root, leftop);
1859 rightop = estimate_expression_value(root, rightop);
1860
1861 /* get nominal (after relabeling) element type of rightop */
1862 nominal_element_type = get_base_element_type(exprType(rightop));
1863 if (!OidIsValid(nominal_element_type))
1864 return (Selectivity) 0.5; /* probably shouldn't happen */
1865 /* get nominal collation, too, for generating constants */
1866 nominal_element_collation = exprCollation(rightop);
1867
1868 /* look through any binary-compatible relabeling of rightop */
1869 rightop = strip_array_coercion(rightop);
1870
1871 /*
1872 * Detect whether the operator is the default equality or inequality
1873 * operator of the array element type.
1874 */
1875 typentry = lookup_type_cache(nominal_element_type, TYPECACHE_EQ_OPR);
1876 if (OidIsValid(typentry->eq_opr))
1877 {
1878 if (operator == typentry->eq_opr)
1879 isEquality = true;
1880 else if (get_negator(operator) == typentry->eq_opr)
1881 isInequality = true;
1882 }
1883
1884 /*
1885 * If it is equality or inequality, we might be able to estimate this as a
1886 * form of array containment; for instance "const = ANY(column)" can be
1887 * treated as "ARRAY[const] <@ column". scalararraysel_containment tries
1888 * that, and returns the selectivity estimate if successful, or -1 if not.
1889 */
1890 if ((isEquality || isInequality) && !is_join_clause)
1891 {
1892 s1 = scalararraysel_containment(root, leftop, rightop,
1893 nominal_element_type,
1894 isEquality, useOr, varRelid);
1895 if (s1 >= 0.0)
1896 return s1;
1897 }
1898
1899 /*
1900 * Look up the underlying operator's selectivity estimator. Punt if it
1901 * hasn't got one.
1902 */
1903 if (is_join_clause)
1904 oprsel = get_oprjoin(operator);
1905 else
1906 oprsel = get_oprrest(operator);
1907 if (!oprsel)
1908 return (Selectivity) 0.5;
1909 fmgr_info(oprsel, &oprselproc);
1910
1911 /*
1912 * In the array-containment check above, we must only believe that an
1913 * operator is equality or inequality if it is the default btree equality
1914 * operator (or its negator) for the element type, since those are the
1915 * operators that array containment will use. But in what follows, we can
1916 * be a little laxer, and also believe that any operators using eqsel() or
1917 * neqsel() as selectivity estimator act like equality or inequality.
1918 */
1919 if (oprsel == F_EQSEL || oprsel == F_EQJOINSEL)
1920 isEquality = true;
1921 else if (oprsel == F_NEQSEL || oprsel == F_NEQJOINSEL)
1922 isInequality = true;
1923
1924 /*
1925 * We consider three cases:
1926 *
1927 * 1. rightop is an Array constant: deconstruct the array, apply the
1928 * operator's selectivity function for each array element, and merge the
1929 * results in the same way that clausesel.c does for AND/OR combinations.
1930 *
1931 * 2. rightop is an ARRAY[] construct: apply the operator's selectivity
1932 * function for each element of the ARRAY[] construct, and merge.
1933 *
1934 * 3. otherwise, make a guess ...
1935 */
1936 if (rightop && IsA(rightop, Const))
1937 {
1938 Datum arraydatum = ((Const *) rightop)->constvalue;
1939 bool arrayisnull = ((Const *) rightop)->constisnull;
1940 ArrayType *arrayval;
1941 int16 elmlen;
1942 bool elmbyval;
1943 char elmalign;
1944 int num_elems;
1945 Datum *elem_values;
1946 bool *elem_nulls;
1947 int i;
1948
1949 if (arrayisnull) /* qual can't succeed if null array */
1950 return (Selectivity) 0.0;
1951 arrayval = DatumGetArrayTypeP(arraydatum);
1953 &elmlen, &elmbyval, &elmalign);
1954 deconstruct_array(arrayval,
1955 ARR_ELEMTYPE(arrayval),
1956 elmlen, elmbyval, elmalign,
1957 &elem_values, &elem_nulls, &num_elems);
1958
1959 /*
1960 * For generic operators, we assume the probability of success is
1961 * independent for each array element. But for "= ANY" or "<> ALL",
1962 * if the array elements are distinct (which'd typically be the case)
1963 * then the probabilities are disjoint, and we should just sum them.
1964 *
1965 * If we were being really tense we would try to confirm that the
1966 * elements are all distinct, but that would be expensive and it
1967 * doesn't seem to be worth the cycles; it would amount to penalizing
1968 * well-written queries in favor of poorly-written ones. However, we
1969 * do protect ourselves a little bit by checking whether the
1970 * disjointness assumption leads to an impossible (out of range)
1971 * probability; if so, we fall back to the normal calculation.
1972 */
1973 s1 = s1disjoint = (useOr ? 0.0 : 1.0);
1974
1975 for (i = 0; i < num_elems; i++)
1976 {
1977 List *args;
1979
1980 args = list_make2(leftop,
1981 makeConst(nominal_element_type,
1982 -1,
1983 nominal_element_collation,
1984 elmlen,
1985 elem_values[i],
1986 elem_nulls[i],
1987 elmbyval));
1988 if (is_join_clause)
1989 s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
1990 clause->inputcollid,
1992 ObjectIdGetDatum(operator),
1994 Int16GetDatum(jointype),
1995 PointerGetDatum(sjinfo)));
1996 else
1997 s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
1998 clause->inputcollid,
2000 ObjectIdGetDatum(operator),
2002 Int32GetDatum(varRelid)));
2003
2004 if (useOr)
2005 {
2006 s1 = s1 + s2 - s1 * s2;
2007 if (isEquality)
2008 s1disjoint += s2;
2009 }
2010 else
2011 {
2012 s1 = s1 * s2;
2013 if (isInequality)
2014 s1disjoint += s2 - 1.0;
2015 }
2016 }
2017
2018 /* accept disjoint-probability estimate if in range */
2019 if ((useOr ? isEquality : isInequality) &&
2020 s1disjoint >= 0.0 && s1disjoint <= 1.0)
2021 s1 = s1disjoint;
2022 }
2023 else if (rightop && IsA(rightop, ArrayExpr) &&
2024 !((ArrayExpr *) rightop)->multidims)
2025 {
2026 ArrayExpr *arrayexpr = (ArrayExpr *) rightop;
2027 int16 elmlen;
2028 bool elmbyval;
2029 ListCell *l;
2030
2031 get_typlenbyval(arrayexpr->element_typeid,
2032 &elmlen, &elmbyval);
2033
2034 /*
2035 * We use the assumption of disjoint probabilities here too, although
2036 * the odds of equal array elements are rather higher if the elements
2037 * are not all constants (which they won't be, else constant folding
2038 * would have reduced the ArrayExpr to a Const). In this path it's
2039 * critical to have the sanity check on the s1disjoint estimate.
2040 */
2041 s1 = s1disjoint = (useOr ? 0.0 : 1.0);
2042
2043 foreach(l, arrayexpr->elements)
2044 {
2045 Node *elem = (Node *) lfirst(l);
2046 List *args;
2048
2049 /*
2050 * Theoretically, if elem isn't of nominal_element_type we should
2051 * insert a RelabelType, but it seems unlikely that any operator
2052 * estimation function would really care ...
2053 */
2054 args = list_make2(leftop, elem);
2055 if (is_join_clause)
2056 s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
2057 clause->inputcollid,
2059 ObjectIdGetDatum(operator),
2061 Int16GetDatum(jointype),
2062 PointerGetDatum(sjinfo)));
2063 else
2064 s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
2065 clause->inputcollid,
2067 ObjectIdGetDatum(operator),
2069 Int32GetDatum(varRelid)));
2070
2071 if (useOr)
2072 {
2073 s1 = s1 + s2 - s1 * s2;
2074 if (isEquality)
2075 s1disjoint += s2;
2076 }
2077 else
2078 {
2079 s1 = s1 * s2;
2080 if (isInequality)
2081 s1disjoint += s2 - 1.0;
2082 }
2083 }
2084
2085 /* accept disjoint-probability estimate if in range */
2086 if ((useOr ? isEquality : isInequality) &&
2087 s1disjoint >= 0.0 && s1disjoint <= 1.0)
2088 s1 = s1disjoint;
2089 }
2090 else
2091 {
2092 CaseTestExpr *dummyexpr;
2093 List *args;
2095 int i;
2096
2097 /*
2098 * We need a dummy rightop to pass to the operator selectivity
2099 * routine. It can be pretty much anything that doesn't look like a
2100 * constant; CaseTestExpr is a convenient choice.
2101 */
2102 dummyexpr = makeNode(CaseTestExpr);
2103 dummyexpr->typeId = nominal_element_type;
2104 dummyexpr->typeMod = -1;
2105 dummyexpr->collation = clause->inputcollid;
2106 args = list_make2(leftop, dummyexpr);
2107 if (is_join_clause)
2108 s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
2109 clause->inputcollid,
2111 ObjectIdGetDatum(operator),
2113 Int16GetDatum(jointype),
2114 PointerGetDatum(sjinfo)));
2115 else
2116 s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
2117 clause->inputcollid,
2119 ObjectIdGetDatum(operator),
2121 Int32GetDatum(varRelid)));
2122 s1 = useOr ? 0.0 : 1.0;
2123
2124 /*
2125 * Arbitrarily assume 10 elements in the eventual array value (see
2126 * also estimate_array_length). We don't risk an assumption of
2127 * disjoint probabilities here.
2128 */
2129 for (i = 0; i < 10; i++)
2130 {
2131 if (useOr)
2132 s1 = s1 + s2 - s1 * s2;
2133 else
2134 s1 = s1 * s2;
2135 }
2136 }
2137
2138 /* result should be in range, but make sure... */
2140
2141 return s1;
2142}
2143
2144/*
2145 * Estimate number of elements in the array yielded by an expression.
2146 *
2147 * Note: the result is integral, but we use "double" to avoid overflow
2148 * concerns. Most callers will use it in double-type expressions anyway.
2149 *
2150 * Note: in some code paths root can be passed as NULL, resulting in
2151 * slightly worse estimates.
2152 */
2153double
2155{
2156 /* look through any binary-compatible relabeling of arrayexpr */
2157 arrayexpr = strip_array_coercion(arrayexpr);
2158
2159 if (arrayexpr && IsA(arrayexpr, Const))
2160 {
2161 Datum arraydatum = ((Const *) arrayexpr)->constvalue;
2162 bool arrayisnull = ((Const *) arrayexpr)->constisnull;
2163 ArrayType *arrayval;
2164
2165 if (arrayisnull)
2166 return 0;
2167 arrayval = DatumGetArrayTypeP(arraydatum);
2168 return ArrayGetNItems(ARR_NDIM(arrayval), ARR_DIMS(arrayval));
2169 }
2170 else if (arrayexpr && IsA(arrayexpr, ArrayExpr) &&
2171 !((ArrayExpr *) arrayexpr)->multidims)
2172 {
2173 return list_length(((ArrayExpr *) arrayexpr)->elements);
2174 }
2175 else if (arrayexpr && root)
2176 {
2177 /* See if we can find any statistics about it */
2178 VariableStatData vardata;
2179 AttStatsSlot sslot;
2180 double nelem = 0;
2181
2182 examine_variable(root, arrayexpr, 0, &vardata);
2183 if (HeapTupleIsValid(vardata.statsTuple))
2184 {
2185 /*
2186 * Found stats, so use the average element count, which is stored
2187 * in the last stanumbers element of the DECHIST statistics.
2188 * Actually that is the average count of *distinct* elements;
2189 * perhaps we should scale it up somewhat?
2190 */
2191 if (get_attstatsslot(&sslot, vardata.statsTuple,
2192 STATISTIC_KIND_DECHIST, InvalidOid,
2194 {
2195 if (sslot.nnumbers > 0)
2196 nelem = clamp_row_est(sslot.numbers[sslot.nnumbers - 1]);
2197 free_attstatsslot(&sslot);
2198 }
2199 }
2200 ReleaseVariableStats(vardata);
2201
2202 if (nelem > 0)
2203 return nelem;
2204 }
2205
2206 /* Else use a default guess --- this should match scalararraysel */
2207 return 10;
2208}
2209
2210/*
2211 * rowcomparesel - Selectivity of RowCompareExpr Node.
2212 *
2213 * We estimate RowCompare selectivity by considering just the first (high
2214 * order) columns, which makes it equivalent to an ordinary OpExpr. While
2215 * this estimate could be refined by considering additional columns, it
2216 * seems unlikely that we could do a lot better without multi-column
2217 * statistics.
2218 */
2221 RowCompareExpr *clause,
2222 int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
2223{
2225 Oid opno = linitial_oid(clause->opnos);
2226 Oid inputcollid = linitial_oid(clause->inputcollids);
2227 List *opargs;
2228 bool is_join_clause;
2229
2230 /* Build equivalent arg list for single operator */
2231 opargs = list_make2(linitial(clause->largs), linitial(clause->rargs));
2232
2233 /*
2234 * Decide if it's a join clause. This should match clausesel.c's
2235 * treat_as_join_clause(), except that we intentionally consider only the
2236 * leading columns and not the rest of the clause.
2237 */
2238 if (varRelid != 0)
2239 {
2240 /*
2241 * Caller is forcing restriction mode (eg, because we are examining an
2242 * inner indexscan qual).
2243 */
2244 is_join_clause = false;
2245 }
2246 else if (sjinfo == NULL)
2247 {
2248 /*
2249 * It must be a restriction clause, since it's being evaluated at a
2250 * scan node.
2251 */
2252 is_join_clause = false;
2253 }
2254 else
2255 {
2256 /*
2257 * Otherwise, it's a join if there's more than one base relation used.
2258 */
2259 is_join_clause = (NumRelids(root, (Node *) opargs) > 1);
2260 }
2261
2262 if (is_join_clause)
2263 {
2264 /* Estimate selectivity for a join clause. */
2265 s1 = join_selectivity(root, opno,
2266 opargs,
2267 inputcollid,
2268 jointype,
2269 sjinfo);
2270 }
2271 else
2272 {
2273 /* Estimate selectivity for a restriction clause. */
2275 opargs,
2276 inputcollid,
2277 varRelid);
2278 }
2279
2280 return s1;
2281}
2282
2283/*
2284 * eqjoinsel - Join selectivity of "="
2285 */
2286Datum
2288{
2290 Oid operator = PG_GETARG_OID(1);
2291 List *args = (List *) PG_GETARG_POINTER(2);
2292
2293#ifdef NOT_USED
2294 JoinType jointype = (JoinType) PG_GETARG_INT16(3);
2295#endif
2297 Oid collation = PG_GET_COLLATION();
2298 double selec;
2299 double selec_inner;
2300 VariableStatData vardata1;
2301 VariableStatData vardata2;
2302 double nd1;
2303 double nd2;
2304 bool isdefault1;
2305 bool isdefault2;
2306 Oid opfuncoid;
2307 AttStatsSlot sslot1;
2308 AttStatsSlot sslot2;
2309 Form_pg_statistic stats1 = NULL;
2310 Form_pg_statistic stats2 = NULL;
2311 bool have_mcvs1 = false;
2312 bool have_mcvs2 = false;
2313 bool get_mcv_stats;
2314 bool join_is_reversed;
2315 RelOptInfo *inner_rel;
2316
2317 get_join_variables(root, args, sjinfo,
2318 &vardata1, &vardata2, &join_is_reversed);
2319
2320 nd1 = get_variable_numdistinct(&vardata1, &isdefault1);
2321 nd2 = get_variable_numdistinct(&vardata2, &isdefault2);
2322
2323 opfuncoid = get_opcode(operator);
2324
2325 memset(&sslot1, 0, sizeof(sslot1));
2326 memset(&sslot2, 0, sizeof(sslot2));
2327
2328 /*
2329 * There is no use in fetching one side's MCVs if we lack MCVs for the
2330 * other side, so do a quick check to verify that both stats exist.
2331 */
2332 get_mcv_stats = (HeapTupleIsValid(vardata1.statsTuple) &&
2333 HeapTupleIsValid(vardata2.statsTuple) &&
2334 get_attstatsslot(&sslot1, vardata1.statsTuple,
2335 STATISTIC_KIND_MCV, InvalidOid,
2336 0) &&
2337 get_attstatsslot(&sslot2, vardata2.statsTuple,
2338 STATISTIC_KIND_MCV, InvalidOid,
2339 0));
2340
2341 if (HeapTupleIsValid(vardata1.statsTuple))
2342 {
2343 /* note we allow use of nullfrac regardless of security check */
2344 stats1 = (Form_pg_statistic) GETSTRUCT(vardata1.statsTuple);
2345 if (get_mcv_stats &&
2346 statistic_proc_security_check(&vardata1, opfuncoid))
2347 have_mcvs1 = get_attstatsslot(&sslot1, vardata1.statsTuple,
2348 STATISTIC_KIND_MCV, InvalidOid,
2350 }
2351
2352 if (HeapTupleIsValid(vardata2.statsTuple))
2353 {
2354 /* note we allow use of nullfrac regardless of security check */
2355 stats2 = (Form_pg_statistic) GETSTRUCT(vardata2.statsTuple);
2356 if (get_mcv_stats &&
2357 statistic_proc_security_check(&vardata2, opfuncoid))
2358 have_mcvs2 = get_attstatsslot(&sslot2, vardata2.statsTuple,
2359 STATISTIC_KIND_MCV, InvalidOid,
2361 }
2362
2363 /* We need to compute the inner-join selectivity in all cases */
2364 selec_inner = eqjoinsel_inner(opfuncoid, collation,
2365 &vardata1, &vardata2,
2366 nd1, nd2,
2367 isdefault1, isdefault2,
2368 &sslot1, &sslot2,
2369 stats1, stats2,
2370 have_mcvs1, have_mcvs2);
2371
2372 switch (sjinfo->jointype)
2373 {
2374 case JOIN_INNER:
2375 case JOIN_LEFT:
2376 case JOIN_FULL:
2377 selec = selec_inner;
2378 break;
2379 case JOIN_SEMI:
2380 case JOIN_ANTI:
2381
2382 /*
2383 * Look up the join's inner relation. min_righthand is sufficient
2384 * information because neither SEMI nor ANTI joins permit any
2385 * reassociation into or out of their RHS, so the righthand will
2386 * always be exactly that set of rels.
2387 */
2388 inner_rel = find_join_input_rel(root, sjinfo->min_righthand);
2389
2390 if (!join_is_reversed)
2391 selec = eqjoinsel_semi(opfuncoid, collation,
2392 &vardata1, &vardata2,
2393 nd1, nd2,
2394 isdefault1, isdefault2,
2395 &sslot1, &sslot2,
2396 stats1, stats2,
2397 have_mcvs1, have_mcvs2,
2398 inner_rel);
2399 else
2400 {
2401 Oid commop = get_commutator(operator);
2402 Oid commopfuncoid = OidIsValid(commop) ? get_opcode(commop) : InvalidOid;
2403
2404 selec = eqjoinsel_semi(commopfuncoid, collation,
2405 &vardata2, &vardata1,
2406 nd2, nd1,
2407 isdefault2, isdefault1,
2408 &sslot2, &sslot1,
2409 stats2, stats1,
2410 have_mcvs2, have_mcvs1,
2411 inner_rel);
2412 }
2413
2414 /*
2415 * We should never estimate the output of a semijoin to be more
2416 * rows than we estimate for an inner join with the same input
2417 * rels and join condition; it's obviously impossible for that to
2418 * happen. The former estimate is N1 * Ssemi while the latter is
2419 * N1 * N2 * Sinner, so we may clamp Ssemi <= N2 * Sinner. Doing
2420 * this is worthwhile because of the shakier estimation rules we
2421 * use in eqjoinsel_semi, particularly in cases where it has to
2422 * punt entirely.
2423 */
2424 selec = Min(selec, inner_rel->rows * selec_inner);
2425 break;
2426 default:
2427 /* other values not expected here */
2428 elog(ERROR, "unrecognized join type: %d",
2429 (int) sjinfo->jointype);
2430 selec = 0; /* keep compiler quiet */
2431 break;
2432 }
2433
2434 free_attstatsslot(&sslot1);
2435 free_attstatsslot(&sslot2);
2436
2437 ReleaseVariableStats(vardata1);
2438 ReleaseVariableStats(vardata2);
2439
2440 CLAMP_PROBABILITY(selec);
2441
2442 PG_RETURN_FLOAT8((float8) selec);
2443}
2444
2445/*
2446 * eqjoinsel_inner --- eqjoinsel for normal inner join
2447 *
2448 * We also use this for LEFT/FULL outer joins; it's not presently clear
2449 * that it's worth trying to distinguish them here.
2450 */
2451static double
2452eqjoinsel_inner(Oid opfuncoid, Oid collation,
2453 VariableStatData *vardata1, VariableStatData *vardata2,
2454 double nd1, double nd2,
2455 bool isdefault1, bool isdefault2,
2456 AttStatsSlot *sslot1, AttStatsSlot *sslot2,
2457 Form_pg_statistic stats1, Form_pg_statistic stats2,
2458 bool have_mcvs1, bool have_mcvs2)
2459{
2460 double selec;
2461
2462 if (have_mcvs1 && have_mcvs2)
2463 {
2464 /*
2465 * We have most-common-value lists for both relations. Run through
2466 * the lists to see which MCVs actually join to each other with the
2467 * given operator. This allows us to determine the exact join
2468 * selectivity for the portion of the relations represented by the MCV
2469 * lists. We still have to estimate for the remaining population, but
2470 * in a skewed distribution this gives us a big leg up in accuracy.
2471 * For motivation see the analysis in Y. Ioannidis and S.
2472 * Christodoulakis, "On the propagation of errors in the size of join
2473 * results", Technical Report 1018, Computer Science Dept., University
2474 * of Wisconsin, Madison, March 1991 (available from ftp.cs.wisc.edu).
2475 */
2476 LOCAL_FCINFO(fcinfo, 2);
2477 FmgrInfo eqproc;
2478 bool *hasmatch1;
2479 bool *hasmatch2;
2480 double nullfrac1 = stats1->stanullfrac;
2481 double nullfrac2 = stats2->stanullfrac;
2482 double matchprodfreq,
2483 matchfreq1,
2484 matchfreq2,
2485 unmatchfreq1,
2486 unmatchfreq2,
2487 otherfreq1,
2488 otherfreq2,
2489 totalsel1,
2490 totalsel2;
2491 int i,
2492 nmatches;
2493
2494 fmgr_info(opfuncoid, &eqproc);
2495
2496 /*
2497 * Save a few cycles by setting up the fcinfo struct just once. Using
2498 * FunctionCallInvoke directly also avoids failure if the eqproc
2499 * returns NULL, though really equality functions should never do
2500 * that.
2501 */
2502 InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
2503 NULL, NULL);
2504 fcinfo->args[0].isnull = false;
2505 fcinfo->args[1].isnull = false;
2506
2507 hasmatch1 = (bool *) palloc0(sslot1->nvalues * sizeof(bool));
2508 hasmatch2 = (bool *) palloc0(sslot2->nvalues * sizeof(bool));
2509
2510 /*
2511 * Note we assume that each MCV will match at most one member of the
2512 * other MCV list. If the operator isn't really equality, there could
2513 * be multiple matches --- but we don't look for them, both for speed
2514 * and because the math wouldn't add up...
2515 */
2516 matchprodfreq = 0.0;
2517 nmatches = 0;
2518 for (i = 0; i < sslot1->nvalues; i++)
2519 {
2520 int j;
2521
2522 fcinfo->args[0].value = sslot1->values[i];
2523
2524 for (j = 0; j < sslot2->nvalues; j++)
2525 {
2526 Datum fresult;
2527
2528 if (hasmatch2[j])
2529 continue;
2530 fcinfo->args[1].value = sslot2->values[j];
2531 fcinfo->isnull = false;
2532 fresult = FunctionCallInvoke(fcinfo);
2533 if (!fcinfo->isnull && DatumGetBool(fresult))
2534 {
2535 hasmatch1[i] = hasmatch2[j] = true;
2536 matchprodfreq += sslot1->numbers[i] * sslot2->numbers[j];
2537 nmatches++;
2538 break;
2539 }
2540 }
2541 }
2542 CLAMP_PROBABILITY(matchprodfreq);
2543 /* Sum up frequencies of matched and unmatched MCVs */
2544 matchfreq1 = unmatchfreq1 = 0.0;
2545 for (i = 0; i < sslot1->nvalues; i++)
2546 {
2547 if (hasmatch1[i])
2548 matchfreq1 += sslot1->numbers[i];
2549 else
2550 unmatchfreq1 += sslot1->numbers[i];
2551 }
2552 CLAMP_PROBABILITY(matchfreq1);
2553 CLAMP_PROBABILITY(unmatchfreq1);
2554 matchfreq2 = unmatchfreq2 = 0.0;
2555 for (i = 0; i < sslot2->nvalues; i++)
2556 {
2557 if (hasmatch2[i])
2558 matchfreq2 += sslot2->numbers[i];
2559 else
2560 unmatchfreq2 += sslot2->numbers[i];
2561 }
2562 CLAMP_PROBABILITY(matchfreq2);
2563 CLAMP_PROBABILITY(unmatchfreq2);
2564 pfree(hasmatch1);
2565 pfree(hasmatch2);
2566
2567 /*
2568 * Compute total frequency of non-null values that are not in the MCV
2569 * lists.
2570 */
2571 otherfreq1 = 1.0 - nullfrac1 - matchfreq1 - unmatchfreq1;
2572 otherfreq2 = 1.0 - nullfrac2 - matchfreq2 - unmatchfreq2;
2573 CLAMP_PROBABILITY(otherfreq1);
2574 CLAMP_PROBABILITY(otherfreq2);
2575
2576 /*
2577 * We can estimate the total selectivity from the point of view of
2578 * relation 1 as: the known selectivity for matched MCVs, plus
2579 * unmatched MCVs that are assumed to match against random members of
2580 * relation 2's non-MCV population, plus non-MCV values that are
2581 * assumed to match against random members of relation 2's unmatched
2582 * MCVs plus non-MCV values.
2583 */
2584 totalsel1 = matchprodfreq;
2585 if (nd2 > sslot2->nvalues)
2586 totalsel1 += unmatchfreq1 * otherfreq2 / (nd2 - sslot2->nvalues);
2587 if (nd2 > nmatches)
2588 totalsel1 += otherfreq1 * (otherfreq2 + unmatchfreq2) /
2589 (nd2 - nmatches);
2590 /* Same estimate from the point of view of relation 2. */
2591 totalsel2 = matchprodfreq;
2592 if (nd1 > sslot1->nvalues)
2593 totalsel2 += unmatchfreq2 * otherfreq1 / (nd1 - sslot1->nvalues);
2594 if (nd1 > nmatches)
2595 totalsel2 += otherfreq2 * (otherfreq1 + unmatchfreq1) /
2596 (nd1 - nmatches);
2597
2598 /*
2599 * Use the smaller of the two estimates. This can be justified in
2600 * essentially the same terms as given below for the no-stats case: to
2601 * a first approximation, we are estimating from the point of view of
2602 * the relation with smaller nd.
2603 */
2604 selec = (totalsel1 < totalsel2) ? totalsel1 : totalsel2;
2605 }
2606 else
2607 {
2608 /*
2609 * We do not have MCV lists for both sides. Estimate the join
2610 * selectivity as MIN(1/nd1,1/nd2)*(1-nullfrac1)*(1-nullfrac2). This
2611 * is plausible if we assume that the join operator is strict and the
2612 * non-null values are about equally distributed: a given non-null
2613 * tuple of rel1 will join to either zero or N2*(1-nullfrac2)/nd2 rows
2614 * of rel2, so total join rows are at most
2615 * N1*(1-nullfrac1)*N2*(1-nullfrac2)/nd2 giving a join selectivity of
2616 * not more than (1-nullfrac1)*(1-nullfrac2)/nd2. By the same logic it
2617 * is not more than (1-nullfrac1)*(1-nullfrac2)/nd1, so the expression
2618 * with MIN() is an upper bound. Using the MIN() means we estimate
2619 * from the point of view of the relation with smaller nd (since the
2620 * larger nd is determining the MIN). It is reasonable to assume that
2621 * most tuples in this rel will have join partners, so the bound is
2622 * probably reasonably tight and should be taken as-is.
2623 *
2624 * XXX Can we be smarter if we have an MCV list for just one side? It
2625 * seems that if we assume equal distribution for the other side, we
2626 * end up with the same answer anyway.
2627 */
2628 double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
2629 double nullfrac2 = stats2 ? stats2->stanullfrac : 0.0;
2630
2631 selec = (1.0 - nullfrac1) * (1.0 - nullfrac2);
2632 if (nd1 > nd2)
2633 selec /= nd1;
2634 else
2635 selec /= nd2;
2636 }
2637
2638 return selec;
2639}
2640
2641/*
2642 * eqjoinsel_semi --- eqjoinsel for semi join
2643 *
2644 * (Also used for anti join, which we are supposed to estimate the same way.)
2645 * Caller has ensured that vardata1 is the LHS variable.
2646 * Unlike eqjoinsel_inner, we have to cope with opfuncoid being InvalidOid.
2647 */
2648static double
2649eqjoinsel_semi(Oid opfuncoid, Oid collation,
2650 VariableStatData *vardata1, VariableStatData *vardata2,
2651 double nd1, double nd2,
2652 bool isdefault1, bool isdefault2,
2653 AttStatsSlot *sslot1, AttStatsSlot *sslot2,
2654 Form_pg_statistic stats1, Form_pg_statistic stats2,
2655 bool have_mcvs1, bool have_mcvs2,
2656 RelOptInfo *inner_rel)
2657{
2658 double selec;
2659
2660 /*
2661 * We clamp nd2 to be not more than what we estimate the inner relation's
2662 * size to be. This is intuitively somewhat reasonable since obviously
2663 * there can't be more than that many distinct values coming from the
2664 * inner rel. The reason for the asymmetry (ie, that we don't clamp nd1
2665 * likewise) is that this is the only pathway by which restriction clauses
2666 * applied to the inner rel will affect the join result size estimate,
2667 * since set_joinrel_size_estimates will multiply SEMI/ANTI selectivity by
2668 * only the outer rel's size. If we clamped nd1 we'd be double-counting
2669 * the selectivity of outer-rel restrictions.
2670 *
2671 * We can apply this clamping both with respect to the base relation from
2672 * which the join variable comes (if there is just one), and to the
2673 * immediate inner input relation of the current join.
2674 *
2675 * If we clamp, we can treat nd2 as being a non-default estimate; it's not
2676 * great, maybe, but it didn't come out of nowhere either. This is most
2677 * helpful when the inner relation is empty and consequently has no stats.
2678 */
2679 if (vardata2->rel)
2680 {
2681 if (nd2 >= vardata2->rel->rows)
2682 {
2683 nd2 = vardata2->rel->rows;
2684 isdefault2 = false;
2685 }
2686 }
2687 if (nd2 >= inner_rel->rows)
2688 {
2689 nd2 = inner_rel->rows;
2690 isdefault2 = false;
2691 }
2692
2693 if (have_mcvs1 && have_mcvs2 && OidIsValid(opfuncoid))
2694 {
2695 /*
2696 * We have most-common-value lists for both relations. Run through
2697 * the lists to see which MCVs actually join to each other with the
2698 * given operator. This allows us to determine the exact join
2699 * selectivity for the portion of the relations represented by the MCV
2700 * lists. We still have to estimate for the remaining population, but
2701 * in a skewed distribution this gives us a big leg up in accuracy.
2702 */
2703 LOCAL_FCINFO(fcinfo, 2);
2704 FmgrInfo eqproc;
2705 bool *hasmatch1;
2706 bool *hasmatch2;
2707 double nullfrac1 = stats1->stanullfrac;
2708 double matchfreq1,
2709 uncertainfrac,
2710 uncertain;
2711 int i,
2712 nmatches,
2713 clamped_nvalues2;
2714
2715 /*
2716 * The clamping above could have resulted in nd2 being less than
2717 * sslot2->nvalues; in which case, we assume that precisely the nd2
2718 * most common values in the relation will appear in the join input,
2719 * and so compare to only the first nd2 members of the MCV list. Of
2720 * course this is frequently wrong, but it's the best bet we can make.
2721 */
2722 clamped_nvalues2 = Min(sslot2->nvalues, nd2);
2723
2724 fmgr_info(opfuncoid, &eqproc);
2725
2726 /*
2727 * Save a few cycles by setting up the fcinfo struct just once. Using
2728 * FunctionCallInvoke directly also avoids failure if the eqproc
2729 * returns NULL, though really equality functions should never do
2730 * that.
2731 */
2732 InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
2733 NULL, NULL);
2734 fcinfo->args[0].isnull = false;
2735 fcinfo->args[1].isnull = false;
2736
2737 hasmatch1 = (bool *) palloc0(sslot1->nvalues * sizeof(bool));
2738 hasmatch2 = (bool *) palloc0(clamped_nvalues2 * sizeof(bool));
2739
2740 /*
2741 * Note we assume that each MCV will match at most one member of the
2742 * other MCV list. If the operator isn't really equality, there could
2743 * be multiple matches --- but we don't look for them, both for speed
2744 * and because the math wouldn't add up...
2745 */
2746 nmatches = 0;
2747 for (i = 0; i < sslot1->nvalues; i++)
2748 {
2749 int j;
2750
2751 fcinfo->args[0].value = sslot1->values[i];
2752
2753 for (j = 0; j < clamped_nvalues2; j++)
2754 {
2755 Datum fresult;
2756
2757 if (hasmatch2[j])
2758 continue;
2759 fcinfo->args[1].value = sslot2->values[j];
2760 fcinfo->isnull = false;
2761 fresult = FunctionCallInvoke(fcinfo);
2762 if (!fcinfo->isnull && DatumGetBool(fresult))
2763 {
2764 hasmatch1[i] = hasmatch2[j] = true;
2765 nmatches++;
2766 break;
2767 }
2768 }
2769 }
2770 /* Sum up frequencies of matched MCVs */
2771 matchfreq1 = 0.0;
2772 for (i = 0; i < sslot1->nvalues; i++)
2773 {
2774 if (hasmatch1[i])
2775 matchfreq1 += sslot1->numbers[i];
2776 }
2777 CLAMP_PROBABILITY(matchfreq1);
2778 pfree(hasmatch1);
2779 pfree(hasmatch2);
2780
2781 /*
2782 * Now we need to estimate the fraction of relation 1 that has at
2783 * least one join partner. We know for certain that the matched MCVs
2784 * do, so that gives us a lower bound, but we're really in the dark
2785 * about everything else. Our crude approach is: if nd1 <= nd2 then
2786 * assume all non-null rel1 rows have join partners, else assume for
2787 * the uncertain rows that a fraction nd2/nd1 have join partners. We
2788 * can discount the known-matched MCVs from the distinct-values counts
2789 * before doing the division.
2790 *
2791 * Crude as the above is, it's completely useless if we don't have
2792 * reliable ndistinct values for both sides. Hence, if either nd1 or
2793 * nd2 is default, punt and assume half of the uncertain rows have
2794 * join partners.
2795 */
2796 if (!isdefault1 && !isdefault2)
2797 {
2798 nd1 -= nmatches;
2799 nd2 -= nmatches;
2800 if (nd1 <= nd2 || nd2 < 0)
2801 uncertainfrac = 1.0;
2802 else
2803 uncertainfrac = nd2 / nd1;
2804 }
2805 else
2806 uncertainfrac = 0.5;
2807 uncertain = 1.0 - matchfreq1 - nullfrac1;
2808 CLAMP_PROBABILITY(uncertain);
2809 selec = matchfreq1 + uncertainfrac * uncertain;
2810 }
2811 else
2812 {
2813 /*
2814 * Without MCV lists for both sides, we can only use the heuristic
2815 * about nd1 vs nd2.
2816 */
2817 double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
2818
2819 if (!isdefault1 && !isdefault2)
2820 {
2821 if (nd1 <= nd2 || nd2 < 0)
2822 selec = 1.0 - nullfrac1;
2823 else
2824 selec = (nd2 / nd1) * (1.0 - nullfrac1);
2825 }
2826 else
2827 selec = 0.5 * (1.0 - nullfrac1);
2828 }
2829
2830 return selec;
2831}
2832
2833/*
2834 * neqjoinsel - Join selectivity of "!="
2835 */
2836Datum
2838{
2840 Oid operator = PG_GETARG_OID(1);
2841 List *args = (List *) PG_GETARG_POINTER(2);
2842 JoinType jointype = (JoinType) PG_GETARG_INT16(3);
2844 Oid collation = PG_GET_COLLATION();
2845 float8 result;
2846
2847 if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
2848 {
2849 /*
2850 * For semi-joins, if there is more than one distinct value in the RHS
2851 * relation then every non-null LHS row must find a row to join since
2852 * it can only be equal to one of them. We'll assume that there is
2853 * always more than one distinct RHS value for the sake of stability,
2854 * though in theory we could have special cases for empty RHS
2855 * (selectivity = 0) and single-distinct-value RHS (selectivity =
2856 * fraction of LHS that has the same value as the single RHS value).
2857 *
2858 * For anti-joins, if we use the same assumption that there is more
2859 * than one distinct key in the RHS relation, then every non-null LHS
2860 * row must be suppressed by the anti-join.
2861 *
2862 * So either way, the selectivity estimate should be 1 - nullfrac.
2863 */
2864 VariableStatData leftvar;
2865 VariableStatData rightvar;
2866 bool reversed;
2867 HeapTuple statsTuple;
2868 double nullfrac;
2869
2870 get_join_variables(root, args, sjinfo, &leftvar, &rightvar, &reversed);
2871 statsTuple = reversed ? rightvar.statsTuple : leftvar.statsTuple;
2872 if (HeapTupleIsValid(statsTuple))
2873 nullfrac = ((Form_pg_statistic) GETSTRUCT(statsTuple))->stanullfrac;
2874 else
2875 nullfrac = 0.0;
2876 ReleaseVariableStats(leftvar);
2877 ReleaseVariableStats(rightvar);
2878
2879 result = 1.0 - nullfrac;
2880 }
2881 else
2882 {
2883 /*
2884 * We want 1 - eqjoinsel() where the equality operator is the one
2885 * associated with this != operator, that is, its negator.
2886 */
2887 Oid eqop = get_negator(operator);
2888
2889 if (eqop)
2890 {
2891 result =
2893 collation,
2895 ObjectIdGetDatum(eqop),
2897 Int16GetDatum(jointype),
2898 PointerGetDatum(sjinfo)));
2899 }
2900 else
2901 {
2902 /* Use default selectivity (should we raise an error instead?) */
2903 result = DEFAULT_EQ_SEL;
2904 }
2905 result = 1.0 - result;
2906 }
2907
2908 PG_RETURN_FLOAT8(result);
2909}
2910
2911/*
2912 * scalarltjoinsel - Join selectivity of "<" for scalars
2913 */
2914Datum
2916{
2918}
2919
2920/*
2921 * scalarlejoinsel - Join selectivity of "<=" for scalars
2922 */
2923Datum
2925{
2927}
2928
2929/*
2930 * scalargtjoinsel - Join selectivity of ">" for scalars
2931 */
2932Datum
2934{
2936}
2937
2938/*
2939 * scalargejoinsel - Join selectivity of ">=" for scalars
2940 */
2941Datum
2943{
2945}
2946
2947
2948/*
2949 * mergejoinscansel - Scan selectivity of merge join.
2950 *
2951 * A merge join will stop as soon as it exhausts either input stream.
2952 * Therefore, if we can estimate the ranges of both input variables,
2953 * we can estimate how much of the input will actually be read. This
2954 * can have a considerable impact on the cost when using indexscans.
2955 *
2956 * Also, we can estimate how much of each input has to be read before the
2957 * first join pair is found, which will affect the join's startup time.
2958 *
2959 * clause should be a clause already known to be mergejoinable. opfamily,
2960 * cmptype, and nulls_first specify the sort ordering being used.
2961 *
2962 * The outputs are:
2963 * *leftstart is set to the fraction of the left-hand variable expected
2964 * to be scanned before the first join pair is found (0 to 1).
2965 * *leftend is set to the fraction of the left-hand variable expected
2966 * to be scanned before the join terminates (0 to 1).
2967 * *rightstart, *rightend similarly for the right-hand variable.
2968 */
2969void
2971 Oid opfamily, CompareType cmptype, bool nulls_first,
2972 Selectivity *leftstart, Selectivity *leftend,
2973 Selectivity *rightstart, Selectivity *rightend)
2974{
2975 Node *left,
2976 *right;
2977 VariableStatData leftvar,
2978 rightvar;
2979 Oid opmethod;
2980 int op_strategy;
2981 Oid op_lefttype;
2982 Oid op_righttype;
2983 Oid opno,
2984 collation,
2985 lsortop,
2986 rsortop,
2987 lstatop,
2988 rstatop,
2989 ltop,
2990 leop,
2991 revltop,
2992 revleop;
2993 StrategyNumber ltstrat,
2994 lestrat,
2995 gtstrat,
2996 gestrat;
2997 bool isgt;
2998 Datum leftmin,
2999 leftmax,
3000 rightmin,
3001 rightmax;
3002 double selec;
3003
3004 /* Set default results if we can't figure anything out. */
3005 /* XXX should default "start" fraction be a bit more than 0? */
3006 *leftstart = *rightstart = 0.0;
3007 *leftend = *rightend = 1.0;
3008
3009 /* Deconstruct the merge clause */
3010 if (!is_opclause(clause))
3011 return; /* shouldn't happen */
3012 opno = ((OpExpr *) clause)->opno;
3013 collation = ((OpExpr *) clause)->inputcollid;
3014 left = get_leftop((Expr *) clause);
3015 right = get_rightop((Expr *) clause);
3016 if (!right)
3017 return; /* shouldn't happen */
3018
3019 /* Look for stats for the inputs */
3020 examine_variable(root, left, 0, &leftvar);
3021 examine_variable(root, right, 0, &rightvar);
3022
3023 opmethod = get_opfamily_method(opfamily);
3024
3025 /* Extract the operator's declared left/right datatypes */
3026 get_op_opfamily_properties(opno, opfamily, false,
3027 &op_strategy,
3028 &op_lefttype,
3029 &op_righttype);
3030 Assert(IndexAmTranslateStrategy(op_strategy, opmethod, opfamily, true) == COMPARE_EQ);
3031
3032 /*
3033 * Look up the various operators we need. If we don't find them all, it
3034 * probably means the opfamily is broken, but we just fail silently.
3035 *
3036 * Note: we expect that pg_statistic histograms will be sorted by the '<'
3037 * operator, regardless of which sort direction we are considering.
3038 */
3039 switch (cmptype)
3040 {
3041 case COMPARE_LT:
3042 isgt = false;
3043 ltstrat = IndexAmTranslateCompareType(COMPARE_LT, opmethod, opfamily, true);
3044 lestrat = IndexAmTranslateCompareType(COMPARE_LE, opmethod, opfamily, true);
3045 if (op_lefttype == op_righttype)
3046 {
3047 /* easy case */
3048 ltop = get_opfamily_member(opfamily,
3049 op_lefttype, op_righttype,
3050 ltstrat);
3051 leop = get_opfamily_member(opfamily,
3052 op_lefttype, op_righttype,
3053 lestrat);
3054 lsortop = ltop;
3055 rsortop = ltop;
3056 lstatop = lsortop;
3057 rstatop = rsortop;
3058 revltop = ltop;
3059 revleop = leop;
3060 }
3061 else
3062 {
3063 ltop = get_opfamily_member(opfamily,
3064 op_lefttype, op_righttype,
3065 ltstrat);
3066 leop = get_opfamily_member(opfamily,
3067 op_lefttype, op_righttype,
3068 lestrat);
3069 lsortop = get_opfamily_member(opfamily,
3070 op_lefttype, op_lefttype,
3071 ltstrat);
3072 rsortop = get_opfamily_member(opfamily,
3073 op_righttype, op_righttype,
3074 ltstrat);
3075 lstatop = lsortop;
3076 rstatop = rsortop;
3077 revltop = get_opfamily_member(opfamily,
3078 op_righttype, op_lefttype,
3079 ltstrat);
3080 revleop = get_opfamily_member(opfamily,
3081 op_righttype, op_lefttype,
3082 lestrat);
3083 }
3084 break;
3085 case COMPARE_GT:
3086 /* descending-order case */
3087 isgt = true;
3088 ltstrat = IndexAmTranslateCompareType(COMPARE_LT, opmethod, opfamily, true);
3089 gtstrat = IndexAmTranslateCompareType(COMPARE_GT, opmethod, opfamily, true);
3090 gestrat = IndexAmTranslateCompareType(COMPARE_GE, opmethod, opfamily, true);
3091 if (op_lefttype == op_righttype)
3092 {
3093 /* easy case */
3094 ltop = get_opfamily_member(opfamily,
3095 op_lefttype, op_righttype,
3096 gtstrat);
3097 leop = get_opfamily_member(opfamily,
3098 op_lefttype, op_righttype,
3099 gestrat);
3100 lsortop = ltop;
3101 rsortop = ltop;
3102 lstatop = get_opfamily_member(opfamily,
3103 op_lefttype, op_lefttype,
3104 ltstrat);
3105 rstatop = lstatop;
3106 revltop = ltop;
3107 revleop = leop;
3108 }
3109 else
3110 {
3111 ltop = get_opfamily_member(opfamily,
3112 op_lefttype, op_righttype,
3113 gtstrat);
3114 leop = get_opfamily_member(opfamily,
3115 op_lefttype, op_righttype,
3116 gestrat);
3117 lsortop = get_opfamily_member(opfamily,
3118 op_lefttype, op_lefttype,
3119 gtstrat);
3120 rsortop = get_opfamily_member(opfamily,
3121 op_righttype, op_righttype,
3122 gtstrat);
3123 lstatop = get_opfamily_member(opfamily,
3124 op_lefttype, op_lefttype,
3125 ltstrat);
3126 rstatop = get_opfamily_member(opfamily,
3127 op_righttype, op_righttype,
3128 ltstrat);
3129 revltop = get_opfamily_member(opfamily,
3130 op_righttype, op_lefttype,
3131 gtstrat);
3132 revleop = get_opfamily_member(opfamily,
3133 op_righttype, op_lefttype,
3134 gestrat);
3135 }
3136 break;
3137 default:
3138 goto fail; /* shouldn't get here */
3139 }
3140
3141 if (!OidIsValid(lsortop) ||
3142 !OidIsValid(rsortop) ||
3143 !OidIsValid(lstatop) ||
3144 !OidIsValid(rstatop) ||
3145 !OidIsValid(ltop) ||
3146 !OidIsValid(leop) ||
3147 !OidIsValid(revltop) ||
3148 !OidIsValid(revleop))
3149 goto fail; /* insufficient info in catalogs */
3150
3151 /* Try to get ranges of both inputs */
3152 if (!isgt)
3153 {
3154 if (!get_variable_range(root, &leftvar, lstatop, collation,
3155 &leftmin, &leftmax))
3156 goto fail; /* no range available from stats */
3157 if (!get_variable_range(root, &rightvar, rstatop, collation,
3158 &rightmin, &rightmax))
3159 goto fail; /* no range available from stats */
3160 }
3161 else
3162 {
3163 /* need to swap the max and min */
3164 if (!get_variable_range(root, &leftvar, lstatop, collation,
3165 &leftmax, &leftmin))
3166 goto fail; /* no range available from stats */
3167 if (!get_variable_range(root, &rightvar, rstatop, collation,
3168 &rightmax, &rightmin))
3169 goto fail; /* no range available from stats */
3170 }
3171
3172 /*
3173 * Now, the fraction of the left variable that will be scanned is the
3174 * fraction that's <= the right-side maximum value. But only believe
3175 * non-default estimates, else stick with our 1.0.
3176 */
3177 selec = scalarineqsel(root, leop, isgt, true, collation, &leftvar,
3178 rightmax, op_righttype);
3179 if (selec != DEFAULT_INEQ_SEL)
3180 *leftend = selec;
3181
3182 /* And similarly for the right variable. */
3183 selec = scalarineqsel(root, revleop, isgt, true, collation, &rightvar,
3184 leftmax, op_lefttype);
3185 if (selec != DEFAULT_INEQ_SEL)
3186 *rightend = selec;
3187
3188 /*
3189 * Only one of the two "end" fractions can really be less than 1.0;
3190 * believe the smaller estimate and reset the other one to exactly 1.0. If
3191 * we get exactly equal estimates (as can easily happen with self-joins),
3192 * believe neither.
3193 */
3194 if (*leftend > *rightend)
3195 *leftend = 1.0;
3196 else if (*leftend < *rightend)
3197 *rightend = 1.0;
3198 else
3199 *leftend = *rightend = 1.0;
3200
3201 /*
3202 * Also, the fraction of the left variable that will be scanned before the
3203 * first join pair is found is the fraction that's < the right-side
3204 * minimum value. But only believe non-default estimates, else stick with
3205 * our own default.
3206 */
3207 selec = scalarineqsel(root, ltop, isgt, false, collation, &leftvar,
3208 rightmin, op_righttype);
3209 if (selec != DEFAULT_INEQ_SEL)
3210 *leftstart = selec;
3211
3212 /* And similarly for the right variable. */
3213 selec = scalarineqsel(root, revltop, isgt, false, collation, &rightvar,
3214 leftmin, op_lefttype);
3215 if (selec != DEFAULT_INEQ_SEL)
3216 *rightstart = selec;
3217
3218 /*
3219 * Only one of the two "start" fractions can really be more than zero;
3220 * believe the larger estimate and reset the other one to exactly 0.0. If
3221 * we get exactly equal estimates (as can easily happen with self-joins),
3222 * believe neither.
3223 */
3224 if (*leftstart < *rightstart)
3225 *leftstart = 0.0;
3226 else if (*leftstart > *rightstart)
3227 *rightstart = 0.0;
3228 else
3229 *leftstart = *rightstart = 0.0;
3230
3231 /*
3232 * If the sort order is nulls-first, we're going to have to skip over any
3233 * nulls too. These would not have been counted by scalarineqsel, and we
3234 * can safely add in this fraction regardless of whether we believe
3235 * scalarineqsel's results or not. But be sure to clamp the sum to 1.0!
3236 */
3237 if (nulls_first)
3238 {
3239 Form_pg_statistic stats;
3240
3241 if (HeapTupleIsValid(leftvar.statsTuple))
3242 {
3243 stats = (Form_pg_statistic) GETSTRUCT(leftvar.statsTuple);
3244 *leftstart += stats->stanullfrac;
3245 CLAMP_PROBABILITY(*leftstart);
3246 *leftend += stats->stanullfrac;
3247 CLAMP_PROBABILITY(*leftend);
3248 }
3249 if (HeapTupleIsValid(rightvar.statsTuple))
3250 {
3251 stats = (Form_pg_statistic) GETSTRUCT(rightvar.statsTuple);
3252 *rightstart += stats->stanullfrac;
3253 CLAMP_PROBABILITY(*rightstart);
3254 *rightend += stats->stanullfrac;
3255 CLAMP_PROBABILITY(*rightend);
3256 }
3257 }
3258
3259 /* Disbelieve start >= end, just in case that can happen */
3260 if (*leftstart >= *leftend)
3261 {
3262 *leftstart = 0.0;
3263 *leftend = 1.0;
3264 }
3265 if (*rightstart >= *rightend)
3266 {
3267 *rightstart = 0.0;
3268 *rightend = 1.0;
3269 }
3270
3271fail:
3272 ReleaseVariableStats(leftvar);
3273 ReleaseVariableStats(rightvar);
3274}
3275
3276
3277/*
3278 * matchingsel -- generic matching-operator selectivity support
3279 *
3280 * Use these for any operators that (a) are on data types for which we collect
3281 * standard statistics, and (b) have behavior for which the default estimate
3282 * (twice DEFAULT_EQ_SEL) is sane. Typically that is good for match-like
3283 * operators.
3284 */
3285
3286Datum
3288{
3290 Oid operator = PG_GETARG_OID(1);
3291 List *args = (List *) PG_GETARG_POINTER(2);
3292 int varRelid = PG_GETARG_INT32(3);
3293 Oid collation = PG_GET_COLLATION();
3294 double selec;
3295
3296 /* Use generic restriction selectivity logic. */
3297 selec = generic_restriction_selectivity(root, operator, collation,
3298 args, varRelid,
3300
3301 PG_RETURN_FLOAT8((float8) selec);
3302}
3303
3304Datum
3306{
3307 /* Just punt, for the moment. */
3309}
3310
3311
3312/*
3313 * Helper routine for estimate_num_groups: add an item to a list of
3314 * GroupVarInfos, but only if it's not known equal to any of the existing
3315 * entries.
3316 */
3317typedef struct
3318{
3319 Node *var; /* might be an expression, not just a Var */
3320 RelOptInfo *rel; /* relation it belongs to */
3321 double ndistinct; /* # distinct values */
3322 bool isdefault; /* true if DEFAULT_NUM_DISTINCT was used */
3323} GroupVarInfo;
3324
3325static List *
3327 Node *var, VariableStatData *vardata)
3328{
3329 GroupVarInfo *varinfo;
3330 double ndistinct;
3331 bool isdefault;
3332 ListCell *lc;
3333
3334 ndistinct = get_variable_numdistinct(vardata, &isdefault);
3335
3336 /*
3337 * The nullingrels bits within the var could cause the same var to be
3338 * counted multiple times if it's marked with different nullingrels. They
3339 * could also prevent us from matching the var to the expressions in
3340 * extended statistics (see estimate_multivariate_ndistinct). So strip
3341 * them out first.
3342 */
3343 var = remove_nulling_relids(var, root->outer_join_rels, NULL);
3344
3345 foreach(lc, varinfos)
3346 {
3347 varinfo = (GroupVarInfo *) lfirst(lc);
3348
3349 /* Drop exact duplicates */
3350 if (equal(var, varinfo->var))
3351 return varinfos;
3352
3353 /*
3354 * Drop known-equal vars, but only if they belong to different
3355 * relations (see comments for estimate_num_groups). We aren't too
3356 * fussy about the semantics of "equal" here.
3357 */
3358 if (vardata->rel != varinfo->rel &&
3359 exprs_known_equal(root, var, varinfo->var, InvalidOid))
3360 {
3361 if (varinfo->ndistinct <= ndistinct)
3362 {
3363 /* Keep older item, forget new one */
3364 return varinfos;
3365 }
3366 else
3367 {
3368 /* Delete the older item */
3369 varinfos = foreach_delete_current(varinfos, lc);
3370 }
3371 }
3372 }
3373
3374 varinfo = (GroupVarInfo *) palloc(sizeof(GroupVarInfo));
3375
3376 varinfo->var = var;
3377 varinfo->rel = vardata->rel;
3378 varinfo->ndistinct = ndistinct;
3379 varinfo->isdefault = isdefault;
3380 varinfos = lappend(varinfos, varinfo);
3381 return varinfos;
3382}
3383
3384/*
3385 * estimate_num_groups - Estimate number of groups in a grouped query
3386 *
3387 * Given a query having a GROUP BY clause, estimate how many groups there
3388 * will be --- ie, the number of distinct combinations of the GROUP BY
3389 * expressions.
3390 *
3391 * This routine is also used to estimate the number of rows emitted by
3392 * a DISTINCT filtering step; that is an isomorphic problem. (Note:
3393 * actually, we only use it for DISTINCT when there's no grouping or
3394 * aggregation ahead of the DISTINCT.)
3395 *
3396 * Inputs:
3397 * root - the query
3398 * groupExprs - list of expressions being grouped by
3399 * input_rows - number of rows estimated to arrive at the group/unique
3400 * filter step
3401 * pgset - NULL, or a List** pointing to a grouping set to filter the
3402 * groupExprs against
3403 *
3404 * Outputs:
3405 * estinfo - When passed as non-NULL, the function will set bits in the
3406 * "flags" field in order to provide callers with additional information
3407 * about the estimation. Currently, we only set the SELFLAG_USED_DEFAULT
3408 * bit if we used any default values in the estimation.
3409 *
3410 * Given the lack of any cross-correlation statistics in the system, it's
3411 * impossible to do anything really trustworthy with GROUP BY conditions
3412 * involving multiple Vars. We should however avoid assuming the worst
3413 * case (all possible cross-product terms actually appear as groups) since
3414 * very often the grouped-by Vars are highly correlated. Our current approach
3415 * is as follows:
3416 * 1. Expressions yielding boolean are assumed to contribute two groups,
3417 * independently of their content, and are ignored in the subsequent
3418 * steps. This is mainly because tests like "col IS NULL" break the
3419 * heuristic used in step 2 especially badly.
3420 * 2. Reduce the given expressions to a list of unique Vars used. For
3421 * example, GROUP BY a, a + b is treated the same as GROUP BY a, b.
3422 * It is clearly correct not to count the same Var more than once.
3423 * It is also reasonable to treat f(x) the same as x: f() cannot
3424 * increase the number of distinct values (unless it is volatile,
3425 * which we consider unlikely for grouping), but it probably won't
3426 * reduce the number of distinct values much either.
3427 * As a special case, if a GROUP BY expression can be matched to an
3428 * expressional index for which we have statistics, then we treat the
3429 * whole expression as though it were just a Var.
3430 * 3. If the list contains Vars of different relations that are known equal
3431 * due to equivalence classes, then drop all but one of the Vars from each
3432 * known-equal set, keeping the one with smallest estimated # of values
3433 * (since the extra values of the others can't appear in joined rows).
3434 * Note the reason we only consider Vars of different relations is that
3435 * if we considered ones of the same rel, we'd be double-counting the
3436 * restriction selectivity of the equality in the next step.
3437 * 4. For Vars within a single source rel, we multiply together the numbers
3438 * of values, clamp to the number of rows in the rel (divided by 10 if
3439 * more than one Var), and then multiply by a factor based on the
3440 * selectivity of the restriction clauses for that rel. When there's
3441 * more than one Var, the initial product is probably too high (it's the
3442 * worst case) but clamping to a fraction of the rel's rows seems to be a
3443 * helpful heuristic for not letting the estimate get out of hand. (The
3444 * factor of 10 is derived from pre-Postgres-7.4 practice.) The factor
3445 * we multiply by to adjust for the restriction selectivity assumes that
3446 * the restriction clauses are independent of the grouping, which may not
3447 * be a valid assumption, but it's hard to do better.
3448 * 5. If there are Vars from multiple rels, we repeat step 4 for each such
3449 * rel, and multiply the results together.
3450 * Note that rels not containing grouped Vars are ignored completely, as are
3451 * join clauses. Such rels cannot increase the number of groups, and we
3452 * assume such clauses do not reduce the number either (somewhat bogus,
3453 * but we don't have the info to do better).
3454 */
3455double
3456estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
3457 List **pgset, EstimationInfo *estinfo)
3458{
3459 List *varinfos = NIL;
3460 double srf_multiplier = 1.0;
3461 double numdistinct;
3462 ListCell *l;
3463 int i;
3464
3465 /* Zero the estinfo output parameter, if non-NULL */
3466 if (estinfo != NULL)
3467 memset(estinfo, 0, sizeof(EstimationInfo));
3468
3469 /*
3470 * We don't ever want to return an estimate of zero groups, as that tends
3471 * to lead to division-by-zero and other unpleasantness. The input_rows
3472 * estimate is usually already at least 1, but clamp it just in case it
3473 * isn't.
3474 */
3475 input_rows = clamp_row_est(input_rows);
3476
3477 /*
3478 * If no grouping columns, there's exactly one group. (This can't happen
3479 * for normal cases with GROUP BY or DISTINCT, but it is possible for
3480 * corner cases with set operations.)
3481 */
3482 if (groupExprs == NIL || (pgset && *pgset == NIL))
3483 return 1.0;
3484
3485 /*
3486 * Count groups derived from boolean grouping expressions. For other
3487 * expressions, find the unique Vars used, treating an expression as a Var
3488 * if we can find stats for it. For each one, record the statistical
3489 * estimate of number of distinct values (total in its table, without
3490 * regard for filtering).
3491 */
3492 numdistinct = 1.0;
3493
3494 i = 0;
3495 foreach(l, groupExprs)
3496 {
3497 Node *groupexpr = (Node *) lfirst(l);
3498 double this_srf_multiplier;
3499 VariableStatData vardata;
3500 List *varshere;
3501 ListCell *l2;
3502
3503 /* is expression in this grouping set? */
3504 if (pgset && !list_member_int(*pgset, i++))
3505 continue;
3506
3507 /*
3508 * Set-returning functions in grouping columns are a bit problematic.
3509 * The code below will effectively ignore their SRF nature and come up
3510 * with a numdistinct estimate as though they were scalar functions.
3511 * We compensate by scaling up the end result by the largest SRF
3512 * rowcount estimate. (This will be an overestimate if the SRF
3513 * produces multiple copies of any output value, but it seems best to
3514 * assume the SRF's outputs are distinct. In any case, it's probably
3515 * pointless to worry too much about this without much better
3516 * estimates for SRF output rowcounts than we have today.)
3517 */
3518 this_srf_multiplier = expression_returns_set_rows(root, groupexpr);
3519 if (srf_multiplier < this_srf_multiplier)
3520 srf_multiplier = this_srf_multiplier;
3521
3522 /* Short-circuit for expressions returning boolean */
3523 if (exprType(groupexpr) == BOOLOID)
3524 {
3525 numdistinct *= 2.0;
3526 continue;
3527 }
3528
3529 /*
3530 * If examine_variable is able to deduce anything about the GROUP BY
3531 * expression, treat it as a single variable even if it's really more
3532 * complicated.
3533 *
3534 * XXX This has the consequence that if there's a statistics object on
3535 * the expression, we don't split it into individual Vars. This
3536 * affects our selection of statistics in
3537 * estimate_multivariate_ndistinct, because it's probably better to
3538 * use more accurate estimate for each expression and treat them as
3539 * independent, than to combine estimates for the extracted variables
3540 * when we don't know how that relates to the expressions.
3541 */
3542 examine_variable(root, groupexpr, 0, &vardata);
3543 if (HeapTupleIsValid(vardata.statsTuple) || vardata.isunique)
3544 {
3545 varinfos = add_unique_group_var(root, varinfos,
3546 groupexpr, &vardata);
3547 ReleaseVariableStats(vardata);
3548 continue;
3549 }
3550 ReleaseVariableStats(vardata);
3551
3552 /*
3553 * Else pull out the component Vars. Handle PlaceHolderVars by
3554 * recursing into their arguments (effectively assuming that the
3555 * PlaceHolderVar doesn't change the number of groups, which boils
3556 * down to ignoring the possible addition of nulls to the result set).
3557 */
3558 varshere = pull_var_clause(groupexpr,
3562
3563 /*
3564 * If we find any variable-free GROUP BY item, then either it is a
3565 * constant (and we can ignore it) or it contains a volatile function;
3566 * in the latter case we punt and assume that each input row will
3567 * yield a distinct group.
3568 */
3569 if (varshere == NIL)
3570 {
3571 if (contain_volatile_functions(groupexpr))
3572 return input_rows;
3573 continue;
3574 }
3575
3576 /*
3577 * Else add variables to varinfos list
3578 */
3579 foreach(l2, varshere)
3580 {
3581 Node *var = (Node *) lfirst(l2);
3582
3583 examine_variable(root, var, 0, &vardata);
3584 varinfos = add_unique_group_var(root, varinfos, var, &vardata);
3585 ReleaseVariableStats(vardata);
3586 }
3587 }
3588
3589 /*
3590 * If now no Vars, we must have an all-constant or all-boolean GROUP BY
3591 * list.
3592 */
3593 if (varinfos == NIL)
3594 {
3595 /* Apply SRF multiplier as we would do in the long path */
3596 numdistinct *= srf_multiplier;
3597 /* Round off */
3598 numdistinct = ceil(numdistinct);
3599 /* Guard against out-of-range answers */
3600 if (numdistinct > input_rows)
3601 numdistinct = input_rows;
3602 if (numdistinct < 1.0)
3603 numdistinct = 1.0;
3604 return numdistinct;
3605 }
3606
3607 /*
3608 * Group Vars by relation and estimate total numdistinct.
3609 *
3610 * For each iteration of the outer loop, we process the frontmost Var in
3611 * varinfos, plus all other Vars in the same relation. We remove these
3612 * Vars from the newvarinfos list for the next iteration. This is the
3613 * easiest way to group Vars of same rel together.
3614 */
3615 do
3616 {
3617 GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
3618 RelOptInfo *rel = varinfo1->rel;
3619 double reldistinct = 1;
3620 double relmaxndistinct = reldistinct;
3621 int relvarcount = 0;
3622 List *newvarinfos = NIL;
3623 List *relvarinfos = NIL;
3624
3625 /*
3626 * Split the list of varinfos in two - one for the current rel, one
3627 * for remaining Vars on other rels.
3628 */
3629 relvarinfos = lappend(relvarinfos, varinfo1);
3630 for_each_from(l, varinfos, 1)
3631 {
3632 GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
3633
3634 if (varinfo2->rel == varinfo1->rel)
3635 {
3636 /* varinfos on current rel */
3637 relvarinfos = lappend(relvarinfos, varinfo2);
3638 }
3639 else
3640 {
3641 /* not time to process varinfo2 yet */
3642 newvarinfos = lappend(newvarinfos, varinfo2);
3643 }
3644 }
3645
3646 /*
3647 * Get the numdistinct estimate for the Vars of this rel. We
3648 * iteratively search for multivariate n-distinct with maximum number
3649 * of vars; assuming that each var group is independent of the others,
3650 * we multiply them together. Any remaining relvarinfos after no more
3651 * multivariate matches are found are assumed independent too, so
3652 * their individual ndistinct estimates are multiplied also.
3653 *
3654 * While iterating, count how many separate numdistinct values we
3655 * apply. We apply a fudge factor below, but only if we multiplied
3656 * more than one such values.
3657 */
3658 while (relvarinfos)
3659 {
3660 double mvndistinct;
3661
3662 if (estimate_multivariate_ndistinct(root, rel, &relvarinfos,
3663 &mvndistinct))
3664 {
3665 reldistinct *= mvndistinct;
3666 if (relmaxndistinct < mvndistinct)
3667 relmaxndistinct = mvndistinct;
3668 relvarcount++;
3669 }
3670 else
3671 {
3672 foreach(l, relvarinfos)
3673 {
3674 GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
3675
3676 reldistinct *= varinfo2->ndistinct;
3677 if (relmaxndistinct < varinfo2->ndistinct)
3678 relmaxndistinct = varinfo2->ndistinct;
3679 relvarcount++;
3680
3681 /*
3682 * When varinfo2's isdefault is set then we'd better set
3683 * the SELFLAG_USED_DEFAULT bit in the EstimationInfo.
3684 */
3685 if (estinfo != NULL && varinfo2->isdefault)
3686 estinfo->flags |= SELFLAG_USED_DEFAULT;
3687 }
3688
3689 /* we're done with this relation */
3690 relvarinfos = NIL;
3691 }
3692 }
3693
3694 /*
3695 * Sanity check --- don't divide by zero if empty relation.
3696 */
3697 Assert(IS_SIMPLE_REL(rel));
3698 if (rel->tuples > 0)
3699 {
3700 /*
3701 * Clamp to size of rel, or size of rel / 10 if multiple Vars. The
3702 * fudge factor is because the Vars are probably correlated but we
3703 * don't know by how much. We should never clamp to less than the
3704 * largest ndistinct value for any of the Vars, though, since
3705 * there will surely be at least that many groups.
3706 */
3707 double clamp = rel->tuples;
3708
3709 if (relvarcount > 1)
3710 {
3711 clamp *= 0.1;
3712 if (clamp < relmaxndistinct)
3713 {
3714 clamp = relmaxndistinct;
3715 /* for sanity in case some ndistinct is too large: */
3716 if (clamp > rel->tuples)
3717 clamp = rel->tuples;
3718 }
3719 }
3720 if (reldistinct > clamp)
3721 reldistinct = clamp;
3722
3723 /*
3724 * Update the estimate based on the restriction selectivity,
3725 * guarding against division by zero when reldistinct is zero.
3726 * Also skip this if we know that we are returning all rows.
3727 */
3728 if (reldistinct > 0 && rel->rows < rel->tuples)
3729 {
3730 /*
3731 * Given a table containing N rows with n distinct values in a
3732 * uniform distribution, if we select p rows at random then
3733 * the expected number of distinct values selected is
3734 *
3735 * n * (1 - product((N-N/n-i)/(N-i), i=0..p-1))
3736 *
3737 * = n * (1 - (N-N/n)! / (N-N/n-p)! * (N-p)! / N!)
3738 *
3739 * See "Approximating block accesses in database
3740 * organizations", S. B. Yao, Communications of the ACM,
3741 * Volume 20 Issue 4, April 1977 Pages 260-261.
3742 *
3743 * Alternatively, re-arranging the terms from the factorials,
3744 * this may be written as
3745 *
3746 * n * (1 - product((N-p-i)/(N-i), i=0..N/n-1))
3747 *
3748 * This form of the formula is more efficient to compute in
3749 * the common case where p is larger than N/n. Additionally,
3750 * as pointed out by Dell'Era, if i << N for all terms in the
3751 * product, it can be approximated by
3752 *
3753 * n * (1 - ((N-p)/N)^(N/n))
3754 *
3755 * See "Expected distinct values when selecting from a bag
3756 * without replacement", Alberto Dell'Era,
3757 * http://www.adellera.it/investigations/distinct_balls/.
3758 *
3759 * The condition i << N is equivalent to n >> 1, so this is a
3760 * good approximation when the number of distinct values in
3761 * the table is large. It turns out that this formula also
3762 * works well even when n is small.
3763 */
3764 reldistinct *=
3765 (1 - pow((rel->tuples - rel->rows) / rel->tuples,
3766 rel->tuples / reldistinct));
3767 }
3768 reldistinct = clamp_row_est(reldistinct);
3769
3770 /*
3771 * Update estimate of total distinct groups.
3772 */
3773 numdistinct *= reldistinct;
3774 }
3775
3776 varinfos = newvarinfos;
3777 } while (varinfos != NIL);
3778
3779 /* Now we can account for the effects of any SRFs */
3780 numdistinct *= srf_multiplier;
3781
3782 /* Round off */
3783 numdistinct = ceil(numdistinct);
3784
3785 /* Guard against out-of-range answers */
3786 if (numdistinct > input_rows)
3787 numdistinct = input_rows;
3788 if (numdistinct < 1.0)
3789 numdistinct = 1.0;
3790
3791 return numdistinct;
3792}
3793
3794/*
3795 * Try to estimate the bucket size of the hash join inner side when the join
3796 * condition contains two or more clauses by employing extended statistics.
3797 *
3798 * The main idea of this approach is that the distinct value generated by
3799 * multivariate estimation on two or more columns would provide less bucket size
3800 * than estimation on one separate column.
3801 *
3802 * IMPORTANT: It is crucial to synchronize the approach of combining different
3803 * estimations with the caller's method.
3804 *
3805 * Return a list of clauses that didn't fetch any extended statistics.
3806 */
3807List *
3809 List *hashclauses,
3810 Selectivity *innerbucketsize)
3811{
3812 List *clauses;
3813 List *otherclauses;
3814 double ndistinct;
3815
3816 if (list_length(hashclauses) <= 1)
3817 {
3818 /*
3819 * Nothing to do for a single clause. Could we employ univariate
3820 * extended stat here?
3821 */
3822 return hashclauses;
3823 }
3824
3825 /* "clauses" is the list of hashclauses we've not dealt with yet */
3826 clauses = list_copy(hashclauses);
3827 /* "otherclauses" holds clauses we are going to return to caller */
3828 otherclauses = NIL;
3829 /* current estimate of ndistinct */
3830 ndistinct = 1.0;
3831 while (clauses != NIL)
3832 {
3833 ListCell *lc;
3834 int relid = -1;
3835 List *varinfos = NIL;
3836 List *origin_rinfos = NIL;
3837 double mvndistinct;
3838 List *origin_varinfos;
3839 int group_relid = -1;
3840 RelOptInfo *group_rel = NULL;
3841 ListCell *lc1,
3842 *lc2;
3843
3844 /*
3845 * Find clauses, referencing the same single base relation and try to
3846 * estimate such a group with extended statistics. Create varinfo for
3847 * an approved clause, push it to otherclauses, if it can't be
3848 * estimated here or ignore to process at the next iteration.
3849 */
3850 foreach(lc, clauses)
3851 {
3853 Node *expr;
3854 Relids relids;
3855 GroupVarInfo *varinfo;
3856
3857 /*
3858 * Find the inner side of the join, which we need to estimate the
3859 * number of buckets. Use outer_is_left because the
3860 * clause_sides_match_join routine has called on hash clauses.
3861 */
3862 relids = rinfo->outer_is_left ?
3863 rinfo->right_relids : rinfo->left_relids;
3864 expr = rinfo->outer_is_left ?
3865 get_rightop(rinfo->clause) : get_leftop(rinfo->clause);
3866
3867 if (bms_get_singleton_member(relids, &relid) &&
3868 root->simple_rel_array[relid]->statlist != NIL)
3869 {
3870 bool is_duplicate = false;
3871
3872 /*
3873 * This inner-side expression references only one relation.
3874 * Extended statistics on this clause can exist.
3875 */
3876 if (group_relid < 0)
3877 {
3878 RangeTblEntry *rte = root->simple_rte_array[relid];
3879
3880 if (!rte || (rte->relkind != RELKIND_RELATION &&
3881 rte->relkind != RELKIND_MATVIEW &&
3882 rte->relkind != RELKIND_FOREIGN_TABLE &&
3883 rte->relkind != RELKIND_PARTITIONED_TABLE))
3884 {
3885 /* Extended statistics can't exist in principle */
3886 otherclauses = lappend(otherclauses, rinfo);
3887 clauses = foreach_delete_current(clauses, lc);
3888 continue;
3889 }
3890
3891 group_relid = relid;
3892 group_rel = root->simple_rel_array[relid];
3893 }
3894 else if (group_relid != relid)
3895 {
3896 /*
3897 * Being in the group forming state we don't need other
3898 * clauses.
3899 */
3900 continue;
3901 }
3902
3903 /*
3904 * We're going to add the new clause to the varinfos list. We
3905 * might re-use add_unique_group_var(), but we don't do so for
3906 * two reasons.
3907 *
3908 * 1) We must keep the origin_rinfos list ordered exactly the
3909 * same way as varinfos.
3910 *
3911 * 2) add_unique_group_var() is designed for
3912 * estimate_num_groups(), where a larger number of groups is
3913 * worse. While estimating the number of hash buckets, we
3914 * have the opposite: a lesser number of groups is worse.
3915 * Therefore, we don't have to remove "known equal" vars: the
3916 * removed var may valuably contribute to the multivariate
3917 * statistics to grow the number of groups.
3918 */
3919
3920 /*
3921 * Clear nullingrels to correctly match hash keys. See
3922 * add_unique_group_var()'s comment for details.
3923 */
3924 expr = remove_nulling_relids(expr, root->outer_join_rels, NULL);
3925
3926 /*
3927 * Detect and exclude exact duplicates from the list of hash
3928 * keys (like add_unique_group_var does).
3929 */
3930 foreach(lc1, varinfos)
3931 {
3932 varinfo = (GroupVarInfo *) lfirst(lc1);
3933
3934 if (!equal(expr, varinfo->var))
3935 continue;
3936
3937 is_duplicate = true;
3938 break;
3939 }
3940
3941 if (is_duplicate)
3942 {
3943 /*
3944 * Skip exact duplicates. Adding them to the otherclauses
3945 * list also doesn't make sense.
3946 */
3947 continue;
3948 }
3949
3950 /*
3951 * Initialize GroupVarInfo. We only use it to call
3952 * estimate_multivariate_ndistinct(), which doesn't care about
3953 * ndistinct and isdefault fields. Thus, skip these fields.
3954 */
3955 varinfo = (GroupVarInfo *) palloc0(sizeof(GroupVarInfo));
3956 varinfo->var = expr;
3957 varinfo->rel = root->simple_rel_array[relid];
3958 varinfos = lappend(varinfos, varinfo);
3959
3960 /*
3961 * Remember the link to RestrictInfo for the case the clause
3962 * is failed to be estimated.
3963 */
3964 origin_rinfos = lappend(origin_rinfos, rinfo);
3965 }
3966 else
3967 {
3968 /* This clause can't be estimated with extended statistics */
3969 otherclauses = lappend(otherclauses, rinfo);
3970 }
3971
3972 clauses = foreach_delete_current(clauses, lc);
3973 }
3974
3975 if (list_length(varinfos) < 2)
3976 {
3977 /*
3978 * Multivariate statistics doesn't apply to single columns except
3979 * for expressions, but it has not been implemented yet.
3980 */
3981 otherclauses = list_concat(otherclauses, origin_rinfos);
3982 list_free_deep(varinfos);
3983 list_free(origin_rinfos);
3984 continue;
3985 }
3986
3987 Assert(group_rel != NULL);
3988
3989 /* Employ the extended statistics. */
3990 origin_varinfos = varinfos;
3991 for (;;)
3992 {
3993 bool estimated = estimate_multivariate_ndistinct(root,
3994 group_rel,
3995 &varinfos,
3996 &mvndistinct);
3997
3998 if (!estimated)
3999 break;
4000
4001 /*
4002 * We've got an estimation. Use ndistinct value in a consistent
4003 * way - according to the caller's logic (see
4004 * final_cost_hashjoin).
4005 */
4006 if (ndistinct < mvndistinct)
4007 ndistinct = mvndistinct;
4008 Assert(ndistinct >= 1.0);
4009 }
4010
4011 Assert(list_length(origin_varinfos) == list_length(origin_rinfos));
4012
4013 /* Collect unmatched clauses as otherclauses. */
4014 forboth(lc1, origin_varinfos, lc2, origin_rinfos)
4015 {
4016 GroupVarInfo *vinfo = lfirst(lc1);
4017
4018 if (!list_member_ptr(varinfos, vinfo))
4019 /* Already estimated */
4020 continue;
4021
4022 /* Can't be estimated here - push to the returning list */
4023 otherclauses = lappend(otherclauses, lfirst(lc2));
4024 }
4025 }
4026
4027 *innerbucketsize = 1.0 / ndistinct;
4028 return otherclauses;
4029}
4030
4031/*
4032 * Estimate hash bucket statistics when the specified expression is used
4033 * as a hash key for the given number of buckets.
4034 *
4035 * This attempts to determine two values:
4036 *
4037 * 1. The frequency of the most common value of the expression (returns
4038 * zero into *mcv_freq if we can't get that).
4039 *
4040 * 2. The "bucketsize fraction", ie, average number of entries in a bucket
4041 * divided by total tuples in relation.
4042 *
4043 * XXX This is really pretty bogus since we're effectively assuming that the
4044 * distribution of hash keys will be the same after applying restriction
4045 * clauses as it was in the underlying relation. However, we are not nearly
4046 * smart enough to figure out how the restrict clauses might change the
4047 * distribution, so this will have to do for now.
4048 *
4049 * We are passed the number of buckets the executor will use for the given
4050 * input relation. If the data were perfectly distributed, with the same
4051 * number of tuples going into each available bucket, then the bucketsize
4052 * fraction would be 1/nbuckets. But this happy state of affairs will occur
4053 * only if (a) there are at least nbuckets distinct data values, and (b)
4054 * we have a not-too-skewed data distribution. Otherwise the buckets will
4055 * be nonuniformly occupied. If the other relation in the join has a key
4056 * distribution similar to this one's, then the most-loaded buckets are
4057 * exactly those that will be probed most often. Therefore, the "average"
4058 * bucket size for costing purposes should really be taken as something close
4059 * to the "worst case" bucket size. We try to estimate this by adjusting the
4060 * fraction if there are too few distinct data values, and then scaling up
4061 * by the ratio of the most common value's frequency to the average frequency.
4062 *
4063 * If no statistics are available, use a default estimate of 0.1. This will
4064 * discourage use of a hash rather strongly if the inner relation is large,
4065 * which is what we want. We do not want to hash unless we know that the
4066 * inner rel is well-dispersed (or the alternatives seem much worse).
4067 *
4068 * The caller should also check that the mcv_freq is not so large that the
4069 * most common value would by itself require an impractically large bucket.
4070 * In a hash join, the executor can split buckets if they get too big, but
4071 * obviously that doesn't help for a bucket that contains many duplicates of
4072 * the same value.
4073 */
4074void
4076 Selectivity *mcv_freq,
4077 Selectivity *bucketsize_frac)
4078{
4079 VariableStatData vardata;
4080 double estfract,
4081 ndistinct,
4082 stanullfrac,
4083 avgfreq;
4084 bool isdefault;
4085 AttStatsSlot sslot;
4086
4087 examine_variable(root, hashkey, 0, &vardata);
4088
4089 /* Look up the frequency of the most common value, if available */
4090 *mcv_freq = 0.0;
4091
4092 if (HeapTupleIsValid(vardata.statsTuple))
4093 {
4094 if (get_attstatsslot(&sslot, vardata.statsTuple,
4095 STATISTIC_KIND_MCV, InvalidOid,
4097 {
4098 /*
4099 * The first MCV stat is for the most common value.
4100 */
4101 if (sslot.nnumbers > 0)
4102 *mcv_freq = sslot.numbers[0];
4103 free_attstatsslot(&sslot);
4104 }
4105 }
4106
4107 /* Get number of distinct values */
4108 ndistinct = get_variable_numdistinct(&vardata, &isdefault);
4109
4110 /*
4111 * If ndistinct isn't real, punt. We normally return 0.1, but if the
4112 * mcv_freq is known to be even higher than that, use it instead.
4113 */
4114 if (isdefault)
4115 {
4116 *bucketsize_frac = (Selectivity) Max(0.1, *mcv_freq);
4117 ReleaseVariableStats(vardata);
4118 return;
4119 }
4120
4121 /* Get fraction that are null */
4122 if (HeapTupleIsValid(vardata.statsTuple))
4123 {
4124 Form_pg_statistic stats;
4125
4126 stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
4127 stanullfrac = stats->stanullfrac;
4128 }
4129 else
4130 stanullfrac = 0.0;
4131
4132 /* Compute avg freq of all distinct data values in raw relation */
4133 avgfreq = (1.0 - stanullfrac) / ndistinct;
4134
4135 /*
4136 * Adjust ndistinct to account for restriction clauses. Observe we are
4137 * assuming that the data distribution is affected uniformly by the
4138 * restriction clauses!
4139 *
4140 * XXX Possibly better way, but much more expensive: multiply by
4141 * selectivity of rel's restriction clauses that mention the target Var.
4142 */
4143 if (vardata.rel && vardata.rel->tuples > 0)
4144 {
4145 ndistinct *= vardata.rel->rows / vardata.rel->tuples;
4146 ndistinct = clamp_row_est(ndistinct);
4147 }
4148
4149 /*
4150 * Initial estimate of bucketsize fraction is 1/nbuckets as long as the
4151 * number of buckets is less than the expected number of distinct values;
4152 * otherwise it is 1/ndistinct.
4153 */
4154 if (ndistinct > nbuckets)
4155 estfract = 1.0 / nbuckets;
4156 else
4157 estfract = 1.0 / ndistinct;
4158
4159 /*
4160 * Adjust estimated bucketsize upward to account for skewed distribution.
4161 */
4162 if (avgfreq > 0.0 && *mcv_freq > avgfreq)
4163 estfract *= *mcv_freq / avgfreq;
4164
4165 /*
4166 * Clamp bucketsize to sane range (the above adjustment could easily
4167 * produce an out-of-range result). We set the lower bound a little above
4168 * zero, since zero isn't a very sane result.
4169 */
4170 if (estfract < 1.0e-6)
4171 estfract = 1.0e-6;
4172 else if (estfract > 1.0)
4173 estfract = 1.0;
4174
4175 *bucketsize_frac = (Selectivity) estfract;
4176
4177 ReleaseVariableStats(vardata);
4178}
4179
4180/*
4181 * estimate_hashagg_tablesize
4182 * estimate the number of bytes that a hash aggregate hashtable will
4183 * require based on the agg_costs, path width and number of groups.
4184 *
4185 * We return the result as "double" to forestall any possible overflow
4186 * problem in the multiplication by dNumGroups.
4187 *
4188 * XXX this may be over-estimating the size now that hashagg knows to omit
4189 * unneeded columns from the hashtable. Also for mixed-mode grouping sets,
4190 * grouping columns not in the hashed set are counted here even though hashagg
4191 * won't store them. Is this a problem?
4192 */
4193double
4195 const AggClauseCosts *agg_costs, double dNumGroups)
4196{
4197 Size hashentrysize;
4198
4199 hashentrysize = hash_agg_entry_size(list_length(root->aggtransinfos),
4200 path->pathtarget->width,
4201 agg_costs->transitionSpace);
4202
4203 /*
4204 * Note that this disregards the effect of fill-factor and growth policy
4205 * of the hash table. That's probably ok, given that the default
4206 * fill-factor is relatively high. It'd be hard to meaningfully factor in
4207 * "double-in-size" growth policies here.
4208 */
4209 return hashentrysize * dNumGroups;
4210}
4211
4212
4213/*-------------------------------------------------------------------------
4214 *
4215 * Support routines
4216 *
4217 *-------------------------------------------------------------------------
4218 */
4219
4220/*
4221 * Find the best matching ndistinct extended statistics for the given list of
4222 * GroupVarInfos.
4223 *
4224 * Callers must ensure that the given GroupVarInfos all belong to 'rel' and
4225 * the GroupVarInfos list does not contain any duplicate Vars or expressions.
4226 *
4227 * When statistics are found that match > 1 of the given GroupVarInfo, the
4228 * *ndistinct parameter is set according to the ndistinct estimate and a new
4229 * list is built with the matching GroupVarInfos removed, which is output via
4230 * the *varinfos parameter before returning true. When no matching stats are
4231 * found, false is returned and the *varinfos and *ndistinct parameters are
4232 * left untouched.
4233 */
4234static bool
4236 List **varinfos, double *ndistinct)
4237{
4238 ListCell *lc;
4239 int nmatches_vars;
4240 int nmatches_exprs;
4241 Oid statOid = InvalidOid;
4242 MVNDistinct *stats;
4243 StatisticExtInfo *matched_info = NULL;
4245
4246 /* bail out immediately if the table has no extended statistics */
4247 if (!rel->statlist)
4248 return false;
4249
4250 /* look for the ndistinct statistics object matching the most vars */
4251 nmatches_vars = 0; /* we require at least two matches */
4252 nmatches_exprs = 0;
4253 foreach(lc, rel->statlist)
4254 {
4255 ListCell *lc2;
4257 int nshared_vars = 0;
4258 int nshared_exprs = 0;
4259
4260 /* skip statistics of other kinds */
4261 if (info->kind != STATS_EXT_NDISTINCT)
4262 continue;
4263
4264 /* skip statistics with mismatching stxdinherit value */
4265 if (info->inherit != rte->inh)
4266 continue;
4267
4268 /*
4269 * Determine how many expressions (and variables in non-matched
4270 * expressions) match. We'll then use these numbers to pick the
4271 * statistics object that best matches the clauses.
4272 */
4273 foreach(lc2, *varinfos)
4274 {
4275 ListCell *lc3;
4276 GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc2);
4278
4279 Assert(varinfo->rel == rel);
4280
4281 /* simple Var, search in statistics keys directly */
4282 if (IsA(varinfo->var, Var))
4283 {
4284 attnum = ((Var *) varinfo->var)->varattno;
4285
4286 /*
4287 * Ignore system attributes - we don't support statistics on
4288 * them, so can't match them (and it'd fail as the values are
4289 * negative).
4290 */
4292 continue;
4293
4294 if (bms_is_member(attnum, info->keys))
4295 nshared_vars++;
4296
4297 continue;
4298 }
4299
4300 /* expression - see if it's in the statistics object */
4301 foreach(lc3, info->exprs)
4302 {
4303 Node *expr = (Node *) lfirst(lc3);
4304
4305 if (equal(varinfo->var, expr))
4306 {
4307 nshared_exprs++;
4308 break;
4309 }
4310 }
4311 }
4312
4313 /*
4314 * The ndistinct extended statistics contain estimates for a minimum
4315 * of pairs of columns which the statistics are defined on and
4316 * certainly not single columns. Here we skip unless we managed to
4317 * match to at least two columns.
4318 */
4319 if (nshared_vars + nshared_exprs < 2)
4320 continue;
4321
4322 /*
4323 * Check if these statistics are a better match than the previous best
4324 * match and if so, take note of the StatisticExtInfo.
4325 *
4326 * The statslist is sorted by statOid, so the StatisticExtInfo we
4327 * select as the best match is deterministic even when multiple sets
4328 * of statistics match equally as well.
4329 */
4330 if ((nshared_exprs > nmatches_exprs) ||
4331 (((nshared_exprs == nmatches_exprs)) && (nshared_vars > nmatches_vars)))
4332 {
4333 statOid = info->statOid;
4334 nmatches_vars = nshared_vars;
4335 nmatches_exprs = nshared_exprs;
4336 matched_info = info;
4337 }
4338 }
4339
4340 /* No match? */
4341 if (statOid == InvalidOid)
4342 return false;
4343
4344 Assert(nmatches_vars + nmatches_exprs > 1);
4345
4346 stats = statext_ndistinct_load(statOid, rte->inh);
4347
4348 /*
4349 * If we have a match, search it for the specific item that matches (there
4350 * must be one), and construct the output values.
4351 */
4352 if (stats)
4353 {
4354 int i;
4355 List *newlist = NIL;
4356 MVNDistinctItem *item = NULL;
4357 ListCell *lc2;
4358 Bitmapset *matched = NULL;
4359 AttrNumber attnum_offset;
4360
4361 /*
4362 * How much we need to offset the attnums? If there are no
4363 * expressions, no offset is needed. Otherwise offset enough to move
4364 * the lowest one (which is equal to number of expressions) to 1.
4365 */
4366 if (matched_info->exprs)
4367 attnum_offset = (list_length(matched_info->exprs) + 1);
4368 else
4369 attnum_offset = 0;
4370
4371 /* see what actually matched */
4372 foreach(lc2, *varinfos)
4373 {
4374 ListCell *lc3;
4375 int idx;
4376 bool found = false;
4377
4378 GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc2);
4379
4380 /*
4381 * Process a simple Var expression, by matching it to keys
4382 * directly. If there's a matching expression, we'll try matching
4383 * it later.
4384 */
4385 if (IsA(varinfo->var, Var))
4386 {
4387 AttrNumber attnum = ((Var *) varinfo->var)->varattno;
4388
4389 /*
4390 * Ignore expressions on system attributes. Can't rely on the
4391 * bms check for negative values.
4392 */
4394 continue;
4395
4396 /* Is the variable covered by the statistics object? */
4397 if (!bms_is_member(attnum, matched_info->keys))
4398 continue;
4399
4400 attnum = attnum + attnum_offset;
4401
4402 /* ensure sufficient offset */
4404
4405 matched = bms_add_member(matched, attnum);
4406
4407 found = true;
4408 }
4409
4410 /*
4411 * XXX Maybe we should allow searching the expressions even if we
4412 * found an attribute matching the expression? That would handle
4413 * trivial expressions like "(a)" but it seems fairly useless.
4414 */
4415 if (found)
4416 continue;
4417
4418 /* expression - see if it's in the statistics object */
4419 idx = 0;
4420 foreach(lc3, matched_info->exprs)
4421 {
4422 Node *expr = (Node *) lfirst(lc3);
4423
4424 if (equal(varinfo->var, expr))
4425 {
4426 AttrNumber attnum = -(idx + 1);
4427
4428 attnum = attnum + attnum_offset;
4429
4430 /* ensure sufficient offset */
4432
4433 matched = bms_add_member(matched, attnum);
4434
4435 /* there should be just one matching expression */
4436 break;
4437 }
4438
4439 idx++;
4440 }
4441 }
4442
4443 /* Find the specific item that exactly matches the combination */
4444 for (i = 0; i < stats->nitems; i++)
4445 {
4446 int j;
4447 MVNDistinctItem *tmpitem = &stats->items[i];
4448
4449 if (tmpitem->nattributes != bms_num_members(matched))
4450 continue;
4451
4452 /* assume it's the right item */
4453 item = tmpitem;
4454
4455 /* check that all item attributes/expressions fit the match */
4456 for (j = 0; j < tmpitem->nattributes; j++)
4457 {
4458 AttrNumber attnum = tmpitem->attributes[j];
4459
4460 /*
4461 * Thanks to how we constructed the matched bitmap above, we
4462 * can just offset all attnums the same way.
4463 */
4464 attnum = attnum + attnum_offset;
4465
4466 if (!bms_is_member(attnum, matched))
4467 {
4468 /* nah, it's not this item */
4469 item = NULL;
4470 break;
4471 }
4472 }
4473
4474 /*
4475 * If the item has all the matched attributes, we know it's the
4476 * right one - there can't be a better one. matching more.
4477 */
4478 if (item)
4479 break;
4480 }
4481
4482 /*
4483 * Make sure we found an item. There has to be one, because ndistinct
4484 * statistics includes all combinations of attributes.
4485 */
4486 if (!item)
4487 elog(ERROR, "corrupt MVNDistinct entry");
4488
4489 /* Form the output varinfo list, keeping only unmatched ones */
4490 foreach(lc, *varinfos)
4491 {
4492 GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
4493 ListCell *lc3;
4494 bool found = false;
4495
4496 /*
4497 * Let's look at plain variables first, because it's the most
4498 * common case and the check is quite cheap. We can simply get the
4499 * attnum and check (with an offset) matched bitmap.
4500 */
4501 if (IsA(varinfo->var, Var))
4502 {
4503 AttrNumber attnum = ((Var *) varinfo->var)->varattno;
4504
4505 /*
4506 * If it's a system attribute, we're done. We don't support
4507 * extended statistics on system attributes, so it's clearly
4508 * not matched. Just keep the expression and continue.
4509 */
4511 {
4512 newlist = lappend(newlist, varinfo);
4513 continue;
4514 }
4515
4516 /* apply the same offset as above */
4517 attnum += attnum_offset;
4518
4519 /* if it's not matched, keep the varinfo */
4520 if (!bms_is_member(attnum, matched))
4521 newlist = lappend(newlist, varinfo);
4522
4523 /* The rest of the loop deals with complex expressions. */
4524 continue;
4525 }
4526
4527 /*
4528 * Process complex expressions, not just simple Vars.
4529 *
4530 * First, we search for an exact match of an expression. If we
4531 * find one, we can just discard the whole GroupVarInfo, with all
4532 * the variables we extracted from it.
4533 *
4534 * Otherwise we inspect the individual vars, and try matching it
4535 * to variables in the item.
4536 */
4537 foreach(lc3, matched_info->exprs)
4538 {
4539 Node *expr = (Node *) lfirst(lc3);
4540
4541 if (equal(varinfo->var, expr))
4542 {
4543 found = true;
4544 break;
4545 }
4546 }
4547
4548 /* found exact match, skip */
4549 if (found)
4550 continue;
4551
4552 newlist = lappend(newlist, varinfo);
4553 }
4554
4555 *varinfos = newlist;
4556 *ndistinct = item->ndistinct;
4557 return true;
4558 }
4559
4560 return false;
4561}
4562
4563/*
4564 * convert_to_scalar
4565 * Convert non-NULL values of the indicated types to the comparison
4566 * scale needed by scalarineqsel().
4567 * Returns "true" if successful.
4568 *
4569 * XXX this routine is a hack: ideally we should look up the conversion
4570 * subroutines in pg_type.
4571 *
4572 * All numeric datatypes are simply converted to their equivalent
4573 * "double" values. (NUMERIC values that are outside the range of "double"
4574 * are clamped to +/- HUGE_VAL.)
4575 *
4576 * String datatypes are converted by convert_string_to_scalar(),
4577 * which is explained below. The reason why this routine deals with
4578 * three values at a time, not just one, is that we need it for strings.
4579 *
4580 * The bytea datatype is just enough different from strings that it has
4581 * to be treated separately.
4582 *
4583 * The several datatypes representing absolute times are all converted
4584 * to Timestamp, which is actually an int64, and then we promote that to
4585 * a double. Note this will give correct results even for the "special"
4586 * values of Timestamp, since those are chosen to compare correctly;
4587 * see timestamp_cmp.
4588 *
4589 * The several datatypes representing relative times (intervals) are all
4590 * converted to measurements expressed in seconds.
4591 */
4592static bool
4593convert_to_scalar(Datum value, Oid valuetypid, Oid collid, double *scaledvalue,
4594 Datum lobound, Datum hibound, Oid boundstypid,
4595 double *scaledlobound, double *scaledhibound)
4596{
4597 bool failure = false;
4598
4599 /*
4600 * Both the valuetypid and the boundstypid should exactly match the
4601 * declared input type(s) of the operator we are invoked for. However,
4602 * extensions might try to use scalarineqsel as estimator for operators
4603 * with input type(s) we don't handle here; in such cases, we want to
4604 * return false, not fail. In any case, we mustn't assume that valuetypid
4605 * and boundstypid are identical.
4606 *
4607 * XXX The histogram we are interpolating between points of could belong
4608 * to a column that's only binary-compatible with the declared type. In
4609 * essence we are assuming that the semantics of binary-compatible types
4610 * are enough alike that we can use a histogram generated with one type's
4611 * operators to estimate selectivity for the other's. This is outright
4612 * wrong in some cases --- in particular signed versus unsigned
4613 * interpretation could trip us up. But it's useful enough in the
4614 * majority of cases that we do it anyway. Should think about more
4615 * rigorous ways to do it.
4616 */
4617 switch (valuetypid)
4618 {
4619 /*
4620 * Built-in numeric types
4621 */
4622 case BOOLOID:
4623 case INT2OID:
4624 case INT4OID:
4625 case INT8OID:
4626 case FLOAT4OID:
4627 case FLOAT8OID:
4628 case NUMERICOID:
4629 case OIDOID:
4630 case REGPROCOID:
4631 case REGPROCEDUREOID:
4632 case REGOPEROID:
4633 case REGOPERATOROID:
4634 case REGCLASSOID:
4635 case REGTYPEOID:
4636 case REGCOLLATIONOID:
4637 case REGCONFIGOID:
4638 case REGDICTIONARYOID:
4639 case REGROLEOID:
4640 case REGNAMESPACEOID:
4641 case REGDATABASEOID:
4642 *scaledvalue = convert_numeric_to_scalar(value, valuetypid,
4643 &failure);
4644 *scaledlobound = convert_numeric_to_scalar(lobound, boundstypid,
4645 &failure);
4646 *scaledhibound = convert_numeric_to_scalar(hibound, boundstypid,
4647 &failure);
4648 return !failure;
4649
4650 /*
4651 * Built-in string types
4652 */
4653 case CHAROID:
4654 case BPCHAROID:
4655 case VARCHAROID:
4656 case TEXTOID:
4657 case NAMEOID:
4658 {
4659 char *valstr = convert_string_datum(value, valuetypid,
4660 collid, &failure);
4661 char *lostr = convert_string_datum(lobound, boundstypid,
4662 collid, &failure);
4663 char *histr = convert_string_datum(hibound, boundstypid,
4664 collid, &failure);
4665
4666 /*
4667 * Bail out if any of the values is not of string type. We
4668 * might leak converted strings for the other value(s), but
4669 * that's not worth troubling over.
4670 */
4671 if (failure)
4672 return false;
4673
4674 convert_string_to_scalar(valstr, scaledvalue,
4675 lostr, scaledlobound,
4676 histr, scaledhibound);
4677 pfree(valstr);
4678 pfree(lostr);
4679 pfree(histr);
4680 return true;
4681 }
4682
4683 /*
4684 * Built-in bytea type
4685 */
4686 case BYTEAOID:
4687 {
4688 /* We only support bytea vs bytea comparison */
4689 if (boundstypid != BYTEAOID)
4690 return false;
4691 convert_bytea_to_scalar(value, scaledvalue,
4692 lobound, scaledlobound,
4693 hibound, scaledhibound);
4694 return true;
4695 }
4696
4697 /*
4698 * Built-in time types
4699 */
4700 case TIMESTAMPOID:
4701 case TIMESTAMPTZOID:
4702 case DATEOID:
4703 case INTERVALOID:
4704 case TIMEOID:
4705 case TIMETZOID:
4706 *scaledvalue = convert_timevalue_to_scalar(value, valuetypid,
4707 &failure);
4708 *scaledlobound = convert_timevalue_to_scalar(lobound, boundstypid,
4709 &failure);
4710 *scaledhibound = convert_timevalue_to_scalar(hibound, boundstypid,
4711 &failure);
4712 return !failure;
4713
4714 /*
4715 * Built-in network types
4716 */
4717 case INETOID:
4718 case CIDROID:
4719 case MACADDROID:
4720 case MACADDR8OID:
4721 *scaledvalue = convert_network_to_scalar(value, valuetypid,
4722 &failure);
4723 *scaledlobound = convert_network_to_scalar(lobound, boundstypid,
4724 &failure);
4725 *scaledhibound = convert_network_to_scalar(hibound, boundstypid,
4726 &failure);
4727 return !failure;
4728 }
4729 /* Don't know how to convert */
4730 *scaledvalue = *scaledlobound = *scaledhibound = 0;
4731 return false;
4732}
4733
4734/*
4735 * Do convert_to_scalar()'s work for any numeric data type.
4736 *
4737 * On failure (e.g., unsupported typid), set *failure to true;
4738 * otherwise, that variable is not changed.
4739 */
4740static double
4742{
4743 switch (typid)
4744 {
4745 case BOOLOID:
4746 return (double) DatumGetBool(value);
4747 case INT2OID:
4748 return (double) DatumGetInt16(value);
4749 case INT4OID:
4750 return (double) DatumGetInt32(value);
4751 case INT8OID:
4752 return (double) DatumGetInt64(value);
4753 case FLOAT4OID:
4754 return (double) DatumGetFloat4(value);
4755 case FLOAT8OID:
4756 return (double) DatumGetFloat8(value);
4757 case NUMERICOID:
4758 /* Note: out-of-range values will be clamped to +-HUGE_VAL */
4759 return (double)
4761 value));
4762 case OIDOID:
4763 case REGPROCOID:
4764 case REGPROCEDUREOID:
4765 case REGOPEROID:
4766 case REGOPERATOROID:
4767 case REGCLASSOID:
4768 case REGTYPEOID:
4769 case REGCOLLATIONOID:
4770 case REGCONFIGOID:
4771 case REGDICTIONARYOID:
4772 case REGROLEOID:
4773 case REGNAMESPACEOID:
4774 case REGDATABASEOID:
4775 /* we can treat OIDs as integers... */
4776 return (double) DatumGetObjectId(value);
4777 }
4778
4779 *failure = true;
4780 return 0;
4781}
4782
4783/*
4784 * Do convert_to_scalar()'s work for any character-string data type.
4785 *
4786 * String datatypes are converted to a scale that ranges from 0 to 1,
4787 * where we visualize the bytes of the string as fractional digits.
4788 *
4789 * We do not want the base to be 256, however, since that tends to
4790 * generate inflated selectivity estimates; few databases will have
4791 * occurrences of all 256 possible byte values at each position.
4792 * Instead, use the smallest and largest byte values seen in the bounds
4793 * as the estimated range for each byte, after some fudging to deal with
4794 * the fact that we probably aren't going to see the full range that way.
4795 *
4796 * An additional refinement is that we discard any common prefix of the
4797 * three strings before computing the scaled values. This allows us to
4798 * "zoom in" when we encounter a narrow data range. An example is a phone
4799 * number database where all the values begin with the same area code.
4800 * (Actually, the bounds will be adjacent histogram-bin-boundary values,
4801 * so this is more likely to happen than you might think.)
4802 */
4803static void
4805 double *scaledvalue,
4806 char *lobound,
4807 double *scaledlobound,
4808 char *hibound,
4809 double *scaledhibound)
4810{
4811 int rangelo,
4812 rangehi;
4813 char *sptr;
4814
4815 rangelo = rangehi = (unsigned char) hibound[0];
4816 for (sptr = lobound; *sptr; sptr++)
4817 {
4818 if (rangelo > (unsigned char) *sptr)
4819 rangelo = (unsigned char) *sptr;
4820 if (rangehi < (unsigned char) *sptr)
4821 rangehi = (unsigned char) *sptr;
4822 }
4823 for (sptr = hibound; *sptr; sptr++)
4824 {
4825 if (rangelo > (unsigned char) *sptr)
4826 rangelo = (unsigned char) *sptr;
4827 if (rangehi < (unsigned char) *sptr)
4828 rangehi = (unsigned char) *sptr;
4829 }
4830 /* If range includes any upper-case ASCII chars, make it include all */
4831 if (rangelo <= 'Z' && rangehi >= 'A')
4832 {
4833 if (rangelo > 'A')
4834 rangelo = 'A';
4835 if (rangehi < 'Z')
4836 rangehi = 'Z';
4837 }
4838 /* Ditto lower-case */
4839 if (rangelo <= 'z' && rangehi >= 'a')
4840 {
4841 if (rangelo > 'a')
4842 rangelo = 'a';
4843 if (rangehi < 'z')
4844 rangehi = 'z';
4845 }
4846 /* Ditto digits */
4847 if (rangelo <= '9' && rangehi >= '0')
4848 {
4849 if (rangelo > '0')
4850 rangelo = '0';
4851 if (rangehi < '9')
4852 rangehi = '9';
4853 }
4854
4855 /*
4856 * If range includes less than 10 chars, assume we have not got enough
4857 * data, and make it include regular ASCII set.
4858 */
4859 if (rangehi - rangelo < 9)
4860 {
4861 rangelo = ' ';
4862 rangehi = 127;
4863 }
4864
4865 /*
4866 * Now strip any common prefix of the three strings.
4867 */
4868 while (*lobound)
4869 {
4870 if (*lobound != *hibound || *lobound != *value)
4871 break;
4872 lobound++, hibound++, value++;
4873 }
4874
4875 /*
4876 * Now we can do the conversions.
4877 */
4878 *scaledvalue = convert_one_string_to_scalar(value, rangelo, rangehi);
4879 *scaledlobound = convert_one_string_to_scalar(lobound, rangelo, rangehi);
4880 *scaledhibound = convert_one_string_to_scalar(hibound, rangelo, rangehi);
4881}
4882
4883static double
4884convert_one_string_to_scalar(char *value, int rangelo, int rangehi)
4885{
4886 int slen = strlen(value);
4887 double num,
4888 denom,
4889 base;
4890
4891 if (slen <= 0)
4892 return 0.0; /* empty string has scalar value 0 */
4893
4894 /*
4895 * There seems little point in considering more than a dozen bytes from
4896 * the string. Since base is at least 10, that will give us nominal
4897 * resolution of at least 12 decimal digits, which is surely far more
4898 * precision than this estimation technique has got anyway (especially in
4899 * non-C locales). Also, even with the maximum possible base of 256, this
4900 * ensures denom cannot grow larger than 256^13 = 2.03e31, which will not
4901 * overflow on any known machine.
4902 */
4903 if (slen > 12)
4904 slen = 12;
4905
4906 /* Convert initial characters to fraction */
4907 base = rangehi - rangelo + 1;
4908 num = 0.0;
4909 denom = base;
4910 while (slen-- > 0)
4911 {
4912 int ch = (unsigned char) *value++;
4913
4914 if (ch < rangelo)
4915 ch = rangelo - 1;
4916 else if (ch > rangehi)
4917 ch = rangehi + 1;
4918 num += ((double) (ch - rangelo)) / denom;
4919 denom *= base;
4920 }
4921
4922 return num;
4923}
4924
4925/*
4926 * Convert a string-type Datum into a palloc'd, null-terminated string.
4927 *
4928 * On failure (e.g., unsupported typid), set *failure to true;
4929 * otherwise, that variable is not changed. (We'll return NULL on failure.)
4930 *
4931 * When using a non-C locale, we must pass the string through pg_strxfrm()
4932 * before continuing, so as to generate correct locale-specific results.
4933 */
4934static char *
4936{
4937 char *val;
4938 pg_locale_t mylocale;
4939
4940 switch (typid)
4941 {
4942 case CHAROID:
4943 val = (char *) palloc(2);
4944 val[0] = DatumGetChar(value);
4945 val[1] = '\0';
4946 break;
4947 case BPCHAROID:
4948 case VARCHAROID:
4949 case TEXTOID:
4951 break;
4952 case NAMEOID:
4953 {
4955
4956 val = pstrdup(NameStr(*nm));
4957 break;
4958 }
4959 default:
4960 *failure = true;
4961 return NULL;
4962 }
4963
4965
4966 if (!mylocale->collate_is_c)
4967 {
4968 char *xfrmstr;
4969 size_t xfrmlen;
4970 size_t xfrmlen2 PG_USED_FOR_ASSERTS_ONLY;
4971
4972 /*
4973 * XXX: We could guess at a suitable output buffer size and only call
4974 * pg_strxfrm() twice if our guess is too small.
4975 *
4976 * XXX: strxfrm doesn't support UTF-8 encoding on Win32, it can return
4977 * bogus data or set an error. This is not really a problem unless it
4978 * crashes since it will only give an estimation error and nothing
4979 * fatal.
4980 *
4981 * XXX: we do not check pg_strxfrm_enabled(). On some platforms and in
4982 * some cases, libc strxfrm() may return the wrong results, but that
4983 * will only lead to an estimation error.
4984 */
4985 xfrmlen = pg_strxfrm(NULL, val, 0, mylocale);
4986#ifdef WIN32
4987
4988 /*
4989 * On Windows, strxfrm returns INT_MAX when an error occurs. Instead
4990 * of trying to allocate this much memory (and fail), just return the
4991 * original string unmodified as if we were in the C locale.
4992 */
4993 if (xfrmlen == INT_MAX)
4994 return val;
4995#endif
4996 xfrmstr = (char *) palloc(xfrmlen + 1);
4997 xfrmlen2 = pg_strxfrm(xfrmstr, val, xfrmlen + 1, mylocale);
4998
4999 /*
5000 * Some systems (e.g., glibc) can return a smaller value from the
5001 * second call than the first; thus the Assert must be <= not ==.
5002 */
5003 Assert(xfrmlen2 <= xfrmlen);
5004 pfree(val);
5005 val = xfrmstr;
5006 }
5007
5008 return val;
5009}
5010
5011/*
5012 * Do convert_to_scalar()'s work for any bytea data type.
5013 *
5014 * Very similar to convert_string_to_scalar except we can't assume
5015 * null-termination and therefore pass explicit lengths around.
5016 *
5017 * Also, assumptions about likely "normal" ranges of characters have been
5018 * removed - a data range of 0..255 is always used, for now. (Perhaps
5019 * someday we will add information about actual byte data range to
5020 * pg_statistic.)
5021 */
5022static void
5024 double *scaledvalue,
5025 Datum lobound,
5026 double *scaledlobound,
5027 Datum hibound,
5028 double *scaledhibound)
5029{
5030 bytea *valuep = DatumGetByteaPP(value);
5031 bytea *loboundp = DatumGetByteaPP(lobound);
5032 bytea *hiboundp = DatumGetByteaPP(hibound);
5033 int rangelo,
5034 rangehi,
5035 valuelen = VARSIZE_ANY_EXHDR(valuep),
5036 loboundlen = VARSIZE_ANY_EXHDR(loboundp),
5037 hiboundlen = VARSIZE_ANY_EXHDR(hiboundp),
5038 i,
5039 minlen;
5040 unsigned char *valstr = (unsigned char *) VARDATA_ANY(valuep);
5041 unsigned char *lostr = (unsigned char *) VARDATA_ANY(loboundp);
5042 unsigned char *histr = (unsigned char *) VARDATA_ANY(hiboundp);
5043
5044 /*
5045 * Assume bytea data is uniformly distributed across all byte values.
5046 */
5047 rangelo = 0;
5048 rangehi = 255;
5049
5050 /*
5051 * Now strip any common prefix of the three strings.
5052 */
5053 minlen = Min(Min(valuelen, loboundlen), hiboundlen);
5054 for (i = 0; i < minlen; i++)
5055 {
5056 if (*lostr != *histr || *lostr != *valstr)
5057 break;
5058 lostr++, histr++, valstr++;
5059 loboundlen--, hiboundlen--, valuelen--;
5060 }
5061
5062 /*
5063 * Now we can do the conversions.
5064 */
5065 *scaledvalue = convert_one_bytea_to_scalar(valstr, valuelen, rangelo, rangehi);
5066 *scaledlobound = convert_one_bytea_to_scalar(lostr, loboundlen, rangelo, rangehi);
5067 *scaledhibound = convert_one_bytea_to_scalar(histr, hiboundlen, rangelo, rangehi);
5068}
5069
5070static double
5071convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
5072 int rangelo, int rangehi)
5073{
5074 double num,
5075 denom,
5076 base;
5077
5078 if (valuelen <= 0)
5079 return 0.0; /* empty string has scalar value 0 */
5080
5081 /*
5082 * Since base is 256, need not consider more than about 10 chars (even
5083 * this many seems like overkill)
5084 */
5085 if (valuelen > 10)
5086 valuelen = 10;
5087
5088 /* Convert initial characters to fraction */
5089 base = rangehi - rangelo + 1;
5090 num = 0.0;
5091 denom = base;
5092 while (valuelen-- > 0)
5093 {
5094 int ch = *value++;
5095
5096 if (ch < rangelo)
5097 ch = rangelo - 1;
5098 else if (ch > rangehi)
5099 ch = rangehi + 1;
5100 num += ((double) (ch - rangelo)) / denom;
5101 denom *= base;
5102 }
5103
5104 return num;
5105}
5106
5107/*
5108 * Do convert_to_scalar()'s work for any timevalue data type.
5109 *
5110 * On failure (e.g., unsupported typid), set *failure to true;
5111 * otherwise, that variable is not changed.
5112 */
5113static double
5115{
5116 switch (typid)
5117 {
5118 case TIMESTAMPOID:
5119 return DatumGetTimestamp(value);
5120 case TIMESTAMPTZOID:
5121 return DatumGetTimestampTz(value);
5122 case DATEOID:
5124 case INTERVALOID:
5125 {
5127
5128 /*
5129 * Convert the month part of Interval to days using assumed
5130 * average month length of 365.25/12.0 days. Not too
5131 * accurate, but plenty good enough for our purposes.
5132 *
5133 * This also works for infinite intervals, which just have all
5134 * fields set to INT_MIN/INT_MAX, and so will produce a result
5135 * smaller/larger than any finite interval.
5136 */
5137 return interval->time + interval->day * (double) USECS_PER_DAY +
5139 }
5140 case TIMEOID:
5141 return DatumGetTimeADT(value);
5142 case TIMETZOID:
5143 {
5145
5146 /* use GMT-equivalent time */
5147 return (double) (timetz->time + (timetz->zone * 1000000.0));
5148 }
5149 }
5150
5151 *failure = true;
5152 return 0;
5153}
5154
5155
5156/*
5157 * get_restriction_variable
5158 * Examine the args of a restriction clause to see if it's of the
5159 * form (variable op pseudoconstant) or (pseudoconstant op variable),
5160 * where "variable" could be either a Var or an expression in vars of a
5161 * single relation. If so, extract information about the variable,
5162 * and also indicate which side it was on and the other argument.
5163 *
5164 * Inputs:
5165 * root: the planner info
5166 * args: clause argument list
5167 * varRelid: see specs for restriction selectivity functions
5168 *
5169 * Outputs: (these are valid only if true is returned)
5170 * *vardata: gets information about variable (see examine_variable)
5171 * *other: gets other clause argument, aggressively reduced to a constant
5172 * *varonleft: set true if variable is on the left, false if on the right
5173 *
5174 * Returns true if a variable is identified, otherwise false.
5175 *
5176 * Note: if there are Vars on both sides of the clause, we must fail, because
5177 * callers are expecting that the other side will act like a pseudoconstant.
5178 */
5179bool
5181 VariableStatData *vardata, Node **other,
5182 bool *varonleft)
5183{
5184 Node *left,
5185 *right;
5186 VariableStatData rdata;
5187
5188 /* Fail if not a binary opclause (probably shouldn't happen) */
5189 if (list_length(args) != 2)
5190 return false;
5191
5192 left = (Node *) linitial(args);
5193 right = (Node *) lsecond(args);
5194
5195 /*
5196 * Examine both sides. Note that when varRelid is nonzero, Vars of other
5197 * relations will be treated as pseudoconstants.
5198 */
5199 examine_variable(root, left, varRelid, vardata);
5200 examine_variable(root, right, varRelid, &rdata);
5201
5202 /*
5203 * If one side is a variable and the other not, we win.
5204 */
5205 if (vardata->rel && rdata.rel == NULL)
5206 {
5207 *varonleft = true;
5208 *other = estimate_expression_value(root, rdata.var);
5209 /* Assume we need no ReleaseVariableStats(rdata) here */
5210 return true;
5211 }
5212
5213 if (vardata->rel == NULL && rdata.rel)
5214 {
5215 *varonleft = false;
5216 *other = estimate_expression_value(root, vardata->var);
5217 /* Assume we need no ReleaseVariableStats(*vardata) here */
5218 *vardata = rdata;
5219 return true;
5220 }
5221
5222 /* Oops, clause has wrong structure (probably var op var) */
5223 ReleaseVariableStats(*vardata);
5224 ReleaseVariableStats(rdata);
5225
5226 return false;
5227}
5228
5229/*
5230 * get_join_variables
5231 * Apply examine_variable() to each side of a join clause.
5232 * Also, attempt to identify whether the join clause has the same
5233 * or reversed sense compared to the SpecialJoinInfo.
5234 *
5235 * We consider the join clause "normal" if it is "lhs_var OP rhs_var",
5236 * or "reversed" if it is "rhs_var OP lhs_var". In complicated cases
5237 * where we can't tell for sure, we default to assuming it's normal.
5238 */
5239void
5241 VariableStatData *vardata1, VariableStatData *vardata2,
5242 bool *join_is_reversed)
5243{
5244 Node *left,
5245 *right;
5246
5247 if (list_length(args) != 2)
5248 elog(ERROR, "join operator should take two arguments");
5249
5250 left = (Node *) linitial(args);
5251 right = (Node *) lsecond(args);
5252
5253 examine_variable(root, left, 0, vardata1);
5254 examine_variable(root, right, 0, vardata2);
5255
5256 if (vardata1->rel &&
5257 bms_is_subset(vardata1->rel->relids, sjinfo->syn_righthand))
5258 *join_is_reversed = true; /* var1 is on RHS */
5259 else if (vardata2->rel &&
5260 bms_is_subset(vardata2->rel->relids, sjinfo->syn_lefthand))
5261 *join_is_reversed = true; /* var2 is on LHS */
5262 else
5263 *join_is_reversed = false;
5264}
5265
5266/* statext_expressions_load copies the tuple, so just pfree it. */
5267static void
5269{
5270 pfree(tuple);
5271}
5272
5273/*
5274 * examine_variable
5275 * Try to look up statistical data about an expression.
5276 * Fill in a VariableStatData struct to describe the expression.
5277 *
5278 * Inputs:
5279 * root: the planner info
5280 * node: the expression tree to examine
5281 * varRelid: see specs for restriction selectivity functions
5282 *
5283 * Outputs: *vardata is filled as follows:
5284 * var: the input expression (with any binary relabeling stripped, if
5285 * it is or contains a variable; but otherwise the type is preserved)
5286 * rel: RelOptInfo for relation containing variable; NULL if expression
5287 * contains no Vars (NOTE this could point to a RelOptInfo of a
5288 * subquery, not one in the current query).
5289 * statsTuple: the pg_statistic entry for the variable, if one exists;
5290 * otherwise NULL.
5291 * freefunc: pointer to a function to release statsTuple with.
5292 * vartype: exposed type of the expression; this should always match
5293 * the declared input type of the operator we are estimating for.
5294 * atttype, atttypmod: actual type/typmod of the "var" expression. This is
5295 * commonly the same as the exposed type of the variable argument,
5296 * but can be different in binary-compatible-type cases.
5297 * isunique: true if we were able to match the var to a unique index, a
5298 * single-column DISTINCT or GROUP-BY clause, implying its values are
5299 * unique for this query. (Caution: this should be trusted for
5300 * statistical purposes only, since we do not check indimmediate nor
5301 * verify that the exact same definition of equality applies.)
5302 * acl_ok: true if current user has permission to read all table rows from
5303 * the column(s) underlying the pg_statistic entry. This is consulted by
5304 * statistic_proc_security_check().
5305 *
5306 * Caller is responsible for doing ReleaseVariableStats() before exiting.
5307 */
5308void
5310 VariableStatData *vardata)
5311{
5312 Node *basenode;
5313 Relids varnos;
5314 Relids basevarnos;
5315 RelOptInfo *onerel;
5316
5317 /* Make sure we don't return dangling pointers in vardata */
5318 MemSet(vardata, 0, sizeof(VariableStatData));
5319
5320 /* Save the exposed type of the expression */
5321 vardata->vartype = exprType(node);
5322
5323 /* Look inside any binary-compatible relabeling */
5324
5325 if (IsA(node, RelabelType))
5326 basenode = (Node *) ((RelabelType *) node)->arg;
5327 else
5328 basenode = node;
5329
5330 /* Fast path for a simple Var */
5331
5332 if (IsA(basenode, Var) &&
5333 (varRelid == 0 || varRelid == ((Var *) basenode)->varno))
5334 {
5335 Var *var = (Var *) basenode;
5336
5337 /* Set up result fields other than the stats tuple */
5338 vardata->var = basenode; /* return Var without relabeling */
5339 vardata->rel = find_base_rel(root, var->varno);
5340 vardata->atttype = var->vartype;
5341 vardata->atttypmod = var->vartypmod;
5342 vardata->isunique = has_unique_index(vardata->rel, var->varattno);
5343
5344 /* Try to locate some stats */
5345 examine_simple_variable(root, var, vardata);
5346
5347 return;
5348 }
5349
5350 /*
5351 * Okay, it's a more complicated expression. Determine variable
5352 * membership. Note that when varRelid isn't zero, only vars of that
5353 * relation are considered "real" vars.
5354 */
5355 varnos = pull_varnos(root, basenode);
5356 basevarnos = bms_difference(varnos, root->outer_join_rels);
5357
5358 onerel = NULL;
5359
5360 if (bms_is_empty(basevarnos))
5361 {
5362 /* No Vars at all ... must be pseudo-constant clause */
5363 }
5364 else
5365 {
5366 int relid;
5367
5368 /* Check if the expression is in vars of a single base relation */
5369 if (bms_get_singleton_member(basevarnos, &relid))
5370 {
5371 if (varRelid == 0 || varRelid == relid)
5372 {
5373 onerel = find_base_rel(root, relid);
5374 vardata->rel = onerel;
5375 node = basenode; /* strip any relabeling */
5376 }
5377 /* else treat it as a constant */
5378 }
5379 else
5380 {
5381 /* varnos has multiple relids */
5382 if (varRelid == 0)
5383 {
5384 /* treat it as a variable of a join relation */
5385 vardata->rel = find_join_rel(root, varnos);
5386 node = basenode; /* strip any relabeling */
5387 }
5388 else if (bms_is_member(varRelid, varnos))
5389 {
5390 /* ignore the vars belonging to other relations */
5391 vardata->rel = find_base_rel(root, varRelid);
5392 node = basenode; /* strip any relabeling */
5393 /* note: no point in expressional-index search here */
5394 }
5395 /* else treat it as a constant */
5396 }
5397 }
5398
5399 bms_free(basevarnos);
5400
5401 vardata->var = node;
5402 vardata->atttype = exprType(node);
5403 vardata->atttypmod = exprTypmod(node);
5404
5405 if (onerel)
5406 {
5407 /*
5408 * We have an expression in vars of a single relation. Try to match
5409 * it to expressional index columns, in hopes of finding some
5410 * statistics.
5411 *
5412 * Note that we consider all index columns including INCLUDE columns,
5413 * since there could be stats for such columns. But the test for
5414 * uniqueness needs to be warier.
5415 *
5416 * XXX it's conceivable that there are multiple matches with different
5417 * index opfamilies; if so, we need to pick one that matches the
5418 * operator we are estimating for. FIXME later.
5419 */
5420 ListCell *ilist;
5421 ListCell *slist;
5422
5423 /*
5424 * The nullingrels bits within the expression could prevent us from
5425 * matching it to expressional index columns or to the expressions in
5426 * extended statistics. So strip them out first.
5427 */
5428 if (bms_overlap(varnos, root->outer_join_rels))
5429 node = remove_nulling_relids(node, root->outer_join_rels, NULL);
5430
5431 foreach(ilist, onerel->indexlist)
5432 {
5433 IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
5434 ListCell *indexpr_item;
5435 int pos;
5436
5437 indexpr_item = list_head(index->indexprs);
5438 if (indexpr_item == NULL)
5439 continue; /* no expressions here... */
5440
5441 for (pos = 0; pos < index->ncolumns; pos++)
5442 {
5443 if (index->indexkeys[pos] == 0)
5444 {
5445 Node *indexkey;
5446
5447 if (indexpr_item == NULL)
5448 elog(ERROR, "too few entries in indexprs list");
5449 indexkey = (Node *) lfirst(indexpr_item);
5450 if (indexkey && IsA(indexkey, RelabelType))
5451 indexkey = (Node *) ((RelabelType *) indexkey)->arg;
5452 if (equal(node, indexkey))
5453 {
5454 /*
5455 * Found a match ... is it a unique index? Tests here
5456 * should match has_unique_index().
5457 */
5458 if (index->unique &&
5459 index->nkeycolumns == 1 &&
5460 pos == 0 &&
5461 (index->indpred == NIL || index->predOK))
5462 vardata->isunique = true;
5463
5464 /*
5465 * Has it got stats? We only consider stats for
5466 * non-partial indexes, since partial indexes probably
5467 * don't reflect whole-relation statistics; the above
5468 * check for uniqueness is the only info we take from
5469 * a partial index.
5470 *
5471 * An index stats hook, however, must make its own
5472 * decisions about what to do with partial indexes.
5473 */
5475 (*get_index_stats_hook) (root, index->indexoid,
5476 pos + 1, vardata))
5477 {
5478 /*
5479 * The hook took control of acquiring a stats
5480 * tuple. If it did supply a tuple, it'd better
5481 * have supplied a freefunc.
5482 */
5483 if (HeapTupleIsValid(vardata->statsTuple) &&
5484 !vardata->freefunc)
5485 elog(ERROR, "no function provided to release variable stats with");
5486 }
5487 else if (index->indpred == NIL)
5488 {
5489 vardata->statsTuple =
5490 SearchSysCache3(STATRELATTINH,
5491 ObjectIdGetDatum(index->indexoid),
5492 Int16GetDatum(pos + 1),
5493 BoolGetDatum(false));
5494 vardata->freefunc = ReleaseSysCache;
5495
5496 if (HeapTupleIsValid(vardata->statsTuple))
5497 {
5498 /*
5499 * Test if user has permission to access all
5500 * rows from the index's table.
5501 *
5502 * For simplicity, we insist on the whole
5503 * table being selectable, rather than trying
5504 * to identify which column(s) the index
5505 * depends on.
5506 *
5507 * Note that for an inheritance child,
5508 * permissions are checked on the inheritance
5509 * root parent, and whole-table select
5510 * privilege on the parent doesn't quite
5511 * guarantee that the user could read all
5512 * columns of the child. But in practice it's
5513 * unlikely that any interesting security
5514 * violation could result from allowing access
5515 * to the expression index's stats, so we
5516 * allow it anyway. See similar code in
5517 * examine_simple_variable() for additional
5518 * comments.
5519 */
5520 vardata->acl_ok =
5522 index->rel->relid,
5523 NULL);
5524 }
5525 else
5526 {
5527 /* suppress leakproofness checks later */
5528 vardata->acl_ok = true;
5529 }
5530 }
5531 if (vardata->statsTuple)
5532 break;
5533 }
5534 indexpr_item = lnext(index->indexprs, indexpr_item);
5535 }
5536 }
5537 if (vardata->statsTuple)
5538 break;
5539 }
5540
5541 /*
5542 * Search extended statistics for one with a matching expression.
5543 * There might be multiple ones, so just grab the first one. In the
5544 * future, we might consider the statistics target (and pick the most
5545 * accurate statistics) and maybe some other parameters.
5546 */
5547 foreach(slist, onerel->statlist)
5548 {
5549 StatisticExtInfo *info = (StatisticExtInfo *) lfirst(slist);
5550 RangeTblEntry *rte = planner_rt_fetch(onerel->relid, root);
5551 ListCell *expr_item;
5552 int pos;
5553
5554 /*
5555 * Stop once we've found statistics for the expression (either
5556 * from extended stats, or for an index in the preceding loop).
5557 */
5558 if (vardata->statsTuple)
5559 break;
5560
5561 /* skip stats without per-expression stats */
5562 if (info->kind != STATS_EXT_EXPRESSIONS)
5563 continue;
5564
5565 /* skip stats with mismatching stxdinherit value */
5566 if (info->inherit != rte->inh)
5567 continue;
5568
5569 pos = 0;
5570 foreach(expr_item, info->exprs)
5571 {
5572 Node *expr = (Node *) lfirst(expr_item);
5573
5574 Assert(expr);
5575
5576 /* strip RelabelType before comparing it */
5577 if (expr && IsA(expr, RelabelType))
5578 expr = (Node *) ((RelabelType *) expr)->arg;
5579
5580 /* found a match, see if we can extract pg_statistic row */
5581 if (equal(node, expr))
5582 {
5583 /*
5584 * XXX Not sure if we should cache the tuple somewhere.
5585 * Now we just create a new copy every time.
5586 */
5587 vardata->statsTuple =
5588 statext_expressions_load(info->statOid, rte->inh, pos);
5589
5590 vardata->freefunc = ReleaseDummy;
5591
5592 /*
5593 * Test if user has permission to access all rows from the
5594 * table.
5595 *
5596 * For simplicity, we insist on the whole table being
5597 * selectable, rather than trying to identify which
5598 * column(s) the statistics object depends on.
5599 *
5600 * Note that for an inheritance child, permissions are
5601 * checked on the inheritance root parent, and whole-table
5602 * select privilege on the parent doesn't quite guarantee
5603 * that the user could read all columns of the child. But
5604 * in practice it's unlikely that any interesting security
5605 * violation could result from allowing access to the
5606 * expression stats, so we allow it anyway. See similar
5607 * code in examine_simple_variable() for additional
5608 * comments.
5609 */
5610 vardata->acl_ok = all_rows_selectable(root,
5611 onerel->relid,
5612 NULL);
5613
5614 break;
5615 }
5616
5617 pos++;
5618 }
5619 }
5620 }
5621
5622 bms_free(varnos);
5623}
5624
5625/*
5626 * examine_simple_variable
5627 * Handle a simple Var for examine_variable
5628 *
5629 * This is split out as a subroutine so that we can recurse to deal with
5630 * Vars referencing subqueries (either sub-SELECT-in-FROM or CTE style).
5631 *
5632 * We already filled in all the fields of *vardata except for the stats tuple.
5633 */
5634static void
5636 VariableStatData *vardata)
5637{
5638 RangeTblEntry *rte = root->simple_rte_array[var->varno];
5639
5640 Assert(IsA(rte, RangeTblEntry));
5641
5643 (*get_relation_stats_hook) (root, rte, var->varattno, vardata))
5644 {
5645 /*
5646 * The hook took control of acquiring a stats tuple. If it did supply
5647 * a tuple, it'd better have supplied a freefunc.
5648 */
5649 if (HeapTupleIsValid(vardata->statsTuple) &&
5650 !vardata->freefunc)
5651 elog(ERROR, "no function provided to release variable stats with");
5652 }
5653 else if (rte->rtekind == RTE_RELATION)
5654 {
5655 /*
5656 * Plain table or parent of an inheritance appendrel, so look up the
5657 * column in pg_statistic
5658 */
5659 vardata->statsTuple = SearchSysCache3(STATRELATTINH,
5660 ObjectIdGetDatum(rte->relid),
5661 Int16GetDatum(var->varattno),
5662 BoolGetDatum(rte->inh));
5663 vardata->freefunc = ReleaseSysCache;
5664
5665 if (HeapTupleIsValid(vardata->statsTuple))
5666 {
5667 /*
5668 * Test if user has permission to read all rows from this column.
5669 *
5670 * This requires that the user has the appropriate SELECT
5671 * privileges and that there are no securityQuals from security
5672 * barrier views or RLS policies. If that's not the case, then we
5673 * only permit leakproof functions to be passed pg_statistic data
5674 * in vardata, otherwise the functions might reveal data that the
5675 * user doesn't have permission to see --- see
5676 * statistic_proc_security_check().
5677 */
5678 vardata->acl_ok =
5681 }
5682 else
5683 {
5684 /* suppress any possible leakproofness checks later */
5685 vardata->acl_ok = true;
5686 }
5687 }
5688 else if ((rte->rtekind == RTE_SUBQUERY && !rte->inh) ||
5689 (rte->rtekind == RTE_CTE && !rte->self_reference))
5690 {
5691 /*
5692 * Plain subquery (not one that was converted to an appendrel) or
5693 * non-recursive CTE. In either case, we can try to find out what the
5694 * Var refers to within the subquery. We skip this for appendrel and
5695 * recursive-CTE cases because any column stats we did find would
5696 * likely not be very relevant.
5697 */
5698 PlannerInfo *subroot;
5699 Query *subquery;
5700 List *subtlist;
5701 TargetEntry *ste;
5702
5703 /*
5704 * Punt if it's a whole-row var rather than a plain column reference.
5705 */
5706 if (var->varattno == InvalidAttrNumber)
5707 return;
5708
5709 /*
5710 * Otherwise, find the subquery's planner subroot.
5711 */
5712 if (rte->rtekind == RTE_SUBQUERY)
5713 {
5714 RelOptInfo *rel;
5715
5716 /*
5717 * Fetch RelOptInfo for subquery. Note that we don't change the
5718 * rel returned in vardata, since caller expects it to be a rel of
5719 * the caller's query level. Because we might already be
5720 * recursing, we can't use that rel pointer either, but have to
5721 * look up the Var's rel afresh.
5722 */
5723 rel = find_base_rel(root, var->varno);
5724
5725 subroot = rel->subroot;
5726 }
5727 else
5728 {
5729 /* CTE case is more difficult */
5730 PlannerInfo *cteroot;
5731 Index levelsup;
5732 int ndx;
5733 int plan_id;
5734 ListCell *lc;
5735
5736 /*
5737 * Find the referenced CTE, and locate the subroot previously made
5738 * for it.
5739 */
5740 levelsup = rte->ctelevelsup;
5741 cteroot = root;
5742 while (levelsup-- > 0)
5743 {
5744 cteroot = cteroot->parent_root;
5745 if (!cteroot) /* shouldn't happen */
5746 elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
5747 }
5748
5749 /*
5750 * Note: cte_plan_ids can be shorter than cteList, if we are still
5751 * working on planning the CTEs (ie, this is a side-reference from
5752 * another CTE). So we mustn't use forboth here.
5753 */
5754 ndx = 0;
5755 foreach(lc, cteroot->parse->cteList)
5756 {
5757 CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc);
5758
5759 if (strcmp(cte->ctename, rte->ctename) == 0)
5760 break;
5761 ndx++;
5762 }
5763 if (lc == NULL) /* shouldn't happen */
5764 elog(ERROR, "could not find CTE \"%s\"", rte->ctename);
5765 if (ndx >= list_length(cteroot->cte_plan_ids))
5766 elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename);
5767 plan_id = list_nth_int(cteroot->cte_plan_ids, ndx);
5768 if (plan_id <= 0)
5769 elog(ERROR, "no plan was made for CTE \"%s\"", rte->ctename);
5770 subroot = list_nth(root->glob->subroots, plan_id - 1);
5771 }
5772
5773 /* If the subquery hasn't been planned yet, we have to punt */
5774 if (subroot == NULL)
5775 return;
5776 Assert(IsA(subroot, PlannerInfo));
5777
5778 /*
5779 * We must use the subquery parsetree as mangled by the planner, not
5780 * the raw version from the RTE, because we need a Var that will refer
5781 * to the subroot's live RelOptInfos. For instance, if any subquery
5782 * pullup happened during planning, Vars in the targetlist might have
5783 * gotten replaced, and we need to see the replacement expressions.
5784 */
5785 subquery = subroot->parse;
5786 Assert(IsA(subquery, Query));
5787
5788 /*
5789 * Punt if subquery uses set operations or grouping sets, as these
5790 * will mash underlying columns' stats beyond recognition. (Set ops
5791 * are particularly nasty; if we forged ahead, we would return stats
5792 * relevant to only the leftmost subselect...) DISTINCT is also
5793 * problematic, but we check that later because there is a possibility
5794 * of learning something even with it.
5795 */
5796 if (subquery->setOperations ||
5797 subquery->groupingSets)
5798 return;
5799
5800 /* Get the subquery output expression referenced by the upper Var */
5801 if (subquery->returningList)
5802 subtlist = subquery->returningList;
5803 else
5804 subtlist = subquery->targetList;
5805 ste = get_tle_by_resno(subtlist, var->varattno);
5806 if (ste == NULL || ste->resjunk)
5807 elog(ERROR, "subquery %s does not have attribute %d",
5808 rte->eref->aliasname, var->varattno);
5809 var = (Var *) ste->expr;
5810
5811 /*
5812 * If subquery uses DISTINCT, we can't make use of any stats for the
5813 * variable ... but, if it's the only DISTINCT column, we are entitled
5814 * to consider it unique. We do the test this way so that it works
5815 * for cases involving DISTINCT ON.
5816 */
5817 if (subquery->distinctClause)
5818 {
5819 if (list_length(subquery->distinctClause) == 1 &&
5821 vardata->isunique = true;
5822 /* cannot go further */
5823 return;
5824 }
5825
5826 /* The same idea as with DISTINCT clause works for a GROUP-BY too */
5827 if (subquery->groupClause)
5828 {
5829 if (list_length(subquery->groupClause) == 1 &&
5830 targetIsInSortList(ste, InvalidOid, subquery->groupClause))
5831 vardata->isunique = true;
5832 /* cannot go further */
5833 return;
5834 }
5835
5836 /*
5837 * If the sub-query originated from a view with the security_barrier
5838 * attribute, we must not look at the variable's statistics, though it
5839 * seems all right to notice the existence of a DISTINCT clause. So
5840 * stop here.
5841 *
5842 * This is probably a harsher restriction than necessary; it's
5843 * certainly OK for the selectivity estimator (which is a C function,
5844 * and therefore omnipotent anyway) to look at the statistics. But
5845 * many selectivity estimators will happily *invoke the operator
5846 * function* to try to work out a good estimate - and that's not OK.
5847 * So for now, don't dig down for stats.
5848 */
5849 if (rte->security_barrier)
5850 return;
5851
5852 /* Can only handle a simple Var of subquery's query level */
5853 if (var && IsA(var, Var) &&
5854 var->varlevelsup == 0)
5855 {
5856 /*
5857 * OK, recurse into the subquery. Note that the original setting
5858 * of vardata->isunique (which will surely be false) is left
5859 * unchanged in this situation. That's what we want, since even
5860 * if the underlying column is unique, the subquery may have
5861 * joined to other tables in a way that creates duplicates.
5862 */
5863 examine_simple_variable(subroot, var, vardata);
5864 }
5865 }
5866 else
5867 {
5868 /*
5869 * Otherwise, the Var comes from a FUNCTION or VALUES RTE. (We won't
5870 * see RTE_JOIN here because join alias Vars have already been
5871 * flattened.) There's not much we can do with function outputs, but
5872 * maybe someday try to be smarter about VALUES.
5873 */
5874 }
5875}
5876
5877/*
5878 * all_rows_selectable
5879 * Test whether the user has permission to select all rows from a given
5880 * relation.
5881 *
5882 * Inputs:
5883 * root: the planner info
5884 * varno: the index of the relation (assumed to be an RTE_RELATION)
5885 * varattnos: the attributes for which permission is required, or NULL if
5886 * whole-table access is required
5887 *
5888 * Returns true if the user has the required select permissions, and there are
5889 * no securityQuals from security barrier views or RLS policies.
5890 *
5891 * Note that if the relation is an inheritance child relation, securityQuals
5892 * and access permissions are checked against the inheritance root parent (the
5893 * relation actually mentioned in the query) --- see the comments in
5894 * expand_single_inheritance_child() for an explanation of why it has to be
5895 * done this way.
5896 *
5897 * If varattnos is non-NULL, its attribute numbers should be offset by
5898 * FirstLowInvalidHeapAttributeNumber so that system attributes can be
5899 * checked. If varattnos is NULL, only table-level SELECT privileges are
5900 * checked, not any column-level privileges.
5901 *
5902 * Note: if the relation is accessed via a view, this function actually tests
5903 * whether the view owner has permission to select from the relation. To
5904 * ensure that the current user has permission, it is also necessary to check
5905 * that the current user has permission to select from the view, which we do
5906 * at planner-startup --- see subquery_planner().
5907 *
5908 * This is exported so that other estimation functions can use it.
5909 */
5910bool
5912{
5913 RelOptInfo *rel = find_base_rel_noerr(root, varno);
5914 RangeTblEntry *rte = planner_rt_fetch(varno, root);
5915 Oid userid;
5916 int varattno;
5917
5918 Assert(rte->rtekind == RTE_RELATION);
5919
5920 /*
5921 * Determine the user ID to use for privilege checks (either the current
5922 * user or the view owner, if we're accessing the table via a view).
5923 *
5924 * Normally the relation will have an associated RelOptInfo from which we
5925 * can find the userid, but it might not if it's a RETURNING Var for an
5926 * INSERT target relation. In that case use the RTEPermissionInfo
5927 * associated with the RTE.
5928 *
5929 * If we navigate up to a parent relation, we keep using the same userid,
5930 * since it's the same in all relations of a given inheritance tree.
5931 */
5932 if (rel)
5933 userid = rel->userid;
5934 else
5935 {
5936 RTEPermissionInfo *perminfo;
5937
5938 perminfo = getRTEPermissionInfo(root->parse->rteperminfos, rte);
5939 userid = perminfo->checkAsUser;
5940 }
5941 if (!OidIsValid(userid))
5942 userid = GetUserId();
5943
5944 /*
5945 * Permissions and securityQuals must be checked on the table actually
5946 * mentioned in the query, so if this is an inheritance child, navigate up
5947 * to the inheritance root parent. If the user can read the whole table
5948 * or the required columns there, then they can read from the child table
5949 * too. For per-column checks, we must find out which of the root
5950 * parent's attributes the child relation's attributes correspond to.
5951 */
5952 if (root->append_rel_array != NULL)
5953 {
5954 AppendRelInfo *appinfo;
5955
5956 appinfo = root->append_rel_array[varno];
5957
5958 /*
5959 * Partitions are mapped to their immediate parent, not the root
5960 * parent, so must be ready to walk up multiple AppendRelInfos. But
5961 * stop if we hit a parent that is not RTE_RELATION --- that's a
5962 * flattened UNION ALL subquery, not an inheritance parent.
5963 */
5964 while (appinfo &&
5966 root)->rtekind == RTE_RELATION)
5967 {
5968 Bitmapset *parent_varattnos = NULL;
5969
5970 /*
5971 * For each child attribute, find the corresponding parent
5972 * attribute. In rare cases, the attribute may be local to the
5973 * child table, in which case, we've got to live with having no
5974 * access to this column.
5975 */
5976 varattno = -1;
5977 while ((varattno = bms_next_member(varattnos, varattno)) >= 0)
5978 {
5979 AttrNumber attno;
5980 AttrNumber parent_attno;
5981
5982 attno = varattno + FirstLowInvalidHeapAttributeNumber;
5983
5984 if (attno == InvalidAttrNumber)
5985 {
5986 /*
5987 * Whole-row reference, so must map each column of the
5988 * child to the parent table.
5989 */
5990 for (attno = 1; attno <= appinfo->num_child_cols; attno++)
5991 {
5992 parent_attno = appinfo->parent_colnos[attno - 1];
5993 if (parent_attno == 0)
5994 return false; /* attr is local to child */
5995 parent_varattnos =
5996 bms_add_member(parent_varattnos,
5997 parent_attno - FirstLowInvalidHeapAttributeNumber);
5998 }
5999 }
6000 else
6001 {
6002 if (attno < 0)
6003 {
6004 /* System attnos are the same in all tables */
6005 parent_attno = attno;
6006 }
6007 else
6008 {
6009 if (attno > appinfo->num_child_cols)
6010 return false; /* safety check */
6011 parent_attno = appinfo->parent_colnos[attno - 1];
6012 if (parent_attno == 0)
6013 return false; /* attr is local to child */
6014 }
6015 parent_varattnos =
6016 bms_add_member(parent_varattnos,
6017 parent_attno - FirstLowInvalidHeapAttributeNumber);
6018 }
6019 }
6020
6021 /* If the parent is itself a child, continue up */
6022 varno = appinfo->parent_relid;
6023 varattnos = parent_varattnos;
6024 appinfo = root->append_rel_array[varno];
6025 }
6026
6027 /* Perform the access check on this parent rel */
6028 rte = planner_rt_fetch(varno, root);
6029 Assert(rte->rtekind == RTE_RELATION);
6030 }
6031
6032 /*
6033 * For all rows to be accessible, there must be no securityQuals from
6034 * security barrier views or RLS policies.
6035 */
6036 if (rte->securityQuals != NIL)
6037 return false;
6038
6039 /*
6040 * Test for table-level SELECT privilege.
6041 *
6042 * If varattnos is non-NULL, this is sufficient to give access to all
6043 * requested attributes, even for a child table, since we have verified
6044 * that all required child columns have matching parent columns.
6045 *
6046 * If varattnos is NULL (whole-table access requested), this doesn't
6047 * necessarily guarantee that the user can read all columns of a child
6048 * table, but we allow it anyway (see comments in examine_variable()) and
6049 * don't bother checking any column privileges.
6050 */
6051 if (pg_class_aclcheck(rte->relid, userid, ACL_SELECT) == ACLCHECK_OK)
6052 return true;
6053
6054 if (varattnos == NULL)
6055 return false; /* whole-table access requested */
6056
6057 /*
6058 * Don't have table-level SELECT privilege, so check per-column
6059 * privileges.
6060 */
6061 varattno = -1;
6062 while ((varattno = bms_next_member(varattnos, varattno)) >= 0)
6063 {
6065
6066 if (attno == InvalidAttrNumber)
6067 {
6068 /* Whole-row reference, so must have access to all columns */
6069 if (pg_attribute_aclcheck_all(rte->relid, userid, ACL_SELECT,
6071 return false;
6072 }
6073 else
6074 {
6075 if (pg_attribute_aclcheck(rte->relid, attno, userid,
6077 return false;
6078 }
6079 }
6080
6081 /* If we reach here, have all required column privileges */
6082 return true;
6083}
6084
6085/*
6086 * examine_indexcol_variable
6087 * Try to look up statistical data about an index column/expression.
6088 * Fill in a VariableStatData struct to describe the column.
6089 *
6090 * Inputs:
6091 * root: the planner info
6092 * index: the index whose column we're interested in
6093 * indexcol: 0-based index column number (subscripts index->indexkeys[])
6094 *
6095 * Outputs: *vardata is filled as follows:
6096 * var: the input expression (with any binary relabeling stripped, if
6097 * it is or contains a variable; but otherwise the type is preserved)
6098 * rel: RelOptInfo for table relation containing variable.
6099 * statsTuple: the pg_statistic entry for the variable, if one exists;
6100 * otherwise NULL.
6101 * freefunc: pointer to a function to release statsTuple with.
6102 *
6103 * Caller is responsible for doing ReleaseVariableStats() before exiting.
6104 */
6105static void
6107 int indexcol, VariableStatData *vardata)
6108{
6109 AttrNumber colnum;
6110 Oid relid;
6111
6112 if (index->indexkeys[indexcol] != 0)
6113 {
6114 /* Simple variable --- look to stats for the underlying table */
6115 RangeTblEntry *rte = planner_rt_fetch(index->rel->relid, root);
6116
6117 Assert(rte->rtekind == RTE_RELATION);
6118 relid = rte->relid;
6119 Assert(relid != InvalidOid);
6120 colnum = index->indexkeys[indexcol];
6121 vardata->rel = index->rel;
6122
6124 (*get_relation_stats_hook) (root, rte, colnum, vardata))
6125 {
6126 /*
6127 * The hook took control of acquiring a stats tuple. If it did
6128 * supply a tuple, it'd better have supplied a freefunc.
6129 */
6130 if (HeapTupleIsValid(vardata->statsTuple) &&
6131 !vardata->freefunc)
6132 elog(ERROR, "no function provided to release variable stats with");
6133 }
6134 else
6135 {
6136 vardata->statsTuple = SearchSysCache3(STATRELATTINH,
6137 ObjectIdGetDatum(relid),
6138 Int16GetDatum(colnum),
6139 BoolGetDatum(rte->inh));
6140 vardata->freefunc = ReleaseSysCache;
6141 }
6142 }
6143 else
6144 {
6145 /* Expression --- maybe there are stats for the index itself */
6146 relid = index->indexoid;
6147 colnum = indexcol + 1;
6148
6150 (*get_index_stats_hook) (root, relid, colnum, vardata))
6151 {
6152 /*
6153 * The hook took control of acquiring a stats tuple. If it did
6154 * supply a tuple, it'd better have supplied a freefunc.
6155 */
6156 if (HeapTupleIsValid(vardata->statsTuple) &&
6157 !vardata->freefunc)
6158 elog(ERROR, "no function provided to release variable stats with");
6159 }
6160 else
6161 {
6162 vardata->statsTuple = SearchSysCache3(STATRELATTINH,
6163 ObjectIdGetDatum(relid),
6164 Int16GetDatum(colnum),
6165 BoolGetDatum(false));
6166 vardata->freefunc = ReleaseSysCache;
6167 }
6168 }
6169}
6170
6171/*
6172 * Check whether it is permitted to call func_oid passing some of the
6173 * pg_statistic data in vardata. We allow this if either of the following
6174 * conditions is met: (1) the user has SELECT privileges on the table or
6175 * column underlying the pg_statistic data and there are no securityQuals from
6176 * security barrier views or RLS policies, or (2) the function is marked
6177 * leakproof.
6178 */
6179bool
6181{
6182 if (vardata->acl_ok)
6183 return true; /* have SELECT privs and no securityQuals */
6184
6185 if (!OidIsValid(func_oid))
6186 return false;
6187
6188 if (get_func_leakproof(func_oid))
6189 return true;
6190
6192 (errmsg_internal("not using statistics because function \"%s\" is not leakproof",
6193 get_func_name(func_oid))));
6194 return false;
6195}
6196
6197/*
6198 * get_variable_numdistinct
6199 * Estimate the number of distinct values of a variable.
6200 *
6201 * vardata: results of examine_variable
6202 * *isdefault: set to true if the result is a default rather than based on
6203 * anything meaningful.
6204 *
6205 * NB: be careful to produce a positive integral result, since callers may
6206 * compare the result to exact integer counts, or might divide by it.
6207 */
6208double
6210{
6211 double stadistinct;
6212 double stanullfrac = 0.0;
6213 double ntuples;
6214
6215 *isdefault = false;
6216
6217 /*
6218 * Determine the stadistinct value to use. There are cases where we can
6219 * get an estimate even without a pg_statistic entry, or can get a better
6220 * value than is in pg_statistic. Grab stanullfrac too if we can find it
6221 * (otherwise, assume no nulls, for lack of any better idea).
6222 */
6223 if (HeapTupleIsValid(vardata->statsTuple))
6224 {
6225 /* Use the pg_statistic entry */
6226 Form_pg_statistic stats;
6227
6228 stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
6229 stadistinct = stats->stadistinct;
6230 stanullfrac = stats->stanullfrac;
6231 }
6232 else if (vardata->vartype == BOOLOID)
6233 {
6234 /*
6235 * Special-case boolean columns: presumably, two distinct values.
6236 *
6237 * Are there any other datatypes we should wire in special estimates
6238 * for?
6239 */
6240 stadistinct = 2.0;
6241 }
6242 else if (vardata->rel && vardata->rel->rtekind == RTE_VALUES)
6243 {
6244 /*
6245 * If the Var represents a column of a VALUES RTE, assume it's unique.
6246 * This could of course be very wrong, but it should tend to be true
6247 * in well-written queries. We could consider examining the VALUES'
6248 * contents to get some real statistics; but that only works if the
6249 * entries are all constants, and it would be pretty expensive anyway.
6250 */
6251 stadistinct = -1.0; /* unique (and all non null) */
6252 }
6253 else
6254 {
6255 /*
6256 * We don't keep statistics for system columns, but in some cases we
6257 * can infer distinctness anyway.
6258 */
6259 if (vardata->var && IsA(vardata->var, Var))
6260 {
6261 switch (((Var *) vardata->var)->varattno)
6262 {
6264 stadistinct = -1.0; /* unique (and all non null) */
6265 break;
6267 stadistinct = 1.0; /* only 1 value */
6268 break;
6269 default:
6270 stadistinct = 0.0; /* means "unknown" */
6271 break;
6272 }
6273 }
6274 else
6275 stadistinct = 0.0; /* means "unknown" */
6276
6277 /*
6278 * XXX consider using estimate_num_groups on expressions?
6279 */
6280 }
6281
6282 /*
6283 * If there is a unique index, DISTINCT or GROUP-BY clause for the
6284 * variable, assume it is unique no matter what pg_statistic says; the
6285 * statistics could be out of date, or we might have found a partial
6286 * unique index that proves the var is unique for this query. However,
6287 * we'd better still believe the null-fraction statistic.
6288 */
6289 if (vardata->isunique)
6290 stadistinct = -1.0 * (1.0 - stanullfrac);
6291
6292 /*
6293 * If we had an absolute estimate, use that.
6294 */
6295 if (stadistinct > 0.0)
6296 return clamp_row_est(stadistinct);
6297
6298 /*
6299 * Otherwise we need to get the relation size; punt if not available.
6300 */
6301 if (vardata->rel == NULL)
6302 {
6303 *isdefault = true;
6304 return DEFAULT_NUM_DISTINCT;
6305 }
6306 ntuples = vardata->rel->tuples;
6307 if (ntuples <= 0.0)
6308 {
6309 *isdefault = true;
6310 return DEFAULT_NUM_DISTINCT;
6311 }
6312
6313 /*
6314 * If we had a relative estimate, use that.
6315 */
6316 if (stadistinct < 0.0)
6317 return clamp_row_est(-stadistinct * ntuples);
6318
6319 /*
6320 * With no data, estimate ndistinct = ntuples if the table is small, else
6321 * use default. We use DEFAULT_NUM_DISTINCT as the cutoff for "small" so
6322 * that the behavior isn't discontinuous.
6323 */
6324 if (ntuples < DEFAULT_NUM_DISTINCT)
6325 return clamp_row_est(ntuples);
6326
6327 *isdefault = true;
6328 return DEFAULT_NUM_DISTINCT;
6329}
6330
6331/*
6332 * get_variable_range
6333 * Estimate the minimum and maximum value of the specified variable.
6334 * If successful, store values in *min and *max, and return true.
6335 * If no data available, return false.
6336 *
6337 * sortop is the "<" comparison operator to use. This should generally
6338 * be "<" not ">", as only the former is likely to be found in pg_statistic.
6339 * The collation must be specified too.
6340 */
6341static bool
6343 Oid sortop, Oid collation,
6344 Datum *min, Datum *max)
6345{
6346 Datum tmin = 0;
6347 Datum tmax = 0;
6348 bool have_data = false;
6349 int16 typLen;
6350 bool typByVal;
6351 Oid opfuncoid;
6352 FmgrInfo opproc;
6353 AttStatsSlot sslot;
6354
6355 /*
6356 * XXX It's very tempting to try to use the actual column min and max, if
6357 * we can get them relatively-cheaply with an index probe. However, since
6358 * this function is called many times during join planning, that could
6359 * have unpleasant effects on planning speed. Need more investigation
6360 * before enabling this.
6361 */
6362#ifdef NOT_USED
6363 if (get_actual_variable_range(root, vardata, sortop, collation, min, max))
6364 return true;
6365#endif
6366
6367 if (!HeapTupleIsValid(vardata->statsTuple))
6368 {
6369 /* no stats available, so default result */
6370 return false;
6371 }
6372
6373 /*
6374 * If we can't apply the sortop to the stats data, just fail. In
6375 * principle, if there's a histogram and no MCVs, we could return the
6376 * histogram endpoints without ever applying the sortop ... but it's
6377 * probably not worth trying, because whatever the caller wants to do with
6378 * the endpoints would likely fail the security check too.
6379 */
6380 if (!statistic_proc_security_check(vardata,
6381 (opfuncoid = get_opcode(sortop))))
6382 return false;
6383
6384 opproc.fn_oid = InvalidOid; /* mark this as not looked up yet */
6385
6386 get_typlenbyval(vardata->atttype, &typLen, &typByVal);
6387
6388 /*
6389 * If there is a histogram with the ordering we want, grab the first and
6390 * last values.
6391 */
6392 if (get_attstatsslot(&sslot, vardata->statsTuple,
6393 STATISTIC_KIND_HISTOGRAM, sortop,
6395 {
6396 if (sslot.stacoll == collation && sslot.nvalues > 0)
6397 {
6398 tmin = datumCopy(sslot.values[0], typByVal, typLen);
6399 tmax = datumCopy(sslot.values[sslot.nvalues - 1], typByVal, typLen);
6400 have_data = true;
6401 }
6402 free_attstatsslot(&sslot);
6403 }
6404
6405 /*
6406 * Otherwise, if there is a histogram with some other ordering, scan it
6407 * and get the min and max values according to the ordering we want. This
6408 * of course may not find values that are really extremal according to our
6409 * ordering, but it beats ignoring available data.
6410 */
6411 if (!have_data &&
6412 get_attstatsslot(&sslot, vardata->statsTuple,
6413 STATISTIC_KIND_HISTOGRAM, InvalidOid,
6415 {
6416 get_stats_slot_range(&sslot, opfuncoid, &opproc,
6417 collation, typLen, typByVal,
6418 &tmin, &tmax, &have_data);
6419 free_attstatsslot(&sslot);
6420 }
6421
6422 /*
6423 * If we have most-common-values info, look for extreme MCVs. This is
6424 * needed even if we also have a histogram, since the histogram excludes
6425 * the MCVs. However, if we *only* have MCVs and no histogram, we should
6426 * be pretty wary of deciding that that is a full representation of the
6427 * data. Proceed only if the MCVs represent the whole table (to within
6428 * roundoff error).
6429 */
6430 if (get_attstatsslot(&sslot, vardata->statsTuple,
6431 STATISTIC_KIND_MCV, InvalidOid,
6432 have_data ? ATTSTATSSLOT_VALUES :
6434 {
6435 bool use_mcvs = have_data;
6436
6437 if (!have_data)
6438 {
6439 double sumcommon = 0.0;
6440 double nullfrac;
6441 int i;
6442
6443 for (i = 0; i < sslot.nnumbers; i++)
6444 sumcommon += sslot.numbers[i];
6445 nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata->statsTuple))->stanullfrac;
6446 if (sumcommon + nullfrac > 0.99999)
6447 use_mcvs = true;
6448 }
6449
6450 if (use_mcvs)
6451 get_stats_slot_range(&sslot, opfuncoid, &opproc,
6452 collation, typLen, typByVal,
6453 &tmin, &tmax, &have_data);
6454 free_attstatsslot(&sslot);
6455 }
6456
6457 *min = tmin;
6458 *max = tmax;
6459 return have_data;
6460}
6461
6462/*
6463 * get_stats_slot_range: scan sslot for min/max values
6464 *
6465 * Subroutine for get_variable_range: update min/max/have_data according
6466 * to what we find in the statistics array.
6467 */
6468static void
6470 Oid collation, int16 typLen, bool typByVal,
6471 Datum *min, Datum *max, bool *p_have_data)
6472{
6473 Datum tmin = *min;
6474 Datum tmax = *max;
6475 bool have_data = *p_have_data;
6476 bool found_tmin = false;
6477 bool found_tmax = false;
6478
6479 /* Look up the comparison function, if we didn't already do so */
6480 if (opproc->fn_oid != opfuncoid)
6481 fmgr_info(opfuncoid, opproc);
6482
6483 /* Scan all the slot's values */
6484 for (int i = 0; i < sslot->nvalues; i++)
6485 {
6486 if (!have_data)
6487 {
6488 tmin = tmax = sslot->values[i];
6489 found_tmin = found_tmax = true;
6490 *p_have_data = have_data = true;
6491 continue;
6492 }
6494 collation,
6495 sslot->values[i], tmin)))
6496 {
6497 tmin = sslot->values[i];
6498 found_tmin = true;
6499 }
6501 collation,
6502 tmax, sslot->values[i])))
6503 {
6504 tmax = sslot->values[i];
6505 found_tmax = true;
6506 }
6507 }
6508
6509 /*
6510 * Copy the slot's values, if we found new extreme values.
6511 */
6512 if (found_tmin)
6513 *min = datumCopy(tmin, typByVal, typLen);
6514 if (found_tmax)
6515 *max = datumCopy(tmax, typByVal, typLen);
6516}
6517
6518
6519/*
6520 * get_actual_variable_range
6521 * Attempt to identify the current *actual* minimum and/or maximum
6522 * of the specified variable, by looking for a suitable btree index
6523 * and fetching its low and/or high values.
6524 * If successful, store values in *min and *max, and return true.
6525 * (Either pointer can be NULL if that endpoint isn't needed.)
6526 * If unsuccessful, return false.
6527 *
6528 * sortop is the "<" comparison operator to use.
6529 * collation is the required collation.
6530 */
6531static bool
6533 Oid sortop, Oid collation,
6534 Datum *min, Datum *max)
6535{
6536 bool have_data = false;
6537 RelOptInfo *rel = vardata->rel;
6538 RangeTblEntry *rte;
6539 ListCell *lc;
6540
6541 /* No hope if no relation or it doesn't have indexes */
6542 if (rel == NULL || rel->indexlist == NIL)
6543 return false;
6544 /* If it has indexes it must be a plain relation */
6545 rte = root->simple_rte_array[rel->relid];
6546 Assert(rte->rtekind == RTE_RELATION);
6547
6548 /* ignore partitioned tables. Any indexes here are not real indexes */
6549 if (rte->relkind == RELKIND_PARTITIONED_TABLE)
6550 return false;
6551
6552 /* Search through the indexes to see if any match our problem */
6553 foreach(lc, rel->indexlist)
6554 {
6556 ScanDirection indexscandir;
6557 StrategyNumber strategy;
6558
6559 /* Ignore non-ordering indexes */
6560 if (index->sortopfamily == NULL)
6561 continue;
6562
6563 /*
6564 * Ignore partial indexes --- we only want stats that cover the entire
6565 * relation.
6566 */
6567 if (index->indpred != NIL)
6568 continue;
6569
6570 /*
6571 * The index list might include hypothetical indexes inserted by a
6572 * get_relation_info hook --- don't try to access them.
6573 */
6574 if (index->hypothetical)
6575 continue;
6576
6577 /*
6578 * The first index column must match the desired variable, sortop, and
6579 * collation --- but we can use a descending-order index.
6580 */
6581 if (collation != index->indexcollations[0])
6582 continue; /* test first 'cause it's cheapest */
6583 if (!match_index_to_operand(vardata->var, 0, index))
6584 continue;
6585 strategy = get_op_opfamily_strategy(sortop, index->sortopfamily[0]);
6586 switch (IndexAmTranslateStrategy(strategy, index->relam, index->sortopfamily[0], true))
6587 {
6588 case COMPARE_LT:
6589 if (index->reverse_sort[0])
6590 indexscandir = BackwardScanDirection;
6591 else
6592 indexscandir = ForwardScanDirection;
6593 break;
6594 case COMPARE_GT:
6595 if (index->reverse_sort[0])
6596 indexscandir = ForwardScanDirection;
6597 else
6598 indexscandir = BackwardScanDirection;
6599 break;
6600 default:
6601 /* index doesn't match the sortop */
6602 continue;
6603 }
6604
6605 /*
6606 * Found a suitable index to extract data from. Set up some data that
6607 * can be used by both invocations of get_actual_variable_endpoint.
6608 */
6609 {
6610 MemoryContext tmpcontext;
6611 MemoryContext oldcontext;
6612 Relation heapRel;
6613 Relation indexRel;
6614 TupleTableSlot *slot;
6615 int16 typLen;
6616 bool typByVal;
6617 ScanKeyData scankeys[1];
6618
6619 /* Make sure any cruft gets recycled when we're done */
6621 "get_actual_variable_range workspace",
6623 oldcontext = MemoryContextSwitchTo(tmpcontext);
6624
6625 /*
6626 * Open the table and index so we can read from them. We should
6627 * already have some type of lock on each.
6628 */
6629 heapRel = table_open(rte->relid, NoLock);
6630 indexRel = index_open(index->indexoid, NoLock);
6631
6632 /* build some stuff needed for indexscan execution */
6633 slot = table_slot_create(heapRel, NULL);
6634 get_typlenbyval(vardata->atttype, &typLen, &typByVal);
6635
6636 /* set up an IS NOT NULL scan key so that we ignore nulls */
6637 ScanKeyEntryInitialize(&scankeys[0],
6639 1, /* index col to scan */
6640 InvalidStrategy, /* no strategy */
6641 InvalidOid, /* no strategy subtype */
6642 InvalidOid, /* no collation */
6643 InvalidOid, /* no reg proc for this */
6644 (Datum) 0); /* constant */
6645
6646 /* If min is requested ... */
6647 if (min)
6648 {
6649 have_data = get_actual_variable_endpoint(heapRel,
6650 indexRel,
6651 indexscandir,
6652 scankeys,
6653 typLen,
6654 typByVal,
6655 slot,
6656 oldcontext,
6657 min);
6658 }
6659 else
6660 {
6661 /* If min not requested, still want to fetch max */
6662 have_data = true;
6663 }
6664
6665 /* If max is requested, and we didn't already fail ... */
6666 if (max && have_data)
6667 {
6668 /* scan in the opposite direction; all else is the same */
6669 have_data = get_actual_variable_endpoint(heapRel,
6670 indexRel,
6671 -indexscandir,
6672 scankeys,
6673 typLen,
6674 typByVal,
6675 slot,
6676 oldcontext,
6677 max);
6678 }
6679
6680 /* Clean everything up */
6682
6683 index_close(indexRel, NoLock);
6684 table_close(heapRel, NoLock);
6685
6686 MemoryContextSwitchTo(oldcontext);
6687 MemoryContextDelete(tmpcontext);
6688
6689 /* And we're done */
6690 break;
6691 }
6692 }
6693
6694 return have_data;
6695}
6696
6697/*
6698 * Get one endpoint datum (min or max depending on indexscandir) from the
6699 * specified index. Return true if successful, false if not.
6700 * On success, endpoint value is stored to *endpointDatum (and copied into
6701 * outercontext).
6702 *
6703 * scankeys is a 1-element scankey array set up to reject nulls.
6704 * typLen/typByVal describe the datatype of the index's first column.
6705 * tableslot is a slot suitable to hold table tuples, in case we need
6706 * to probe the heap.
6707 * (We could compute these values locally, but that would mean computing them
6708 * twice when get_actual_variable_range needs both the min and the max.)
6709 *
6710 * Failure occurs either when the index is empty, or we decide that it's
6711 * taking too long to find a suitable tuple.
6712 */
6713static bool
6715 Relation indexRel,
6716 ScanDirection indexscandir,
6717 ScanKey scankeys,
6718 int16 typLen,
6719 bool typByVal,
6720 TupleTableSlot *tableslot,
6721 MemoryContext outercontext,
6722 Datum *endpointDatum)
6723{
6724 bool have_data = false;
6725 SnapshotData SnapshotNonVacuumable;
6726 IndexScanDesc index_scan;
6727 Buffer vmbuffer = InvalidBuffer;
6728 BlockNumber last_heap_block = InvalidBlockNumber;
6729 int n_visited_heap_pages = 0;
6730 ItemPointer tid;
6732 bool isnull[INDEX_MAX_KEYS];
6733 MemoryContext oldcontext;
6734
6735 /*
6736 * We use the index-only-scan machinery for this. With mostly-static
6737 * tables that's a win because it avoids a heap visit. It's also a win
6738 * for dynamic data, but the reason is less obvious; read on for details.
6739 *
6740 * In principle, we should scan the index with our current active
6741 * snapshot, which is the best approximation we've got to what the query
6742 * will see when executed. But that won't be exact if a new snap is taken
6743 * before running the query, and it can be very expensive if a lot of
6744 * recently-dead or uncommitted rows exist at the beginning or end of the
6745 * index (because we'll laboriously fetch each one and reject it).
6746 * Instead, we use SnapshotNonVacuumable. That will accept recently-dead
6747 * and uncommitted rows as well as normal visible rows. On the other
6748 * hand, it will reject known-dead rows, and thus not give a bogus answer
6749 * when the extreme value has been deleted (unless the deletion was quite
6750 * recent); that case motivates not using SnapshotAny here.
6751 *
6752 * A crucial point here is that SnapshotNonVacuumable, with
6753 * GlobalVisTestFor(heapRel) as horizon, yields the inverse of the
6754 * condition that the indexscan will use to decide that index entries are
6755 * killable (see heap_hot_search_buffer()). Therefore, if the snapshot
6756 * rejects a tuple (or more precisely, all tuples of a HOT chain) and we
6757 * have to continue scanning past it, we know that the indexscan will mark
6758 * that index entry killed. That means that the next
6759 * get_actual_variable_endpoint() call will not have to re-consider that
6760 * index entry. In this way we avoid repetitive work when this function
6761 * is used a lot during planning.
6762 *
6763 * But using SnapshotNonVacuumable creates a hazard of its own. In a
6764 * recently-created index, some index entries may point at "broken" HOT
6765 * chains in which not all the tuple versions contain data matching the
6766 * index entry. The live tuple version(s) certainly do match the index,
6767 * but SnapshotNonVacuumable can accept recently-dead tuple versions that
6768 * don't match. Hence, if we took data from the selected heap tuple, we
6769 * might get a bogus answer that's not close to the index extremal value,
6770 * or could even be NULL. We avoid this hazard because we take the data
6771 * from the index entry not the heap.
6772 *
6773 * Despite all this care, there are situations where we might find many
6774 * non-visible tuples near the end of the index. We don't want to expend
6775 * a huge amount of time here, so we give up once we've read too many heap
6776 * pages. When we fail for that reason, the caller will end up using
6777 * whatever extremal value is recorded in pg_statistic.
6778 */
6779 InitNonVacuumableSnapshot(SnapshotNonVacuumable,
6780 GlobalVisTestFor(heapRel));
6781
6782 index_scan = index_beginscan(heapRel, indexRel,
6783 &SnapshotNonVacuumable, NULL,
6784 1, 0);
6785 /* Set it up for index-only scan */
6786 index_scan->xs_want_itup = true;
6787 index_rescan(index_scan, scankeys, 1, NULL, 0);
6788
6789 /* Fetch first/next tuple in specified direction */
6790 while ((tid = index_getnext_tid(index_scan, indexscandir)) != NULL)
6791 {
6793
6794 if (!VM_ALL_VISIBLE(heapRel,
6795 block,
6796 &vmbuffer))
6797 {
6798 /* Rats, we have to visit the heap to check visibility */
6799 if (!index_fetch_heap(index_scan, tableslot))
6800 {
6801 /*
6802 * No visible tuple for this index entry, so we need to
6803 * advance to the next entry. Before doing so, count heap
6804 * page fetches and give up if we've done too many.
6805 *
6806 * We don't charge a page fetch if this is the same heap page
6807 * as the previous tuple. This is on the conservative side,
6808 * since other recently-accessed pages are probably still in
6809 * buffers too; but it's good enough for this heuristic.
6810 */
6811#define VISITED_PAGES_LIMIT 100
6812
6813 if (block != last_heap_block)
6814 {
6815 last_heap_block = block;
6816 n_visited_heap_pages++;
6817 if (n_visited_heap_pages > VISITED_PAGES_LIMIT)
6818 break;
6819 }
6820
6821 continue; /* no visible tuple, try next index entry */
6822 }
6823
6824 /* We don't actually need the heap tuple for anything */
6825 ExecClearTuple(tableslot);
6826
6827 /*
6828 * We don't care whether there's more than one visible tuple in
6829 * the HOT chain; if any are visible, that's good enough.
6830 */
6831 }
6832
6833 /*
6834 * We expect that the index will return data in IndexTuple not
6835 * HeapTuple format.
6836 */
6837 if (!index_scan->xs_itup)
6838 elog(ERROR, "no data returned for index-only scan");
6839
6840 /*
6841 * We do not yet support recheck here.
6842 */
6843 if (index_scan->xs_recheck)
6844 break;
6845
6846 /* OK to deconstruct the index tuple */
6847 index_deform_tuple(index_scan->xs_itup,
6848 index_scan->xs_itupdesc,
6849 values, isnull);
6850
6851 /* Shouldn't have got a null, but be careful */
6852 if (isnull[0])
6853 elog(ERROR, "found unexpected null value in index \"%s\"",
6854 RelationGetRelationName(indexRel));
6855
6856 /* Copy the index column value out to caller's context */
6857 oldcontext = MemoryContextSwitchTo(outercontext);
6858 *endpointDatum = datumCopy(values[0], typByVal, typLen);
6859 MemoryContextSwitchTo(oldcontext);
6860 have_data = true;
6861 break;
6862 }
6863
6864 if (vmbuffer != InvalidBuffer)
6865 ReleaseBuffer(vmbuffer);
6866 index_endscan(index_scan);
6867
6868 return have_data;
6869}
6870
6871/*
6872 * find_join_input_rel
6873 * Look up the input relation for a join.
6874 *
6875 * We assume that the input relation's RelOptInfo must have been constructed
6876 * already.
6877 */
6878static RelOptInfo *
6880{
6881 RelOptInfo *rel = NULL;
6882
6883 if (!bms_is_empty(relids))
6884 {
6885 int relid;
6886
6887 if (bms_get_singleton_member(relids, &relid))
6888 rel = find_base_rel(root, relid);
6889 else
6890 rel = find_join_rel(root, relids);
6891 }
6892
6893 if (rel == NULL)
6894 elog(ERROR, "could not find RelOptInfo for given relids");
6895
6896 return rel;
6897}
6898
6899
6900/*-------------------------------------------------------------------------
6901 *
6902 * Index cost estimation functions
6903 *
6904 *-------------------------------------------------------------------------
6905 */
6906
6907/*
6908 * Extract the actual indexquals (as RestrictInfos) from an IndexClause list
6909 */
6910List *
6912{
6913 List *result = NIL;
6914 ListCell *lc;
6915
6916 foreach(lc, indexclauses)
6917 {
6918 IndexClause *iclause = lfirst_node(IndexClause, lc);
6919 ListCell *lc2;
6920
6921 foreach(lc2, iclause->indexquals)
6922 {
6923 RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
6924
6925 result = lappend(result, rinfo);
6926 }
6927 }
6928 return result;
6929}
6930
6931/*
6932 * Compute the total evaluation cost of the comparison operands in a list
6933 * of index qual expressions. Since we know these will be evaluated just
6934 * once per scan, there's no need to distinguish startup from per-row cost.
6935 *
6936 * This can be used either on the result of get_quals_from_indexclauses(),
6937 * or directly on an indexorderbys list. In both cases, we expect that the
6938 * index key expression is on the left side of binary clauses.
6939 */
6940Cost
6942{
6943 Cost qual_arg_cost = 0;
6944 ListCell *lc;
6945
6946 foreach(lc, indexquals)
6947 {
6948 Expr *clause = (Expr *) lfirst(lc);
6949 Node *other_operand;
6950 QualCost index_qual_cost;
6951
6952 /*
6953 * Index quals will have RestrictInfos, indexorderbys won't. Look
6954 * through RestrictInfo if present.
6955 */
6956 if (IsA(clause, RestrictInfo))
6957 clause = ((RestrictInfo *) clause)->clause;
6958
6959 if (IsA(clause, OpExpr))
6960 {
6961 OpExpr *op = (OpExpr *) clause;
6962
6963 other_operand = (Node *) lsecond(op->args);
6964 }
6965 else if (IsA(clause, RowCompareExpr))
6966 {
6967 RowCompareExpr *rc = (RowCompareExpr *) clause;
6968
6969 other_operand = (Node *) rc->rargs;
6970 }
6971 else if (IsA(clause, ScalarArrayOpExpr))
6972 {
6973 ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
6974
6975 other_operand = (Node *) lsecond(saop->args);
6976 }
6977 else if (IsA(clause, NullTest))
6978 {
6979 other_operand = NULL;
6980 }
6981 else
6982 {
6983 elog(ERROR, "unsupported indexqual type: %d",
6984 (int) nodeTag(clause));
6985 other_operand = NULL; /* keep compiler quiet */
6986 }
6987
6988 cost_qual_eval_node(&index_qual_cost, other_operand, root);
6989 qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
6990 }
6991 return qual_arg_cost;
6992}
6993
6994void
6996 IndexPath *path,
6997 double loop_count,
6998 GenericCosts *costs)
6999{
7000 IndexOptInfo *index = path->indexinfo;
7001 List *indexQuals = get_quals_from_indexclauses(path->indexclauses);
7002 List *indexOrderBys = path->indexorderbys;
7003 Cost indexStartupCost;
7004 Cost indexTotalCost;
7005 Selectivity indexSelectivity;
7006 double indexCorrelation;
7007 double numIndexPages;
7008 double numIndexTuples;
7009 double spc_random_page_cost;
7010 double num_sa_scans;
7011 double num_outer_scans;
7012 double num_scans;
7013 double qual_op_cost;
7014 double qual_arg_cost;
7015 List *selectivityQuals;
7016 ListCell *l;
7017
7018 /*
7019 * If the index is partial, AND the index predicate with the explicitly
7020 * given indexquals to produce a more accurate idea of the index
7021 * selectivity.
7022 */
7023 selectivityQuals = add_predicate_to_index_quals(index, indexQuals);
7024
7025 /*
7026 * If caller didn't give us an estimate for ScalarArrayOpExpr index scans,
7027 * just assume that the number of index descents is the number of distinct
7028 * combinations of array elements from all of the scan's SAOP clauses.
7029 */
7030 num_sa_scans = costs->num_sa_scans;
7031 if (num_sa_scans < 1)
7032 {
7033 num_sa_scans = 1;
7034 foreach(l, indexQuals)
7035 {
7036 RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
7037
7038 if (IsA(rinfo->clause, ScalarArrayOpExpr))
7039 {
7040 ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) rinfo->clause;
7041 double alength = estimate_array_length(root, lsecond(saop->args));
7042
7043 if (alength > 1)
7044 num_sa_scans *= alength;
7045 }
7046 }
7047 }
7048
7049 /* Estimate the fraction of main-table tuples that will be visited */
7050 indexSelectivity = clauselist_selectivity(root, selectivityQuals,
7051 index->rel->relid,
7052 JOIN_INNER,
7053 NULL);
7054
7055 /*
7056 * If caller didn't give us an estimate, estimate the number of index
7057 * tuples that will be visited. We do it in this rather peculiar-looking
7058 * way in order to get the right answer for partial indexes.
7059 */
7060 numIndexTuples = costs->numIndexTuples;
7061 if (numIndexTuples <= 0.0)
7062 {
7063 numIndexTuples = indexSelectivity * index->rel->tuples;
7064
7065 /*
7066 * The above calculation counts all the tuples visited across all
7067 * scans induced by ScalarArrayOpExpr nodes. We want to consider the
7068 * average per-indexscan number, so adjust. This is a handy place to
7069 * round to integer, too. (If caller supplied tuple estimate, it's
7070 * responsible for handling these considerations.)
7071 */
7072 numIndexTuples = rint(numIndexTuples / num_sa_scans);
7073 }
7074
7075 /*
7076 * We can bound the number of tuples by the index size in any case. Also,
7077 * always estimate at least one tuple is touched, even when
7078 * indexSelectivity estimate is tiny.
7079 */
7080 if (numIndexTuples > index->tuples)
7081 numIndexTuples = index->tuples;
7082 if (numIndexTuples < 1.0)
7083 numIndexTuples = 1.0;
7084
7085 /*
7086 * Estimate the number of index pages that will be retrieved.
7087 *
7088 * We use the simplistic method of taking a pro-rata fraction of the total
7089 * number of index pages. In effect, this counts only leaf pages and not
7090 * any overhead such as index metapage or upper tree levels.
7091 *
7092 * In practice access to upper index levels is often nearly free because
7093 * those tend to stay in cache under load; moreover, the cost involved is
7094 * highly dependent on index type. We therefore ignore such costs here
7095 * and leave it to the caller to add a suitable charge if needed.
7096 */
7097 if (index->pages > 1 && index->tuples > 1)
7098 numIndexPages = ceil(numIndexTuples * index->pages / index->tuples);
7099 else
7100 numIndexPages = 1.0;
7101
7102 /* fetch estimated page cost for tablespace containing index */
7103 get_tablespace_page_costs(index->reltablespace,
7104 &spc_random_page_cost,
7105 NULL);
7106
7107 /*
7108 * Now compute the disk access costs.
7109 *
7110 * The above calculations are all per-index-scan. However, if we are in a
7111 * nestloop inner scan, we can expect the scan to be repeated (with
7112 * different search keys) for each row of the outer relation. Likewise,
7113 * ScalarArrayOpExpr quals result in multiple index scans. This creates
7114 * the potential for cache effects to reduce the number of disk page
7115 * fetches needed. We want to estimate the average per-scan I/O cost in
7116 * the presence of caching.
7117 *
7118 * We use the Mackert-Lohman formula (see costsize.c for details) to
7119 * estimate the total number of page fetches that occur. While this
7120 * wasn't what it was designed for, it seems a reasonable model anyway.
7121 * Note that we are counting pages not tuples anymore, so we take N = T =
7122 * index size, as if there were one "tuple" per page.
7123 */
7124 num_outer_scans = loop_count;
7125 num_scans = num_sa_scans * num_outer_scans;
7126
7127 if (num_scans > 1)
7128 {
7129 double pages_fetched;
7130
7131 /* total page fetches ignoring cache effects */
7132 pages_fetched = numIndexPages * num_scans;
7133
7134 /* use Mackert and Lohman formula to adjust for cache effects */
7135 pages_fetched = index_pages_fetched(pages_fetched,
7136 index->pages,
7137 (double) index->pages,
7138 root);
7139
7140 /*
7141 * Now compute the total disk access cost, and then report a pro-rated
7142 * share for each outer scan. (Don't pro-rate for ScalarArrayOpExpr,
7143 * since that's internal to the indexscan.)
7144 */
7145 indexTotalCost = (pages_fetched * spc_random_page_cost)
7146 / num_outer_scans;
7147 }
7148 else
7149 {
7150 /*
7151 * For a single index scan, we just charge spc_random_page_cost per
7152 * page touched.
7153 */
7154 indexTotalCost = numIndexPages * spc_random_page_cost;
7155 }
7156
7157 /*
7158 * CPU cost: any complex expressions in the indexquals will need to be
7159 * evaluated once at the start of the scan to reduce them to runtime keys
7160 * to pass to the index AM (see nodeIndexscan.c). We model the per-tuple
7161 * CPU costs as cpu_index_tuple_cost plus one cpu_operator_cost per
7162 * indexqual operator. Because we have numIndexTuples as a per-scan
7163 * number, we have to multiply by num_sa_scans to get the correct result
7164 * for ScalarArrayOpExpr cases. Similarly add in costs for any index
7165 * ORDER BY expressions.
7166 *
7167 * Note: this neglects the possible costs of rechecking lossy operators.
7168 * Detecting that that might be needed seems more expensive than it's
7169 * worth, though, considering all the other inaccuracies here ...
7170 */
7171 qual_arg_cost = index_other_operands_eval_cost(root, indexQuals) +
7172 index_other_operands_eval_cost(root, indexOrderBys);
7173 qual_op_cost = cpu_operator_cost *
7174 (list_length(indexQuals) + list_length(indexOrderBys));
7175
7176 indexStartupCost = qual_arg_cost;
7177 indexTotalCost += qual_arg_cost;
7178 indexTotalCost += numIndexTuples * num_sa_scans * (cpu_index_tuple_cost + qual_op_cost);
7179
7180 /*
7181 * Generic assumption about index correlation: there isn't any.
7182 */
7183 indexCorrelation = 0.0;
7184
7185 /*
7186 * Return everything to caller.
7187 */
7188 costs->indexStartupCost = indexStartupCost;
7189 costs->indexTotalCost = indexTotalCost;
7190 costs->indexSelectivity = indexSelectivity;
7191 costs->indexCorrelation = indexCorrelation;
7192 costs->numIndexPages = numIndexPages;
7193 costs->numIndexTuples = numIndexTuples;
7194 costs->spc_random_page_cost = spc_random_page_cost;
7195 costs->num_sa_scans = num_sa_scans;
7196}
7197
7198/*
7199 * If the index is partial, add its predicate to the given qual list.
7200 *
7201 * ANDing the index predicate with the explicitly given indexquals produces
7202 * a more accurate idea of the index's selectivity. However, we need to be
7203 * careful not to insert redundant clauses, because clauselist_selectivity()
7204 * is easily fooled into computing a too-low selectivity estimate. Our
7205 * approach is to add only the predicate clause(s) that cannot be proven to
7206 * be implied by the given indexquals. This successfully handles cases such
7207 * as a qual "x = 42" used with a partial index "WHERE x >= 40 AND x < 50".
7208 * There are many other cases where we won't detect redundancy, leading to a
7209 * too-low selectivity estimate, which will bias the system in favor of using
7210 * partial indexes where possible. That is not necessarily bad though.
7211 *
7212 * Note that indexQuals contains RestrictInfo nodes while the indpred
7213 * does not, so the output list will be mixed. This is OK for both
7214 * predicate_implied_by() and clauselist_selectivity(), but might be
7215 * problematic if the result were passed to other things.
7216 */
7217List *
7219{
7220 List *predExtraQuals = NIL;
7221 ListCell *lc;
7222
7223 if (index->indpred == NIL)
7224 return indexQuals;
7225
7226 foreach(lc, index->indpred)
7227 {
7228 Node *predQual = (Node *) lfirst(lc);
7229 List *oneQual = list_make1(predQual);
7230
7231 if (!predicate_implied_by(oneQual, indexQuals, false))
7232 predExtraQuals = list_concat(predExtraQuals, oneQual);
7233 }
7234 return list_concat(predExtraQuals, indexQuals);
7235}
7236
7237/*
7238 * Estimate correlation of btree index's first column.
7239 *
7240 * If we can get an estimate of the first column's ordering correlation C
7241 * from pg_statistic, estimate the index correlation as C for a single-column
7242 * index, or C * 0.75 for multiple columns. The idea here is that multiple
7243 * columns dilute the importance of the first column's ordering, but don't
7244 * negate it entirely.
7245 *
7246 * We already filled in the stats tuple for *vardata when called.
7247 */
7248static double
7250{
7251 Oid sortop;
7252 AttStatsSlot sslot;
7253 double indexCorrelation = 0;
7254
7256
7257 sortop = get_opfamily_member(index->opfamily[0],
7258 index->opcintype[0],
7259 index->opcintype[0],
7261 if (OidIsValid(sortop) &&
7262 get_attstatsslot(&sslot, vardata->statsTuple,
7263 STATISTIC_KIND_CORRELATION, sortop,
7265 {
7266 double varCorrelation;
7267
7268 Assert(sslot.nnumbers == 1);
7269 varCorrelation = sslot.numbers[0];
7270
7271 if (index->reverse_sort[0])
7272 varCorrelation = -varCorrelation;
7273
7274 if (index->nkeycolumns > 1)
7275 indexCorrelation = varCorrelation * 0.75;
7276 else
7277 indexCorrelation = varCorrelation;
7278
7279 free_attstatsslot(&sslot);
7280 }
7281
7282 return indexCorrelation;
7283}
7284
7285void
7286btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
7287 Cost *indexStartupCost, Cost *indexTotalCost,
7288 Selectivity *indexSelectivity, double *indexCorrelation,
7289 double *indexPages)
7290{
7291 IndexOptInfo *index = path->indexinfo;
7292 GenericCosts costs = {0};
7293 VariableStatData vardata = {0};
7294 double numIndexTuples;
7295 Cost descentCost;
7296 List *indexBoundQuals;
7297 List *indexSkipQuals;
7298 int indexcol;
7299 bool eqQualHere;
7300 bool found_row_compare;
7301 bool found_array;
7302 bool found_is_null_op;
7303 bool have_correlation = false;
7304 double num_sa_scans;
7305 double correlation = 0.0;
7306 ListCell *lc;
7307
7308 /*
7309 * For a btree scan, only leading '=' quals plus inequality quals for the
7310 * immediately next attribute contribute to index selectivity (these are
7311 * the "boundary quals" that determine the starting and stopping points of
7312 * the index scan). Additional quals can suppress visits to the heap, so
7313 * it's OK to count them in indexSelectivity, but they should not count
7314 * for estimating numIndexTuples. So we must examine the given indexquals
7315 * to find out which ones count as boundary quals. We rely on the
7316 * knowledge that they are given in index column order. Note that nbtree
7317 * preprocessing can add skip arrays that act as leading '=' quals in the
7318 * absence of ordinary input '=' quals, so in practice _most_ input quals
7319 * are able to act as index bound quals (which we take into account here).
7320 *
7321 * For a RowCompareExpr, we consider only the first column, just as
7322 * rowcomparesel() does.
7323 *
7324 * If there's a SAOP or skip array in the quals, we'll actually perform up
7325 * to N index descents (not just one), but the underlying array key's
7326 * operator can be considered to act the same as it normally does.
7327 */
7328 indexBoundQuals = NIL;
7329 indexSkipQuals = NIL;
7330 indexcol = 0;
7331 eqQualHere = false;
7332 found_row_compare = false;
7333 found_array = false;
7334 found_is_null_op = false;
7335 num_sa_scans = 1;
7336 foreach(lc, path->indexclauses)
7337 {
7338 IndexClause *iclause = lfirst_node(IndexClause, lc);
7339 ListCell *lc2;
7340
7341 if (indexcol < iclause->indexcol)
7342 {
7343 double num_sa_scans_prev_cols = num_sa_scans;
7344
7345 /*
7346 * Beginning of a new column's quals.
7347 *
7348 * Skip scans use skip arrays, which are ScalarArrayOp style
7349 * arrays that generate their elements procedurally and on demand.
7350 * Given a multi-column index on "(a, b)", and an SQL WHERE clause
7351 * "WHERE b = 42", a skip scan will effectively use an indexqual
7352 * "WHERE a = ANY('{every col a value}') AND b = 42". (Obviously,
7353 * the array on "a" must also return "IS NULL" matches, since our
7354 * WHERE clause used no strict operator on "a").
7355 *
7356 * Here we consider how nbtree will backfill skip arrays for any
7357 * index columns that lacked an '=' qual. This maintains our
7358 * num_sa_scans estimate, and determines if this new column (the
7359 * "iclause->indexcol" column, not the prior "indexcol" column)
7360 * can have its RestrictInfos/quals added to indexBoundQuals.
7361 *
7362 * We'll need to handle columns that have inequality quals, where
7363 * the skip array generates values from a range constrained by the
7364 * quals (not every possible value). We've been maintaining
7365 * indexSkipQuals to help with this; it will now contain all of
7366 * the prior column's quals (that is, indexcol's quals) when they
7367 * might be used for this.
7368 */
7369 if (found_row_compare)
7370 {
7371 /*
7372 * Skip arrays can't be added after a RowCompare input qual
7373 * due to limitations in nbtree
7374 */
7375 break;
7376 }
7377 if (eqQualHere)
7378 {
7379 /*
7380 * Don't need to add a skip array for an indexcol that already
7381 * has an '=' qual/equality constraint
7382 */
7383 indexcol++;
7384 indexSkipQuals = NIL;
7385 }
7386 eqQualHere = false;
7387
7388 while (indexcol < iclause->indexcol)
7389 {
7390 double ndistinct;
7391 bool isdefault = true;
7392
7393 found_array = true;
7394
7395 /*
7396 * A skipped attribute's ndistinct forms the basis of our
7397 * estimate of the total number of "array elements" used by
7398 * its skip array at runtime. Look that up first.
7399 */
7400 examine_indexcol_variable(root, index, indexcol, &vardata);
7401 ndistinct = get_variable_numdistinct(&vardata, &isdefault);
7402
7403 if (indexcol == 0)
7404 {
7405 /*
7406 * Get an estimate of the leading column's correlation in
7407 * passing (avoids rereading variable stats below)
7408 */
7409 if (HeapTupleIsValid(vardata.statsTuple))
7410 correlation = btcost_correlation(index, &vardata);
7411 have_correlation = true;
7412 }
7413
7414 ReleaseVariableStats(vardata);
7415
7416 /*
7417 * If ndistinct is a default estimate, conservatively assume
7418 * that no skipping will happen at runtime
7419 */
7420 if (isdefault)
7421 {
7422 num_sa_scans = num_sa_scans_prev_cols;
7423 break; /* done building indexBoundQuals */
7424 }
7425
7426 /*
7427 * Apply indexcol's indexSkipQuals selectivity to ndistinct
7428 */
7429 if (indexSkipQuals != NIL)
7430 {
7431 List *partialSkipQuals;
7432 Selectivity ndistinctfrac;
7433
7434 /*
7435 * If the index is partial, AND the index predicate with
7436 * the index-bound quals to produce a more accurate idea
7437 * of the number of distinct values for prior indexcol
7438 */
7439 partialSkipQuals = add_predicate_to_index_quals(index,
7440 indexSkipQuals);
7441
7442 ndistinctfrac = clauselist_selectivity(root, partialSkipQuals,
7443 index->rel->relid,
7444 JOIN_INNER,
7445 NULL);
7446
7447 /*
7448 * If ndistinctfrac is selective (on its own), the scan is
7449 * unlikely to benefit from repositioning itself using
7450 * later quals. Do not allow iclause->indexcol's quals to
7451 * be added to indexBoundQuals (it would increase descent
7452 * costs, without lowering numIndexTuples costs by much).
7453 */
7454 if (ndistinctfrac < DEFAULT_RANGE_INEQ_SEL)
7455 {
7456 num_sa_scans = num_sa_scans_prev_cols;
7457 break; /* done building indexBoundQuals */
7458 }
7459
7460 /* Adjust ndistinct downward */
7461 ndistinct = rint(ndistinct * ndistinctfrac);
7462 ndistinct = Max(ndistinct, 1);
7463 }
7464
7465 /*
7466 * When there's no inequality quals, account for the need to
7467 * find an initial value by counting -inf/+inf as a value.
7468 *
7469 * We don't charge anything extra for possible next/prior key
7470 * index probes, which are sometimes used to find the next
7471 * valid skip array element (ahead of using the located
7472 * element value to relocate the scan to the next position
7473 * that might contain matching tuples). It seems hard to do
7474 * better here. Use of the skip support infrastructure often
7475 * avoids most next/prior key probes. But even when it can't,
7476 * there's a decent chance that most individual next/prior key
7477 * probes will locate a leaf page whose key space overlaps all
7478 * of the scan's keys (even the lower-order keys) -- which
7479 * also avoids the need for a separate, extra index descent.
7480 * Note also that these probes are much cheaper than non-probe
7481 * primitive index scans: they're reliably very selective.
7482 */
7483 if (indexSkipQuals == NIL)
7484 ndistinct += 1;
7485
7486 /*
7487 * Update num_sa_scans estimate by multiplying by ndistinct.
7488 *
7489 * We make the pessimistic assumption that there is no
7490 * naturally occurring cross-column correlation. This is
7491 * often wrong, but it seems best to err on the side of not
7492 * expecting skipping to be helpful...
7493 */
7494 num_sa_scans *= ndistinct;
7495
7496 /*
7497 * ...but back out of adding this latest group of 1 or more
7498 * skip arrays when num_sa_scans exceeds the total number of
7499 * index pages (revert to num_sa_scans from before indexcol).
7500 * This causes a sharp discontinuity in cost (as a function of
7501 * the indexcol's ndistinct), but that is representative of
7502 * actual runtime costs.
7503 *
7504 * Note that skipping is helpful when each primitive index
7505 * scan only manages to skip over 1 or 2 irrelevant leaf pages
7506 * on average. Skip arrays bring savings in CPU costs due to
7507 * the scan not needing to evaluate indexquals against every
7508 * tuple, which can greatly exceed any savings in I/O costs.
7509 * This test is a test of whether num_sa_scans implies that
7510 * we're past the point where the ability to skip ceases to
7511 * lower the scan's costs (even qual evaluation CPU costs).
7512 */
7513 if (index->pages < num_sa_scans)
7514 {
7515 num_sa_scans = num_sa_scans_prev_cols;
7516 break; /* done building indexBoundQuals */
7517 }
7518
7519 indexcol++;
7520 indexSkipQuals = NIL;
7521 }
7522
7523 /*
7524 * Finished considering the need to add skip arrays to bridge an
7525 * initial eqQualHere gap between the old and new index columns
7526 * (or there was no initial eqQualHere gap in the first place).
7527 *
7528 * If an initial gap could not be bridged, then new column's quals
7529 * (i.e. iclause->indexcol's quals) won't go into indexBoundQuals,
7530 * and so won't affect our final numIndexTuples estimate.
7531 */
7532 if (indexcol != iclause->indexcol)
7533 break; /* done building indexBoundQuals */
7534 }
7535
7536 Assert(indexcol == iclause->indexcol);
7537
7538 /* Examine each indexqual associated with this index clause */
7539 foreach(lc2, iclause->indexquals)
7540 {
7541 RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
7542 Expr *clause = rinfo->clause;
7543 Oid clause_op = InvalidOid;
7544 int op_strategy;
7545
7546 if (IsA(clause, OpExpr))
7547 {
7548 OpExpr *op = (OpExpr *) clause;
7549
7550 clause_op = op->opno;
7551 }
7552 else if (IsA(clause, RowCompareExpr))
7553 {
7554 RowCompareExpr *rc = (RowCompareExpr *) clause;
7555
7556 clause_op = linitial_oid(rc->opnos);
7557 found_row_compare = true;
7558 }
7559 else if (IsA(clause, ScalarArrayOpExpr))
7560 {
7561 ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
7562 Node *other_operand = (Node *) lsecond(saop->args);
7563 double alength = estimate_array_length(root, other_operand);
7564
7565 clause_op = saop->opno;
7566 found_array = true;
7567 /* estimate SA descents by indexBoundQuals only */
7568 if (alength > 1)
7569 num_sa_scans *= alength;
7570 }
7571 else if (IsA(clause, NullTest))
7572 {
7573 NullTest *nt = (NullTest *) clause;
7574
7575 if (nt->nulltesttype == IS_NULL)
7576 {
7577 found_is_null_op = true;
7578 /* IS NULL is like = for selectivity/skip scan purposes */
7579 eqQualHere = true;
7580 }
7581 }
7582 else
7583 elog(ERROR, "unsupported indexqual type: %d",
7584 (int) nodeTag(clause));
7585
7586 /* check for equality operator */
7587 if (OidIsValid(clause_op))
7588 {
7589 op_strategy = get_op_opfamily_strategy(clause_op,
7590 index->opfamily[indexcol]);
7591 Assert(op_strategy != 0); /* not a member of opfamily?? */
7592 if (op_strategy == BTEqualStrategyNumber)
7593 eqQualHere = true;
7594 }
7595
7596 indexBoundQuals = lappend(indexBoundQuals, rinfo);
7597
7598 /*
7599 * We apply inequality selectivities to estimate index descent
7600 * costs with scans that use skip arrays. Save this indexcol's
7601 * RestrictInfos if it looks like they'll be needed for that.
7602 */
7603 if (!eqQualHere && !found_row_compare &&
7604 indexcol < index->nkeycolumns - 1)
7605 indexSkipQuals = lappend(indexSkipQuals, rinfo);
7606 }
7607 }
7608
7609 /*
7610 * If index is unique and we found an '=' clause for each column, we can
7611 * just assume numIndexTuples = 1 and skip the expensive
7612 * clauselist_selectivity calculations. However, an array or NullTest
7613 * always invalidates that theory (even when eqQualHere has been set).
7614 */
7615 if (index->unique &&
7616 indexcol == index->nkeycolumns - 1 &&
7617 eqQualHere &&
7618 !found_array &&
7619 !found_is_null_op)
7620 numIndexTuples = 1.0;
7621 else
7622 {
7623 List *selectivityQuals;
7624 Selectivity btreeSelectivity;
7625
7626 /*
7627 * If the index is partial, AND the index predicate with the
7628 * index-bound quals to produce a more accurate idea of the number of
7629 * rows covered by the bound conditions.
7630 */
7631 selectivityQuals = add_predicate_to_index_quals(index, indexBoundQuals);
7632
7633 btreeSelectivity = clauselist_selectivity(root, selectivityQuals,
7634 index->rel->relid,
7635 JOIN_INNER,
7636 NULL);
7637 numIndexTuples = btreeSelectivity * index->rel->tuples;
7638
7639 /*
7640 * btree automatically combines individual array element primitive
7641 * index scans whenever the tuples covered by the next set of array
7642 * keys are close to tuples covered by the current set. That puts a
7643 * natural ceiling on the worst case number of descents -- there
7644 * cannot possibly be more than one descent per leaf page scanned.
7645 *
7646 * Clamp the number of descents to at most 1/3 the number of index
7647 * pages. This avoids implausibly high estimates with low selectivity
7648 * paths, where scans usually require only one or two descents. This
7649 * is most likely to help when there are several SAOP clauses, where
7650 * naively accepting the total number of distinct combinations of
7651 * array elements as the number of descents would frequently lead to
7652 * wild overestimates.
7653 *
7654 * We somewhat arbitrarily don't just make the cutoff the total number
7655 * of leaf pages (we make it 1/3 the total number of pages instead) to
7656 * give the btree code credit for its ability to continue on the leaf
7657 * level with low selectivity scans.
7658 *
7659 * Note: num_sa_scans includes both ScalarArrayOp array elements and
7660 * skip array elements whose qual affects our numIndexTuples estimate.
7661 */
7662 num_sa_scans = Min(num_sa_scans, ceil(index->pages * 0.3333333));
7663 num_sa_scans = Max(num_sa_scans, 1);
7664
7665 /*
7666 * As in genericcostestimate(), we have to adjust for any array quals
7667 * included in indexBoundQuals, and then round to integer.
7668 *
7669 * It is tempting to make genericcostestimate behave as if array
7670 * clauses work in almost the same way as scalar operators during
7671 * btree scans, making the top-level scan look like a continuous scan
7672 * (as opposed to num_sa_scans-many primitive index scans). After
7673 * all, btree scans mostly work like that at runtime. However, such a
7674 * scheme would badly bias genericcostestimate's simplistic approach
7675 * to calculating numIndexPages through prorating.
7676 *
7677 * Stick with the approach taken by non-native SAOP scans for now.
7678 * genericcostestimate will use the Mackert-Lohman formula to
7679 * compensate for repeat page fetches, even though that definitely
7680 * won't happen during btree scans (not for leaf pages, at least).
7681 * We're usually very pessimistic about the number of primitive index
7682 * scans that will be required, but it's not clear how to do better.
7683 */
7684 numIndexTuples = rint(numIndexTuples / num_sa_scans);
7685 }
7686
7687 /*
7688 * Now do generic index cost estimation.
7689 */
7690 costs.numIndexTuples = numIndexTuples;
7691 costs.num_sa_scans = num_sa_scans;
7692
7693 genericcostestimate(root, path, loop_count, &costs);
7694
7695 /*
7696 * Add a CPU-cost component to represent the costs of initial btree
7697 * descent. We don't charge any I/O cost for touching upper btree levels,
7698 * since they tend to stay in cache, but we still have to do about log2(N)
7699 * comparisons to descend a btree of N leaf tuples. We charge one
7700 * cpu_operator_cost per comparison.
7701 *
7702 * If there are SAOP or skip array keys, charge this once per estimated
7703 * index descent. The ones after the first one are not startup cost so
7704 * far as the overall plan goes, so just add them to "total" cost.
7705 */
7706 if (index->tuples > 1) /* avoid computing log(0) */
7707 {
7708 descentCost = ceil(log(index->tuples) / log(2.0)) * cpu_operator_cost;
7709 costs.indexStartupCost += descentCost;
7710 costs.indexTotalCost += costs.num_sa_scans * descentCost;
7711 }
7712
7713 /*
7714 * Even though we're not charging I/O cost for touching upper btree pages,
7715 * it's still reasonable to charge some CPU cost per page descended
7716 * through. Moreover, if we had no such charge at all, bloated indexes
7717 * would appear to have the same search cost as unbloated ones, at least
7718 * in cases where only a single leaf page is expected to be visited. This
7719 * cost is somewhat arbitrarily set at 50x cpu_operator_cost per page
7720 * touched. The number of such pages is btree tree height plus one (ie,
7721 * we charge for the leaf page too). As above, charge once per estimated
7722 * SAOP/skip array descent.
7723 */
7724 descentCost = (index->tree_height + 1) * DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost;
7725 costs.indexStartupCost += descentCost;
7726 costs.indexTotalCost += costs.num_sa_scans * descentCost;
7727
7728 if (!have_correlation)
7729 {
7730 examine_indexcol_variable(root, index, 0, &vardata);
7731 if (HeapTupleIsValid(vardata.statsTuple))
7732 costs.indexCorrelation = btcost_correlation(index, &vardata);
7733 ReleaseVariableStats(vardata);
7734 }
7735 else
7736 {
7737 /* btcost_correlation already called earlier on */
7738 costs.indexCorrelation = correlation;
7739 }
7740
7741 *indexStartupCost = costs.indexStartupCost;
7742 *indexTotalCost = costs.indexTotalCost;
7743 *indexSelectivity = costs.indexSelectivity;
7744 *indexCorrelation = costs.indexCorrelation;
7745 *indexPages = costs.numIndexPages;
7746}
7747
7748void
7749hashcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
7750 Cost *indexStartupCost, Cost *indexTotalCost,
7751 Selectivity *indexSelectivity, double *indexCorrelation,
7752 double *indexPages)
7753{
7754 GenericCosts costs = {0};
7755
7756 genericcostestimate(root, path, loop_count, &costs);
7757
7758 /*
7759 * A hash index has no descent costs as such, since the index AM can go
7760 * directly to the target bucket after computing the hash value. There
7761 * are a couple of other hash-specific costs that we could conceivably add
7762 * here, though:
7763 *
7764 * Ideally we'd charge spc_random_page_cost for each page in the target
7765 * bucket, not just the numIndexPages pages that genericcostestimate
7766 * thought we'd visit. However in most cases we don't know which bucket
7767 * that will be. There's no point in considering the average bucket size
7768 * because the hash AM makes sure that's always one page.
7769 *
7770 * Likewise, we could consider charging some CPU for each index tuple in
7771 * the bucket, if we knew how many there were. But the per-tuple cost is
7772 * just a hash value comparison, not a general datatype-dependent
7773 * comparison, so any such charge ought to be quite a bit less than
7774 * cpu_operator_cost; which makes it probably not worth worrying about.
7775 *
7776 * A bigger issue is that chance hash-value collisions will result in
7777 * wasted probes into the heap. We don't currently attempt to model this
7778 * cost on the grounds that it's rare, but maybe it's not rare enough.
7779 * (Any fix for this ought to consider the generic lossy-operator problem,
7780 * though; it's not entirely hash-specific.)
7781 */
7782
7783 *indexStartupCost = costs.indexStartupCost;
7784 *indexTotalCost = costs.indexTotalCost;
7785 *indexSelectivity = costs.indexSelectivity;
7786 *indexCorrelation = costs.indexCorrelation;
7787 *indexPages = costs.numIndexPages;
7788}
7789
7790void
7791gistcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
7792 Cost *indexStartupCost, Cost *indexTotalCost,
7793 Selectivity *indexSelectivity, double *indexCorrelation,
7794 double *indexPages)
7795{
7796 IndexOptInfo *index = path->indexinfo;
7797 GenericCosts costs = {0};
7798 Cost descentCost;
7799
7800 genericcostestimate(root, path, loop_count, &costs);
7801
7802 /*
7803 * We model index descent costs similarly to those for btree, but to do
7804 * that we first need an idea of the tree height. We somewhat arbitrarily
7805 * assume that the fanout is 100, meaning the tree height is at most
7806 * log100(index->pages).
7807 *
7808 * Although this computation isn't really expensive enough to require
7809 * caching, we might as well use index->tree_height to cache it.
7810 */
7811 if (index->tree_height < 0) /* unknown? */
7812 {
7813 if (index->pages > 1) /* avoid computing log(0) */
7814 index->tree_height = (int) (log(index->pages) / log(100.0));
7815 else
7816 index->tree_height = 0;
7817 }
7818
7819 /*
7820 * Add a CPU-cost component to represent the costs of initial descent. We
7821 * just use log(N) here not log2(N) since the branching factor isn't
7822 * necessarily two anyway. As for btree, charge once per SA scan.
7823 */
7824 if (index->tuples > 1) /* avoid computing log(0) */
7825 {
7826 descentCost = ceil(log(index->tuples)) * cpu_operator_cost;
7827 costs.indexStartupCost += descentCost;
7828 costs.indexTotalCost += costs.num_sa_scans * descentCost;
7829 }
7830
7831 /*
7832 * Likewise add a per-page charge, calculated the same as for btrees.
7833 */
7834 descentCost = (index->tree_height + 1) * DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost;
7835 costs.indexStartupCost += descentCost;
7836 costs.indexTotalCost += costs.num_sa_scans * descentCost;
7837
7838 *indexStartupCost = costs.indexStartupCost;
7839 *indexTotalCost = costs.indexTotalCost;
7840 *indexSelectivity = costs.indexSelectivity;
7841 *indexCorrelation = costs.indexCorrelation;
7842 *indexPages = costs.numIndexPages;
7843}
7844
7845void
7846spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
7847 Cost *indexStartupCost, Cost *indexTotalCost,
7848 Selectivity *indexSelectivity, double *indexCorrelation,
7849 double *indexPages)
7850{
7851 IndexOptInfo *index = path->indexinfo;
7852 GenericCosts costs = {0};
7853 Cost descentCost;
7854
7855 genericcostestimate(root, path, loop_count, &costs);
7856
7857 /*
7858 * We model index descent costs similarly to those for btree, but to do
7859 * that we first need an idea of the tree height. We somewhat arbitrarily
7860 * assume that the fanout is 100, meaning the tree height is at most
7861 * log100(index->pages).
7862 *
7863 * Although this computation isn't really expensive enough to require
7864 * caching, we might as well use index->tree_height to cache it.
7865 */
7866 if (index->tree_height < 0) /* unknown? */
7867 {
7868 if (index->pages > 1) /* avoid computing log(0) */
7869 index->tree_height = (int) (log(index->pages) / log(100.0));
7870 else
7871 index->tree_height = 0;
7872 }
7873
7874 /*
7875 * Add a CPU-cost component to represent the costs of initial descent. We
7876 * just use log(N) here not log2(N) since the branching factor isn't
7877 * necessarily two anyway. As for btree, charge once per SA scan.
7878 */
7879 if (index->tuples > 1) /* avoid computing log(0) */
7880 {
7881 descentCost = ceil(log(index->tuples)) * cpu_operator_cost;
7882 costs.indexStartupCost += descentCost;
7883 costs.indexTotalCost += costs.num_sa_scans * descentCost;
7884 }
7885
7886 /*
7887 * Likewise add a per-page charge, calculated the same as for btrees.
7888 */
7889 descentCost = (index->tree_height + 1) * DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost;
7890 costs.indexStartupCost += descentCost;
7891 costs.indexTotalCost += costs.num_sa_scans * descentCost;
7892
7893 *indexStartupCost = costs.indexStartupCost;
7894 *indexTotalCost = costs.indexTotalCost;
7895 *indexSelectivity = costs.indexSelectivity;
7896 *indexCorrelation = costs.indexCorrelation;
7897 *indexPages = costs.numIndexPages;
7898}
7899
7900
7901/*
7902 * Support routines for gincostestimate
7903 */
7904
7905typedef struct
7906{
7907 bool attHasFullScan[INDEX_MAX_KEYS];
7908 bool attHasNormalScan[INDEX_MAX_KEYS];
7914
7915/*
7916 * Estimate the number of index terms that need to be searched for while
7917 * testing the given GIN query, and increment the counts in *counts
7918 * appropriately. If the query is unsatisfiable, return false.
7919 */
7920static bool
7922 Oid clause_op, Datum query,
7923 GinQualCounts *counts)
7924{
7925 FmgrInfo flinfo;
7926 Oid extractProcOid;
7927 Oid collation;
7928 int strategy_op;
7929 Oid lefttype,
7930 righttype;
7931 int32 nentries = 0;
7932 bool *partial_matches = NULL;
7933 Pointer *extra_data = NULL;
7934 bool *nullFlags = NULL;
7935 int32 searchMode = GIN_SEARCH_MODE_DEFAULT;
7936 int32 i;
7937
7938 Assert(indexcol < index->nkeycolumns);
7939
7940 /*
7941 * Get the operator's strategy number and declared input data types within
7942 * the index opfamily. (We don't need the latter, but we use
7943 * get_op_opfamily_properties because it will throw error if it fails to
7944 * find a matching pg_amop entry.)
7945 */
7946 get_op_opfamily_properties(clause_op, index->opfamily[indexcol], false,
7947 &strategy_op, &lefttype, &righttype);
7948
7949 /*
7950 * GIN always uses the "default" support functions, which are those with
7951 * lefttype == righttype == the opclass' opcintype (see
7952 * IndexSupportInitialize in relcache.c).
7953 */
7954 extractProcOid = get_opfamily_proc(index->opfamily[indexcol],
7955 index->opcintype[indexcol],
7956 index->opcintype[indexcol],
7958
7959 if (!OidIsValid(extractProcOid))
7960 {
7961 /* should not happen; throw same error as index_getprocinfo */
7962 elog(ERROR, "missing support function %d for attribute %d of index \"%s\"",
7963 GIN_EXTRACTQUERY_PROC, indexcol + 1,
7964 get_rel_name(index->indexoid));
7965 }
7966
7967 /*
7968 * Choose collation to pass to extractProc (should match initGinState).
7969 */
7970 if (OidIsValid(index->indexcollations[indexcol]))
7971 collation = index->indexcollations[indexcol];
7972 else
7973 collation = DEFAULT_COLLATION_OID;
7974
7975 fmgr_info(extractProcOid, &flinfo);
7976
7977 set_fn_opclass_options(&flinfo, index->opclassoptions[indexcol]);
7978
7979 FunctionCall7Coll(&flinfo,
7980 collation,
7981 query,
7982 PointerGetDatum(&nentries),
7983 UInt16GetDatum(strategy_op),
7984 PointerGetDatum(&partial_matches),
7985 PointerGetDatum(&extra_data),
7986 PointerGetDatum(&nullFlags),
7987 PointerGetDatum(&searchMode));
7988
7989 if (nentries <= 0 && searchMode == GIN_SEARCH_MODE_DEFAULT)
7990 {
7991 /* No match is possible */
7992 return false;
7993 }
7994
7995 for (i = 0; i < nentries; i++)
7996 {
7997 /*
7998 * For partial match we haven't any information to estimate number of
7999 * matched entries in index, so, we just estimate it as 100
8000 */
8001 if (partial_matches && partial_matches[i])
8002 counts->partialEntries += 100;
8003 else
8004 counts->exactEntries++;
8005
8006 counts->searchEntries++;
8007 }
8008
8009 if (searchMode == GIN_SEARCH_MODE_DEFAULT)
8010 {
8011 counts->attHasNormalScan[indexcol] = true;
8012 }
8013 else if (searchMode == GIN_SEARCH_MODE_INCLUDE_EMPTY)
8014 {
8015 /* Treat "include empty" like an exact-match item */
8016 counts->attHasNormalScan[indexcol] = true;
8017 counts->exactEntries++;
8018 counts->searchEntries++;
8019 }
8020 else
8021 {
8022 /* It's GIN_SEARCH_MODE_ALL */
8023 counts->attHasFullScan[indexcol] = true;
8024 }
8025
8026 return true;
8027}
8028
8029/*
8030 * Estimate the number of index terms that need to be searched for while
8031 * testing the given GIN index clause, and increment the counts in *counts
8032 * appropriately. If the query is unsatisfiable, return false.
8033 */
8034static bool
8037 int indexcol,
8038 OpExpr *clause,
8039 GinQualCounts *counts)
8040{
8041 Oid clause_op = clause->opno;
8042 Node *operand = (Node *) lsecond(clause->args);
8043
8044 /* aggressively reduce to a constant, and look through relabeling */
8045 operand = estimate_expression_value(root, operand);
8046
8047 if (IsA(operand, RelabelType))
8048 operand = (Node *) ((RelabelType *) operand)->arg;
8049
8050 /*
8051 * It's impossible to call extractQuery method for unknown operand. So
8052 * unless operand is a Const we can't do much; just assume there will be
8053 * one ordinary search entry from the operand at runtime.
8054 */
8055 if (!IsA(operand, Const))
8056 {
8057 counts->exactEntries++;
8058 counts->searchEntries++;
8059 return true;
8060 }
8061
8062 /* If Const is null, there can be no matches */
8063 if (((Const *) operand)->constisnull)
8064 return false;
8065
8066 /* Otherwise, apply extractQuery and get the actual term counts */
8067 return gincost_pattern(index, indexcol, clause_op,
8068 ((Const *) operand)->constvalue,
8069 counts);
8070}
8071
8072/*
8073 * Estimate the number of index terms that need to be searched for while
8074 * testing the given GIN index clause, and increment the counts in *counts
8075 * appropriately. If the query is unsatisfiable, return false.
8076 *
8077 * A ScalarArrayOpExpr will give rise to N separate indexscans at runtime,
8078 * each of which involves one value from the RHS array, plus all the
8079 * non-array quals (if any). To model this, we average the counts across
8080 * the RHS elements, and add the averages to the counts in *counts (which
8081 * correspond to per-indexscan costs). We also multiply counts->arrayScans
8082 * by N, causing gincostestimate to scale up its estimates accordingly.
8083 */
8084static bool
8087 int indexcol,
8088 ScalarArrayOpExpr *clause,
8089 double numIndexEntries,
8090 GinQualCounts *counts)
8091{
8092 Oid clause_op = clause->opno;
8093 Node *rightop = (Node *) lsecond(clause->args);
8094 ArrayType *arrayval;
8095 int16 elmlen;
8096 bool elmbyval;
8097 char elmalign;
8098 int numElems;
8099 Datum *elemValues;
8100 bool *elemNulls;
8101 GinQualCounts arraycounts;
8102 int numPossible = 0;
8103 int i;
8104
8105 Assert(clause->useOr);
8106
8107 /* aggressively reduce to a constant, and look through relabeling */
8108 rightop = estimate_expression_value(root, rightop);
8109
8110 if (IsA(rightop, RelabelType))
8111 rightop = (Node *) ((RelabelType *) rightop)->arg;
8112
8113 /*
8114 * It's impossible to call extractQuery method for unknown operand. So
8115 * unless operand is a Const we can't do much; just assume there will be
8116 * one ordinary search entry from each array entry at runtime, and fall
8117 * back on a probably-bad estimate of the number of array entries.
8118 */
8119 if (!IsA(rightop, Const))
8120 {
8121 counts->exactEntries++;
8122 counts->searchEntries++;
8123 counts->arrayScans *= estimate_array_length(root, rightop);
8124 return true;
8125 }
8126
8127 /* If Const is null, there can be no matches */
8128 if (((Const *) rightop)->constisnull)
8129 return false;
8130
8131 /* Otherwise, extract the array elements and iterate over them */
8132 arrayval = DatumGetArrayTypeP(((Const *) rightop)->constvalue);
8134 &elmlen, &elmbyval, &elmalign);
8135 deconstruct_array(arrayval,
8136 ARR_ELEMTYPE(arrayval),
8137 elmlen, elmbyval, elmalign,
8138 &elemValues, &elemNulls, &numElems);
8139
8140 memset(&arraycounts, 0, sizeof(arraycounts));
8141
8142 for (i = 0; i < numElems; i++)
8143 {
8144 GinQualCounts elemcounts;
8145
8146 /* NULL can't match anything, so ignore, as the executor will */
8147 if (elemNulls[i])
8148 continue;
8149
8150 /* Otherwise, apply extractQuery and get the actual term counts */
8151 memset(&elemcounts, 0, sizeof(elemcounts));
8152
8153 if (gincost_pattern(index, indexcol, clause_op, elemValues[i],
8154 &elemcounts))
8155 {
8156 /* We ignore array elements that are unsatisfiable patterns */
8157 numPossible++;
8158
8159 if (elemcounts.attHasFullScan[indexcol] &&
8160 !elemcounts.attHasNormalScan[indexcol])
8161 {
8162 /*
8163 * Full index scan will be required. We treat this as if
8164 * every key in the index had been listed in the query; is
8165 * that reasonable?
8166 */
8167 elemcounts.partialEntries = 0;
8168 elemcounts.exactEntries = numIndexEntries;
8169 elemcounts.searchEntries = numIndexEntries;
8170 }
8171 arraycounts.partialEntries += elemcounts.partialEntries;
8172 arraycounts.exactEntries += elemcounts.exactEntries;
8173 arraycounts.searchEntries += elemcounts.searchEntries;
8174 }
8175 }
8176
8177 if (numPossible == 0)
8178 {
8179 /* No satisfiable patterns in the array */
8180 return false;
8181 }
8182
8183 /*
8184 * Now add the averages to the global counts. This will give us an
8185 * estimate of the average number of terms searched for in each indexscan,
8186 * including contributions from both array and non-array quals.
8187 */
8188 counts->partialEntries += arraycounts.partialEntries / numPossible;
8189 counts->exactEntries += arraycounts.exactEntries / numPossible;
8190 counts->searchEntries += arraycounts.searchEntries / numPossible;
8191
8192 counts->arrayScans *= numPossible;
8193
8194 return true;
8195}
8196
8197/*
8198 * GIN has search behavior completely different from other index types
8199 */
8200void
8201gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
8202 Cost *indexStartupCost, Cost *indexTotalCost,
8203 Selectivity *indexSelectivity, double *indexCorrelation,
8204 double *indexPages)
8205{
8206 IndexOptInfo *index = path->indexinfo;
8207 List *indexQuals = get_quals_from_indexclauses(path->indexclauses);
8208 List *selectivityQuals;
8209 double numPages = index->pages,
8210 numTuples = index->tuples;
8211 double numEntryPages,
8212 numDataPages,
8213 numPendingPages,
8214 numEntries;
8215 GinQualCounts counts;
8216 bool matchPossible;
8217 bool fullIndexScan;
8218 double partialScale;
8219 double entryPagesFetched,
8220 dataPagesFetched,
8221 dataPagesFetchedBySel;
8222 double qual_op_cost,
8223 qual_arg_cost,
8224 spc_random_page_cost,
8225 outer_scans;
8226 Cost descentCost;
8227 Relation indexRel;
8228 GinStatsData ginStats;
8229 ListCell *lc;
8230 int i;
8231
8232 /*
8233 * Obtain statistical information from the meta page, if possible. Else
8234 * set ginStats to zeroes, and we'll cope below.
8235 */
8236 if (!index->hypothetical)
8237 {
8238 /* Lock should have already been obtained in plancat.c */
8239 indexRel = index_open(index->indexoid, NoLock);
8240 ginGetStats(indexRel, &ginStats);
8241 index_close(indexRel, NoLock);
8242 }
8243 else
8244 {
8245 memset(&ginStats, 0, sizeof(ginStats));
8246 }
8247
8248 /*
8249 * Assuming we got valid (nonzero) stats at all, nPendingPages can be
8250 * trusted, but the other fields are data as of the last VACUUM. We can
8251 * scale them up to account for growth since then, but that method only
8252 * goes so far; in the worst case, the stats might be for a completely
8253 * empty index, and scaling them will produce pretty bogus numbers.
8254 * Somewhat arbitrarily, set the cutoff for doing scaling at 4X growth; if
8255 * it's grown more than that, fall back to estimating things only from the
8256 * assumed-accurate index size. But we'll trust nPendingPages in any case
8257 * so long as it's not clearly insane, ie, more than the index size.
8258 */
8259 if (ginStats.nPendingPages < numPages)
8260 numPendingPages = ginStats.nPendingPages;
8261 else
8262 numPendingPages = 0;
8263
8264 if (numPages > 0 && ginStats.nTotalPages <= numPages &&
8265 ginStats.nTotalPages > numPages / 4 &&
8266 ginStats.nEntryPages > 0 && ginStats.nEntries > 0)
8267 {
8268 /*
8269 * OK, the stats seem close enough to sane to be trusted. But we
8270 * still need to scale them by the ratio numPages / nTotalPages to
8271 * account for growth since the last VACUUM.
8272 */
8273 double scale = numPages / ginStats.nTotalPages;
8274
8275 numEntryPages = ceil(ginStats.nEntryPages * scale);
8276 numDataPages = ceil(ginStats.nDataPages * scale);
8277 numEntries = ceil(ginStats.nEntries * scale);
8278 /* ensure we didn't round up too much */
8279 numEntryPages = Min(numEntryPages, numPages - numPendingPages);
8280 numDataPages = Min(numDataPages,
8281 numPages - numPendingPages - numEntryPages);
8282 }
8283 else
8284 {
8285 /*
8286 * We might get here because it's a hypothetical index, or an index
8287 * created pre-9.1 and never vacuumed since upgrading (in which case
8288 * its stats would read as zeroes), or just because it's grown too
8289 * much since the last VACUUM for us to put our faith in scaling.
8290 *
8291 * Invent some plausible internal statistics based on the index page
8292 * count (and clamp that to at least 10 pages, just in case). We
8293 * estimate that 90% of the index is entry pages, and the rest is data
8294 * pages. Estimate 100 entries per entry page; this is rather bogus
8295 * since it'll depend on the size of the keys, but it's more robust
8296 * than trying to predict the number of entries per heap tuple.
8297 */
8298 numPages = Max(numPages, 10);
8299 numEntryPages = floor((numPages - numPendingPages) * 0.90);
8300 numDataPages = numPages - numPendingPages - numEntryPages;
8301 numEntries = floor(numEntryPages * 100);
8302 }
8303
8304 /* In an empty index, numEntries could be zero. Avoid divide-by-zero */
8305 if (numEntries < 1)
8306 numEntries = 1;
8307
8308 /*
8309 * If the index is partial, AND the index predicate with the index-bound
8310 * quals to produce a more accurate idea of the number of rows covered by
8311 * the bound conditions.
8312 */
8313 selectivityQuals = add_predicate_to_index_quals(index, indexQuals);
8314
8315 /* Estimate the fraction of main-table tuples that will be visited */
8316 *indexSelectivity = clauselist_selectivity(root, selectivityQuals,
8317 index->rel->relid,
8318 JOIN_INNER,
8319 NULL);
8320
8321 /* fetch estimated page cost for tablespace containing index */
8322 get_tablespace_page_costs(index->reltablespace,
8323 &spc_random_page_cost,
8324 NULL);
8325
8326 /*
8327 * Generic assumption about index correlation: there isn't any.
8328 */
8329 *indexCorrelation = 0.0;
8330
8331 /*
8332 * Examine quals to estimate number of search entries & partial matches
8333 */
8334 memset(&counts, 0, sizeof(counts));
8335 counts.arrayScans = 1;
8336 matchPossible = true;
8337
8338 foreach(lc, path->indexclauses)
8339 {
8340 IndexClause *iclause = lfirst_node(IndexClause, lc);
8341 ListCell *lc2;
8342
8343 foreach(lc2, iclause->indexquals)
8344 {
8345 RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
8346 Expr *clause = rinfo->clause;
8347
8348 if (IsA(clause, OpExpr))
8349 {
8350 matchPossible = gincost_opexpr(root,
8351 index,
8352 iclause->indexcol,
8353 (OpExpr *) clause,
8354 &counts);
8355 if (!matchPossible)
8356 break;
8357 }
8358 else if (IsA(clause, ScalarArrayOpExpr))
8359 {
8360 matchPossible = gincost_scalararrayopexpr(root,
8361 index,
8362 iclause->indexcol,
8363 (ScalarArrayOpExpr *) clause,
8364 numEntries,
8365 &counts);
8366 if (!matchPossible)
8367 break;
8368 }
8369 else
8370 {
8371 /* shouldn't be anything else for a GIN index */
8372 elog(ERROR, "unsupported GIN indexqual type: %d",
8373 (int) nodeTag(clause));
8374 }
8375 }
8376 }
8377
8378 /* Fall out if there were any provably-unsatisfiable quals */
8379 if (!matchPossible)
8380 {
8381 *indexStartupCost = 0;
8382 *indexTotalCost = 0;
8383 *indexSelectivity = 0;
8384 return;
8385 }
8386
8387 /*
8388 * If attribute has a full scan and at the same time doesn't have normal
8389 * scan, then we'll have to scan all non-null entries of that attribute.
8390 * Currently, we don't have per-attribute statistics for GIN. Thus, we
8391 * must assume the whole GIN index has to be scanned in this case.
8392 */
8393 fullIndexScan = false;
8394 for (i = 0; i < index->nkeycolumns; i++)
8395 {
8396 if (counts.attHasFullScan[i] && !counts.attHasNormalScan[i])
8397 {
8398 fullIndexScan = true;
8399 break;
8400 }
8401 }
8402
8403 if (fullIndexScan || indexQuals == NIL)
8404 {
8405 /*
8406 * Full index scan will be required. We treat this as if every key in
8407 * the index had been listed in the query; is that reasonable?
8408 */
8409 counts.partialEntries = 0;
8410 counts.exactEntries = numEntries;
8411 counts.searchEntries = numEntries;
8412 }
8413
8414 /* Will we have more than one iteration of a nestloop scan? */
8415 outer_scans = loop_count;
8416
8417 /*
8418 * Compute cost to begin scan, first of all, pay attention to pending
8419 * list.
8420 */
8421 entryPagesFetched = numPendingPages;
8422
8423 /*
8424 * Estimate number of entry pages read. We need to do
8425 * counts.searchEntries searches. Use a power function as it should be,
8426 * but tuples on leaf pages usually is much greater. Here we include all
8427 * searches in entry tree, including search of first entry in partial
8428 * match algorithm
8429 */
8430 entryPagesFetched += ceil(counts.searchEntries * rint(pow(numEntryPages, 0.15)));
8431
8432 /*
8433 * Add an estimate of entry pages read by partial match algorithm. It's a
8434 * scan over leaf pages in entry tree. We haven't any useful stats here,
8435 * so estimate it as proportion. Because counts.partialEntries is really
8436 * pretty bogus (see code above), it's possible that it is more than
8437 * numEntries; clamp the proportion to ensure sanity.
8438 */
8439 partialScale = counts.partialEntries / numEntries;
8440 partialScale = Min(partialScale, 1.0);
8441
8442 entryPagesFetched += ceil(numEntryPages * partialScale);
8443
8444 /*
8445 * Partial match algorithm reads all data pages before doing actual scan,
8446 * so it's a startup cost. Again, we haven't any useful stats here, so
8447 * estimate it as proportion.
8448 */
8449 dataPagesFetched = ceil(numDataPages * partialScale);
8450
8451 *indexStartupCost = 0;
8452 *indexTotalCost = 0;
8453
8454 /*
8455 * Add a CPU-cost component to represent the costs of initial entry btree
8456 * descent. We don't charge any I/O cost for touching upper btree levels,
8457 * since they tend to stay in cache, but we still have to do about log2(N)
8458 * comparisons to descend a btree of N leaf tuples. We charge one
8459 * cpu_operator_cost per comparison.
8460 *
8461 * If there are ScalarArrayOpExprs, charge this once per SA scan. The
8462 * ones after the first one are not startup cost so far as the overall
8463 * plan is concerned, so add them only to "total" cost.
8464 */
8465 if (numEntries > 1) /* avoid computing log(0) */
8466 {
8467 descentCost = ceil(log(numEntries) / log(2.0)) * cpu_operator_cost;
8468 *indexStartupCost += descentCost * counts.searchEntries;
8469 *indexTotalCost += counts.arrayScans * descentCost * counts.searchEntries;
8470 }
8471
8472 /*
8473 * Add a cpu cost per entry-page fetched. This is not amortized over a
8474 * loop.
8475 */
8476 *indexStartupCost += entryPagesFetched * DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost;
8477 *indexTotalCost += entryPagesFetched * counts.arrayScans * DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost;
8478
8479 /*
8480 * Add a cpu cost per data-page fetched. This is also not amortized over a
8481 * loop. Since those are the data pages from the partial match algorithm,
8482 * charge them as startup cost.
8483 */
8484 *indexStartupCost += DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost * dataPagesFetched;
8485
8486 /*
8487 * Since we add the startup cost to the total cost later on, remove the
8488 * initial arrayscan from the total.
8489 */
8490 *indexTotalCost += dataPagesFetched * (counts.arrayScans - 1) * DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost;
8491
8492 /*
8493 * Calculate cache effects if more than one scan due to nestloops or array
8494 * quals. The result is pro-rated per nestloop scan, but the array qual
8495 * factor shouldn't be pro-rated (compare genericcostestimate).
8496 */
8497 if (outer_scans > 1 || counts.arrayScans > 1)
8498 {
8499 entryPagesFetched *= outer_scans * counts.arrayScans;
8500 entryPagesFetched = index_pages_fetched(entryPagesFetched,
8501 (BlockNumber) numEntryPages,
8502 numEntryPages, root);
8503 entryPagesFetched /= outer_scans;
8504 dataPagesFetched *= outer_scans * counts.arrayScans;
8505 dataPagesFetched = index_pages_fetched(dataPagesFetched,
8506 (BlockNumber) numDataPages,
8507 numDataPages, root);
8508 dataPagesFetched /= outer_scans;
8509 }
8510
8511 /*
8512 * Here we use random page cost because logically-close pages could be far
8513 * apart on disk.
8514 */
8515 *indexStartupCost += (entryPagesFetched + dataPagesFetched) * spc_random_page_cost;
8516
8517 /*
8518 * Now compute the number of data pages fetched during the scan.
8519 *
8520 * We assume every entry to have the same number of items, and that there
8521 * is no overlap between them. (XXX: tsvector and array opclasses collect
8522 * statistics on the frequency of individual keys; it would be nice to use
8523 * those here.)
8524 */
8525 dataPagesFetched = ceil(numDataPages * counts.exactEntries / numEntries);
8526
8527 /*
8528 * If there is a lot of overlap among the entries, in particular if one of
8529 * the entries is very frequent, the above calculation can grossly
8530 * under-estimate. As a simple cross-check, calculate a lower bound based
8531 * on the overall selectivity of the quals. At a minimum, we must read
8532 * one item pointer for each matching entry.
8533 *
8534 * The width of each item pointer varies, based on the level of
8535 * compression. We don't have statistics on that, but an average of
8536 * around 3 bytes per item is fairly typical.
8537 */
8538 dataPagesFetchedBySel = ceil(*indexSelectivity *
8539 (numTuples / (BLCKSZ / 3)));
8540 if (dataPagesFetchedBySel > dataPagesFetched)
8541 dataPagesFetched = dataPagesFetchedBySel;
8542
8543 /* Add one page cpu-cost to the startup cost */
8544 *indexStartupCost += DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost * counts.searchEntries;
8545
8546 /*
8547 * Add once again a CPU-cost for those data pages, before amortizing for
8548 * cache.
8549 */
8550 *indexTotalCost += dataPagesFetched * counts.arrayScans * DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost;
8551
8552 /* Account for cache effects, the same as above */
8553 if (outer_scans > 1 || counts.arrayScans > 1)
8554 {
8555 dataPagesFetched *= outer_scans * counts.arrayScans;
8556 dataPagesFetched = index_pages_fetched(dataPagesFetched,
8557 (BlockNumber) numDataPages,
8558 numDataPages, root);
8559 dataPagesFetched /= outer_scans;
8560 }
8561
8562 /* And apply random_page_cost as the cost per page */
8563 *indexTotalCost += *indexStartupCost +
8564 dataPagesFetched * spc_random_page_cost;
8565
8566 /*
8567 * Add on index qual eval costs, much as in genericcostestimate. We charge
8568 * cpu but we can disregard indexorderbys, since GIN doesn't support
8569 * those.
8570 */
8571 qual_arg_cost = index_other_operands_eval_cost(root, indexQuals);
8572 qual_op_cost = cpu_operator_cost * list_length(indexQuals);
8573
8574 *indexStartupCost += qual_arg_cost;
8575 *indexTotalCost += qual_arg_cost;
8576
8577 /*
8578 * Add a cpu cost per search entry, corresponding to the actual visited
8579 * entries.
8580 */
8581 *indexTotalCost += (counts.searchEntries * counts.arrayScans) * (qual_op_cost);
8582 /* Now add a cpu cost per tuple in the posting lists / trees */
8583 *indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost);
8584 *indexPages = dataPagesFetched;
8585}
8586
8587/*
8588 * BRIN has search behavior completely different from other index types
8589 */
8590void
8591brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
8592 Cost *indexStartupCost, Cost *indexTotalCost,
8593 Selectivity *indexSelectivity, double *indexCorrelation,
8594 double *indexPages)
8595{
8596 IndexOptInfo *index = path->indexinfo;
8597 List *indexQuals = get_quals_from_indexclauses(path->indexclauses);
8598 double numPages = index->pages;
8599 RelOptInfo *baserel = index->rel;
8600 RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root);
8601 Cost spc_seq_page_cost;
8602 Cost spc_random_page_cost;
8603 double qual_arg_cost;
8604 double qualSelectivity;
8605 BrinStatsData statsData;
8606 double indexRanges;
8607 double minimalRanges;
8608 double estimatedRanges;
8609 double selec;
8610 Relation indexRel;
8611 ListCell *l;
8612 VariableStatData vardata;
8613
8614 Assert(rte->rtekind == RTE_RELATION);
8615
8616 /* fetch estimated page cost for the tablespace containing the index */
8617 get_tablespace_page_costs(index->reltablespace,
8618 &spc_random_page_cost,
8619 &spc_seq_page_cost);
8620
8621 /*
8622 * Obtain some data from the index itself, if possible. Otherwise invent
8623 * some plausible internal statistics based on the relation page count.
8624 */
8625 if (!index->hypothetical)
8626 {
8627 /*
8628 * A lock should have already been obtained on the index in plancat.c.
8629 */
8630 indexRel = index_open(index->indexoid, NoLock);
8631 brinGetStats(indexRel, &statsData);
8632 index_close(indexRel, NoLock);
8633
8634 /* work out the actual number of ranges in the index */
8635 indexRanges = Max(ceil((double) baserel->pages /
8636 statsData.pagesPerRange), 1.0);
8637 }
8638 else
8639 {
8640 /*
8641 * Assume default number of pages per range, and estimate the number
8642 * of ranges based on that.
8643 */
8644 indexRanges = Max(ceil((double) baserel->pages /
8646
8648 statsData.revmapNumPages = (indexRanges / REVMAP_PAGE_MAXITEMS) + 1;
8649 }
8650
8651 /*
8652 * Compute index correlation
8653 *
8654 * Because we can use all index quals equally when scanning, we can use
8655 * the largest correlation (in absolute value) among columns used by the
8656 * query. Start at zero, the worst possible case. If we cannot find any
8657 * correlation statistics, we will keep it as 0.
8658 */
8659 *indexCorrelation = 0;
8660
8661 foreach(l, path->indexclauses)
8662 {
8663 IndexClause *iclause = lfirst_node(IndexClause, l);
8664 AttrNumber attnum = index->indexkeys[iclause->indexcol];
8665
8666 /* attempt to lookup stats in relation for this index column */
8667 if (attnum != 0)
8668 {
8669 /* Simple variable -- look to stats for the underlying table */
8671 (*get_relation_stats_hook) (root, rte, attnum, &vardata))
8672 {
8673 /*
8674 * The hook took control of acquiring a stats tuple. If it
8675 * did supply a tuple, it'd better have supplied a freefunc.
8676 */
8677 if (HeapTupleIsValid(vardata.statsTuple) && !vardata.freefunc)
8678 elog(ERROR,
8679 "no function provided to release variable stats with");
8680 }
8681 else
8682 {
8683 vardata.statsTuple =
8684 SearchSysCache3(STATRELATTINH,
8685 ObjectIdGetDatum(rte->relid),
8687 BoolGetDatum(false));
8688 vardata.freefunc = ReleaseSysCache;
8689 }
8690 }
8691 else
8692 {
8693 /*
8694 * Looks like we've found an expression column in the index. Let's
8695 * see if there's any stats for it.
8696 */
8697
8698 /* get the attnum from the 0-based index. */
8699 attnum = iclause->indexcol + 1;
8700
8702 (*get_index_stats_hook) (root, index->indexoid, attnum, &vardata))
8703 {
8704 /*
8705 * The hook took control of acquiring a stats tuple. If it
8706 * did supply a tuple, it'd better have supplied a freefunc.
8707 */
8708 if (HeapTupleIsValid(vardata.statsTuple) &&
8709 !vardata.freefunc)
8710 elog(ERROR, "no function provided to release variable stats with");
8711 }
8712 else
8713 {
8714 vardata.statsTuple = SearchSysCache3(STATRELATTINH,
8715 ObjectIdGetDatum(index->indexoid),
8717 BoolGetDatum(false));
8718 vardata.freefunc = ReleaseSysCache;
8719 }
8720 }
8721
8722 if (HeapTupleIsValid(vardata.statsTuple))
8723 {
8724 AttStatsSlot sslot;
8725
8726 if (get_attstatsslot(&sslot, vardata.statsTuple,
8727 STATISTIC_KIND_CORRELATION, InvalidOid,
8729 {
8730 double varCorrelation = 0.0;
8731
8732 if (sslot.nnumbers > 0)
8733 varCorrelation = fabs(sslot.numbers[0]);
8734
8735 if (varCorrelation > *indexCorrelation)
8736 *indexCorrelation = varCorrelation;
8737
8738 free_attstatsslot(&sslot);
8739 }
8740 }
8741
8742 ReleaseVariableStats(vardata);
8743 }
8744
8745 qualSelectivity = clauselist_selectivity(root, indexQuals,
8746 baserel->relid,
8747 JOIN_INNER, NULL);
8748
8749 /*
8750 * Now calculate the minimum possible ranges we could match with if all of
8751 * the rows were in the perfect order in the table's heap.
8752 */
8753 minimalRanges = ceil(indexRanges * qualSelectivity);
8754
8755 /*
8756 * Now estimate the number of ranges that we'll touch by using the
8757 * indexCorrelation from the stats. Careful not to divide by zero (note
8758 * we're using the absolute value of the correlation).
8759 */
8760 if (*indexCorrelation < 1.0e-10)
8761 estimatedRanges = indexRanges;
8762 else
8763 estimatedRanges = Min(minimalRanges / *indexCorrelation, indexRanges);
8764
8765 /* we expect to visit this portion of the table */
8766 selec = estimatedRanges / indexRanges;
8767
8768 CLAMP_PROBABILITY(selec);
8769
8770 *indexSelectivity = selec;
8771
8772 /*
8773 * Compute the index qual costs, much as in genericcostestimate, to add to
8774 * the index costs. We can disregard indexorderbys, since BRIN doesn't
8775 * support those.
8776 */
8777 qual_arg_cost = index_other_operands_eval_cost(root, indexQuals);
8778
8779 /*
8780 * Compute the startup cost as the cost to read the whole revmap
8781 * sequentially, including the cost to execute the index quals.
8782 */
8783 *indexStartupCost =
8784 spc_seq_page_cost * statsData.revmapNumPages * loop_count;
8785 *indexStartupCost += qual_arg_cost;
8786
8787 /*
8788 * To read a BRIN index there might be a bit of back and forth over
8789 * regular pages, as revmap might point to them out of sequential order;
8790 * calculate the total cost as reading the whole index in random order.
8791 */
8792 *indexTotalCost = *indexStartupCost +
8793 spc_random_page_cost * (numPages - statsData.revmapNumPages) * loop_count;
8794
8795 /*
8796 * Charge a small amount per range tuple which we expect to match to. This
8797 * is meant to reflect the costs of manipulating the bitmap. The BRIN scan
8798 * will set a bit for each page in the range when we find a matching
8799 * range, so we must multiply the charge by the number of pages in the
8800 * range.
8801 */
8802 *indexTotalCost += 0.1 * cpu_operator_cost * estimatedRanges *
8803 statsData.pagesPerRange;
8804
8805 *indexPages = index->pages;
8806}
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:262
@ ACLCHECK_OK
Definition: acl.h:183
@ ACLMASK_ALL
Definition: acl.h:176
AclResult pg_attribute_aclcheck_all(Oid table_oid, Oid roleid, AclMode mode, AclMaskHow how)
Definition: aclchk.c:3908
AclResult pg_attribute_aclcheck(Oid table_oid, AttrNumber attnum, Oid roleid, AclMode mode)
Definition: aclchk.c:3866
AclResult pg_class_aclcheck(Oid table_oid, Oid roleid, AclMode mode)
Definition: aclchk.c:4037
StrategyNumber IndexAmTranslateCompareType(CompareType cmptype, Oid amoid, Oid opfamily, bool missing_ok)
Definition: amapi.c:161
CompareType IndexAmTranslateStrategy(StrategyNumber strategy, Oid amoid, Oid opfamily, bool missing_ok)
Definition: amapi.c:131
#define ARR_NDIM(a)
Definition: array.h:290
#define DatumGetArrayTypeP(X)
Definition: array.h:261
#define ARR_ELEMTYPE(a)
Definition: array.h:292
#define ARR_DIMS(a)
Definition: array.h:294
Selectivity scalararraysel_containment(PlannerInfo *root, Node *leftop, Node *rightop, Oid elemtype, bool isEquality, bool useOr, int varRelid)
void deconstruct_array(ArrayType *array, Oid elmtype, int elmlen, bool elmbyval, char elmalign, Datum **elemsp, bool **nullsp, int *nelemsp)
Definition: arrayfuncs.c:3631
int ArrayGetNItems(int ndim, const int *dims)
Definition: arrayutils.c:57
int16 AttrNumber
Definition: attnum.h:21
#define AttrNumberIsForUserDefinedAttr(attributeNumber)
Definition: attnum.h:41
#define InvalidAttrNumber
Definition: attnum.h:23
Datum numeric_float8_no_overflow(PG_FUNCTION_ARGS)
Definition: numeric.c:4590
Bitmapset * bms_difference(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:346
Bitmapset * bms_make_singleton(int x)
Definition: bitmapset.c:216
int bms_next_member(const Bitmapset *a, int prevbit)
Definition: bitmapset.c:1306
bool bms_is_subset(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:412
void bms_free(Bitmapset *a)
Definition: bitmapset.c:239
int bms_num_members(const Bitmapset *a)
Definition: bitmapset.c:751
bool bms_is_member(int x, const Bitmapset *a)
Definition: bitmapset.c:510
Bitmapset * bms_add_member(Bitmapset *a, int x)
Definition: bitmapset.c:815
bool bms_overlap(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:582
bool bms_get_singleton_member(const Bitmapset *a, int *member)
Definition: bitmapset.c:715
#define bms_is_empty(a)
Definition: bitmapset.h:118
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static Datum values[MAXATTR]
Definition: bootstrap.c:153
void brinGetStats(Relation index, BrinStatsData *stats)
Definition: brin.c:1648
#define BRIN_DEFAULT_PAGES_PER_RANGE
Definition: brin.h:39
#define REVMAP_PAGE_MAXITEMS
Definition: brin_page.h:93
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:5338
#define TextDatumGetCString(d)
Definition: builtins.h:98
#define NameStr(name)
Definition: c.h:752
#define Min(x, y)
Definition: c.h:1004
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:223
#define Max(x, y)
Definition: c.h:998
char * Pointer
Definition: c.h:530
double float8
Definition: c.h:636
int16_t int16
Definition: c.h:534
regproc RegProcedure
Definition: c.h:656
int32_t int32
Definition: c.h:535
unsigned int Index
Definition: c.h:620
#define MemSet(start, val, len)
Definition: c.h:1020
#define OidIsValid(objectId)
Definition: c.h:775
size_t Size
Definition: c.h:611
int NumRelids(PlannerInfo *root, Node *clause)
Definition: clauses.c:2137
Node * estimate_expression_value(PlannerInfo *root, Node *node)
Definition: clauses.c:2403
bool contain_volatile_functions(Node *clause)
Definition: clauses.c:542
double expression_returns_set_rows(PlannerInfo *root, Node *clause)
Definition: clauses.c:293
Selectivity clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:100
Selectivity clause_selectivity(PlannerInfo *root, Node *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:667
CompareType
Definition: cmptype.h:32
@ COMPARE_LE
Definition: cmptype.h:35
@ COMPARE_GT
Definition: cmptype.h:38
@ COMPARE_EQ
Definition: cmptype.h:36
@ COMPARE_GE
Definition: cmptype.h:37
@ COMPARE_LT
Definition: cmptype.h:34
Oid collid
double cpu_operator_cost
Definition: costsize.c:134
double index_pages_fetched(double tuples_fetched, BlockNumber pages, double index_pages, PlannerInfo *root)
Definition: costsize.c:908
void cost_qual_eval_node(QualCost *cost, Node *qual, PlannerInfo *root)
Definition: costsize.c:4817
double clamp_row_est(double nrows)
Definition: costsize.c:213
double cpu_index_tuple_cost
Definition: costsize.c:133
#define MONTHS_PER_YEAR
Definition: timestamp.h:108
#define USECS_PER_DAY
Definition: timestamp.h:131
#define DAYS_PER_YEAR
Definition: timestamp.h:107
double date2timestamp_no_overflow(DateADT dateVal)
Definition: date.c:785
static TimeTzADT * DatumGetTimeTzADTP(Datum X)
Definition: date.h:66
static DateADT DatumGetDateADT(Datum X)
Definition: date.h:54
static TimeADT DatumGetTimeADT(Datum X)
Definition: date.h:60
Datum datumCopy(Datum value, bool typByVal, int typLen)
Definition: datum.c:132
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1161
#define DEBUG2
Definition: elog.h:29
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define ereport(elevel,...)
Definition: elog.h:150
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:223
bool exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2, Oid opfamily)
Definition: equivclass.c:2648
void ExecDropSingleTupleTableSlot(TupleTableSlot *slot)
Definition: execTuples.c:1443
HeapTuple statext_expressions_load(Oid stxoid, bool inh, int idx)
Datum FunctionCall4Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3, Datum arg4)
Definition: fmgr.c:1196
void set_fn_opclass_options(FmgrInfo *flinfo, bytea *options)
Definition: fmgr.c:2034
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1149
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:127
Datum FunctionCall5Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3, Datum arg4, Datum arg5)
Definition: fmgr.c:1223
Datum DirectFunctionCall5Coll(PGFunction func, Oid collation, Datum arg1, Datum arg2, Datum arg3, Datum arg4, Datum arg5)
Definition: fmgr.c:886
Datum FunctionCall7Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3, Datum arg4, Datum arg5, Datum arg6, Datum arg7)
Definition: fmgr.c:1284
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define DatumGetByteaPP(X)
Definition: fmgr.h:291
#define PG_RETURN_FLOAT8(x)
Definition: fmgr.h:367
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define InitFunctionCallInfoData(Fcinfo, Flinfo, Nargs, Collation, Context, Resultinfo)
Definition: fmgr.h:150
#define DirectFunctionCall1(func, arg1)
Definition: fmgr.h:682
#define LOCAL_FCINFO(name, nargs)
Definition: fmgr.h:110
#define FunctionCallInvoke(fcinfo)
Definition: fmgr.h:172
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_GET_COLLATION()
Definition: fmgr.h:198
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_GETARG_INT16(n)
Definition: fmgr.h:271
#define GIN_EXTRACTQUERY_PROC
Definition: gin.h:26
#define GIN_SEARCH_MODE_DEFAULT
Definition: gin.h:36
#define GIN_SEARCH_MODE_INCLUDE_EMPTY
Definition: gin.h:37
void ginGetStats(Relation index, GinStatsData *stats)
Definition: ginutil.c:628
Assert(PointerIsAligned(start, uint64))
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
static void * GETSTRUCT(const HeapTupleData *tuple)
Definition: htup_details.h:728
IndexScanDesc index_beginscan(Relation heapRelation, Relation indexRelation, Snapshot snapshot, IndexScanInstrumentation *instrument, int nkeys, int norderbys)
Definition: indexam.c:256
void index_close(Relation relation, LOCKMODE lockmode)
Definition: indexam.c:177
ItemPointer index_getnext_tid(IndexScanDesc scan, ScanDirection direction)
Definition: indexam.c:631
bool index_fetch_heap(IndexScanDesc scan, TupleTableSlot *slot)
Definition: indexam.c:689
void index_endscan(IndexScanDesc scan)
Definition: indexam.c:392
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition: indexam.c:133
void index_rescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys)
Definition: indexam.c:366
void index_deform_tuple(IndexTuple tup, TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: indextuple.c:456
bool match_index_to_operand(Node *operand, int indexcol, IndexOptInfo *index)
Definition: indxpath.c:4369
long val
Definition: informix.c:689
static struct @166 value
int j
Definition: isn.c:78
int i
Definition: isn.c:77
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:81
static OffsetNumber ItemPointerGetOffsetNumberNoCheck(const ItemPointerData *pointer)
Definition: itemptr.h:114
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
static BlockNumber ItemPointerGetBlockNumberNoCheck(const ItemPointerData *pointer)
Definition: itemptr.h:93
ItemPointerData * ItemPointer
Definition: itemptr.h:49
List * lappend(List *list, void *datum)
Definition: list.c:339
List * list_concat(List *list1, const List *list2)
Definition: list.c:561
List * list_copy(const List *oldlist)
Definition: list.c:1573
bool list_member_ptr(const List *list, const void *datum)
Definition: list.c:682
void list_free(List *list)
Definition: list.c:1546
bool list_member_int(const List *list, int datum)
Definition: list.c:702
void list_free_deep(List *list)
Definition: list.c:1560
#define NoLock
Definition: lockdefs.h:34
char * get_rel_name(Oid relid)
Definition: lsyscache.c:2095
void get_op_opfamily_properties(Oid opno, Oid opfamily, bool ordering_op, int *strategy, Oid *lefttype, Oid *righttype)
Definition: lsyscache.c:138
RegProcedure get_oprrest(Oid opno)
Definition: lsyscache.c:1724
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3511
bool comparison_ops_are_compatible(Oid opno1, Oid opno2)
Definition: lsyscache.c:836
void get_typlenbyvalalign(Oid typid, int16 *typlen, bool *typbyval, char *typalign)
Definition: lsyscache.c:2438
Oid get_opfamily_proc(Oid opfamily, Oid lefttype, Oid righttype, int16 procnum)
Definition: lsyscache.c:889
RegProcedure get_oprjoin(Oid opno)
Definition: lsyscache.c:1748
void get_typlenbyval(Oid typid, int16 *typlen, bool *typbyval)
Definition: lsyscache.c:2418
RegProcedure get_opcode(Oid opno)
Definition: lsyscache.c:1452
int get_op_opfamily_strategy(Oid opno, Oid opfamily)
Definition: lsyscache.c:85
Oid get_opfamily_member(Oid opfamily, Oid lefttype, Oid righttype, int16 strategy)
Definition: lsyscache.c:168
bool get_func_leakproof(Oid funcid)
Definition: lsyscache.c:2004
char * get_func_name(Oid funcid)
Definition: lsyscache.c:1775
Oid get_base_element_type(Oid typid)
Definition: lsyscache.c:2999
Oid get_opfamily_method(Oid opfid)
Definition: lsyscache.c:1403
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3401
Oid get_negator(Oid opno)
Definition: lsyscache.c:1700
Oid get_commutator(Oid opno)
Definition: lsyscache.c:1676
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:44
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:43
Const * makeConst(Oid consttype, int32 consttypmod, Oid constcollid, int constlen, Datum constvalue, bool constisnull, bool constbyval)
Definition: makefuncs.c:350
char * pstrdup(const char *in)
Definition: mcxt.c:1759
void pfree(void *pointer)
Definition: mcxt.c:1594
void * palloc0(Size size)
Definition: mcxt.c:1395
void * palloc(Size size)
Definition: mcxt.c:1365
MemoryContext CurrentMemoryContext
Definition: mcxt.c:160
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:469
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
Oid GetUserId(void)
Definition: miscinit.c:469
MVNDistinct * statext_ndistinct_load(Oid mvoid, bool inh)
Definition: mvdistinct.c:148
double convert_network_to_scalar(Datum value, Oid typid, bool *failure)
Definition: network.c:1435
Size hash_agg_entry_size(int numTrans, Size tupleWidth, Size transitionSpace)
Definition: nodeAgg.c:1700
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:42
int32 exprTypmod(const Node *expr)
Definition: nodeFuncs.c:301
Oid exprCollation(const Node *expr)
Definition: nodeFuncs.c:821
static Node * get_rightop(const void *clause)
Definition: nodeFuncs.h:95
static bool is_opclause(const void *clause)
Definition: nodeFuncs.h:76
static bool is_funcclause(const void *clause)
Definition: nodeFuncs.h:69
static Node * get_leftop(const void *clause)
Definition: nodeFuncs.h:83
#define IsA(nodeptr, _type_)
Definition: nodes.h:164
double Cost
Definition: nodes.h:261
#define nodeTag(nodeptr)
Definition: nodes.h:139
double Selectivity
Definition: nodes.h:260
#define makeNode(_type_)
Definition: nodes.h:161
JoinType
Definition: nodes.h:298
@ JOIN_SEMI
Definition: nodes.h:317
@ JOIN_FULL
Definition: nodes.h:305
@ JOIN_INNER
Definition: nodes.h:303
@ JOIN_LEFT
Definition: nodes.h:304
@ JOIN_ANTI
Definition: nodes.h:318
uint16 OffsetNumber
Definition: off.h:24
#define PVC_RECURSE_AGGREGATES
Definition: optimizer.h:184
#define PVC_RECURSE_PLACEHOLDERS
Definition: optimizer.h:188
#define PVC_RECURSE_WINDOWFUNCS
Definition: optimizer.h:186
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
bool targetIsInSortList(TargetEntry *tle, Oid sortop, List *sortList)
RTEPermissionInfo * getRTEPermissionInfo(List *rteperminfos, RangeTblEntry *rte)
TargetEntry * get_tle_by_resno(List *tlist, AttrNumber resno)
@ RTE_CTE
Definition: parsenodes.h:1047
@ RTE_VALUES
Definition: parsenodes.h:1046
@ RTE_SUBQUERY
Definition: parsenodes.h:1042
@ RTE_RELATION
Definition: parsenodes.h:1041
#define ACL_SELECT
Definition: parsenodes.h:77
#define IS_SIMPLE_REL(rel)
Definition: pathnodes.h:876
#define planner_rt_fetch(rti, root)
Definition: pathnodes.h:591
int16 attnum
Definition: pg_attribute.h:74
void * arg
#define INDEX_MAX_KEYS
#define lfirst(lc)
Definition: pg_list.h:172
#define lfirst_node(type, lc)
Definition: pg_list.h:176
static int list_length(const List *l)
Definition: pg_list.h:152
#define NIL
Definition: pg_list.h:68
#define forboth(cell1, list1, cell2, list2)
Definition: pg_list.h:518
#define foreach_delete_current(lst, var_or_cell)
Definition: pg_list.h:391
#define list_make1(x1)
Definition: pg_list.h:212
#define for_each_from(cell, lst, N)
Definition: pg_list.h:414
static void * list_nth(const List *list, int n)
Definition: pg_list.h:299
#define linitial(l)
Definition: pg_list.h:178
#define lsecond(l)
Definition: pg_list.h:183
static ListCell * list_head(const List *l)
Definition: pg_list.h:128
static ListCell * lnext(const List *l, const ListCell *c)
Definition: pg_list.h:343
#define linitial_oid(l)
Definition: pg_list.h:180
#define list_make2(x1, x2)
Definition: pg_list.h:214
static int list_nth_int(const List *list, int n)
Definition: pg_list.h:310
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1166
size_t pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale)
Definition: pg_locale.c:1320
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:135
static int scale
Definition: pgbench.c:182
Selectivity restriction_selectivity(PlannerInfo *root, Oid operatorid, List *args, Oid inputcollid, int varRelid)
Definition: plancat.c:2073
bool has_unique_index(RelOptInfo *rel, AttrNumber attno)
Definition: plancat.c:2325
Selectivity join_selectivity(PlannerInfo *root, Oid operatorid, List *args, Oid inputcollid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: plancat.c:2112
static bool DatumGetBool(Datum X)
Definition: postgres.h:100
static int64 DatumGetInt64(Datum X)
Definition: postgres.h:393
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:332
static float4 DatumGetFloat4(Datum X)
Definition: postgres.h:441
static Oid DatumGetObjectId(Datum X)
Definition: postgres.h:252
static Datum Int16GetDatum(int16 X)
Definition: postgres.h:182
static Datum UInt16GetDatum(uint16 X)
Definition: postgres.h:202
static Datum BoolGetDatum(bool X)
Definition: postgres.h:112
static float8 DatumGetFloat8(Datum X)
Definition: postgres.h:475
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:262
uint64_t Datum
Definition: postgres.h:70
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:322
static char DatumGetChar(Datum X)
Definition: postgres.h:122
static Datum Int32GetDatum(int32 X)
Definition: postgres.h:222
static int16 DatumGetInt16(Datum X)
Definition: postgres.h:172
static int32 DatumGetInt32(Datum X)
Definition: postgres.h:212
#define InvalidOid
Definition: postgres_ext.h:37
unsigned int Oid
Definition: postgres_ext.h:32
bool predicate_implied_by(List *predicate_list, List *clause_list, bool weak)
Definition: predtest.c:152
char * s1
char * s2
BoolTestType
Definition: primnodes.h:1986
@ IS_NOT_TRUE
Definition: primnodes.h:1987
@ IS_NOT_FALSE
Definition: primnodes.h:1987
@ IS_NOT_UNKNOWN
Definition: primnodes.h:1987
@ IS_TRUE
Definition: primnodes.h:1987
@ IS_UNKNOWN
Definition: primnodes.h:1987
@ IS_FALSE
Definition: primnodes.h:1987
NullTestType
Definition: primnodes.h:1962
@ IS_NULL
Definition: primnodes.h:1963
@ IS_NOT_NULL
Definition: primnodes.h:1963
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:4069
tree ctl root
Definition: radixtree.h:1857
#define RelationGetRelationName(relation)
Definition: rel.h:548
RelOptInfo * find_base_rel(PlannerInfo *root, int relid)
Definition: relnode.c:416
RelOptInfo * find_base_rel_noerr(PlannerInfo *root, int relid)
Definition: relnode.c:438
RelOptInfo * find_join_rel(PlannerInfo *root, Relids relids)
Definition: relnode.c:529
Node * remove_nulling_relids(Node *node, const Bitmapset *removable_relids, const Bitmapset *except_relids)
void ScanKeyEntryInitialize(ScanKey entry, int flags, AttrNumber attributeNumber, StrategyNumber strategy, Oid subtype, Oid collation, RegProcedure procedure, Datum argument)
Definition: scankey.c:32
ScanDirection
Definition: sdir.h:25
@ BackwardScanDirection
Definition: sdir.h:26
@ ForwardScanDirection
Definition: sdir.h:28
static bool get_actual_variable_endpoint(Relation heapRel, Relation indexRel, ScanDirection indexscandir, ScanKey scankeys, int16 typLen, bool typByVal, TupleTableSlot *tableslot, MemoryContext outercontext, Datum *endpointDatum)
Definition: selfuncs.c:6714
bool get_restriction_variable(PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft)
Definition: selfuncs.c:5180
Datum neqsel(PG_FUNCTION_ARGS)
Definition: selfuncs.c:561
static RelOptInfo * find_join_input_rel(PlannerInfo *root, Relids relids)
Definition: selfuncs.c:6879
void mergejoinscansel(PlannerInfo *root, Node *clause, Oid opfamily, CompareType cmptype, bool nulls_first, Selectivity *leftstart, Selectivity *leftend, Selectivity *rightstart, Selectivity *rightend)
Definition: selfuncs.c:2970
bool all_rows_selectable(PlannerInfo *root, Index varno, Bitmapset *varattnos)
Definition: selfuncs.c:5911
static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, Oid collation, Datum *min, Datum *max)
Definition: selfuncs.c:6342
void btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition: selfuncs.c:7286
List * get_quals_from_indexclauses(List *indexclauses)
Definition: selfuncs.c:6911
static void convert_string_to_scalar(char *value, double *scaledvalue, char *lobound, double *scaledlobound, char *hibound, double *scaledhibound)
Definition: selfuncs.c:4804
double var_eq_const(VariableStatData *vardata, Oid oproid, Oid collation, Datum constval, bool constisnull, bool varonleft, bool negate)
Definition: selfuncs.c:299
List * add_predicate_to_index_quals(IndexOptInfo *index, List *indexQuals)
Definition: selfuncs.c:7218
double generic_restriction_selectivity(PlannerInfo *root, Oid oproid, Oid collation, List *args, int varRelid, double default_selectivity)
Definition: selfuncs.c:918
#define VISITED_PAGES_LIMIT
void spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition: selfuncs.c:7846
Datum scalargtsel(PG_FUNCTION_ARGS)
Definition: selfuncs.c:1493
#define DEFAULT_PAGE_CPU_MULTIPLIER
Definition: selfuncs.c:144
static bool estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel, List **varinfos, double *ndistinct)
Definition: selfuncs.c:4235
Selectivity booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: selfuncs.c:1555
Datum eqjoinsel(PG_FUNCTION_ARGS)
Definition: selfuncs.c:2287
double estimate_array_length(PlannerInfo *root, Node *arrayexpr)
Definition: selfuncs.c:2154
double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Oid collation, Datum constval, bool varonleft, double *sumcommonp)
Definition: selfuncs.c:736
Selectivity nulltestsel(PlannerInfo *root, NullTestType nulltesttype, Node *arg, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: selfuncs.c:1713
static void examine_simple_variable(PlannerInfo *root, Var *var, VariableStatData *vardata)
Definition: selfuncs.c:5635
static List * add_unique_group_var(PlannerInfo *root, List *varinfos, Node *var, VariableStatData *vardata)
Definition: selfuncs.c:3326
Datum matchingsel(PG_FUNCTION_ARGS)
Definition: selfuncs.c:3287
static double eqjoinsel_inner(Oid opfuncoid, Oid collation, VariableStatData *vardata1, VariableStatData *vardata2, double nd1, double nd2, bool isdefault1, bool isdefault2, AttStatsSlot *sslot1, AttStatsSlot *sslot2, Form_pg_statistic stats1, Form_pg_statistic stats2, bool have_mcvs1, bool have_mcvs2)
Definition: selfuncs.c:2452
Datum eqsel(PG_FUNCTION_ARGS)
Definition: selfuncs.c:231
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:5309
Datum scalargtjoinsel(PG_FUNCTION_ARGS)
Definition: selfuncs.c:2933
static double convert_one_string_to_scalar(char *value, int rangelo, int rangehi)
Definition: selfuncs.c:4884
static Datum scalarineqsel_wrapper(PG_FUNCTION_ARGS, bool isgt, bool iseq)
Definition: selfuncs.c:1404
static double eqjoinsel_semi(Oid opfuncoid, Oid collation, VariableStatData *vardata1, VariableStatData *vardata2, double nd1, double nd2, bool isdefault1, bool isdefault2, AttStatsSlot *sslot1, AttStatsSlot *sslot2, Form_pg_statistic stats1, Form_pg_statistic stats2, bool have_mcvs1, bool have_mcvs2, RelOptInfo *inner_rel)
Definition: selfuncs.c:2649
void gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition: selfuncs.c:8201
static double convert_timevalue_to_scalar(Datum value, Oid typid, bool *failure)
Definition: selfuncs.c:5114
static double convert_numeric_to_scalar(Datum value, Oid typid, bool *failure)
Definition: selfuncs.c:4741
static Node * strip_array_coercion(Node *node)
Definition: selfuncs.c:1798
double estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, List **pgset, EstimationInfo *estinfo)
Definition: selfuncs.c:3456
static bool convert_to_scalar(Datum value, Oid valuetypid, Oid collid, double *scaledvalue, Datum lobound, Datum hibound, Oid boundstypid, double *scaledlobound, double *scaledhibound)
Definition: selfuncs.c:4593
double ineq_histogram_selectivity(PlannerInfo *root, VariableStatData *vardata, Oid opoid, FmgrInfo *opproc, bool isgt, bool iseq, Oid collation, Datum constval, Oid consttype)
Definition: selfuncs.c:1045
void genericcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, GenericCosts *costs)
Definition: selfuncs.c:6995
List * estimate_multivariate_bucketsize(PlannerInfo *root, RelOptInfo *inner, List *hashclauses, Selectivity *innerbucketsize)
Definition: selfuncs.c:3808
Datum scalarltjoinsel(PG_FUNCTION_ARGS)
Definition: selfuncs.c:2915
static bool gincost_pattern(IndexOptInfo *index, int indexcol, Oid clause_op, Datum query, GinQualCounts *counts)
Definition: selfuncs.c:7921
void brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition: selfuncs.c:8591
void gistcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition: selfuncs.c:7791
Datum scalargejoinsel(PG_FUNCTION_ARGS)
Definition: selfuncs.c:2942
get_index_stats_hook_type get_index_stats_hook
Definition: selfuncs.c:148
Datum matchingjoinsel(PG_FUNCTION_ARGS)
Definition: selfuncs.c:3305
static bool gincost_scalararrayopexpr(PlannerInfo *root, IndexOptInfo *index, int indexcol, ScalarArrayOpExpr *clause, double numIndexEntries, GinQualCounts *counts)
Definition: selfuncs.c:8085
double histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Oid collation, Datum constval, bool varonleft, int min_hist_size, int n_skip, int *hist_size)
Definition: selfuncs.c:827
Selectivity boolvarsel(PlannerInfo *root, Node *arg, int varRelid)
Definition: selfuncs.c:1516
static void examine_indexcol_variable(PlannerInfo *root, IndexOptInfo *index, int indexcol, VariableStatData *vardata)
Definition: selfuncs.c:6106
Datum scalarlesel(PG_FUNCTION_ARGS)
Definition: selfuncs.c:1484
Datum scalargesel(PG_FUNCTION_ARGS)
Definition: selfuncs.c:1502
static double scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq, Oid collation, VariableStatData *vardata, Datum constval, Oid consttype)
Definition: selfuncs.c:584
static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen, int rangelo, int rangehi)
Definition: selfuncs.c:5071
Selectivity scalararraysel(PlannerInfo *root, ScalarArrayOpExpr *clause, bool is_join_clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: selfuncs.c:1831
Datum scalarltsel(PG_FUNCTION_ARGS)
Definition: selfuncs.c:1475
static double btcost_correlation(IndexOptInfo *index, VariableStatData *vardata)
Definition: selfuncs.c:7249
double var_eq_non_const(VariableStatData *vardata, Oid oproid, Oid collation, Node *other, bool varonleft, bool negate)
Definition: selfuncs.c:470
static bool get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, Oid collation, Datum *min, Datum *max)
Definition: selfuncs.c:6532
Datum scalarlejoinsel(PG_FUNCTION_ARGS)
Definition: selfuncs.c:2924
double get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
Definition: selfuncs.c:6209
bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
Definition: selfuncs.c:6180
void hashcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition: selfuncs.c:7749
Datum neqjoinsel(PG_FUNCTION_ARGS)
Definition: selfuncs.c:2837
double estimate_hashagg_tablesize(PlannerInfo *root, Path *path, const AggClauseCosts *agg_costs, double dNumGroups)
Definition: selfuncs.c:4194
void estimate_hash_bucket_stats(PlannerInfo *root, Node *hashkey, double nbuckets, Selectivity *mcv_freq, Selectivity *bucketsize_frac)
Definition: selfuncs.c:4075
static void convert_bytea_to_scalar(Datum value, double *scaledvalue, Datum lobound, double *scaledlobound, Datum hibound, double *scaledhibound)
Definition: selfuncs.c:5023
Cost index_other_operands_eval_cost(PlannerInfo *root, List *indexquals)
Definition: selfuncs.c:6941
get_relation_stats_hook_type get_relation_stats_hook
Definition: selfuncs.c:147
Selectivity rowcomparesel(PlannerInfo *root, RowCompareExpr *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: selfuncs.c:2220
static bool gincost_opexpr(PlannerInfo *root, IndexOptInfo *index, int indexcol, OpExpr *clause, GinQualCounts *counts)
Definition: selfuncs.c:8035
static void ReleaseDummy(HeapTuple tuple)
Definition: selfuncs.c:5268
static char * convert_string_datum(Datum value, Oid typid, Oid collid, bool *failure)
Definition: selfuncs.c:4935
static double eqsel_internal(PG_FUNCTION_ARGS, bool negate)
Definition: selfuncs.c:240
static void get_stats_slot_range(AttStatsSlot *sslot, Oid opfuncoid, FmgrInfo *opproc, Oid collation, int16 typLen, bool typByVal, Datum *min, Datum *max, bool *p_have_data)
Definition: selfuncs.c:6469
void get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo, VariableStatData *vardata1, VariableStatData *vardata2, bool *join_is_reversed)
Definition: selfuncs.c:5240
#define DEFAULT_NOT_UNK_SEL
Definition: selfuncs.h:56
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:101
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63
bool(* get_relation_stats_hook_type)(PlannerInfo *root, RangeTblEntry *rte, AttrNumber attnum, VariableStatData *vardata)
Definition: selfuncs.h:140
#define DEFAULT_UNK_SEL
Definition: selfuncs.h:55
#define DEFAULT_RANGE_INEQ_SEL
Definition: selfuncs.h:40
bool(* get_index_stats_hook_type)(PlannerInfo *root, Oid indexOid, AttrNumber indexattnum, VariableStatData *vardata)
Definition: selfuncs.h:145
#define DEFAULT_EQ_SEL
Definition: selfuncs.h:34
#define DEFAULT_MATCHING_SEL
Definition: selfuncs.h:49
#define DEFAULT_INEQ_SEL
Definition: selfuncs.h:37
#define DEFAULT_NUM_DISTINCT
Definition: selfuncs.h:52
#define SELFLAG_USED_DEFAULT
Definition: selfuncs.h:76
#define SK_SEARCHNOTNULL
Definition: skey.h:122
#define SK_ISNULL
Definition: skey.h:115
#define InitNonVacuumableSnapshot(snapshotdata, vistestp)
Definition: snapmgr.h:50
void get_tablespace_page_costs(Oid spcid, double *spc_random_page_cost, double *spc_seq_page_cost)
Definition: spccache.c:182
uint16 StrategyNumber
Definition: stratnum.h:22
#define InvalidStrategy
Definition: stratnum.h:24
#define BTLessStrategyNumber
Definition: stratnum.h:29
#define BTEqualStrategyNumber
Definition: stratnum.h:31
Size transitionSpace
Definition: pathnodes.h:62
Index parent_relid
Definition: pathnodes.h:3104
int num_child_cols
Definition: pathnodes.h:3140
Datum * values
Definition: lsyscache.h:54
float4 * numbers
Definition: lsyscache.h:57
int nnumbers
Definition: lsyscache.h:58
BlockNumber revmapNumPages
Definition: brin.h:35
BlockNumber pagesPerRange
Definition: brin.h:34
uint32 flags
Definition: selfuncs.h:80
Definition: fmgr.h:57
Oid fn_oid
Definition: fmgr.h:59
Selectivity indexSelectivity
Definition: selfuncs.h:129
Cost indexStartupCost
Definition: selfuncs.h:127
double indexCorrelation
Definition: selfuncs.h:130
double spc_random_page_cost
Definition: selfuncs.h:135
double num_sa_scans
Definition: selfuncs.h:136
Cost indexTotalCost
Definition: selfuncs.h:128
double numIndexPages
Definition: selfuncs.h:133
double numIndexTuples
Definition: selfuncs.h:134
bool attHasNormalScan[INDEX_MAX_KEYS]
Definition: selfuncs.c:7908
double exactEntries
Definition: selfuncs.c:7910
double arrayScans
Definition: selfuncs.c:7912
double partialEntries
Definition: selfuncs.c:7909
bool attHasFullScan[INDEX_MAX_KEYS]
Definition: selfuncs.c:7907
double searchEntries
Definition: selfuncs.c:7911
BlockNumber nDataPages
Definition: gin.h:60
BlockNumber nPendingPages
Definition: gin.h:57
BlockNumber nEntryPages
Definition: gin.h:59
int64 nEntries
Definition: gin.h:61
BlockNumber nTotalPages
Definition: gin.h:58
RelOptInfo * rel
Definition: selfuncs.c:3320
double ndistinct
Definition: selfuncs.c:3321
bool isdefault
Definition: selfuncs.c:3322
Node * var
Definition: selfuncs.c:3319
AttrNumber indexcol
Definition: pathnodes.h:1920
List * indexquals
Definition: pathnodes.h:1918
List * indexclauses
Definition: pathnodes.h:1870
List * indexorderbys
Definition: pathnodes.h:1871
IndexOptInfo * indexinfo
Definition: pathnodes.h:1869
IndexTuple xs_itup
Definition: relscan.h:167
struct TupleDescData * xs_itupdesc
Definition: relscan.h:168
Definition: pg_list.h:54
double ndistinct
Definition: statistics.h:28
AttrNumber * attributes
Definition: statistics.h:30
uint32 nitems
Definition: statistics.h:38
MVNDistinctItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:39
Definition: nodes.h:135
NullTestType nulltesttype
Definition: primnodes.h:1970
Oid opno
Definition: primnodes.h:837
List * args
Definition: primnodes.h:855
List * cte_plan_ids
Definition: pathnodes.h:323
Query * parse
Definition: pathnodes.h:220
Cost per_tuple
Definition: pathnodes.h:48
Cost startup
Definition: pathnodes.h:47
List * returningList
Definition: parsenodes.h:214
Node * setOperations
Definition: parsenodes.h:235
List * cteList
Definition: parsenodes.h:173
List * groupClause
Definition: parsenodes.h:216
List * targetList
Definition: parsenodes.h:198
List * groupingSets
Definition: parsenodes.h:219
List * distinctClause
Definition: parsenodes.h:225
char * ctename
Definition: parsenodes.h:1225
Index ctelevelsup
Definition: parsenodes.h:1227
RTEKind rtekind
Definition: parsenodes.h:1076
Relids relids
Definition: pathnodes.h:908
Index relid
Definition: pathnodes.h:954
List * statlist
Definition: pathnodes.h:978
Cardinality tuples
Definition: pathnodes.h:981
BlockNumber pages
Definition: pathnodes.h:980
List * indexlist
Definition: pathnodes.h:976
Oid userid
Definition: pathnodes.h:998
PlannerInfo * subroot
Definition: pathnodes.h:985
Cardinality rows
Definition: pathnodes.h:914
RTEKind rtekind
Definition: pathnodes.h:958
Expr * clause
Definition: pathnodes.h:2704
Relids syn_lefthand
Definition: pathnodes.h:3031
Relids min_righthand
Definition: pathnodes.h:3030
JoinType jointype
Definition: pathnodes.h:3033
Relids syn_righthand
Definition: pathnodes.h:3032
Bitmapset * keys
Definition: pathnodes.h:1342
Expr * expr
Definition: primnodes.h:2225
Definition: date.h:28
TimeADT time
Definition: date.h:29
int32 zone
Definition: date.h:30
Definition: primnodes.h:262
AttrNumber varattno
Definition: primnodes.h:274
int varno
Definition: primnodes.h:269
Index varlevelsup
Definition: primnodes.h:294
HeapTuple statsTuple
Definition: selfuncs.h:89
int32 atttypmod
Definition: selfuncs.h:94
RelOptInfo * rel
Definition: selfuncs.h:88
void(* freefunc)(HeapTuple tuple)
Definition: selfuncs.h:91
Definition: type.h:96
Definition: c.h:747
Definition: c.h:693
#define FirstLowInvalidHeapAttributeNumber
Definition: sysattr.h:27
#define TableOidAttributeNumber
Definition: sysattr.h:26
#define SelfItemPointerAttributeNumber
Definition: sysattr.h:21
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:264
HeapTuple SearchSysCache3(int cacheId, Datum key1, Datum key2, Datum key3)
Definition: syscache.c:240
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:126
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:40
TupleTableSlot * table_slot_create(Relation relation, List **reglist)
Definition: tableam.c:92
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:458
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:386
#define TYPECACHE_EQ_OPR
Definition: typcache.h:138
static Interval * DatumGetIntervalP(Datum X)
Definition: timestamp.h:40
static Timestamp DatumGetTimestamp(Datum X)
Definition: timestamp.h:28
static TimestampTz DatumGetTimestampTz(Datum X)
Definition: timestamp.h:34
Relids pull_varnos(PlannerInfo *root, Node *node)
Definition: var.c:114
List * pull_var_clause(Node *node, int flags)
Definition: var.c:653
static Size VARSIZE_ANY_EXHDR(const void *PTR)
Definition: varatt.h:472
static char * VARDATA_ANY(const void *PTR)
Definition: varatt.h:486
#define VM_ALL_VISIBLE(r, b, v)
Definition: visibilitymap.h:24