Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit efbebb4

Browse files
michaelpqevanchaoli
andcommitted
Add support for "mcv" in pg_restore_extended_stats()
This commit adds support for the restore of extended statistics of the kind "mcv", aka most-common values. This format is different from n_distinct and dependencies stat types in that it is the combination of three of the four different arrays from the pg_stats_ext view which in turn require three different input parameters on pg_restore_extended_statistics(). These are translated into three input arguments for the function: - "most_common_vals", acting as a leader of the others. It is a 2-dimension array, that includes the common values. - "most_common_freqs", 1-dimension array of float8[], with a number of elements that has to match with "most_common_vals". - "most_common_base_freqs", 1-dimension array of float8[], with a number of elements that has to match with "most_common_vals". All three arrays are required to achieve the restore of this type of extended statistics (if "most_common_vals" happens to be NULL in the catalogs, the rest is NULL by design). Note that "most_common_val_nulls" is not required in input, its data is rebuilt from the decomposition of the "most_common_vals" array based on its text[] representation. The initial versions of the patch provided this option in input, but we do not require it and it simplifies a lot the result. Support in pg_dump is added down to v13 which is where the support for this type of extended statistics has been added, when --statistics is used. This means that upgrade and dumps can restore extended statistics data transparently, like "dependencies", "ndistinct", attribute and relation statistics. For MCV, the values are directly queried from the relevant catalogs. Author: Corey Huinker <[email protected]> Co-authored-by: Chao Li <[email protected]> Reviewed-by: Michael Paquier <[email protected]> Discussion: https://postgr.es/m/CADkLM=dpz3KFnqP-dgJ-zvRvtjsa8UZv8wDAQdqho=qN3kX0Zg@mail.gmail.com
1 parent e09e5ad commit efbebb4

File tree

7 files changed

+823
-5
lines changed

7 files changed

+823
-5
lines changed

doc/src/sgml/func/func-admin.sgml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2222,8 +2222,10 @@ SELECT pg_restore_attribute_stats(
22222222
Other arguments are the names and values of statistics corresponding
22232223
to columns in <link linkend="view-pg-stats-ext"><structname>pg_stats_ext</structname>
22242224
</link>.
2225-
This function currently supports <literal>n_distinct</literal> and
2226-
<literal>dependencies</literal>.
2225+
This function currently supports <literal>n_distinct</literal>,
2226+
<literal>dependencies</literal>, <literal>most_common_vals</literal>,
2227+
<literal>most_common_freqs</literal>,
2228+
and <literal>most_common_base_freqs</literal>.
22272229
</para>
22282230
<para>
22292231
Additionally, this function accepts argument name

src/backend/statistics/extended_stats_funcs.c

Lines changed: 284 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ enum extended_stats_argnum
4848
INHERITED_ARG,
4949
NDISTINCT_ARG,
5050
DEPENDENCIES_ARG,
51+
MOST_COMMON_VALS_ARG,
52+
MOST_COMMON_FREQS_ARG,
53+
MOST_COMMON_BASE_FREQS_ARG,
5154
NUM_EXTENDED_STATS_ARGS,
5255
};
5356

@@ -64,6 +67,9 @@ static struct StatsArgInfo extarginfo[] =
6467
[INHERITED_ARG] = {"inherited", BOOLOID},
6568
[NDISTINCT_ARG] = {"n_distinct", PG_NDISTINCTOID},
6669
[DEPENDENCIES_ARG] = {"dependencies", PG_DEPENDENCIESOID},
70+
[MOST_COMMON_VALS_ARG] = {"most_common_vals", TEXTARRAYOID},
71+
[MOST_COMMON_FREQS_ARG] = {"most_common_freqs", FLOAT8ARRAYOID},
72+
[MOST_COMMON_BASE_FREQS_ARG] = {"most_common_base_freqs", FLOAT8ARRAYOID},
6773
[NUM_EXTENDED_STATS_ARGS] = {0},
6874
};
6975

@@ -90,6 +96,16 @@ static void upsert_pg_statistic_ext_data(const Datum *values,
9096
const bool *nulls,
9197
const bool *replaces);
9298

99+
static bool check_mcvlist_array(const ArrayType *arr, int argindex,
100+
int required_ndims, int mcv_length);
101+
static Datum import_mcv(const ArrayType *mcv_arr,
102+
const ArrayType *freqs_arr,
103+
const ArrayType *base_freqs_arr,
104+
Oid *atttypids, int32 *atttypmods,
105+
Oid *atttypcolls, int numattrs,
106+
bool *ok);
107+
108+
93109
/*
94110
* Fetch a pg_statistic_ext row by name and namespace OID.
95111
*/
@@ -252,16 +268,32 @@ extended_statistics_update(FunctionCallInfo fcinfo)
252268
bool success = true;
253269
Datum exprdatum;
254270
bool isnull;
271+
List *exprs = NIL;
272+
int numattnums = 0;
255273
int numexprs = 0;
274+
int numattrs = 0;
256275

257276
/* arrays of type info, if we need them */
277+
Oid *atttypids = NULL;
278+
int32 *atttypmods = NULL;
279+
Oid *atttypcolls = NULL;
258280
Oid relid;
259281
Oid locked_table = InvalidOid;
260282

261283
/*
262284
* Fill out the StakindFlags "has" structure based on which parameters
263285
* were provided to the function.
286+
*
287+
* The MCV stats composite value is an array of record type, but this is
288+
* externally represented as three arrays that must be interleaved into
289+
* the array of records (pg_stats_ext stores four arrays,
290+
* most_common_val_nulls is built from the contents of most_common_vals).
291+
* Therefore, none of the three array values is meaningful unless the
292+
* other two are also present and in sync in terms of array length.
264293
*/
294+
has.mcv = (!PG_ARGISNULL(MOST_COMMON_VALS_ARG) &&
295+
!PG_ARGISNULL(MOST_COMMON_FREQS_ARG) &&
296+
!PG_ARGISNULL(MOST_COMMON_BASE_FREQS_ARG));
265297
has.ndistinct = !PG_ARGISNULL(NDISTINCT_ARG);
266298
has.dependencies = !PG_ARGISNULL(DEPENDENCIES_ARG);
267299

@@ -344,6 +376,7 @@ extended_statistics_update(FunctionCallInfo fcinfo)
344376

345377
/* Find out what extended statistics kinds we should expect. */
346378
expand_stxkind(tup, &enabled);
379+
numattnums = stxform->stxkeys.dim1;
347380

348381
/* decode expression (if any) */
349382
exprdatum = SysCacheGetAttr(STATEXTOID,
@@ -353,7 +386,6 @@ extended_statistics_update(FunctionCallInfo fcinfo)
353386
if (!isnull)
354387
{
355388
char *s;
356-
List *exprs;
357389

358390
s = TextDatumGetCString(exprdatum);
359391
exprs = (List *) stringToNode(s);
@@ -377,6 +409,8 @@ extended_statistics_update(FunctionCallInfo fcinfo)
377409
numexprs = list_length(exprs);
378410
}
379411

412+
numattrs = numattnums + numexprs;
413+
380414
/*
381415
* If the object cannot support ndistinct, we should not have data for it.
382416
*/
@@ -411,6 +445,115 @@ extended_statistics_update(FunctionCallInfo fcinfo)
411445
success = false;
412446
}
413447

448+
/*
449+
* If the object cannot hold an MCV value, but any of the MCV parameters
450+
* are set, then issue a WARNING and ensure that we do not try to load MCV
451+
* stats later. In pg_stats_ext, most_common_val_nulls, most_common_freqs
452+
* and most_common_base_freqs are NULL if most_common_vals is NULL.
453+
*/
454+
if (!enabled.mcv)
455+
{
456+
if (!PG_ARGISNULL(MOST_COMMON_VALS_ARG) ||
457+
!PG_ARGISNULL(MOST_COMMON_FREQS_ARG) ||
458+
!PG_ARGISNULL(MOST_COMMON_BASE_FREQS_ARG))
459+
{
460+
ereport(WARNING,
461+
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
462+
errmsg("cannot specify parameters \"%s\", \"%s\" or \"%s\"",
463+
extarginfo[MOST_COMMON_VALS_ARG].argname,
464+
extarginfo[MOST_COMMON_FREQS_ARG].argname,
465+
extarginfo[MOST_COMMON_BASE_FREQS_ARG].argname),
466+
errhint("Extended statistics object \"%s\".\"%s\" does not support statistics of this type.",
467+
quote_identifier(nspname),
468+
quote_identifier(stxname)));
469+
470+
has.mcv = false;
471+
success = false;
472+
}
473+
}
474+
else if (!has.mcv)
475+
{
476+
/*
477+
* If we do not have all of the MCV arrays set while the extended
478+
* statistics object expects something, something is wrong. This
479+
* issues a WARNING if a partial input has been provided.
480+
*/
481+
if (!PG_ARGISNULL(MOST_COMMON_VALS_ARG) ||
482+
!PG_ARGISNULL(MOST_COMMON_FREQS_ARG) ||
483+
!PG_ARGISNULL(MOST_COMMON_BASE_FREQS_ARG))
484+
{
485+
ereport(WARNING,
486+
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
487+
errmsg("could not use \"%s\", \"%s\" and \"%s\": missing one or more parameters",
488+
extarginfo[MOST_COMMON_VALS_ARG].argname,
489+
extarginfo[MOST_COMMON_FREQS_ARG].argname,
490+
extarginfo[MOST_COMMON_BASE_FREQS_ARG].argname));
491+
success = false;
492+
}
493+
}
494+
495+
/*
496+
* Either of these statistic types requires that we supply a semi-filled
497+
* VacAttrStatP array.
498+
*
499+
* It is not possible to use the existing lookup_var_attr_stats() and
500+
* examine_attribute() because these functions will skip attributes where
501+
* attstattarget is 0, and we may have statistics data to import for those
502+
* attributes.
503+
*/
504+
if (has.mcv)
505+
{
506+
atttypids = palloc0_array(Oid, numattrs);
507+
atttypmods = palloc0_array(int32, numattrs);
508+
atttypcolls = palloc0_array(Oid, numattrs);
509+
510+
/*
511+
* The leading stxkeys are attribute numbers up through numattnums.
512+
* These keys must be in ascending AttNumber order, but we do not rely
513+
* on that.
514+
*/
515+
for (int i = 0; i < numattnums; i++)
516+
{
517+
AttrNumber attnum = stxform->stxkeys.values[i];
518+
HeapTuple atup = SearchSysCache2(ATTNUM,
519+
ObjectIdGetDatum(relid),
520+
Int16GetDatum(attnum));
521+
522+
Form_pg_attribute attr;
523+
524+
/* Attribute not found */
525+
if (!HeapTupleIsValid(atup))
526+
elog(ERROR, "stxkeys references nonexistent attnum %d", attnum);
527+
528+
attr = (Form_pg_attribute) GETSTRUCT(atup);
529+
530+
if (attr->attisdropped)
531+
elog(ERROR, "stxkeys references dropped attnum %d", attnum);
532+
533+
atttypids[i] = attr->atttypid;
534+
atttypmods[i] = attr->atttypmod;
535+
atttypcolls[i] = attr->attcollation;
536+
ReleaseSysCache(atup);
537+
}
538+
539+
/*
540+
* After all the positive number attnums in stxkeys come the negative
541+
* numbers (if any) which represent expressions in the order that they
542+
* appear in stxdexprs. Because the expressions are always
543+
* monotonically decreasing from -1, there is no point in looking at
544+
* the values in stxkeys, it's enough to know how many of them there
545+
* are.
546+
*/
547+
for (int i = numattnums; i < numattrs; i++)
548+
{
549+
Node *expr = list_nth(exprs, i - numattnums);
550+
551+
atttypids[i] = exprType(expr);
552+
atttypmods[i] = exprTypmod(expr);
553+
atttypcolls[i] = exprCollation(expr);
554+
}
555+
}
556+
414557
/*
415558
* Populate the pg_statistic_ext_data result tuple.
416559
*/
@@ -471,16 +614,156 @@ extended_statistics_update(FunctionCallInfo fcinfo)
471614
statext_dependencies_free(dependencies);
472615
}
473616

617+
if (has.mcv)
618+
{
619+
Datum datum;
620+
bool val_ok = false;
621+
622+
datum = import_mcv(PG_GETARG_ARRAYTYPE_P(MOST_COMMON_VALS_ARG),
623+
PG_GETARG_ARRAYTYPE_P(MOST_COMMON_FREQS_ARG),
624+
PG_GETARG_ARRAYTYPE_P(MOST_COMMON_BASE_FREQS_ARG),
625+
atttypids, atttypmods, atttypcolls, numattrs,
626+
&val_ok);
627+
628+
if (val_ok)
629+
{
630+
Assert(datum != (Datum) 0);
631+
values[Anum_pg_statistic_ext_data_stxdmcv - 1] = datum;
632+
nulls[Anum_pg_statistic_ext_data_stxdmcv - 1] = false;
633+
replaces[Anum_pg_statistic_ext_data_stxdmcv - 1] = true;
634+
}
635+
else
636+
success = false;
637+
}
638+
474639
upsert_pg_statistic_ext_data(values, nulls, replaces);
475640

476641
cleanup:
477642
if (HeapTupleIsValid(tup))
478643
heap_freetuple(tup);
479644
if (pg_stext != NULL)
480645
table_close(pg_stext, RowExclusiveLock);
646+
if (atttypids != NULL)
647+
pfree(atttypids);
648+
if (atttypmods != NULL)
649+
pfree(atttypmods);
650+
if (atttypcolls != NULL)
651+
pfree(atttypcolls);
481652
return success;
482653
}
483654

655+
/*
656+
* Consistency checks to ensure that other mcvlist arrays are in alignment
657+
* with the mcv array.
658+
*/
659+
static bool
660+
check_mcvlist_array(const ArrayType *arr, int argindex, int required_ndims,
661+
int mcv_length)
662+
{
663+
if (ARR_NDIM(arr) != required_ndims)
664+
{
665+
ereport(WARNING,
666+
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
667+
errmsg("could not parse array \"%s\": incorrect number of dimensions (%d required)",
668+
extarginfo[argindex].argname, required_ndims));
669+
return false;
670+
}
671+
672+
if (array_contains_nulls(arr))
673+
{
674+
ereport(WARNING,
675+
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
676+
errmsg("could not parse array \"%s\": NULL value found",
677+
extarginfo[argindex].argname));
678+
return false;
679+
}
680+
681+
if (ARR_DIMS(arr)[0] != mcv_length)
682+
{
683+
ereport(WARNING,
684+
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
685+
errmsg("could not parse array \"%s\": incorrect number of elements (same as \"%s\" required)",
686+
extarginfo[argindex].argname,
687+
extarginfo[MOST_COMMON_VALS_ARG].argname));
688+
return false;
689+
}
690+
691+
return true;
692+
}
693+
694+
/*
695+
* Create the stxdmcv datum from the equal-sized arrays of most common values,
696+
* their null flags, and the frequency and base frequency associated with
697+
* each value.
698+
*/
699+
static Datum
700+
import_mcv(const ArrayType *mcv_arr, const ArrayType *freqs_arr,
701+
const ArrayType *base_freqs_arr, Oid *atttypids, int32 *atttypmods,
702+
Oid *atttypcolls, int numattrs, bool *ok)
703+
{
704+
int nitems;
705+
Datum *mcv_elems;
706+
bool *mcv_nulls;
707+
int check_nummcv;
708+
Datum mcv = (Datum) 0;
709+
710+
*ok = false;
711+
712+
/*
713+
* mcv_arr is an array of arrays. Each inner array must have the same
714+
* number of elements "numattrs".
715+
*/
716+
if (ARR_NDIM(mcv_arr) != 2)
717+
{
718+
ereport(WARNING,
719+
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
720+
errmsg("could not parse array \"%s\": incorrect number of dimensions (%d required)",
721+
extarginfo[MOST_COMMON_VALS_ARG].argname, 2));
722+
goto mcv_error;
723+
}
724+
725+
if (ARR_DIMS(mcv_arr)[1] != numattrs)
726+
{
727+
ereport(WARNING,
728+
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
729+
errmsg("could not parse array \"%s\": found %d attributes but expected %d",
730+
extarginfo[MOST_COMMON_VALS_ARG].argname,
731+
ARR_DIMS(mcv_arr)[1], numattrs));
732+
goto mcv_error;
733+
}
734+
735+
/*
736+
* "most_common_freqs" and "most_common_base_freqs" arrays must be of the
737+
* same length, one-dimension and cannot contain NULLs. We use mcv_arr as
738+
* the reference array for determining their length.
739+
*/
740+
nitems = ARR_DIMS(mcv_arr)[0];
741+
if (!check_mcvlist_array(freqs_arr, MOST_COMMON_FREQS_ARG, 1, nitems) ||
742+
!check_mcvlist_array(base_freqs_arr, MOST_COMMON_BASE_FREQS_ARG, 1, nitems))
743+
{
744+
/* inconsistent input arrays found */
745+
goto mcv_error;
746+
}
747+
748+
/*
749+
* This part builds the contents for "most_common_val_nulls", based on the
750+
* values from "most_common_vals".
751+
*/
752+
deconstruct_array_builtin(mcv_arr, TEXTOID, &mcv_elems,
753+
&mcv_nulls, &check_nummcv);
754+
755+
mcv = statext_mcv_import(WARNING, numattrs,
756+
atttypids, atttypmods, atttypcolls,
757+
nitems, mcv_elems, mcv_nulls,
758+
(float8 *) ARR_DATA_PTR(freqs_arr),
759+
(float8 *) ARR_DATA_PTR(base_freqs_arr));
760+
761+
*ok = (mcv != (Datum) 0);
762+
763+
mcv_error:
764+
return mcv;
765+
}
766+
484767
/*
485768
* Remove an existing pg_statistic_ext_data row for a given pg_statistic_ext
486769
* row and "inherited" pair.

0 commit comments

Comments
 (0)