diff --git a/.gitignore b/.gitignore index ad06216..c1b6c2d 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,6 @@ pg_query_state--*.sql cscope.out tags Dockerfile +tmp_stress +isolation_output + diff --git a/.travis.yml b/.travis.yml index dc651bd..0812444 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,6 +2,10 @@ sudo: required language: c +os: linux + +dist: focal + services: - docker @@ -18,14 +22,22 @@ notifications: on_failure: always env: - - PG_VERSION=11 LEVEL=hardcore - - PG_VERSION=11 - - PG_VERSION=10 LEVEL=hardcore + - PG_VERSION=17 + - PG_VERSION=16 LEVEL=hardcore USE_TPCDS=0 + - PG_VERSION=16 + - PG_VERSION=15 LEVEL=hardcore USE_TPCDS=0 + - PG_VERSION=15 + - PG_VERSION=14 LEVEL=hardcore USE_TPCDS=0 + - PG_VERSION=14 + - PG_VERSION=13 LEVEL=hardcore USE_TPCDS=0 + - PG_VERSION=13 + - PG_VERSION=12 LEVEL=hardcore USE_TPCDS=0 + - PG_VERSION=12 - PG_VERSION=10 - - PG_VERSION=9.6 LEVEL=hardcore - PG_VERSION=9.6 matrix: allow_failures: - - env: PG_VERSION=10 LEVEL=nightmare - - env: PG_VERSION=9.6 LEVEL=nightmare \ No newline at end of file + - env: PG_VERSION=13 LEVEL=hardcore USE_TPCDS=1 + - env: PG_VERSION=10 + - env: PG_VERSION=9.6 diff --git a/Dockerfile.tmpl b/Dockerfile.tmpl index 43d3691..93b9833 100644 --- a/Dockerfile.tmpl +++ b/Dockerfile.tmpl @@ -2,12 +2,12 @@ FROM postgres:${PG_VERSION}-alpine # Install dependencies RUN apk add --no-cache \ - openssl curl \ + openssl curl git patch \ perl perl-ipc-run \ make musl-dev gcc bison flex coreutils \ zlib-dev libedit-dev \ - clang clang-analyzer \ - python2 python2-dev py2-virtualenv; + icu-dev clang clang-analyzer linux-headers \ + python3 python3-dev py3-virtualenv; # Install fresh valgrind @@ -35,4 +35,4 @@ ADD . /pg/testdir WORKDIR /pg/testdir USER postgres -ENTRYPOINT LEVEL=${LEVEL} /run.sh +ENTRYPOINT LEVEL=${LEVEL} USE_TPCDS=${USE_TPCDS} /run.sh diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..7c10525 --- /dev/null +++ b/LICENSE @@ -0,0 +1,11 @@ +pg_query_state is released under the PostgreSQL License, a liberal Open Source license, similar to the BSD or MIT licenses. + +Copyright (c) 2016-2024, Postgres Professional +Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group +Portions Copyright (c) 1994, The Regents of the University of California + +Permission to use, copy, modify, and distribute this software and its documentation for any purpose, without fee, and without a written agreement is hereby granted, provided that the above copyright notice and this paragraph and the following two paragraphs appear in all copies. + +IN NO EVENT SHALL POSTGRES PROFESSIONAL BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF POSTGRES PROFESSIONAL HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +POSTGRES PROFESSIONAL SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND POSTGRES PROFESSIONAL HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. diff --git a/Makefile b/Makefile index 08d24ac..c96aae2 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,4 @@ + # contrib/pg_query_state/Makefile MODULE_big = pg_query_state @@ -9,10 +10,14 @@ DATA_built = $(EXTENSION)--$(EXTVERSION).sql PGFILEDESC = "pg_query_state - facility to track progress of plan execution" EXTRA_CLEAN = ./isolation_output $(EXTENSION)--$(EXTVERSION).sql \ - Dockerfile ./tests/*.pyc + Dockerfile ./tests/*.pyc ./tmp_stress + +ISOLATION = corner_cases + +ISOLATION_OPTS = --load-extension=pg_query_state ifdef USE_PGXS -PG_CONFIG = pg_config +PG_CONFIG ?= pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) else @@ -20,31 +25,35 @@ subdir = contrib/pg_query_state top_builddir = ../.. include $(top_builddir)/src/Makefile.global include $(top_srcdir)/contrib/contrib-global.mk +# need this to provide make check in case of "in source" build +EXTRA_REGRESS_OPTS=--temp-config=$(top_srcdir)/$(subdir)/test.conf endif $(EXTENSION)--$(EXTVERSION).sql: init.sql cat $^ > $@ -check: isolationcheck +# +# Make conditional targets to save backward compatibility with PG11. +# +ifeq ($(MAJORVERSION),11) +ISOLATIONCHECKS = corner_cases -ISOLATIONCHECKS=corner_cases +check: isolationcheck -submake-isolation: - $(MAKE) -C $(top_builddir)/src/test/isolation all +installcheck: submake-isolation + $(MKDIR_P) isolation_output + $(pg_isolation_regress_installcheck) \ + --outputdir=isolation_output \ + $(ISOLATIONCHECKS) isolationcheck: | submake-isolation temp-install $(MKDIR_P) isolation_output $(pg_isolation_regress_check) \ - --temp-config $(top_srcdir)/contrib/pg_query_state/test.conf \ - --outputdir=isolation_output \ - $(ISOLATIONCHECKS) - -isolationcheck-install-force: all | submake-isolation temp-install - $(MKDIR_P) isolation_output - $(pg_isolation_regress_installcheck) \ - --outputdir=isolation_output \ - $(ISOLATIONCHECKS) + --outputdir=isolation_output \ + $(ISOLATIONCHECKS) -.PHONY: isolationcheck isolationcheck-install-force check +submake-isolation: + $(MAKE) -C $(top_builddir)/src/test/isolation all temp-install: EXTRA_INSTALL=contrib/pg_query_state +endif diff --git a/README.md b/README.md index 34ecdb0..6c983c1 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,13 @@ -[![Build Status](https://travis-ci.org/postgrespro/pg_query_state.svg?branch=master)](https://travis-ci.org/postgrespro/pg_query_state) +[![Build Status](https://travis-ci.com/postgrespro/pg_query_state.svg?branch=master)](https://travis-ci.com/postgrespro/pg_query_state) +[![codecov](https://codecov.io/gh/postgrespro/pg_query_state/branch/master/graph/badge.svg)](https://codecov.io/gh/postgrespro/pg_query_state) # pg\_query\_state -The `pg_query_state` module provides facility to know the current state of query execution on working backend. To enable this extension you have to patch the latest stable version of PostgreSQL. Different branches are intended for different version numbers of PostgreSQL, e.g., branch _PG9_5_ corresponds to PostgreSQL 9.5. +The `pg_query_state` module provides facility to know the current state of query execution on working backend. To enable this extension you have to patch the stable version of PostgreSQL, recompile it and deploy new binaries. All patch files are located in `patches/` directory and tagged with suffix of PostgreSQL version number. ## Overview -Each nonutility query statement (SELECT/INSERT/UPDATE/DELETE) after optimization/planning stage is translated into plan tree which is kind of imperative representation of declarative SQL query. EXPLAIN ANALYZE request allows to demonstrate execution statistics gathered from each node of plan tree (full time of execution, number rows emitted to upper nodes, etc). But this statistics is collected after execution of query. This module allows to show actual statistics of query running on external backend. At that, format of resulting output is almost identical to ordinal EXPLAIN ANALYZE. Thus users are able to track of query execution in progress. +Each nonutility query statement (SELECT/INSERT/UPDATE/DELETE) after optimization/planning stage is translated into plan tree which is kind of imperative representation of SQL query execution algorithm. EXPLAIN ANALYZE request allows to demonstrate execution statistics gathered from each node of plan tree (full time of execution, number rows emitted to upper nodes, etc). But this statistics is collected after execution of query. This module allows to show actual statistics of query running gathered from external backend. At that, format of resulting output is almost identical to ordinal EXPLAIN ANALYZE. Thus users are able to track of query execution in progress. -In fact, this module is able to explore external backend and determine its actual state. Particularly it's helpful when backend executes a heavy query or gets stuck. +In fact, this module is able to explore external backend and determine its actual state. Particularly it's helpful when backend executes a heavy query and gets stuck. ## Use cases Using this module there can help in the following things: @@ -14,55 +15,88 @@ Using this module there can help in the following things: - overwatch the query execution ## Installation -To install `pg_query_state`, please apply corresponding patches `custom_signal_(PG_VERSION).patch` and `runtime_explain.patch` (or `runtime_explain_11.0.patch` for PG11) to reqired stable version of PostgreSQL and rebuild PostgreSQL. +To install `pg_query_state`, please apply corresponding patches `custom_signal_(PG_VERSION).patch` and `runtime_explain_(PG_VERSION).patch` (or `runtime_explain.patch` for PG version <= 10.0) from the `patches/` directory to reqired stable version of PostgreSQL and rebuild PostgreSQL. +To do this, run the following commands from the postgresql directory: +``` +patch -p1 < path_to_pg_query_state_folder/patches/runtime_explain_(PG_VERSION).patch +patch -p1 < path_to_pg_query_state_folder/patches/custom_signals_(PG_VERSION).patch +``` Then execute this in the module's directory: ``` make install USE_PGXS=1 ``` +To execute the command correctly, make sure you have the PATH or PG_CONFIG variable set. +``` +export PATH=path_to_your_bin_folder:$PATH +# or +export PG_CONFIG=path_to_your_bin_folder/pg_config +``` + Add module name to the `shared_preload_libraries` parameter in `postgresql.conf`: ``` shared_preload_libraries = 'pg_query_state' ``` It is essential to restart the PostgreSQL instance. After that, execute the following query in psql: -``` +```sql CREATE EXTENSION pg_query_state; ``` Done! ## Tests -Tests using parallel sessions using python 2.7 script: - ``` - python tests/pg_qs_test_runner.py [OPTION]... - ``` +Test using parallel sessions with Python 3+ compatible script: +```shell +python3 tests/pg_qs_test_runner.py [OPTION]... +``` *prerequisite packages*: * `psycopg2` version 2.6 or later * `PyYAML` version 3.11 or later - +* `progressbar2` for stress test progress reporting + *options*: * *- -host* --- postgres server host, default value is *localhost* * *- -port* --- postgres server port, default value is *5432* * *- -database* --- database name, default value is *postgres* * *- -user* --- user name, default value is *postgres* * *- -password* --- user's password, default value is empty +* *- -tpc-ds-setup* --- setup database to run TPC-DS benchmark +* *- -tpc-ds-run* --- runs only stress tests on TPC-DS benchmark + +Or run all tests in `Docker` using: + +```shell +export LEVEL=hardcore +export USE_TPCDS=1 +export PG_VERSION=12 + +./mk_dockerfile.sh + +docker-compose build +docker-compose run tests +``` + +There are different test levels: `hardcore`, `nightmare` (runs tests under `valgrind`) and `stress` (runs tests under `TPC-DS` load). ## Function pg\_query\_state ```plpgsql -pg_query_state(integer pid, - verbose boolean DEFAULT FALSE, - costs boolean DEFAULT FALSE, - timing boolean DEFAULT FALSE, - buffers boolean DEFAULT FALSE, - triggers boolean DEFAULT FALSE, - format text DEFAULT 'text') - returns TABLE ( pid integer, - frame_number integer, - query_text text, - plan text, - leader_pid integer) +pg_query_state( + integer pid, + verbose boolean DEFAULT FALSE, + costs boolean DEFAULT FALSE, + timing boolean DEFAULT FALSE, + buffers boolean DEFAULT FALSE, + triggers boolean DEFAULT FALSE, + format text DEFAULT 'text' +) returns TABLE ( + pid integer, + frame_number integer, + query_text text, + plan text, + leader_pid integer +) ``` -Extract current query state from backend with specified `pid`. Since parallel query can spawn multiple workers and function call causes nested subqueries so that state of execution may be viewed as stack of running queries, return value of `pg_query_state` has type `TABLE (pid integer, frame_number integer, query_text text, plan text, leader_pid integer)`. It represents tree structure consisting of leader process and its spawned workers identified by `pid`. Each worker refers to leader through `leader_pid` column. For leader process the value of this column is` null`. The state of each process is represented as stack of function calls. Each frame of that stack is specified as correspondence between `frame_number` starting from zero, `query_text` and `plan` with online statistics columns. +extracts the current query state from backend with specified `pid`. Since parallel query can spawn multiple workers and function call causes nested subqueries so that state of execution may be viewed as stack of running queries, return value of `pg_query_state` has type `TABLE (pid integer, frame_number integer, query_text text, plan text, leader_pid integer)`. It represents tree structure consisting of leader process and its spawned workers identified by `pid`. Each worker refers to leader through `leader_pid` column. For leader process the value of this column is` null`. The state of each process is represented as stack of function calls. Each frame of that stack is specified as correspondence between `frame_number` starting from zero, `query_text` and `plan` with online statistics columns. Thus, user can see the states of main query and queries generated from function calls for leader process and all workers spawned from it. @@ -79,7 +113,7 @@ Optional arguments: If callable backend is not executing any query the function prints INFO message about backend's state taken from `pg_stat_activity` view if it exists there. -Calling role have to be superuser or member of the role whose backend is being called. Otherwise function prints ERROR message `permission denied`. +**_Warning_**: Calling role have to be superuser or member of the role whose backend is being called. Otherwise function prints ERROR message `permission denied`. ## Configuration settings There are several user-accessible [GUC](https://www.postgresql.org/docs/9.5/static/config-setting.html) variables designed to toggle the whole module and the collecting of specific statistic parameters while query is running: @@ -92,11 +126,11 @@ This parameters is set on called side before running any queries whose states ar ## Examples Set maximum number of parallel workers on `gather` node equals `2`: -``` +```sql postgres=# set max_parallel_workers_per_gather = 2; ``` Assume one backend with pid = 49265 performs a simple query: -``` +```sql postgres=# select pg_backend_pid(); pg_backend_pid ---------------- @@ -105,7 +139,7 @@ postgres=# select pg_backend_pid(); postgres=# select count(*) from foo join bar on foo.c1=bar.c1; ``` Other backend can extract intermediate state of execution that query: -``` +```sql postgres=# \x postgres=# select * from pg_query_state(49265); -[ RECORD 1 ]+------------------------------------------------------------------------------------------------------------------------- @@ -150,11 +184,11 @@ In example above working backend spawns two parallel workers with pids `49324` a `Seq Scan` node has statistics on passed loops (average number of rows delivered to `Nested Loop` and number of passed loops are shown) and statistics on current loop. Other nodes has statistics only for current loop as this loop is first (`loop number` = 1). Assume first backend executes some function: -``` +```sql postgres=# select n_join_foo_bar(); ``` Other backend can get the follow output: -``` +```sql postgres=# select * from pg_query_state(49265); -[ RECORD 1 ]+------------------------------------------------------------------------------------------------------------------ pid | 49265 @@ -180,7 +214,7 @@ leader_pid | (null) First row corresponds to function call, second - to query which is in the body of that function. We can get result plans in different format (e.g. `json`): -``` +```sql postgres=# select * from pg_query_state(pid := 49265, format := 'json'); -[ RECORD 1 ]+------------------------------------------------------------ pid | 49265 @@ -285,4 +319,5 @@ leader_pid | (null) Do not hesitate to post your issues, questions and new ideas at the [issues](https://github.com/postgrespro/pg_query_state/issues) page. ## Authors -Maksim Milyutin Postgres Professional Ltd., Russia +[Maksim Milyutin](https://github.com/maksm90) +Alexey Kondratov Postgres Professional Ltd., Russia diff --git a/docker-compose.yml b/docker-compose.yml index 67f1cee..550e2be 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,2 +1,3 @@ -tests: +services: + tests: build: . \ No newline at end of file diff --git a/expected/corner_cases.out b/expected/corner_cases.out index 8320e1f..725addc 100644 --- a/expected/corner_cases.out +++ b/expected/corner_cases.out @@ -1,3 +1,6 @@ +unused step name: s1_enable_pg_qs +unused step name: s1_pg_qs_counterpart +unused step name: s2_save_pid Parsed test spec with 2 sessions starting permutation: s1_pg_qs_1 @@ -10,55 +13,78 @@ ERROR: attempt to extract state of current process starting permutation: s1_save_pid s2_pg_qs_counterpart step s1_save_pid: select save_own_pid(0); -save_own_pid +save_own_pid +------------ + +(1 row) - -INFO: state of backend is idle -step s2_pg_qs_counterpart: select pg_query_state(counterpart_pid(0)); -pg_query_state +step s2_pg_qs_counterpart: select pg_query_state(counterpart_pid(0)); +s2: INFO: state of backend is idle +step s2_pg_qs_counterpart: <... completed> +pg_query_state +-------------- +(0 rows) starting permutation: s1_save_pid s1_disable_pg_qs s2_pg_qs_counterpart step s1_save_pid: select save_own_pid(0); -save_own_pid +save_own_pid +------------ + +(1 row) - step s1_disable_pg_qs: set pg_query_state.enable to off; -INFO: query execution statistics disabled -step s2_pg_qs_counterpart: select pg_query_state(counterpart_pid(0)); -pg_query_state +step s2_pg_qs_counterpart: select pg_query_state(counterpart_pid(0)); +s2: INFO: query execution statistics disabled +step s2_pg_qs_counterpart: <... completed> +pg_query_state +-------------- +(0 rows) starting permutation: s1_set_bob s2_set_bob s1_save_pid s2_pg_qs_counterpart step s1_set_bob: set role bob; step s2_set_bob: set role bob; step s1_save_pid: select save_own_pid(0); -save_own_pid +save_own_pid +------------ + +(1 row) - -INFO: state of backend is idle -step s2_pg_qs_counterpart: select pg_query_state(counterpart_pid(0)); -pg_query_state +step s2_pg_qs_counterpart: select pg_query_state(counterpart_pid(0)); +s2: INFO: state of backend is idle +step s2_pg_qs_counterpart: <... completed> +pg_query_state +-------------- +(0 rows) starting permutation: s1_set_bob s2_set_su s1_save_pid s2_pg_qs_counterpart step s1_set_bob: set role bob; step s2_set_su: set role super; step s1_save_pid: select save_own_pid(0); -save_own_pid +save_own_pid +------------ + +(1 row) - -INFO: state of backend is idle -step s2_pg_qs_counterpart: select pg_query_state(counterpart_pid(0)); -pg_query_state +step s2_pg_qs_counterpart: select pg_query_state(counterpart_pid(0)); +s2: INFO: state of backend is idle +step s2_pg_qs_counterpart: <... completed> +pg_query_state +-------------- +(0 rows) starting permutation: s1_set_bob s2_set_alice s1_save_pid s2_pg_qs_counterpart step s1_set_bob: set role bob; step s2_set_alice: set role alice; step s1_save_pid: select save_own_pid(0); -save_own_pid +save_own_pid +------------ + +(1 row) - -step s2_pg_qs_counterpart: select pg_query_state(counterpart_pid(0)); +step s2_pg_qs_counterpart: select pg_query_state(counterpart_pid(0)); +step s2_pg_qs_counterpart: <... completed> ERROR: permission denied diff --git a/expected/corner_cases_2.out b/expected/corner_cases_2.out new file mode 100644 index 0000000..df7495f --- /dev/null +++ b/expected/corner_cases_2.out @@ -0,0 +1,69 @@ +Parsed test spec with 2 sessions + +starting permutation: s1_pg_qs_1 +step s1_pg_qs_1: select pg_query_state(1); +ERROR: backend with pid=1 not found + +starting permutation: s1_pg_qs_2 +step s1_pg_qs_2: select pg_query_state(pg_backend_pid()); +ERROR: attempt to extract state of current process + +starting permutation: s1_save_pid s2_pg_qs_counterpart +step s1_save_pid: select save_own_pid(0); +save_own_pid + + +step s2_pg_qs_counterpart: select pg_query_state(counterpart_pid(0)); +s2: INFO: state of backend is idle +step s2_pg_qs_counterpart: <... completed> +pg_query_state + + +starting permutation: s1_save_pid s1_disable_pg_qs s2_pg_qs_counterpart +step s1_save_pid: select save_own_pid(0); +save_own_pid + + +step s1_disable_pg_qs: set pg_query_state.enable to off; +step s2_pg_qs_counterpart: select pg_query_state(counterpart_pid(0)); +s2: INFO: query execution statistics disabled +step s2_pg_qs_counterpart: <... completed> +pg_query_state + + +starting permutation: s1_set_bob s2_set_bob s1_save_pid s2_pg_qs_counterpart +step s1_set_bob: set role bob; +step s2_set_bob: set role bob; +step s1_save_pid: select save_own_pid(0); +save_own_pid + + +step s2_pg_qs_counterpart: select pg_query_state(counterpart_pid(0)); +s2: INFO: state of backend is idle +step s2_pg_qs_counterpart: <... completed> +pg_query_state + + +starting permutation: s1_set_bob s2_set_su s1_save_pid s2_pg_qs_counterpart +step s1_set_bob: set role bob; +step s2_set_su: set role super; +step s1_save_pid: select save_own_pid(0); +save_own_pid + + +step s2_pg_qs_counterpart: select pg_query_state(counterpart_pid(0)); +s2: INFO: state of backend is idle +step s2_pg_qs_counterpart: <... completed> +pg_query_state + + +starting permutation: s1_set_bob s2_set_alice s1_save_pid s2_pg_qs_counterpart +step s1_set_bob: set role bob; +step s2_set_alice: set role alice; +step s1_save_pid: select save_own_pid(0); +save_own_pid + + +step s2_pg_qs_counterpart: select pg_query_state(counterpart_pid(0)); +step s2_pg_qs_counterpart: <... completed> +ERROR: permission denied diff --git a/expected/corner_cases_3.out b/expected/corner_cases_3.out new file mode 100644 index 0000000..8f6a8ef --- /dev/null +++ b/expected/corner_cases_3.out @@ -0,0 +1,72 @@ +Parsed test spec with 2 sessions + +starting permutation: s1_pg_qs_1 +step s1_pg_qs_1: select pg_query_state(1); +ERROR: backend with pid=1 not found + +starting permutation: s1_pg_qs_2 +step s1_pg_qs_2: select pg_query_state(pg_backend_pid()); +ERROR: attempt to extract state of current process + +starting permutation: s1_save_pid s2_pg_qs_counterpart +step s1_save_pid: select save_own_pid(0); +save_own_pid + + +step s2_pg_qs_counterpart: select pg_query_state(counterpart_pid(0)); +s2: INFO: state of backend is idle +step s2_pg_qs_counterpart: <... completed> +pg_query_state + + +starting permutation: s1_save_pid s1_disable_pg_qs s2_pg_qs_counterpart +step s1_save_pid: select save_own_pid(0); +save_own_pid + + +step s1_disable_pg_qs: set pg_query_state.enable to off; +step s2_pg_qs_counterpart: select pg_query_state(counterpart_pid(0)); +s2: INFO: query execution statistics disabled +step s2_pg_qs_counterpart: <... completed> +pg_query_state + + +starting permutation: s1_set_bob s2_set_bob s1_save_pid s2_pg_qs_counterpart +step s1_set_bob: set role bob; +step s2_set_bob: set role bob; +step s1_save_pid: select save_own_pid(0); +save_own_pid + + +step s2_pg_qs_counterpart: select pg_query_state(counterpart_pid(0)); +s2: INFO: state of backend is idle +step s2_pg_qs_counterpart: <... completed> +pg_query_state + + +starting permutation: s1_set_bob s2_set_su s1_save_pid s2_pg_qs_counterpart +step s1_set_bob: set role bob; +step s2_set_su: set role super; +step s1_save_pid: select save_own_pid(0); +save_own_pid + + +step s2_pg_qs_counterpart: select pg_query_state(counterpart_pid(0)); +s2: INFO: state of backend is idle +step s2_pg_qs_counterpart: <... completed> +pg_query_state + + +starting permutation: s1_set_bob s2_set_alice s1_save_pid s2_pg_qs_counterpart +step s1_set_bob: set role bob; +step s2_set_alice: set role alice; +step s1_save_pid: select save_own_pid(0); +save_own_pid + + +step s2_pg_qs_counterpart: select pg_query_state(counterpart_pid(0)); +step s2_pg_qs_counterpart: <... completed> +ERROR: permission denied +unused step name: s1_enable_pg_qs +unused step name: s1_pg_qs_counterpart +unused step name: s2_save_pid diff --git a/meson.build b/meson.build new file mode 100644 index 0000000..b2d4248 --- /dev/null +++ b/meson.build @@ -0,0 +1,53 @@ +# Copyright (c) 2025, Postgres Professional + +# Does not support the PGXS infrastructure at this time. Please, compile as part +# of the contrib source tree. + +pg_query_state_sources = files( + 'pg_query_state.c', + 'signal_handler.c', +) + +if host_system == 'windows' + pg_query_state_sources += rc_lib_gen.process(win32ver_rc, extra_args: [ + '--NAME', 'pg_query_state', + '--FILEDESC', 'pg_query_state - provides facility to know the current state of query execution on working backend.',]) +endif + +pg_query_state = shared_module('pg_query_state', + pg_query_state_sources, + kwargs: contrib_mod_args, +) +contrib_targets += pg_query_state + +extversion = '1.1' +output_name = 'pg_query_state--' + extversion + '.sql' + +configure_file( + input: 'init.sql', + output: output_name, + copy: true, + install: true, + install_dir: contrib_data_args['install_dir'], +) + +install_data( + 'pg_query_state.control', + 'pg_query_state--1.0--1.1.sql', + kwargs: contrib_data_args, +) + +tests += { + 'name': 'pg_query_state', + 'sd': meson.current_source_dir(), + 'bd': meson.current_build_dir(), + 'isolation': { + 'specs': [ + 'corner_cases', + ], + 'regress_args': [ + '--temp-config', files('test.conf'), + '--load-extension=pg_query_state', + ], + }, +} diff --git a/mk_dockerfile.sh b/mk_dockerfile.sh index f15433c..86f72a4 100755 --- a/mk_dockerfile.sh +++ b/mk_dockerfile.sh @@ -1,3 +1,5 @@ +#!/usr/bin/env sh + if [ -z ${PG_VERSION+x} ]; then echo PG_VERSION is not set! exit 1 @@ -7,10 +9,16 @@ if [ -z ${LEVEL+x} ]; then LEVEL=scan-build fi +if [ -z ${USE_TPCDS+x} ]; then + USE_TPCDS=0 +fi + echo PG_VERSION=${PG_VERSION} echo LEVEL=${LEVEL} +echo USE_TPCDS=${USE_TPCDS} sed \ -e 's/${PG_VERSION}/'${PG_VERSION}/g \ -e 's/${LEVEL}/'${LEVEL}/g \ + -e 's/${USE_TPCDS}/'${USE_TPCDS}/g \ Dockerfile.tmpl > Dockerfile diff --git a/patches/custom_signals_12.0.patch b/patches/custom_signals_12.0.patch new file mode 100644 index 0000000..7c57130 --- /dev/null +++ b/patches/custom_signals_12.0.patch @@ -0,0 +1,196 @@ +diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c +index 7605b2c367..6a4327fe76 100644 +--- a/src/backend/storage/ipc/procsignal.c ++++ b/src/backend/storage/ipc/procsignal.c +@@ -60,12 +60,20 @@ typedef struct + */ + #define NumProcSignalSlots (MaxBackends + NUM_AUXPROCTYPES) + ++#define IsCustomProcSignalReason(reason) \ ++ ((reason) >= PROCSIG_CUSTOM_1 && (reason) <= PROCSIG_CUSTOM_N) ++ ++static bool CustomSignalPendings[NUM_CUSTOM_PROCSIGNALS]; ++static ProcSignalHandler_type CustomInterruptHandlers[NUM_CUSTOM_PROCSIGNALS]; ++ + static ProcSignalSlot *ProcSignalSlots = NULL; + static volatile ProcSignalSlot *MyProcSignalSlot = NULL; + + static bool CheckProcSignal(ProcSignalReason reason); + static void CleanupProcSignalState(int status, Datum arg); + ++static void CheckAndSetCustomSignalInterrupts(void); ++ + /* + * ProcSignalShmemSize + * Compute space needed for procsignal's shared memory +@@ -165,6 +173,36 @@ CleanupProcSignalState(int status, Datum arg) + slot->pss_pid = 0; + } + ++/* ++ * RegisterCustomProcSignalHandler ++ * Assign specific handler of custom process signal with new ++ * ProcSignalReason key. ++ * ++ * This function has to be called in _PG_init function of extensions at the ++ * stage of loading shared preloaded libraries. Otherwise it throws fatal error. ++ * ++ * Return INVALID_PROCSIGNAL if all slots for custom signals are occupied. ++ */ ++ProcSignalReason ++RegisterCustomProcSignalHandler(ProcSignalHandler_type handler) ++{ ++ ProcSignalReason reason; ++ ++ if (!process_shared_preload_libraries_in_progress) ++ ereport(FATAL, (errcode(ERRCODE_INTERNAL_ERROR), ++ errmsg("cannot register custom signal after startup"))); ++ ++ /* Iterate through custom signal slots to find a free one */ ++ for (reason = PROCSIG_CUSTOM_1; reason <= PROCSIG_CUSTOM_N; reason++) ++ if (!CustomInterruptHandlers[reason - PROCSIG_CUSTOM_1]) ++ { ++ CustomInterruptHandlers[reason - PROCSIG_CUSTOM_1] = handler; ++ return reason; ++ } ++ ++ return INVALID_PROCSIGNAL; ++} ++ + /* + * SendProcSignal + * Send a signal to a Postgres process +@@ -292,9 +330,63 @@ procsignal_sigusr1_handler(SIGNAL_ARGS) + if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN)) + RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN); + ++ CheckAndSetCustomSignalInterrupts(); ++ + SetLatch(MyLatch); + + latch_sigusr1_handler(); + + errno = save_errno; + } ++ ++/* ++ * Handle receipt of an interrupt indicating any of custom process signals. ++ */ ++static void ++CheckAndSetCustomSignalInterrupts() ++{ ++ ProcSignalReason reason; ++ ++ for (reason = PROCSIG_CUSTOM_1; reason <= PROCSIG_CUSTOM_N; reason++) ++ { ++ if (CheckProcSignal(reason)) ++ { ++ ++ /* set interrupt flags */ ++ InterruptPending = true; ++ CustomSignalPendings[reason - PROCSIG_CUSTOM_1] = true; ++ } ++ } ++ ++ SetLatch(MyLatch); ++} ++ ++/* ++ * CheckAndHandleCustomSignals ++ * Check custom signal flags and call handler assigned to that signal ++ * if it is not NULL ++ * ++ * This function is called within CHECK_FOR_INTERRUPTS if interrupt occurred. ++ */ ++void ++CheckAndHandleCustomSignals(void) ++{ ++ int i; ++ ++ /* Disable interrupts to avoid recursive calls */ ++ HOLD_INTERRUPTS(); ++ ++ /* Check on expiring of custom signals and call its handlers if exist */ ++ for (i = 0; i < NUM_CUSTOM_PROCSIGNALS; i++) ++ if (CustomSignalPendings[i]) ++ { ++ ProcSignalHandler_type handler; ++ ++ CustomSignalPendings[i] = false; ++ handler = CustomInterruptHandlers[i]; ++ if (handler != NULL) ++ handler(); ++ } ++ ++ RESUME_INTERRUPTS(); ++} +diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c +index c28cc37012..f5a48b98e8 100644 +--- a/src/backend/tcop/postgres.c ++++ b/src/backend/tcop/postgres.c +@@ -3139,6 +3139,8 @@ ProcessInterrupts(void) + + if (ParallelMessagePending) + HandleParallelMessages(); ++ ++ CheckAndHandleCustomSignals(); + } + + +diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h +index 05b186a05c..d961790b7e 100644 +--- a/src/include/storage/procsignal.h ++++ b/src/include/storage/procsignal.h +@@ -17,6 +17,8 @@ + #include "storage/backendid.h" + + ++#define NUM_CUSTOM_PROCSIGNALS 64 ++ + /* + * Reasons for signalling a Postgres child process (a backend or an auxiliary + * process, like checkpointer). We can cope with concurrent signals for different +@@ -29,6 +31,8 @@ + */ + typedef enum + { ++ INVALID_PROCSIGNAL = -1, /* Must be first */ ++ + PROCSIG_CATCHUP_INTERRUPT, /* sinval catchup interrupt */ + PROCSIG_NOTIFY_INTERRUPT, /* listen/notify interrupt */ + PROCSIG_PARALLEL_MESSAGE, /* message from cooperating parallel backend */ +@@ -42,9 +46,20 @@ typedef enum + PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, + PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK, + ++ PROCSIG_CUSTOM_1, ++ /* ++ * PROCSIG_CUSTOM_2, ++ * ..., ++ * PROCSIG_CUSTOM_N-1, ++ */ ++ PROCSIG_CUSTOM_N = PROCSIG_CUSTOM_1 + NUM_CUSTOM_PROCSIGNALS - 1, ++ + NUM_PROCSIGNALS /* Must be last! */ + } ProcSignalReason; + ++/* Handler of custom process signal */ ++typedef void (*ProcSignalHandler_type) (void); ++ + /* + * prototypes for functions in procsignal.c + */ +@@ -52,9 +67,13 @@ extern Size ProcSignalShmemSize(void); + extern void ProcSignalShmemInit(void); + + extern void ProcSignalInit(int pss_idx); ++extern ProcSignalReason ++ RegisterCustomProcSignalHandler(ProcSignalHandler_type handler); + extern int SendProcSignal(pid_t pid, ProcSignalReason reason, + BackendId backendId); + ++extern void CheckAndHandleCustomSignals(void); ++ + extern void procsignal_sigusr1_handler(SIGNAL_ARGS); + + #endif /* PROCSIGNAL_H */ diff --git a/patches/custom_signals_13.0.patch b/patches/custom_signals_13.0.patch new file mode 100644 index 0000000..266cba8 --- /dev/null +++ b/patches/custom_signals_13.0.patch @@ -0,0 +1,246 @@ +diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c +index 4fa385b0ece..60854eee386 100644 +--- a/src/backend/storage/ipc/procsignal.c ++++ b/src/backend/storage/ipc/procsignal.c +@@ -88,12 +88,21 @@ typedef struct + (((flags) & (((uint32) 1) << (uint32) (type))) != 0) + + static ProcSignalHeader *ProcSignal = NULL; ++#define IsCustomProcSignalReason(reason) \ ++ ((reason) >= PROCSIG_CUSTOM_1 && (reason) <= PROCSIG_CUSTOM_N) ++ ++static bool CustomSignalPendings[NUM_CUSTOM_PROCSIGNALS]; ++static bool CustomSignalProcessing[NUM_CUSTOM_PROCSIGNALS]; ++static ProcSignalHandler_type CustomInterruptHandlers[NUM_CUSTOM_PROCSIGNALS]; ++ + static volatile ProcSignalSlot *MyProcSignalSlot = NULL; + + static bool CheckProcSignal(ProcSignalReason reason); + static void CleanupProcSignalState(int status, Datum arg); + static void ProcessBarrierPlaceholder(void); + ++static void CheckAndSetCustomSignalInterrupts(void); ++ + /* + * ProcSignalShmemSize + * Compute space needed for procsignal's shared memory +@@ -235,6 +244,36 @@ CleanupProcSignalState(int status, Datum arg) + slot->pss_pid = 0; + } + ++/* ++ * RegisterCustomProcSignalHandler ++ * Assign specific handler of custom process signal with new ++ * ProcSignalReason key. ++ * ++ * This function has to be called in _PG_init function of extensions at the ++ * stage of loading shared preloaded libraries. Otherwise it throws fatal error. ++ * ++ * Return INVALID_PROCSIGNAL if all slots for custom signals are occupied. ++ */ ++ProcSignalReason ++RegisterCustomProcSignalHandler(ProcSignalHandler_type handler) ++{ ++ ProcSignalReason reason; ++ ++ if (!process_shared_preload_libraries_in_progress) ++ ereport(FATAL, (errcode(ERRCODE_INTERNAL_ERROR), ++ errmsg("cannot register custom signal after startup"))); ++ ++ /* Iterate through custom signal slots to find a free one */ ++ for (reason = PROCSIG_CUSTOM_1; reason <= PROCSIG_CUSTOM_N; reason++) ++ if (!CustomInterruptHandlers[reason - PROCSIG_CUSTOM_1]) ++ { ++ CustomInterruptHandlers[reason - PROCSIG_CUSTOM_1] = handler; ++ return reason; ++ } ++ ++ return INVALID_PROCSIGNAL; ++} ++ + /* + * SendProcSignal + * Send a signal to a Postgres process +@@ -585,9 +624,71 @@ procsignal_sigusr1_handler(SIGNAL_ARGS) + if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN)) + RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN); + ++ CheckAndSetCustomSignalInterrupts(); ++ + SetLatch(MyLatch); + + latch_sigusr1_handler(); + + errno = save_errno; + } ++ ++/* ++ * Handle receipt of an interrupt indicating any of custom process signals. ++ */ ++static void ++CheckAndSetCustomSignalInterrupts() ++{ ++ ProcSignalReason reason; ++ ++ for (reason = PROCSIG_CUSTOM_1; reason <= PROCSIG_CUSTOM_N; reason++) ++ { ++ if (CheckProcSignal(reason)) ++ { ++ ++ /* set interrupt flags */ ++ InterruptPending = true; ++ CustomSignalPendings[reason - PROCSIG_CUSTOM_1] = true; ++ } ++ } ++ ++ SetLatch(MyLatch); ++} ++ ++/* ++ * CheckAndHandleCustomSignals ++ * Check custom signal flags and call handler assigned to that signal ++ * if it is not NULL ++ * ++ * This function is called within CHECK_FOR_INTERRUPTS if interrupt occurred. ++ */ ++void ++CheckAndHandleCustomSignals(void) ++{ ++ int i; ++ ++ /* ++ * This is invoked from ProcessInterrupts(), and since some of the ++ * functions it calls contain CHECK_FOR_INTERRUPTS(), there is a potential ++ * for recursive calls if more signals are received while this runs, so ++ * let's block interrupts until done. ++ */ ++ HOLD_INTERRUPTS(); ++ ++ for (i = 0; i < NUM_CUSTOM_PROCSIGNALS; i++) ++ if (!CustomSignalProcessing[i] && CustomSignalPendings[i]) ++ { ++ ProcSignalHandler_type handler; ++ ++ CustomSignalPendings[i] = false; ++ handler = CustomInterruptHandlers[i]; ++ if (handler != NULL) ++ { ++ CustomSignalProcessing[i] = true; ++ handler(); ++ CustomSignalProcessing[i] = false; ++ } ++ } ++ ++ RESUME_INTERRUPTS(); ++} +diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c +index 7bc03ae4edc..3debd63bd7d 100644 +--- a/src/backend/tcop/postgres.c ++++ b/src/backend/tcop/postgres.c +@@ -5,6 +5,7 @@ + * + * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California ++ * Portions Copyright (c) 2020-2021, Postgres Professional + * + * + * IDENTIFICATION +@@ -74,6 +75,7 @@ + #include "tcop/pquery.h" + #include "tcop/tcopprot.h" + #include "tcop/utility.h" ++#include "utils/builtins.h" + #include "utils/lsyscache.h" + #include "utils/memutils.h" + #include "utils/ps_status.h" +@@ -3231,6 +3233,8 @@ ProcessInterrupts(void) + + if (ParallelMessagePending) + HandleParallelMessages(); ++ ++ CheckAndHandleCustomSignals(); + } + + +@@ -3576,7 +3580,7 @@ process_postgres_switches(int argc, char *argv[], GucContext ctx, + * postmaster/postmaster.c (the option sets should not conflict) and with + * the common help() function in main/main.c. + */ +- while ((flag = getopt(argc, argv, "B:bc:C:D:d:EeFf:h:ijk:lN:nOo:Pp:r:S:sTt:v:W:-:")) != -1) ++ while ((flag = getopt(argc, argv, "B:bc:C:D:d:EeFf:h:ijk:lN:nOo:Pp:r:S:sTt:v:W:Z-:")) != -1) + { + switch (flag) + { +@@ -3712,6 +3716,10 @@ process_postgres_switches(int argc, char *argv[], GucContext ctx, + SetConfigOption("post_auth_delay", optarg, ctx, gucsource); + break; + ++ case 'Z': ++ /* ignored for consistency with the postmaster */ ++ break; ++ + case 'c': + case '-': + { +diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h +index 5cb39697f38..c05f60fa719 100644 +--- a/src/include/storage/procsignal.h ++++ b/src/include/storage/procsignal.h +@@ -17,6 +17,8 @@ + #include "storage/backendid.h" + + ++#define NUM_CUSTOM_PROCSIGNALS 64 ++ + /* + * Reasons for signaling a Postgres child process (a backend or an auxiliary + * process, like checkpointer). We can cope with concurrent signals for different +@@ -29,6 +31,8 @@ + */ + typedef enum + { ++ INVALID_PROCSIGNAL = -1, /* Must be first */ ++ + PROCSIG_CATCHUP_INTERRUPT, /* sinval catchup interrupt */ + PROCSIG_NOTIFY_INTERRUPT, /* listen/notify interrupt */ + PROCSIG_PARALLEL_MESSAGE, /* message from cooperating parallel backend */ +@@ -43,6 +47,14 @@ typedef enum + PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, + PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK, + ++ PROCSIG_CUSTOM_1, ++ /* ++ * PROCSIG_CUSTOM_2, ++ * ..., ++ * PROCSIG_CUSTOM_N-1, ++ */ ++ PROCSIG_CUSTOM_N = PROCSIG_CUSTOM_1 + NUM_CUSTOM_PROCSIGNALS - 1, ++ + NUM_PROCSIGNALS /* Must be last! */ + } ProcSignalReason; + +@@ -55,6 +67,8 @@ typedef enum + */ + PROCSIGNAL_BARRIER_PLACEHOLDER = 0 + } ProcSignalBarrierType; ++/* Handler of custom process signal */ ++typedef void (*ProcSignalHandler_type) (void); + + /* + * prototypes for functions in procsignal.c +@@ -63,12 +77,15 @@ extern Size ProcSignalShmemSize(void); + extern void ProcSignalShmemInit(void); + + extern void ProcSignalInit(int pss_idx); ++extern ProcSignalReason ++ RegisterCustomProcSignalHandler(ProcSignalHandler_type handler); + extern int SendProcSignal(pid_t pid, ProcSignalReason reason, + BackendId backendId); + + extern uint64 EmitProcSignalBarrier(ProcSignalBarrierType type); + extern void WaitForProcSignalBarrier(uint64 generation); + extern void ProcessProcSignalBarrier(void); ++extern void CheckAndHandleCustomSignals(void); + + extern void procsignal_sigusr1_handler(SIGNAL_ARGS); + diff --git a/patches/custom_signals_14.0.patch b/patches/custom_signals_14.0.patch new file mode 100644 index 0000000..d02f2b5 --- /dev/null +++ b/patches/custom_signals_14.0.patch @@ -0,0 +1,256 @@ +diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c +index defb75aa26a..cd7d44977ca 100644 +--- a/src/backend/storage/ipc/procsignal.c ++++ b/src/backend/storage/ipc/procsignal.c +@@ -6,6 +6,7 @@ + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California ++ * Portions Copyright (c) 2021, Postgres Professional + * + * IDENTIFICATION + * src/backend/storage/ipc/procsignal.c +@@ -96,6 +97,13 @@ typedef struct + ((flags) &= ~(((uint32) 1) << (uint32) (type))) + + static ProcSignalHeader *ProcSignal = NULL; ++#define IsCustomProcSignalReason(reason) \ ++ ((reason) >= PROCSIG_CUSTOM_1 && (reason) <= PROCSIG_CUSTOM_N) ++ ++static bool CustomSignalPendings[NUM_CUSTOM_PROCSIGNALS]; ++static bool CustomSignalProcessing[NUM_CUSTOM_PROCSIGNALS]; ++static ProcSignalHandler_type CustomInterruptHandlers[NUM_CUSTOM_PROCSIGNALS]; ++ + static ProcSignalSlot *MyProcSignalSlot = NULL; + + static bool CheckProcSignal(ProcSignalReason reason); +@@ -103,6 +111,8 @@ static void CleanupProcSignalState(int status, Datum arg); + static void ResetProcSignalBarrierBits(uint32 flags); + static bool ProcessBarrierPlaceholder(void); + ++static void CheckAndSetCustomSignalInterrupts(void); ++ + /* + * ProcSignalShmemSize + * Compute space needed for procsignal's shared memory +@@ -246,6 +256,36 @@ CleanupProcSignalState(int status, Datum arg) + slot->pss_pid = 0; + } + ++/* ++ * RegisterCustomProcSignalHandler ++ * Assign specific handler of custom process signal with new ++ * ProcSignalReason key. ++ * ++ * This function has to be called in _PG_init function of extensions at the ++ * stage of loading shared preloaded libraries. Otherwise it throws fatal error. ++ * ++ * Return INVALID_PROCSIGNAL if all slots for custom signals are occupied. ++ */ ++ProcSignalReason ++RegisterCustomProcSignalHandler(ProcSignalHandler_type handler) ++{ ++ ProcSignalReason reason; ++ ++ if (!process_shared_preload_libraries_in_progress) ++ ereport(FATAL, (errcode(ERRCODE_INTERNAL_ERROR), ++ errmsg("cannot register custom signal after startup"))); ++ ++ /* Iterate through custom signal slots to find a free one */ ++ for (reason = PROCSIG_CUSTOM_1; reason <= PROCSIG_CUSTOM_N; reason++) ++ if (!CustomInterruptHandlers[reason - PROCSIG_CUSTOM_1]) ++ { ++ CustomInterruptHandlers[reason - PROCSIG_CUSTOM_1] = handler; ++ return reason; ++ } ++ ++ return INVALID_PROCSIGNAL; ++} ++ + /* + * SendProcSignal + * Send a signal to a Postgres process +@@ -679,7 +719,72 @@ procsignal_sigusr1_handler(SIGNAL_ARGS) + if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN)) + RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN); + ++ CheckAndSetCustomSignalInterrupts(); ++ + SetLatch(MyLatch); + + errno = save_errno; + } ++ ++/* ++ * Handle receipt of an interrupt indicating any of custom process signals. ++ */ ++static void ++CheckAndSetCustomSignalInterrupts() ++{ ++ ProcSignalReason reason; ++ ++ for (reason = PROCSIG_CUSTOM_1; reason <= PROCSIG_CUSTOM_N; reason++) ++ { ++ if (CheckProcSignal(reason)) ++ { ++ ++ /* set interrupt flags */ ++ InterruptPending = true; ++ CustomSignalPendings[reason - PROCSIG_CUSTOM_1] = true; ++ } ++ } ++ ++ SetLatch(MyLatch); ++} ++ ++/* ++ * CheckAndHandleCustomSignals ++ * Check custom signal flags and call handler assigned to that signal ++ * if it is not NULL ++ * ++ * This function is called within CHECK_FOR_INTERRUPTS if interrupt occurred. ++ */ ++void ++CheckAndHandleCustomSignals(void) ++{ ++ int i; ++ ++ /* ++ * This is invoked from ProcessInterrupts(), and since some of the ++ * functions it calls contain CHECK_FOR_INTERRUPTS(), there is a potential ++ * for recursive calls if more signals are received while this runs, so ++ * let's block interrupts until done. ++ */ ++ HOLD_INTERRUPTS(); ++ ++ /* Check on expiring of custom signals and call its handlers if exist */ ++ for (i = 0; i < NUM_CUSTOM_PROCSIGNALS; i++) ++ { ++ if (!CustomSignalProcessing[i] && CustomSignalPendings[i]) ++ { ++ ProcSignalHandler_type handler; ++ ++ CustomSignalPendings[i] = false; ++ handler = CustomInterruptHandlers[i]; ++ if (handler != NULL) ++ { ++ CustomSignalProcessing[i] = true; ++ handler(); ++ CustomSignalProcessing[i] = false; ++ } ++ } ++ } ++ ++ RESUME_INTERRUPTS(); ++} +diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c +index 171f3a95006..e6fe26fb19a 100644 +--- a/src/backend/tcop/postgres.c ++++ b/src/backend/tcop/postgres.c +@@ -5,6 +5,7 @@ + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California ++ * Portions Copyright (c) 2021, Postgres Professional + * + * + * IDENTIFICATION +@@ -75,6 +76,7 @@ + #include "tcop/pquery.h" + #include "tcop/tcopprot.h" + #include "tcop/utility.h" ++#include "utils/builtins.h" + #include "utils/lsyscache.h" + #include "utils/memutils.h" + #include "utils/ps_status.h" +@@ -3366,6 +3368,8 @@ ProcessInterrupts(void) + + if (LogMemoryContextPending) + ProcessLogMemoryContextInterrupt(); ++ ++ CheckAndHandleCustomSignals(); + } + + +@@ -3711,7 +3715,7 @@ process_postgres_switches(int argc, char *argv[], GucContext ctx, + * postmaster/postmaster.c (the option sets should not conflict) and with + * the common help() function in main/main.c. + */ +- while ((flag = getopt(argc, argv, "B:bc:C:D:d:EeFf:h:ijk:lN:nOPp:r:S:sTt:v:W:-:")) != -1) ++ while ((flag = getopt(argc, argv, "B:bc:C:D:d:EeFf:h:ijk:lN:nOPp:r:S:sTt:v:W:Z-:")) != -1) + { + switch (flag) + { +@@ -3843,6 +3847,10 @@ process_postgres_switches(int argc, char *argv[], GucContext ctx, + SetConfigOption("post_auth_delay", optarg, ctx, gucsource); + break; + ++ case 'Z': ++ /* ignored for consistency with the postmaster */ ++ break; ++ + case 'c': + case '-': + { +diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h +index eec186be2ee..74af186bf53 100644 +--- a/src/include/storage/procsignal.h ++++ b/src/include/storage/procsignal.h +@@ -17,6 +17,8 @@ + #include "storage/backendid.h" + + ++#define NUM_CUSTOM_PROCSIGNALS 64 ++ + /* + * Reasons for signaling a Postgres child process (a backend or an auxiliary + * process, like checkpointer). We can cope with concurrent signals for different +@@ -29,6 +31,8 @@ + */ + typedef enum + { ++ INVALID_PROCSIGNAL = -1, /* Must be first */ ++ + PROCSIG_CATCHUP_INTERRUPT, /* sinval catchup interrupt */ + PROCSIG_NOTIFY_INTERRUPT, /* listen/notify interrupt */ + PROCSIG_PARALLEL_MESSAGE, /* message from cooperating parallel backend */ +@@ -44,6 +48,14 @@ typedef enum + PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, + PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK, + ++ PROCSIG_CUSTOM_1, ++ /* ++ * PROCSIG_CUSTOM_2, ++ * ..., ++ * PROCSIG_CUSTOM_N-1, ++ */ ++ PROCSIG_CUSTOM_N = PROCSIG_CUSTOM_1 + NUM_CUSTOM_PROCSIGNALS - 1, ++ + NUM_PROCSIGNALS /* Must be last! */ + } ProcSignalReason; + +@@ -56,6 +68,8 @@ typedef enum + */ + PROCSIGNAL_BARRIER_PLACEHOLDER = 0 + } ProcSignalBarrierType; ++/* Handler of custom process signal */ ++typedef void (*ProcSignalHandler_type) (void); + + /* + * prototypes for functions in procsignal.c +@@ -64,12 +78,15 @@ extern Size ProcSignalShmemSize(void); + extern void ProcSignalShmemInit(void); + + extern void ProcSignalInit(int pss_idx); ++extern ProcSignalReason ++ RegisterCustomProcSignalHandler(ProcSignalHandler_type handler); + extern int SendProcSignal(pid_t pid, ProcSignalReason reason, + BackendId backendId); + + extern uint64 EmitProcSignalBarrier(ProcSignalBarrierType type); + extern void WaitForProcSignalBarrier(uint64 generation); + extern void ProcessProcSignalBarrier(void); ++extern void CheckAndHandleCustomSignals(void); + + extern void procsignal_sigusr1_handler(SIGNAL_ARGS); + diff --git a/patches/custom_signals_15.0.patch b/patches/custom_signals_15.0.patch new file mode 100644 index 0000000..4e99c69 --- /dev/null +++ b/patches/custom_signals_15.0.patch @@ -0,0 +1,217 @@ +diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c +index defb75a..4245d28 100644 +--- a/src/backend/storage/ipc/procsignal.c ++++ b/src/backend/storage/ipc/procsignal.c +@@ -96,6 +96,13 @@ typedef struct + #define BARRIER_CLEAR_BIT(flags, type) \ + ((flags) &= ~(((uint32) 1) << (uint32) (type))) + ++#define IsCustomProcSignalReason(reason) \ ++ ((reason) >= PROCSIG_CUSTOM_1 && (reason) <= PROCSIG_CUSTOM_N) ++ ++static bool CustomSignalPendings[NUM_CUSTOM_PROCSIGNALS]; ++static bool CustomSignalProcessing[NUM_CUSTOM_PROCSIGNALS]; ++static ProcSignalHandler_type CustomInterruptHandlers[NUM_CUSTOM_PROCSIGNALS]; ++ + static ProcSignalHeader *ProcSignal = NULL; + static ProcSignalSlot *MyProcSignalSlot = NULL; + +@@ -103,6 +110,8 @@ static bool CheckProcSignal(ProcSignalReason reason); + static void CleanupProcSignalState(int status, Datum arg); + static void ResetProcSignalBarrierBits(uint32 flags); + ++static void CheckAndSetCustomSignalInterrupts(void); ++ + /* + * ProcSignalShmemSize + * Compute space needed for ProcSignal's shared memory +@@ -246,6 +255,36 @@ CleanupProcSignalState(int status, Datum arg) + slot->pss_pid = 0; + } + ++/* ++ * RegisterCustomProcSignalHandler ++ * Assign specific handler of custom process signal with new ++ * ProcSignalReason key. ++ * ++ * This function has to be called in _PG_init function of extensions at the ++ * stage of loading shared preloaded libraries. Otherwise it throws fatal error. ++ * ++ * Return INVALID_PROCSIGNAL if all slots for custom signals are occupied. ++ */ ++ProcSignalReason ++RegisterCustomProcSignalHandler(ProcSignalHandler_type handler) ++{ ++ ProcSignalReason reason; ++ ++ if (!process_shared_preload_libraries_in_progress) ++ ereport(FATAL, (errcode(ERRCODE_INTERNAL_ERROR), ++ errmsg("cannot register custom signal after startup"))); ++ ++ /* Iterate through custom signal slots to find a free one */ ++ for (reason = PROCSIG_CUSTOM_1; reason <= PROCSIG_CUSTOM_N; reason++) ++ if (!CustomInterruptHandlers[reason - PROCSIG_CUSTOM_1]) ++ { ++ CustomInterruptHandlers[reason - PROCSIG_CUSTOM_1] = handler; ++ return reason; ++ } ++ ++ return INVALID_PROCSIGNAL; ++} ++ + /* + * SendProcSignal + * Send a signal to a Postgres process +@@ -675,7 +714,72 @@ procsignal_sigusr1_handler(SIGNAL_ARGS) + if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN)) + RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN); + ++ CheckAndSetCustomSignalInterrupts(); ++ + SetLatch(MyLatch); + + errno = save_errno; + } ++ ++/* ++ * Handle receipt of an interrupt indicating any of custom process signals. ++ */ ++static void ++CheckAndSetCustomSignalInterrupts() ++{ ++ ProcSignalReason reason; ++ ++ for (reason = PROCSIG_CUSTOM_1; reason <= PROCSIG_CUSTOM_N; reason++) ++ { ++ if (CheckProcSignal(reason)) ++ { ++ ++ /* set interrupt flags */ ++ InterruptPending = true; ++ CustomSignalPendings[reason - PROCSIG_CUSTOM_1] = true; ++ } ++ } ++ ++ SetLatch(MyLatch); ++} ++ ++/* ++ * CheckAndHandleCustomSignals ++ * Check custom signal flags and call handler assigned to that signal ++ * if it is not NULL ++ * ++ * This function is called within CHECK_FOR_INTERRUPTS if interrupt occurred. ++ */ ++void ++CheckAndHandleCustomSignals(void) ++{ ++ int i; ++ ++ /* ++ * This is invoked from ProcessInterrupts(), and since some of the ++ * functions it calls contain CHECK_FOR_INTERRUPTS(), there is a potential ++ * for recursive calls if more signals are received while this runs, so ++ * let's block interrupts until done. ++ */ ++ HOLD_INTERRUPTS(); ++ ++ /* Check on expiring of custom signals and call its handlers if exist */ ++ for (i = 0; i < NUM_CUSTOM_PROCSIGNALS; i++) ++ { ++ if (!CustomSignalProcessing[i] && CustomSignalPendings[i]) ++ { ++ ProcSignalHandler_type handler; ++ ++ CustomSignalPendings[i] = false; ++ handler = CustomInterruptHandlers[i]; ++ if (handler != NULL) ++ { ++ CustomSignalProcessing[i] = true; ++ handler(); ++ CustomSignalProcessing[i] = false; ++ } ++ } ++ } ++ ++ RESUME_INTERRUPTS(); ++} ++ +diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c +index 8cea10c..dd77c98 100644 +--- a/src/backend/tcop/postgres.c ++++ b/src/backend/tcop/postgres.c +@@ -3402,6 +3402,8 @@ ProcessInterrupts(void) + if (ParallelMessagePending) + HandleParallelMessages(); + ++ CheckAndHandleCustomSignals(); ++ + if (LogMemoryContextPending) + ProcessLogMemoryContextInterrupt(); + } +diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h +index eec186b..74af186 100644 +--- a/src/include/storage/procsignal.h ++++ b/src/include/storage/procsignal.h +@@ -17,6 +17,8 @@ + #include "storage/backendid.h" + + ++#define NUM_CUSTOM_PROCSIGNALS 64 ++ + /* + * Reasons for signaling a Postgres child process (a backend or an auxiliary + * process, like checkpointer). We can cope with concurrent signals for different +@@ -29,6 +31,8 @@ + */ + typedef enum + { ++ INVALID_PROCSIGNAL = -1, /* Must be first */ ++ + PROCSIG_CATCHUP_INTERRUPT, /* sinval catchup interrupt */ + PROCSIG_NOTIFY_INTERRUPT, /* listen/notify interrupt */ + PROCSIG_PARALLEL_MESSAGE, /* message from cooperating parallel backend */ +@@ -44,6 +48,14 @@ typedef enum + PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, + PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK, + ++ PROCSIG_CUSTOM_1, ++ /* ++ * PROCSIG_CUSTOM_2, ++ * ..., ++ * PROCSIG_CUSTOM_N-1, ++ */ ++ PROCSIG_CUSTOM_N = PROCSIG_CUSTOM_1 + NUM_CUSTOM_PROCSIGNALS - 1, ++ + NUM_PROCSIGNALS /* Must be last! */ + } ProcSignalReason; + +@@ -51,6 +63,9 @@ typedef enum + { + PROCSIGNAL_BARRIER_SMGRRELEASE /* ask smgr to close files */ + } ProcSignalBarrierType; ++ ++/* Handler of custom process signal */ ++typedef void (*ProcSignalHandler_type) (void); + + /* + * prototypes for functions in procsignal.c +@@ -59,12 +74,15 @@ extern Size ProcSignalShmemSize(void); + extern void ProcSignalShmemInit(void); + + extern void ProcSignalInit(int pss_idx); ++extern ProcSignalReason ++ RegisterCustomProcSignalHandler(ProcSignalHandler_type handler); + extern int SendProcSignal(pid_t pid, ProcSignalReason reason, + BackendId backendId); + + extern uint64 EmitProcSignalBarrier(ProcSignalBarrierType type); + extern void WaitForProcSignalBarrier(uint64 generation); + extern void ProcessProcSignalBarrier(void); ++extern void CheckAndHandleCustomSignals(void); + + extern void procsignal_sigusr1_handler(SIGNAL_ARGS); + +-- +2.25.1 + diff --git a/patches/custom_signals_16.0.patch b/patches/custom_signals_16.0.patch new file mode 100644 index 0000000..3a2183f --- /dev/null +++ b/patches/custom_signals_16.0.patch @@ -0,0 +1,229 @@ +diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c +index c85cb5cc18..37ae4b3759 100644 +--- a/src/backend/storage/ipc/procsignal.c ++++ b/src/backend/storage/ipc/procsignal.c +@@ -6,6 +6,7 @@ + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California ++ * Portions Copyright (c) 2024, Postgres Professional + * + * IDENTIFICATION + * src/backend/storage/ipc/procsignal.c +@@ -97,6 +98,13 @@ typedef struct + #define BARRIER_CLEAR_BIT(flags, type) \ + ((flags) &= ~(((uint32) 1) << (uint32) (type))) + ++#define IsCustomProcSignalReason(reason) \ ++ ((reason) >= PROCSIG_CUSTOM_1 && (reason) <= PROCSIG_CUSTOM_N) ++ ++static bool CustomSignalPendings[NUM_CUSTOM_PROCSIGNALS]; ++static bool CustomSignalProcessing[NUM_CUSTOM_PROCSIGNALS]; ++static ProcSignalHandler_type CustomInterruptHandlers[NUM_CUSTOM_PROCSIGNALS]; ++ + static ProcSignalHeader *ProcSignal = NULL; + static ProcSignalSlot *MyProcSignalSlot = NULL; + +@@ -104,6 +112,8 @@ static bool CheckProcSignal(ProcSignalReason reason); + static void CleanupProcSignalState(int status, Datum arg); + static void ResetProcSignalBarrierBits(uint32 flags); + ++static void CheckAndSetCustomSignalInterrupts(void); ++ + /* + * ProcSignalShmemSize + * Compute space needed for ProcSignal's shared memory +@@ -247,6 +257,36 @@ CleanupProcSignalState(int status, Datum arg) + slot->pss_pid = 0; + } + ++/* ++ * RegisterCustomProcSignalHandler ++ * Assign specific handler of custom process signal with new ++ * ProcSignalReason key. ++ * ++ * This function has to be called in _PG_init function of extensions at the ++ * stage of loading shared preloaded libraries. Otherwise it throws fatal error. ++ * ++ * Return INVALID_PROCSIGNAL if all slots for custom signals are occupied. ++ */ ++ProcSignalReason ++RegisterCustomProcSignalHandler(ProcSignalHandler_type handler) ++{ ++ ProcSignalReason reason; ++ ++ if (!process_shared_preload_libraries_in_progress) ++ ereport(FATAL, (errcode(ERRCODE_INTERNAL_ERROR), ++ errmsg("cannot register custom signal after startup"))); ++ ++ /* Iterate through custom signal slots to find a free one */ ++ for (reason = PROCSIG_CUSTOM_1; reason <= PROCSIG_CUSTOM_N; reason++) ++ if (!CustomInterruptHandlers[reason - PROCSIG_CUSTOM_1]) ++ { ++ CustomInterruptHandlers[reason - PROCSIG_CUSTOM_1] = handler; ++ return reason; ++ } ++ ++ return INVALID_PROCSIGNAL; ++} ++ + /* + * SendProcSignal + * Send a signal to a Postgres process +@@ -682,7 +722,72 @@ procsignal_sigusr1_handler(SIGNAL_ARGS) + if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN)) + RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN); + ++ CheckAndSetCustomSignalInterrupts(); ++ + SetLatch(MyLatch); + + errno = save_errno; + } ++ ++/* ++ * Handle receipt of an interrupt indicating any of custom process signals. ++ */ ++static void ++CheckAndSetCustomSignalInterrupts() ++{ ++ ProcSignalReason reason; ++ ++ for (reason = PROCSIG_CUSTOM_1; reason <= PROCSIG_CUSTOM_N; reason++) ++ { ++ if (CheckProcSignal(reason)) ++ { ++ ++ /* set interrupt flags */ ++ InterruptPending = true; ++ CustomSignalPendings[reason - PROCSIG_CUSTOM_1] = true; ++ } ++ } ++ ++ SetLatch(MyLatch); ++} ++ ++/* ++ * CheckAndHandleCustomSignals ++ * Check custom signal flags and call handler assigned to that signal ++ * if it is not NULL ++ * ++ * This function is called within CHECK_FOR_INTERRUPTS if interrupt occurred. ++ */ ++void ++CheckAndHandleCustomSignals(void) ++{ ++ int i; ++ ++ /* ++ * This is invoked from ProcessInterrupts(), and since some of the ++ * functions it calls contain CHECK_FOR_INTERRUPTS(), there is a potential ++ * for recursive calls if more signals are received while this runs, so ++ * let's block interrupts until done. ++ */ ++ HOLD_INTERRUPTS(); ++ ++ /* Check on expiring of custom signals and call its handlers if exist */ ++ for (i = 0; i < NUM_CUSTOM_PROCSIGNALS; i++) ++ { ++ if (!CustomSignalProcessing[i] && CustomSignalPendings[i]) ++ { ++ ProcSignalHandler_type handler; ++ ++ CustomSignalPendings[i] = false; ++ handler = CustomInterruptHandlers[i]; ++ if (handler != NULL) ++ { ++ CustomSignalProcessing[i] = true; ++ handler(); ++ CustomSignalProcessing[i] = false; ++ } ++ } ++ } ++ ++ RESUME_INTERRUPTS(); ++} +diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c +index 36cc99ec9c..a3acce427a 100644 +--- a/src/backend/tcop/postgres.c ++++ b/src/backend/tcop/postgres.c +@@ -3442,6 +3442,8 @@ ProcessInterrupts(void) + if (ParallelMessagePending) + HandleParallelMessages(); + ++ CheckAndHandleCustomSignals(); ++ + if (LogMemoryContextPending) + ProcessLogMemoryContextInterrupt(); + +diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h +index 2f52100b00..0e31a5771e 100644 +--- a/src/include/storage/procsignal.h ++++ b/src/include/storage/procsignal.h +@@ -6,6 +6,7 @@ + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California ++ * Portions Copyright (c) 2024, Postgres Professional + * + * src/include/storage/procsignal.h + * +@@ -17,6 +18,8 @@ + #include "storage/backendid.h" + + ++#define NUM_CUSTOM_PROCSIGNALS 64 ++ + /* + * Reasons for signaling a Postgres child process (a backend or an auxiliary + * process, like checkpointer). We can cope with concurrent signals for different +@@ -29,6 +32,8 @@ + */ + typedef enum + { ++ INVALID_PROCSIGNAL = -1, /* Must be first */ ++ + PROCSIG_CATCHUP_INTERRUPT, /* sinval catchup interrupt */ + PROCSIG_NOTIFY_INTERRUPT, /* listen/notify interrupt */ + PROCSIG_PARALLEL_MESSAGE, /* message from cooperating parallel backend */ +@@ -46,6 +51,14 @@ typedef enum + PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, + PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK, + ++ PROCSIG_CUSTOM_1, ++ /* ++ * PROCSIG_CUSTOM_2, ++ * ..., ++ * PROCSIG_CUSTOM_N-1, ++ */ ++ PROCSIG_CUSTOM_N = PROCSIG_CUSTOM_1 + NUM_CUSTOM_PROCSIGNALS - 1, ++ + NUM_PROCSIGNALS /* Must be last! */ + } ProcSignalReason; + +@@ -54,6 +67,9 @@ typedef enum + PROCSIGNAL_BARRIER_SMGRRELEASE /* ask smgr to close files */ + } ProcSignalBarrierType; + ++/* Handler of custom process signal */ ++typedef void (*ProcSignalHandler_type) (void); ++ + /* + * prototypes for functions in procsignal.c + */ +@@ -61,12 +77,15 @@ extern Size ProcSignalShmemSize(void); + extern void ProcSignalShmemInit(void); + + extern void ProcSignalInit(int pss_idx); ++extern ProcSignalReason ++ RegisterCustomProcSignalHandler(ProcSignalHandler_type handler); + extern int SendProcSignal(pid_t pid, ProcSignalReason reason, + BackendId backendId); + + extern uint64 EmitProcSignalBarrier(ProcSignalBarrierType type); + extern void WaitForProcSignalBarrier(uint64 generation); + extern void ProcessProcSignalBarrier(void); ++extern void CheckAndHandleCustomSignals(void); + + extern void procsignal_sigusr1_handler(SIGNAL_ARGS); + diff --git a/patches/custom_signals_17.0.patch b/patches/custom_signals_17.0.patch new file mode 100644 index 0000000..d227104 --- /dev/null +++ b/patches/custom_signals_17.0.patch @@ -0,0 +1,227 @@ +diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c +index 4ed9ced..6e70892 100644 +--- a/src/backend/storage/ipc/procsignal.c ++++ b/src/backend/storage/ipc/procsignal.c +@@ -6,6 +6,7 @@ + * + * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California ++ * Portions Copyright (c) 2024, Postgres Professional + * + * IDENTIFICATION + * src/backend/storage/ipc/procsignal.c +@@ -96,6 +97,13 @@ typedef struct + #define BARRIER_CLEAR_BIT(flags, type) \ + ((flags) &= ~(((uint32) 1) << (uint32) (type))) + ++#define IsCustomProcSignalReason(reason) \ ++ ((reason) >= PROCSIG_CUSTOM_1 && (reason) <= PROCSIG_CUSTOM_N) ++ ++static bool CustomSignalPendings[NUM_CUSTOM_PROCSIGNALS]; ++static bool CustomSignalProcessing[NUM_CUSTOM_PROCSIGNALS]; ++static ProcSignalHandler_type CustomInterruptHandlers[NUM_CUSTOM_PROCSIGNALS]; ++ + static ProcSignalHeader *ProcSignal = NULL; + static ProcSignalSlot *MyProcSignalSlot = NULL; + +@@ -103,6 +111,8 @@ static bool CheckProcSignal(ProcSignalReason reason); + static void CleanupProcSignalState(int status, Datum arg); + static void ResetProcSignalBarrierBits(uint32 flags); + ++static void CheckAndSetCustomSignalInterrupts(void); ++ + /* + * ProcSignalShmemSize + * Compute space needed for ProcSignal's shared memory +@@ -242,6 +252,36 @@ CleanupProcSignalState(int status, Datum arg) + slot->pss_pid = 0; + } + ++/* ++ * RegisterCustomProcSignalHandler ++ * Assign specific handler of custom process signal with new ++ * ProcSignalReason key. ++ * ++ * This function has to be called in _PG_init function of extensions at the ++ * stage of loading shared preloaded libraries. Otherwise it throws fatal error. ++ * ++ * Return INVALID_PROCSIGNAL if all slots for custom signals are occupied. ++ */ ++ProcSignalReason ++RegisterCustomProcSignalHandler(ProcSignalHandler_type handler) ++{ ++ ProcSignalReason reason; ++ ++ if (!process_shared_preload_libraries_in_progress) ++ ereport(FATAL, (errcode(ERRCODE_INTERNAL_ERROR), ++ errmsg("cannot register custom signal after startup"))); ++ ++ /* Iterate through custom signal slots to find a free one */ ++ for (reason = PROCSIG_CUSTOM_1; reason <= PROCSIG_CUSTOM_N; reason++) ++ if (!CustomInterruptHandlers[reason - PROCSIG_CUSTOM_1]) ++ { ++ CustomInterruptHandlers[reason - PROCSIG_CUSTOM_1] = handler; ++ return reason; ++ } ++ ++ return INVALID_PROCSIGNAL; ++} ++ + /* + * SendProcSignal + * Send a signal to a Postgres process +@@ -676,5 +716,70 @@ procsignal_sigusr1_handler(SIGNAL_ARGS) + if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN)) + HandleRecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN); + ++ CheckAndSetCustomSignalInterrupts(); ++ + SetLatch(MyLatch); + } ++ ++/* ++ * Handle receipt of an interrupt indicating any of custom process signals. ++ */ ++static void ++CheckAndSetCustomSignalInterrupts() ++{ ++ ProcSignalReason reason; ++ ++ for (reason = PROCSIG_CUSTOM_1; reason <= PROCSIG_CUSTOM_N; reason++) ++ { ++ if (CheckProcSignal(reason)) ++ { ++ ++ /* set interrupt flags */ ++ InterruptPending = true; ++ CustomSignalPendings[reason - PROCSIG_CUSTOM_1] = true; ++ } ++ } ++ ++ SetLatch(MyLatch); ++} ++ ++/* ++ * CheckAndHandleCustomSignals ++ * Check custom signal flags and call handler assigned to that signal ++ * if it is not NULL ++ * ++ * This function is called within CHECK_FOR_INTERRUPTS if interrupt occurred. ++ */ ++void ++CheckAndHandleCustomSignals(void) ++{ ++ int i; ++ ++ /* ++ * This is invoked from ProcessInterrupts(), and since some of the ++ * functions it calls contain CHECK_FOR_INTERRUPTS(), there is a potential ++ * for recursive calls if more signals are received while this runs, so ++ * let's block interrupts until done. ++ */ ++ HOLD_INTERRUPTS(); ++ ++ /* Check on expiring of custom signals and call its handlers if exist */ ++ for (i = 0; i < NUM_CUSTOM_PROCSIGNALS; i++) ++ { ++ if (!CustomSignalProcessing[i] && CustomSignalPendings[i]) ++ { ++ ProcSignalHandler_type handler; ++ ++ CustomSignalPendings[i] = false; ++ handler = CustomInterruptHandlers[i]; ++ if (handler != NULL) ++ { ++ CustomSignalProcessing[i] = true; ++ handler(); ++ CustomSignalProcessing[i] = false; ++ } ++ } ++ } ++ ++ RESUME_INTERRUPTS(); ++} +diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c +index a750dc8..e1b0be5 100644 +--- a/src/backend/tcop/postgres.c ++++ b/src/backend/tcop/postgres.c +@@ -3492,6 +3492,8 @@ ProcessInterrupts(void) + if (ParallelMessagePending) + HandleParallelMessages(); + ++ CheckAndHandleCustomSignals(); ++ + if (LogMemoryContextPending) + ProcessLogMemoryContextInterrupt(); + +diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h +index 7d290ea..f262f0c 100644 +--- a/src/include/storage/procsignal.h ++++ b/src/include/storage/procsignal.h +@@ -6,6 +6,7 @@ + * + * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California ++ * Portions Copyright (c) 2024, Postgres Professional + * + * src/include/storage/procsignal.h + * +@@ -17,6 +18,8 @@ + #include "storage/procnumber.h" + + ++#define NUM_CUSTOM_PROCSIGNALS 64 ++ + /* + * Reasons for signaling a Postgres child process (a backend or an auxiliary + * process, like checkpointer). We can cope with concurrent signals for different +@@ -29,6 +32,8 @@ + */ + typedef enum + { ++ INVALID_PROCSIGNAL = -1, /* Must be first */ ++ + PROCSIG_CATCHUP_INTERRUPT, /* sinval catchup interrupt */ + PROCSIG_NOTIFY_INTERRUPT, /* listen/notify interrupt */ + PROCSIG_PARALLEL_MESSAGE, /* message from cooperating parallel backend */ +@@ -37,6 +42,14 @@ typedef enum + PROCSIG_LOG_MEMORY_CONTEXT, /* ask backend to log the memory contexts */ + PROCSIG_PARALLEL_APPLY_MESSAGE, /* Message from parallel apply workers */ + ++ PROCSIG_CUSTOM_1, ++ /* ++ * PROCSIG_CUSTOM_2, ++ * ..., ++ * PROCSIG_CUSTOM_N-1, ++ */ ++ PROCSIG_CUSTOM_N = PROCSIG_CUSTOM_1 + NUM_CUSTOM_PROCSIGNALS - 1, ++ + /* Recovery conflict reasons */ + PROCSIG_RECOVERY_CONFLICT_FIRST, + PROCSIG_RECOVERY_CONFLICT_DATABASE = PROCSIG_RECOVERY_CONFLICT_FIRST, +@@ -56,6 +69,9 @@ typedef enum + PROCSIGNAL_BARRIER_SMGRRELEASE, /* ask smgr to close files */ + } ProcSignalBarrierType; + ++/* Handler of custom process signal */ ++typedef void (*ProcSignalHandler_type) (void); ++ + /* + * prototypes for functions in procsignal.c + */ +@@ -63,12 +79,15 @@ extern Size ProcSignalShmemSize(void); + extern void ProcSignalShmemInit(void); + + extern void ProcSignalInit(void); ++extern ProcSignalReason ++ RegisterCustomProcSignalHandler(ProcSignalHandler_type handler); + extern int SendProcSignal(pid_t pid, ProcSignalReason reason, + ProcNumber procNumber); + + extern uint64 EmitProcSignalBarrier(ProcSignalBarrierType type); + extern void WaitForProcSignalBarrier(uint64 generation); + extern void ProcessProcSignalBarrier(void); ++extern void CheckAndHandleCustomSignals(void); + + extern void procsignal_sigusr1_handler(SIGNAL_ARGS); + diff --git a/patches/runtime_explain_11.0.patch b/patches/runtime_explain_11.0.patch index dddbcbe..9d12d5b 100644 --- a/patches/runtime_explain_11.0.patch +++ b/patches/runtime_explain_11.0.patch @@ -209,10 +209,9 @@ index 16a80a0ea1..b12906b005 100644 /* count the number of source rows */ - total = mtstate->mt_plans[0]->instrument->ntuples; -- other_path = mtstate->ps.instrument->ntuples2; + other_path = mtstate->ps.instrument->ntuples2; - insert_path = total - other_path; -+ other_path = mtstate->ps.instrument->nfiltered2; -+ + + /* + * Insert occurs after extracting row from subplan and in runtime mode + * we can appear between these two operations - situation when @@ -227,7 +226,7 @@ index 16a80a0ea1..b12906b005 100644 + insert_path = total - other_path; + ExplainPropertyFloat("Tuples Inserted", NULL, insert_path, 0, es); + } - ++ - ExplainPropertyFloat("Tuples Inserted", NULL, - insert_path, 0, es); ExplainPropertyFloat("Conflicting Tuples", NULL, diff --git a/patches/runtime_explain_12.0.patch b/patches/runtime_explain_12.0.patch new file mode 100644 index 0000000..9aa8397 --- /dev/null +++ b/patches/runtime_explain_12.0.patch @@ -0,0 +1,263 @@ +From 8ac7d540edcb2dccc663e73ce7e856273f60fbe6 Mon Sep 17 00:00:00 2001 +From: Alexey Kondratov +Date: Tue, 24 Sep 2019 18:49:11 +0300 +Subject: [PATCH] EXPLAIN: print intermediate state of query execution + +EXPLAIN core patch required for pg_query_state +--- + src/backend/commands/explain.c | 152 +++++++++++++++++++++++++++------ + src/include/commands/explain.h | 2 + + 2 files changed, 129 insertions(+), 25 deletions(-) + +diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c +index 92969636b75..fab4267a2c1 100644 +--- a/src/backend/commands/explain.c ++++ b/src/backend/commands/explain.c +@@ -919,15 +919,36 @@ report_triggers(ResultRelInfo *rInfo, bool show_relname, ExplainState *es) + Instrumentation *instr = rInfo->ri_TrigInstrument + nt; + char *relname; + char *conname = NULL; ++ instr_time starttimespan; ++ double total; ++ double ntuples; ++ double ncalls; + ++ if (!es->runtime) ++ { + /* Must clean up instrumentation state */ + InstrEndLoop(instr); ++ } ++ ++ /* Collect statistic variables */ ++ if (!INSTR_TIME_IS_ZERO(instr->starttime)) ++ { ++ INSTR_TIME_SET_CURRENT(starttimespan); ++ INSTR_TIME_SUBTRACT(starttimespan, instr->starttime); ++ } ++ else ++ INSTR_TIME_SET_ZERO(starttimespan); ++ ++ total = instr->total + INSTR_TIME_GET_DOUBLE(instr->counter) ++ + INSTR_TIME_GET_DOUBLE(starttimespan); ++ ntuples = instr->ntuples + instr->tuplecount; ++ ncalls = ntuples + !INSTR_TIME_IS_ZERO(starttimespan); + + /* + * We ignore triggers that were never invoked; they likely aren't + * relevant to the current query type. + */ +- if (instr->ntuples == 0) ++ if (ncalls == 0) + continue; + + ExplainOpenGroup("Trigger", NULL, true, es); +@@ -953,9 +974,9 @@ report_triggers(ResultRelInfo *rInfo, bool show_relname, ExplainState *es) + appendStringInfo(es->str, " on %s", relname); + if (es->timing) + appendStringInfo(es->str, ": time=%.3f calls=%.0f\n", +- 1000.0 * instr->total, instr->ntuples); ++ 1000.0 * total, ncalls); + else +- appendStringInfo(es->str, ": calls=%.0f\n", instr->ntuples); ++ appendStringInfo(es->str, ": calls=%.0f\n", ncalls); + } + else + { +@@ -964,9 +985,8 @@ report_triggers(ResultRelInfo *rInfo, bool show_relname, ExplainState *es) + ExplainPropertyText("Constraint Name", conname, es); + ExplainPropertyText("Relation", relname, es); + if (es->timing) +- ExplainPropertyFloat("Time", "ms", 1000.0 * instr->total, 3, +- es); +- ExplainPropertyFloat("Calls", NULL, instr->ntuples, 0, es); ++ ExplainPropertyFloat("Time", "ms", 1000.0 * total, 3, es); ++ ExplainPropertyFloat("Calls", NULL, ncalls, 0, es); + } + + if (conname) +@@ -1501,8 +1521,11 @@ ExplainNode(PlanState *planstate, List *ancestors, + * instrumentation results the user didn't ask for. But we do the + * InstrEndLoop call anyway, if possible, to reduce the number of cases + * auto_explain has to contend with. ++ * ++ * If flag es->stateinfo is set, i.e. when printing the current execution ++ * state, this step of cleaning up is missed. + */ +- if (planstate->instrument) ++ if (planstate->instrument && !es->runtime) + InstrEndLoop(planstate->instrument); + + if (es->analyze && +@@ -1537,7 +1560,7 @@ ExplainNode(PlanState *planstate, List *ancestors, + ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es); + } + } +- else if (es->analyze) ++ else if (es->analyze && !es->runtime) + { + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfoString(es->str, " (never executed)"); +@@ -1553,6 +1576,75 @@ ExplainNode(PlanState *planstate, List *ancestors, + } + } + ++ /* ++ * Print the progress of node execution at current loop. ++ */ ++ if (planstate->instrument && es->analyze && es->runtime) ++ { ++ instr_time starttimespan; ++ double startup_sec; ++ double total_sec; ++ double rows; ++ double loop_num; ++ bool finished; ++ ++ if (!INSTR_TIME_IS_ZERO(planstate->instrument->starttime)) ++ { ++ INSTR_TIME_SET_CURRENT(starttimespan); ++ INSTR_TIME_SUBTRACT(starttimespan, planstate->instrument->starttime); ++ } ++ else ++ INSTR_TIME_SET_ZERO(starttimespan); ++ startup_sec = 1000.0 * planstate->instrument->firsttuple; ++ total_sec = 1000.0 * (INSTR_TIME_GET_DOUBLE(planstate->instrument->counter) ++ + INSTR_TIME_GET_DOUBLE(starttimespan)); ++ rows = planstate->instrument->tuplecount; ++ loop_num = planstate->instrument->nloops + 1; ++ ++ finished = planstate->instrument->nloops > 0 ++ && !planstate->instrument->running ++ && INSTR_TIME_IS_ZERO(starttimespan); ++ ++ if (!finished) ++ { ++ ExplainOpenGroup("Current loop", "Current loop", true, es); ++ if (es->format == EXPLAIN_FORMAT_TEXT) ++ { ++ if (es->timing) ++ { ++ if (planstate->instrument->running) ++ appendStringInfo(es->str, ++ " (Current loop: actual time=%.3f..%.3f rows=%.0f, loop number=%.0f)", ++ startup_sec, total_sec, rows, loop_num); ++ else ++ appendStringInfo(es->str, ++ " (Current loop: running time=%.3f actual rows=0, loop number=%.0f)", ++ total_sec, loop_num); ++ } ++ else ++ appendStringInfo(es->str, ++ " (Current loop: actual rows=%.0f, loop number=%.0f)", ++ rows, loop_num); ++ } ++ else ++ { ++ ExplainPropertyFloat("Actual Loop Number", NULL, loop_num, 0, es); ++ if (es->timing) ++ { ++ if (planstate->instrument->running) ++ { ++ ExplainPropertyFloat("Actual Startup Time", NULL, startup_sec, 3, es); ++ ExplainPropertyFloat("Actual Total Time", NULL, total_sec, 3, es); ++ } ++ else ++ ExplainPropertyFloat("Running Time", NULL, total_sec, 3, es); ++ } ++ ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es); ++ } ++ ExplainCloseGroup("Current loop", "Current loop", true, es); ++ } ++ } ++ + /* in text format, first line ends here */ + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfoChar(es->str, '\n'); +@@ -1868,8 +1960,9 @@ ExplainNode(PlanState *planstate, List *ancestors, + if (es->buffers && planstate->instrument) + show_buffer_usage(es, &planstate->instrument->bufusage); + +- /* Show worker detail */ +- if (es->analyze && es->verbose && planstate->worker_instrument) ++ /* Show worker detail after query execution */ ++ if (es->analyze && es->verbose && planstate->worker_instrument ++ && !es->runtime) + { + WorkerInstrumentation *w = planstate->worker_instrument; + bool opened_group = false; +@@ -2758,20 +2851,17 @@ show_instrumentation_count(const char *qlabel, int which, + if (!es->analyze || !planstate->instrument) + return; + ++ nloops = planstate->instrument->nloops; + if (which == 2) +- nfiltered = planstate->instrument->nfiltered2; ++ nfiltered = ((nloops > 0) ? planstate->instrument->nfiltered2 / nloops : ++ 0); + else +- nfiltered = planstate->instrument->nfiltered1; +- nloops = planstate->instrument->nloops; ++ nfiltered = ((nloops > 0) ? planstate->instrument->nfiltered1 / nloops : ++ 0); + + /* In text mode, suppress zero counts; they're not interesting enough */ + if (nfiltered > 0 || es->format != EXPLAIN_FORMAT_TEXT) +- { +- if (nloops > 0) +- ExplainPropertyFloat(qlabel, NULL, nfiltered / nloops, 0, es); +- else +- ExplainPropertyFloat(qlabel, NULL, 0.0, 0, es); +- } ++ ExplainPropertyFloat(qlabel, NULL, nfiltered, 0, es); + } + + /* +@@ -3290,15 +3380,27 @@ show_modifytable_info(ModifyTableState *mtstate, List *ancestors, + double insert_path; + double other_path; + +- InstrEndLoop(mtstate->mt_plans[0]->instrument); ++ if (!es->runtime) ++ InstrEndLoop(mtstate->mt_plans[0]->instrument); + + /* count the number of source rows */ +- total = mtstate->mt_plans[0]->instrument->ntuples; + other_path = mtstate->ps.instrument->ntuples2; +- insert_path = total - other_path; + ++ /* ++ * Insert occurs after extracting row from subplan and in runtime mode ++ * we can appear between these two operations - situation when ++ * total > insert_path + other_path. Therefore we don't know exactly ++ * whether last row from subplan is inserted. ++ * We don't print inserted tuples in runtime mode in order to not print ++ * inconsistent data ++ */ ++ if (!es->runtime) ++ { ++ total = mtstate->mt_plans[0]->instrument->ntuples; ++ insert_path = total - other_path; ++ ExplainPropertyFloat("Tuples Inserted", NULL, insert_path, 0, es); ++ } ++ +- ExplainPropertyFloat("Tuples Inserted", NULL, +- insert_path, 0, es); + ExplainPropertyFloat("Conflicting Tuples", NULL, + other_path, 0, es); + } +diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h +index f8b79ec120e..b6e2401065c 100644 +--- a/src/include/commands/explain.h ++++ b/src/include/commands/explain.h +@@ -37,6 +37,8 @@ typedef struct ExplainState + bool summary; /* print total planning and execution timing */ + bool settings; /* print modified settings */ + ExplainFormat format; /* output format */ ++ bool runtime; /* print intermediate state of query execution, ++ not after completion */ + /* state for output formatting --- not reset for each new plan tree */ + int indent; /* current indentation level */ + List *grouping_stack; /* format-specific grouping state */ +-- +2.17.1 + diff --git a/patches/runtime_explain_13.0.patch b/patches/runtime_explain_13.0.patch new file mode 100644 index 0000000..be29669 --- /dev/null +++ b/patches/runtime_explain_13.0.patch @@ -0,0 +1,260 @@ +diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c +index 20708db9f12..866948bd0c1 100644 +--- a/src/backend/commands/explain.c ++++ b/src/backend/commands/explain.c +@@ -955,14 +955,36 @@ report_triggers(ResultRelInfo *rInfo, bool show_relname, ExplainState *es) + char *relname; + char *conname = NULL; + ++ instr_time starttimespan; ++ double total; ++ double ntuples; ++ double ncalls; ++ ++ if (!es->runtime) ++ { + /* Must clean up instrumentation state */ + InstrEndLoop(instr); ++ } ++ ++ /* Collect statistic variables */ ++ if (!INSTR_TIME_IS_ZERO(instr->starttime)) ++ { ++ INSTR_TIME_SET_CURRENT(starttimespan); ++ INSTR_TIME_SUBTRACT(starttimespan, instr->starttime); ++ } ++ else ++ INSTR_TIME_SET_ZERO(starttimespan); ++ ++ total = instr->total + INSTR_TIME_GET_DOUBLE(instr->counter) ++ + INSTR_TIME_GET_DOUBLE(starttimespan); ++ ntuples = instr->ntuples + instr->tuplecount; ++ ncalls = ntuples + !INSTR_TIME_IS_ZERO(starttimespan); + + /* + * We ignore triggers that were never invoked; they likely aren't + * relevant to the current query type. + */ +- if (instr->ntuples == 0) ++ if (ncalls == 0) + continue; + + ExplainOpenGroup("Trigger", NULL, true, es); +@@ -988,9 +1010,9 @@ report_triggers(ResultRelInfo *rInfo, bool show_relname, ExplainState *es) + appendStringInfo(es->str, " on %s", relname); + if (es->timing) + appendStringInfo(es->str, ": time=%.3f calls=%.0f\n", +- 1000.0 * instr->total, instr->ntuples); ++ 1000.0 * total, ncalls); + else +- appendStringInfo(es->str, ": calls=%.0f\n", instr->ntuples); ++ appendStringInfo(es->str, ": calls=%.0f\n", ncalls); + } + else + { +@@ -999,9 +1021,8 @@ report_triggers(ResultRelInfo *rInfo, bool show_relname, ExplainState *es) + ExplainPropertyText("Constraint Name", conname, es); + ExplainPropertyText("Relation", relname, es); + if (es->timing) +- ExplainPropertyFloat("Time", "ms", 1000.0 * instr->total, 3, +- es); +- ExplainPropertyFloat("Calls", NULL, instr->ntuples, 0, es); ++ ExplainPropertyFloat("Time", "ms", 1000.0 * total, 3, es); ++ ExplainPropertyFloat("Calls", NULL, ncalls, 0, es); + } + + if (conname) +@@ -1560,8 +1581,11 @@ ExplainNode(PlanState *planstate, List *ancestors, + * instrumentation results the user didn't ask for. But we do the + * InstrEndLoop call anyway, if possible, to reduce the number of cases + * auto_explain has to contend with. ++ * ++ * If flag es->stateinfo is set, i.e. when printing the current execution ++ * state, this step of cleaning up is missed. + */ +- if (planstate->instrument) ++ if (planstate->instrument && !es->runtime) + InstrEndLoop(planstate->instrument); + + if (es->analyze && +@@ -1596,7 +1620,7 @@ ExplainNode(PlanState *planstate, List *ancestors, + ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es); + } + } +- else if (es->analyze) ++ else if (es->analyze && !es->runtime) + { + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfoString(es->str, " (never executed)"); +@@ -1612,6 +1636,75 @@ ExplainNode(PlanState *planstate, List *ancestors, + } + } + ++ /* ++ * Print the progress of node execution at current loop. ++ */ ++ if (planstate->instrument && es->analyze && es->runtime) ++ { ++ instr_time starttimespan; ++ double startup_sec; ++ double total_sec; ++ double rows; ++ double loop_num; ++ bool finished; ++ ++ if (!INSTR_TIME_IS_ZERO(planstate->instrument->starttime)) ++ { ++ INSTR_TIME_SET_CURRENT(starttimespan); ++ INSTR_TIME_SUBTRACT(starttimespan, planstate->instrument->starttime); ++ } ++ else ++ INSTR_TIME_SET_ZERO(starttimespan); ++ startup_sec = 1000.0 * planstate->instrument->firsttuple; ++ total_sec = 1000.0 * (INSTR_TIME_GET_DOUBLE(planstate->instrument->counter) ++ + INSTR_TIME_GET_DOUBLE(starttimespan)); ++ rows = planstate->instrument->tuplecount; ++ loop_num = planstate->instrument->nloops + 1; ++ ++ finished = planstate->instrument->nloops > 0 ++ && !planstate->instrument->running ++ && INSTR_TIME_IS_ZERO(starttimespan); ++ ++ if (!finished) ++ { ++ ExplainOpenGroup("Current loop", "Current loop", true, es); ++ if (es->format == EXPLAIN_FORMAT_TEXT) ++ { ++ if (es->timing) ++ { ++ if (planstate->instrument->running) ++ appendStringInfo(es->str, ++ " (Current loop: actual time=%.3f..%.3f rows=%.0f, loop number=%.0f)", ++ startup_sec, total_sec, rows, loop_num); ++ else ++ appendStringInfo(es->str, ++ " (Current loop: running time=%.3f actual rows=0, loop number=%.0f)", ++ total_sec, loop_num); ++ } ++ else ++ appendStringInfo(es->str, ++ " (Current loop: actual rows=%.0f, loop number=%.0f)", ++ rows, loop_num); ++ } ++ else ++ { ++ ExplainPropertyFloat("Actual Loop Number", NULL, loop_num, 0, es); ++ if (es->timing) ++ { ++ if (planstate->instrument->running) ++ { ++ ExplainPropertyFloat("Actual Startup Time", NULL, startup_sec, 3, es); ++ ExplainPropertyFloat("Actual Total Time", NULL, total_sec, 3, es); ++ } ++ else ++ ExplainPropertyFloat("Running Time", NULL, total_sec, 3, es); ++ } ++ ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es); ++ } ++ ExplainCloseGroup("Current loop", "Current loop", true, es); ++ } ++ } ++ + /* in text format, first line ends here */ + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfoChar(es->str, '\n'); +@@ -1990,6 +2083,9 @@ ExplainNode(PlanState *planstate, List *ancestors, + + /* Prepare per-worker buffer/WAL usage */ + if (es->workers_state && (es->buffers || es->wal) && es->verbose) ++ /* Show worker detail after query execution */ ++ if (es->analyze && es->verbose && planstate->worker_instrument ++ && !es->runtime) + { + WorkerInstrumentation *w = planstate->worker_instrument; + +@@ -2960,6 +3056,11 @@ show_hash_info(HashState *hashstate, ExplainState *es) + memcpy(&hinstrument, hashstate->hinstrument, + sizeof(HashInstrumentation)); + ++ if (hashstate->hashtable) ++ { ++ ExecHashAccumInstrumentation(&hinstrument, hashstate->hashtable); ++ } ++ + /* + * Merge results from workers. In the parallel-oblivious case, the + * results from all participants should be identical, except where +@@ -3196,20 +3297,16 @@ show_instrumentation_count(const char *qlabel, int which, + if (!es->analyze || !planstate->instrument) + return; + ++ nloops = planstate->instrument->nloops; + if (which == 2) +- nfiltered = planstate->instrument->nfiltered2; ++ nfiltered = ((nloops > 0) ? planstate->instrument->nfiltered2 / nloops : 0); + else +- nfiltered = planstate->instrument->nfiltered1; ++ nfiltered = ((nloops > 0) ? planstate->instrument->nfiltered1 / nloops : 0); + nloops = planstate->instrument->nloops; + + /* In text mode, suppress zero counts; they're not interesting enough */ + if (nfiltered > 0 || es->format != EXPLAIN_FORMAT_TEXT) +- { +- if (nloops > 0) +- ExplainPropertyFloat(qlabel, NULL, nfiltered / nloops, 0, es); +- else +- ExplainPropertyFloat(qlabel, NULL, 0.0, 0, es); +- } ++ ExplainPropertyFloat(qlabel, NULL, nfiltered, 0, es); + } + + /* +@@ -3781,15 +3878,27 @@ show_modifytable_info(ModifyTableState *mtstate, List *ancestors, + double insert_path; + double other_path; + +- InstrEndLoop(mtstate->mt_plans[0]->instrument); ++ if (!es->runtime) ++ InstrEndLoop(mtstate->mt_plans[0]->instrument); + + /* count the number of source rows */ +- total = mtstate->mt_plans[0]->instrument->ntuples; + other_path = mtstate->ps.instrument->ntuples2; +- insert_path = total - other_path; + ++ /* ++ * Insert occurs after extracting row from subplan and in runtime mode ++ * we can appear between these two operations - situation when ++ * total > insert_path + other_path. Therefore we don't know exactly ++ * whether last row from subplan is inserted. ++ * We don't print inserted tuples in runtime mode in order to not print ++ * inconsistent data ++ */ ++ if (!es->runtime) ++ { ++ total = mtstate->mt_plans[0]->instrument->ntuples; ++ insert_path = total - other_path; ++ ExplainPropertyFloat("Tuples Inserted", NULL, insert_path, 0, es); ++ } ++ +- ExplainPropertyFloat("Tuples Inserted", NULL, +- insert_path, 0, es); + ExplainPropertyFloat("Conflicting Tuples", NULL, + other_path, 0, es); + } +diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h +index ba661d32a63..ee49febfce5 100644 +--- a/src/include/commands/explain.h ++++ b/src/include/commands/explain.h +@@ -47,6 +47,8 @@ typedef struct ExplainState + bool summary; /* print total planning and execution timing */ + bool settings; /* print modified settings */ + ExplainFormat format; /* output format */ ++ bool runtime; /* print intermediate state of query execution, ++ not after completion */ + /* state for output formatting --- not reset for each new plan tree */ + int indent; /* current indentation level */ + List *grouping_stack; /* format-specific grouping state */ +-- +2.25.1 + diff --git a/patches/runtime_explain_14.0.patch b/patches/runtime_explain_14.0.patch new file mode 100644 index 0000000..b266b15 --- /dev/null +++ b/patches/runtime_explain_14.0.patch @@ -0,0 +1,260 @@ +diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c +index 10644dfac4..7106ed4257 100644 +--- a/src/backend/commands/explain.c ++++ b/src/backend/commands/explain.c +@@ -984,14 +984,36 @@ report_triggers(ResultRelInfo *rInfo, bool show_relname, ExplainState *es) + char *relname; + char *conname = NULL; + ++ instr_time starttimespan; ++ double total; ++ double ntuples; ++ double ncalls; ++ ++ if (!es->runtime) ++ { + /* Must clean up instrumentation state */ + InstrEndLoop(instr); ++ } ++ ++ /* Collect statistic variables */ ++ if (!INSTR_TIME_IS_ZERO(instr->starttime)) ++ { ++ INSTR_TIME_SET_CURRENT(starttimespan); ++ INSTR_TIME_SUBTRACT(starttimespan, instr->starttime); ++ } ++ else ++ INSTR_TIME_SET_ZERO(starttimespan); ++ ++ total = instr->total + INSTR_TIME_GET_DOUBLE(instr->counter) ++ + INSTR_TIME_GET_DOUBLE(starttimespan); ++ ntuples = instr->ntuples + instr->tuplecount; ++ ncalls = ntuples + !INSTR_TIME_IS_ZERO(starttimespan); + + /* + * We ignore triggers that were never invoked; they likely aren't + * relevant to the current query type. + */ +- if (instr->ntuples == 0) ++ if (ncalls == 0) + continue; + + ExplainOpenGroup("Trigger", NULL, true, es); +@@ -1017,9 +1039,9 @@ report_triggers(ResultRelInfo *rInfo, bool show_relname, ExplainState *es) + appendStringInfo(es->str, " on %s", relname); + if (es->timing) + appendStringInfo(es->str, ": time=%.3f calls=%.0f\n", +- 1000.0 * instr->total, instr->ntuples); ++ 1000.0 * total, ncalls); + else +- appendStringInfo(es->str, ": calls=%.0f\n", instr->ntuples); ++ appendStringInfo(es->str, ": calls=%.0f\n", ncalls); + } + else + { +@@ -1028,9 +1050,8 @@ report_triggers(ResultRelInfo *rInfo, bool show_relname, ExplainState *es) + ExplainPropertyText("Constraint Name", conname, es); + ExplainPropertyText("Relation", relname, es); + if (es->timing) +- ExplainPropertyFloat("Time", "ms", 1000.0 * instr->total, 3, +- es); +- ExplainPropertyFloat("Calls", NULL, instr->ntuples, 0, es); ++ ExplainPropertyFloat("Time", "ms", 1000.0 * total, 3, es); ++ ExplainPropertyFloat("Calls", NULL, ncalls, 0, es); + } + + if (conname) +@@ -1600,8 +1621,11 @@ ExplainNode(PlanState *planstate, List *ancestors, + * instrumentation results the user didn't ask for. But we do the + * InstrEndLoop call anyway, if possible, to reduce the number of cases + * auto_explain has to contend with. ++ * ++ * If flag es->stateinfo is set, i.e. when printing the current execution ++ * state, this step of cleaning up is missed. + */ +- if (planstate->instrument) ++ if (planstate->instrument && !es->runtime) + InstrEndLoop(planstate->instrument); + + if (es->analyze && +@@ -1636,7 +1660,7 @@ ExplainNode(PlanState *planstate, List *ancestors, + ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es); + } + } +- else if (es->analyze) ++ else if (es->analyze && !es->runtime) + { + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfoString(es->str, " (never executed)"); +@@ -1652,6 +1676,75 @@ ExplainNode(PlanState *planstate, List *ancestors, + } + } + ++ /* ++ * Print the progress of node execution at current loop. ++ */ ++ if (planstate->instrument && es->analyze && es->runtime) ++ { ++ instr_time starttimespan; ++ double startup_sec; ++ double total_sec; ++ double rows; ++ double loop_num; ++ bool finished; ++ ++ if (!INSTR_TIME_IS_ZERO(planstate->instrument->starttime)) ++ { ++ INSTR_TIME_SET_CURRENT(starttimespan); ++ INSTR_TIME_SUBTRACT(starttimespan, planstate->instrument->starttime); ++ } ++ else ++ INSTR_TIME_SET_ZERO(starttimespan); ++ startup_sec = 1000.0 * planstate->instrument->firsttuple; ++ total_sec = 1000.0 * (INSTR_TIME_GET_DOUBLE(planstate->instrument->counter) ++ + INSTR_TIME_GET_DOUBLE(starttimespan)); ++ rows = planstate->instrument->tuplecount; ++ loop_num = planstate->instrument->nloops + 1; ++ ++ finished = planstate->instrument->nloops > 0 ++ && !planstate->instrument->running ++ && INSTR_TIME_IS_ZERO(starttimespan); ++ ++ if (!finished) ++ { ++ ExplainOpenGroup("Current loop", "Current loop", true, es); ++ if (es->format == EXPLAIN_FORMAT_TEXT) ++ { ++ if (es->timing) ++ { ++ if (planstate->instrument->running) ++ appendStringInfo(es->str, ++ " (Current loop: actual time=%.3f..%.3f rows=%.0f, loop number=%.0f)", ++ startup_sec, total_sec, rows, loop_num); ++ else ++ appendStringInfo(es->str, ++ " (Current loop: running time=%.3f actual rows=0, loop number=%.0f)", ++ total_sec, loop_num); ++ } ++ else ++ appendStringInfo(es->str, ++ " (Current loop: actual rows=%.0f, loop number=%.0f)", ++ rows, loop_num); ++ } ++ else ++ { ++ ExplainPropertyFloat("Actual Loop Number", NULL, loop_num, 0, es); ++ if (es->timing) ++ { ++ if (planstate->instrument->running) ++ { ++ ExplainPropertyFloat("Actual Startup Time", NULL, startup_sec, 3, es); ++ ExplainPropertyFloat("Actual Total Time", NULL, total_sec, 3, es); ++ } ++ else ++ ExplainPropertyFloat("Running Time", NULL, total_sec, 3, es); ++ } ++ ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es); ++ } ++ ExplainCloseGroup("Current loop", "Current loop", true, es); ++ } ++ } ++ + /* in text format, first line ends here */ + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfoChar(es->str, '\n'); +@@ -2051,6 +2144,9 @@ ExplainNode(PlanState *planstate, List *ancestors, + + /* Prepare per-worker buffer/WAL usage */ + if (es->workers_state && (es->buffers || es->wal) && es->verbose) ++ /* Show worker detail after query execution */ ++ if (es->analyze && es->verbose && planstate->worker_instrument ++ && !es->runtime) + { + WorkerInstrumentation *w = planstate->worker_instrument; + +@@ -3015,6 +3111,11 @@ show_hash_info(HashState *hashstate, ExplainState *es) + memcpy(&hinstrument, hashstate->hinstrument, + sizeof(HashInstrumentation)); + ++ if (hashstate->hashtable) ++ { ++ ExecHashAccumInstrumentation(&hinstrument, hashstate->hashtable); ++ } ++ + /* + * Merge results from workers. In the parallel-oblivious case, the + * results from all participants should be identical, except where +@@ -3392,20 +3493,16 @@ show_instrumentation_count(const char *qlabel, int which, + if (!es->analyze || !planstate->instrument) + return; + ++ nloops = planstate->instrument->nloops; + if (which == 2) +- nfiltered = planstate->instrument->nfiltered2; ++ nfiltered = ((nloops > 0) ? planstate->instrument->nfiltered2 / nloops : 0); + else +- nfiltered = planstate->instrument->nfiltered1; ++ nfiltered = ((nloops > 0) ? planstate->instrument->nfiltered1 / nloops : 0); + nloops = planstate->instrument->nloops; + + /* In text mode, suppress zero counts; they're not interesting enough */ + if (nfiltered > 0 || es->format != EXPLAIN_FORMAT_TEXT) +- { +- if (nloops > 0) +- ExplainPropertyFloat(qlabel, NULL, nfiltered / nloops, 0, es); +- else +- ExplainPropertyFloat(qlabel, NULL, 0.0, 0, es); +- } ++ ExplainPropertyFloat(qlabel, NULL, nfiltered, 0, es); + } + + /* +@@ -3977,15 +4074,27 @@ show_modifytable_info(ModifyTableState *mtstate, List *ancestors, + double insert_path; + double other_path; + +- InstrEndLoop(outerPlanState(mtstate)->instrument); ++ if (!es->runtime) ++ InstrEndLoop(outerPlanState(mtstate)->instrument); + + /* count the number of source rows */ +- total = outerPlanState(mtstate)->instrument->ntuples; + other_path = mtstate->ps.instrument->ntuples2; +- insert_path = total - other_path; + ++ /* ++ * Insert occurs after extracting row from subplan and in runtime mode ++ * we can appear between these two operations - situation when ++ * total > insert_path + other_path. Therefore we don't know exactly ++ * whether last row from subplan is inserted. ++ * We don't print inserted tuples in runtime mode in order to not print ++ * inconsistent data ++ */ ++ if (!es->runtime) ++ { ++ total = outerPlanState(mtstate)->instrument->ntuples; ++ insert_path = total - other_path; ++ ExplainPropertyFloat("Tuples Inserted", NULL, insert_path, 0, es); ++ } ++ +- ExplainPropertyFloat("Tuples Inserted", NULL, +- insert_path, 0, es); + ExplainPropertyFloat("Conflicting Tuples", NULL, + other_path, 0, es); + } +diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h +index e94d9e49cf..6a157b8bc0 100644 +--- a/src/include/commands/explain.h ++++ b/src/include/commands/explain.h +@@ -47,6 +47,8 @@ typedef struct ExplainState + bool summary; /* print total planning and execution timing */ + bool settings; /* print modified settings */ + ExplainFormat format; /* output format */ ++ bool runtime; /* print intermediate state of query execution, ++ not after completion */ + /* state for output formatting --- not reset for each new plan tree */ + int indent; /* current indentation level */ + List *grouping_stack; /* format-specific grouping state */ +-- +2.25.1 + diff --git a/patches/runtime_explain_15.0.patch b/patches/runtime_explain_15.0.patch new file mode 100644 index 0000000..d60cea8 --- /dev/null +++ b/patches/runtime_explain_15.0.patch @@ -0,0 +1,260 @@ +diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c +index 10644dfac4..7106ed4257 100644 +--- a/src/backend/commands/explain.c ++++ b/src/backend/commands/explain.c +@@ -990,14 +990,36 @@ report_triggers(ResultRelInfo *rInfo, bool show_relname, ExplainState *es) + char *relname; + char *conname = NULL; + ++ instr_time starttimespan; ++ double total; ++ double ntuples; ++ double ncalls; ++ ++ if (!es->runtime) ++ { + /* Must clean up instrumentation state */ + InstrEndLoop(instr); ++ } ++ ++ /* Collect statistic variables */ ++ if (!INSTR_TIME_IS_ZERO(instr->starttime)) ++ { ++ INSTR_TIME_SET_CURRENT(starttimespan); ++ INSTR_TIME_SUBTRACT(starttimespan, instr->starttime); ++ } ++ else ++ INSTR_TIME_SET_ZERO(starttimespan); ++ ++ total = instr->total + INSTR_TIME_GET_DOUBLE(instr->counter) ++ + INSTR_TIME_GET_DOUBLE(starttimespan); ++ ntuples = instr->ntuples + instr->tuplecount; ++ ncalls = ntuples + !INSTR_TIME_IS_ZERO(starttimespan); + + /* + * We ignore triggers that were never invoked; they likely aren't + * relevant to the current query type. + */ +- if (instr->ntuples == 0) ++ if (ncalls == 0) + continue; + + ExplainOpenGroup("Trigger", NULL, true, es); +@@ -1023,9 +1045,9 @@ report_triggers(ResultRelInfo *rInfo, bool show_relname, ExplainState *es) + appendStringInfo(es->str, " on %s", relname); + if (es->timing) + appendStringInfo(es->str, ": time=%.3f calls=%.0f\n", +- 1000.0 * instr->total, instr->ntuples); ++ 1000.0 * total, ncalls); + else +- appendStringInfo(es->str, ": calls=%.0f\n", instr->ntuples); ++ appendStringInfo(es->str, ": calls=%.0f\n", ncalls); + } + else + { +@@ -1034,9 +1056,8 @@ report_triggers(ResultRelInfo *rInfo, bool show_relname, ExplainState *es) + ExplainPropertyText("Constraint Name", conname, es); + ExplainPropertyText("Relation", relname, es); + if (es->timing) +- ExplainPropertyFloat("Time", "ms", 1000.0 * instr->total, 3, +- es); +- ExplainPropertyFloat("Calls", NULL, instr->ntuples, 0, es); ++ ExplainPropertyFloat("Time", "ms", 1000.0 * total, 3, es); ++ ExplainPropertyFloat("Calls", NULL, ncalls, 0, es); + } + + if (conname) +@@ -1609,8 +1630,11 @@ ExplainNode(PlanState *planstate, List *ancestors, + * instrumentation results the user didn't ask for. But we do the + * InstrEndLoop call anyway, if possible, to reduce the number of cases + * auto_explain has to contend with. ++ * ++ * If flag es->stateinfo is set, i.e. when printing the current execution ++ * state, this step of cleaning up is missed. + */ +- if (planstate->instrument) ++ if (planstate->instrument && !es->runtime) + InstrEndLoop(planstate->instrument); + + if (es->analyze && +@@ -1645,7 +1669,7 @@ ExplainNode(PlanState *planstate, List *ancestors, + ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es); + } + } +- else if (es->analyze) ++ else if (es->analyze && !es->runtime) + { + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfoString(es->str, " (never executed)"); +@@ -1661,6 +1685,75 @@ ExplainNode(PlanState *planstate, List *ancestors, + } + } + ++ /* ++ * Print the progress of node execution at current loop. ++ */ ++ if (planstate->instrument && es->analyze && es->runtime) ++ { ++ instr_time starttimespan; ++ double startup_sec; ++ double total_sec; ++ double rows; ++ double loop_num; ++ bool finished; ++ ++ if (!INSTR_TIME_IS_ZERO(planstate->instrument->starttime)) ++ { ++ INSTR_TIME_SET_CURRENT(starttimespan); ++ INSTR_TIME_SUBTRACT(starttimespan, planstate->instrument->starttime); ++ } ++ else ++ INSTR_TIME_SET_ZERO(starttimespan); ++ startup_sec = 1000.0 * planstate->instrument->firsttuple; ++ total_sec = 1000.0 * (INSTR_TIME_GET_DOUBLE(planstate->instrument->counter) ++ + INSTR_TIME_GET_DOUBLE(starttimespan)); ++ rows = planstate->instrument->tuplecount; ++ loop_num = planstate->instrument->nloops + 1; ++ ++ finished = planstate->instrument->nloops > 0 ++ && !planstate->instrument->running ++ && INSTR_TIME_IS_ZERO(starttimespan); ++ ++ if (!finished) ++ { ++ ExplainOpenGroup("Current loop", "Current loop", true, es); ++ if (es->format == EXPLAIN_FORMAT_TEXT) ++ { ++ if (es->timing) ++ { ++ if (planstate->instrument->running) ++ appendStringInfo(es->str, ++ " (Current loop: actual time=%.3f..%.3f rows=%.0f, loop number=%.0f)", ++ startup_sec, total_sec, rows, loop_num); ++ else ++ appendStringInfo(es->str, ++ " (Current loop: running time=%.3f actual rows=0, loop number=%.0f)", ++ total_sec, loop_num); ++ } ++ else ++ appendStringInfo(es->str, ++ " (Current loop: actual rows=%.0f, loop number=%.0f)", ++ rows, loop_num); ++ } ++ else ++ { ++ ExplainPropertyFloat("Actual Loop Number", NULL, loop_num, 0, es); ++ if (es->timing) ++ { ++ if (planstate->instrument->running) ++ { ++ ExplainPropertyFloat("Actual Startup Time", NULL, startup_sec, 3, es); ++ ExplainPropertyFloat("Actual Total Time", NULL, total_sec, 3, es); ++ } ++ else ++ ExplainPropertyFloat("Running Time", NULL, total_sec, 3, es); ++ } ++ ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es); ++ } ++ ExplainCloseGroup("Current loop", "Current loop", true, es); ++ } ++ } ++ + /* in text format, first line ends here */ + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfoChar(es->str, '\n'); +@@ -2068,6 +2161,9 @@ ExplainNode(PlanState *planstate, List *ancestors, + + /* Prepare per-worker buffer/WAL usage */ + if (es->workers_state && (es->buffers || es->wal) && es->verbose) ++ /* Show worker detail after query execution */ ++ if (es->analyze && es->verbose && planstate->worker_instrument ++ && !es->runtime) + { + WorkerInstrumentation *w = planstate->worker_instrument; + +@@ -3032,6 +3128,11 @@ show_hash_info(HashState *hashstate, ExplainState *es) + memcpy(&hinstrument, hashstate->hinstrument, + sizeof(HashInstrumentation)); + ++ if (hashstate->hashtable) ++ { ++ ExecHashAccumInstrumentation(&hinstrument, hashstate->hashtable); ++ } ++ + /* + * Merge results from workers. In the parallel-oblivious case, the + * results from all participants should be identical, except where +@@ -3412,20 +3513,16 @@ show_instrumentation_count(const char *qlabel, int which, + if (!es->analyze || !planstate->instrument) + return; + ++ nloops = planstate->instrument->nloops; + if (which == 2) +- nfiltered = planstate->instrument->nfiltered2; ++ nfiltered = ((nloops > 0) ? planstate->instrument->nfiltered2 / nloops : 0); + else +- nfiltered = planstate->instrument->nfiltered1; ++ nfiltered = ((nloops > 0) ? planstate->instrument->nfiltered1 / nloops : 0); + nloops = planstate->instrument->nloops; + + /* In text mode, suppress zero counts; they're not interesting enough */ + if (nfiltered > 0 || es->format != EXPLAIN_FORMAT_TEXT) +- { +- if (nloops > 0) +- ExplainPropertyFloat(qlabel, NULL, nfiltered / nloops, 0, es); +- else +- ExplainPropertyFloat(qlabel, NULL, 0.0, 0, es); +- } ++ ExplainPropertyFloat(qlabel, NULL, nfiltered, 0, es); + } + + /* +@@ -4028,15 +4125,27 @@ show_modifytable_info(ModifyTableState *mtstate, List *ancestors, + double insert_path; + double other_path; + +- InstrEndLoop(outerPlanState(mtstate)->instrument); ++ if (!es->runtime) ++ InstrEndLoop(outerPlanState(mtstate)->instrument); + + /* count the number of source rows */ +- total = outerPlanState(mtstate)->instrument->ntuples; + other_path = mtstate->ps.instrument->ntuples2; +- insert_path = total - other_path; + ++ /* ++ * Insert occurs after extracting row from subplan and in runtime mode ++ * we can appear between these two operations - situation when ++ * total > insert_path + other_path. Therefore we don't know exactly ++ * whether last row from subplan is inserted. ++ * We don't print inserted tuples in runtime mode in order to not print ++ * inconsistent data ++ */ ++ if (!es->runtime) ++ { ++ total = outerPlanState(mtstate)->instrument->ntuples; ++ insert_path = total - other_path; ++ ExplainPropertyFloat("Tuples Inserted", NULL, insert_path, 0, es); ++ } ++ +- ExplainPropertyFloat("Tuples Inserted", NULL, +- insert_path, 0, es); + ExplainPropertyFloat("Conflicting Tuples", NULL, + other_path, 0, es); + } +diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h +index e94d9e49cf..6a157b8bc0 100644 +--- a/src/include/commands/explain.h ++++ b/src/include/commands/explain.h +@@ -47,6 +47,8 @@ typedef struct ExplainState + bool summary; /* print total planning and execution timing */ + bool settings; /* print modified settings */ + ExplainFormat format; /* output format */ ++ bool runtime; /* print intermediate state of query execution, ++ not after completion */ + /* state for output formatting --- not reset for each new plan tree */ + int indent; /* current indentation level */ + List *grouping_stack; /* format-specific grouping state */ +-- +2.25.1 + diff --git a/patches/runtime_explain_16.0.patch b/patches/runtime_explain_16.0.patch new file mode 100644 index 0000000..2b955e9 --- /dev/null +++ b/patches/runtime_explain_16.0.patch @@ -0,0 +1,257 @@ +diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c +index 6c2e5c8a4f..74be3944d1 100644 +--- a/src/backend/commands/explain.c ++++ b/src/backend/commands/explain.c +@@ -1023,14 +1023,36 @@ report_triggers(ResultRelInfo *rInfo, bool show_relname, ExplainState *es) + char *relname; + char *conname = NULL; + ++ instr_time starttimespan; ++ double total; ++ double ntuples; ++ double ncalls; ++ ++ if (!es->runtime) ++ { + /* Must clean up instrumentation state */ + InstrEndLoop(instr); ++ } ++ ++ /* Collect statistic variables */ ++ if (!INSTR_TIME_IS_ZERO(instr->starttime)) ++ { ++ INSTR_TIME_SET_CURRENT(starttimespan); ++ INSTR_TIME_SUBTRACT(starttimespan, instr->starttime); ++ } ++ else ++ INSTR_TIME_SET_ZERO(starttimespan); ++ ++ total = instr->total + INSTR_TIME_GET_DOUBLE(instr->counter) ++ + INSTR_TIME_GET_DOUBLE(starttimespan); ++ ntuples = instr->ntuples + instr->tuplecount; ++ ncalls = ntuples + !INSTR_TIME_IS_ZERO(starttimespan); + + /* + * We ignore triggers that were never invoked; they likely aren't + * relevant to the current query type. + */ +- if (instr->ntuples == 0) ++ if (ncalls == 0) + continue; + + ExplainOpenGroup("Trigger", NULL, true, es); +@@ -1056,9 +1078,9 @@ report_triggers(ResultRelInfo *rInfo, bool show_relname, ExplainState *es) + appendStringInfo(es->str, " on %s", relname); + if (es->timing) + appendStringInfo(es->str, ": time=%.3f calls=%.0f\n", +- 1000.0 * instr->total, instr->ntuples); ++ 1000.0 * total, ncalls); + else +- appendStringInfo(es->str, ": calls=%.0f\n", instr->ntuples); ++ appendStringInfo(es->str, ": calls=%.0f\n", ncalls); + } + else + { +@@ -1067,9 +1089,8 @@ report_triggers(ResultRelInfo *rInfo, bool show_relname, ExplainState *es) + ExplainPropertyText("Constraint Name", conname, es); + ExplainPropertyText("Relation", relname, es); + if (es->timing) +- ExplainPropertyFloat("Time", "ms", 1000.0 * instr->total, 3, +- es); +- ExplainPropertyFloat("Calls", NULL, instr->ntuples, 0, es); ++ ExplainPropertyFloat("Time", "ms", 1000.0 * total, 3, es); ++ ExplainPropertyFloat("Calls", NULL, ncalls, 0, es); + } + + if (conname) +@@ -1645,8 +1666,11 @@ ExplainNode(PlanState *planstate, List *ancestors, + * instrumentation results the user didn't ask for. But we do the + * InstrEndLoop call anyway, if possible, to reduce the number of cases + * auto_explain has to contend with. ++ * ++ * If flag es->stateinfo is set, i.e. when printing the current execution ++ * state, this step of cleaning up is missed. + */ +- if (planstate->instrument) ++ if (planstate->instrument && !es->runtime) + InstrEndLoop(planstate->instrument); + + if (es->analyze && +@@ -1681,7 +1705,7 @@ ExplainNode(PlanState *planstate, List *ancestors, + ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es); + } + } +- else if (es->analyze) ++ else if (es->analyze && !es->runtime) + { + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfoString(es->str, " (never executed)"); +@@ -1697,6 +1721,75 @@ ExplainNode(PlanState *planstate, List *ancestors, + } + } + ++ /* ++ * Print the progress of node execution at current loop. ++ */ ++ if (planstate->instrument && es->analyze && es->runtime) ++ { ++ instr_time starttimespan; ++ double startup_sec; ++ double total_sec; ++ double rows; ++ double loop_num; ++ bool finished; ++ ++ if (!INSTR_TIME_IS_ZERO(planstate->instrument->starttime)) ++ { ++ INSTR_TIME_SET_CURRENT(starttimespan); ++ INSTR_TIME_SUBTRACT(starttimespan, planstate->instrument->starttime); ++ } ++ else ++ INSTR_TIME_SET_ZERO(starttimespan); ++ startup_sec = 1000.0 * planstate->instrument->firsttuple; ++ total_sec = 1000.0 * (INSTR_TIME_GET_DOUBLE(planstate->instrument->counter) ++ + INSTR_TIME_GET_DOUBLE(starttimespan)); ++ rows = planstate->instrument->tuplecount; ++ loop_num = planstate->instrument->nloops + 1; ++ ++ finished = planstate->instrument->nloops > 0 ++ && !planstate->instrument->running ++ && INSTR_TIME_IS_ZERO(starttimespan); ++ ++ if (!finished) ++ { ++ ExplainOpenGroup("Current loop", "Current loop", true, es); ++ if (es->format == EXPLAIN_FORMAT_TEXT) ++ { ++ if (es->timing) ++ { ++ if (planstate->instrument->running) ++ appendStringInfo(es->str, ++ " (Current loop: actual time=%.3f..%.3f rows=%.0f, loop number=%.0f)", ++ startup_sec, total_sec, rows, loop_num); ++ else ++ appendStringInfo(es->str, ++ " (Current loop: running time=%.3f actual rows=0, loop number=%.0f)", ++ total_sec, loop_num); ++ } ++ else ++ appendStringInfo(es->str, ++ " (Current loop: actual rows=%.0f, loop number=%.0f)", ++ rows, loop_num); ++ } ++ else ++ { ++ ExplainPropertyFloat("Actual Loop Number", NULL, loop_num, 0, es); ++ if (es->timing) ++ { ++ if (planstate->instrument->running) ++ { ++ ExplainPropertyFloat("Actual Startup Time", NULL, startup_sec, 3, es); ++ ExplainPropertyFloat("Actual Total Time", NULL, total_sec, 3, es); ++ } ++ else ++ ExplainPropertyFloat("Running Time", NULL, total_sec, 3, es); ++ } ++ ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es); ++ } ++ ExplainCloseGroup("Current loop", "Current loop", true, es); ++ } ++ } ++ + /* in text format, first line ends here */ + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfoChar(es->str, '\n'); +@@ -2104,6 +2197,9 @@ ExplainNode(PlanState *planstate, List *ancestors, + + /* Prepare per-worker buffer/WAL usage */ + if (es->workers_state && (es->buffers || es->wal) && es->verbose) ++ /* Show worker detail after query execution */ ++ if (es->analyze && es->verbose && planstate->worker_instrument ++ && !es->runtime) + { + WorkerInstrumentation *w = planstate->worker_instrument; + +@@ -3068,6 +3164,11 @@ show_hash_info(HashState *hashstate, ExplainState *es) + memcpy(&hinstrument, hashstate->hinstrument, + sizeof(HashInstrumentation)); + ++ if (hashstate->hashtable) ++ { ++ ExecHashAccumInstrumentation(&hinstrument, hashstate->hashtable); ++ } ++ + /* + * Merge results from workers. In the parallel-oblivious case, the + * results from all participants should be identical, except where +@@ -3447,20 +3548,16 @@ show_instrumentation_count(const char *qlabel, int which, + if (!es->analyze || !planstate->instrument) + return; + ++ nloops = planstate->instrument->nloops; + if (which == 2) +- nfiltered = planstate->instrument->nfiltered2; ++ nfiltered = ((nloops > 0) ? planstate->instrument->nfiltered2 / nloops : 0); + else +- nfiltered = planstate->instrument->nfiltered1; ++ nfiltered = ((nloops > 0) ? planstate->instrument->nfiltered1 / nloops : 0); + nloops = planstate->instrument->nloops; + + /* In text mode, suppress zero counts; they're not interesting enough */ + if (nfiltered > 0 || es->format != EXPLAIN_FORMAT_TEXT) +- { +- if (nloops > 0) +- ExplainPropertyFloat(qlabel, NULL, nfiltered / nloops, 0, es); +- else +- ExplainPropertyFloat(qlabel, NULL, 0.0, 0, es); +- } ++ ExplainPropertyFloat(qlabel, NULL, nfiltered, 0, es); + } + + /* +@@ -4060,15 +4157,27 @@ show_modifytable_info(ModifyTableState *mtstate, List *ancestors, + double insert_path; + double other_path; + +- InstrEndLoop(outerPlanState(mtstate)->instrument); ++ if (!es->runtime) ++ InstrEndLoop(outerPlanState(mtstate)->instrument); + + /* count the number of source rows */ +- total = outerPlanState(mtstate)->instrument->ntuples; + other_path = mtstate->ps.instrument->ntuples2; +- insert_path = total - other_path; + ++ /* ++ * Insert occurs after extracting row from subplan and in runtime mode ++ * we can appear between these two operations - situation when ++ * total > insert_path + other_path. Therefore we don't know exactly ++ * whether last row from subplan is inserted. ++ * We don't print inserted tuples in runtime mode in order to not print ++ * inconsistent data ++ */ ++ if (!es->runtime) ++ { ++ total = outerPlanState(mtstate)->instrument->ntuples; ++ insert_path = total - other_path; ++ ExplainPropertyFloat("Tuples Inserted", NULL, insert_path, 0, es); ++ } ++ +- ExplainPropertyFloat("Tuples Inserted", NULL, +- insert_path, 0, es); + ExplainPropertyFloat("Conflicting Tuples", NULL, + other_path, 0, es); + } +diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h +index 3d3e632a0c..3eb7bf345d 100644 +--- a/src/include/commands/explain.h ++++ b/src/include/commands/explain.h +@@ -48,6 +48,8 @@ typedef struct ExplainState + bool settings; /* print modified settings */ + bool generic; /* generate a generic plan */ + ExplainFormat format; /* output format */ ++ bool runtime; /* print intermediate state of query execution, ++ not after completion */ + /* state for output formatting --- not reset for each new plan tree */ + int indent; /* current indentation level */ + List *grouping_stack; /* format-specific grouping state */ diff --git a/patches/runtime_explain_17.0.patch b/patches/runtime_explain_17.0.patch new file mode 100644 index 0000000..65e22b8 --- /dev/null +++ b/patches/runtime_explain_17.0.patch @@ -0,0 +1,265 @@ +diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c +index 18a5af6b919..73d3d6171eb 100644 +--- a/src/backend/commands/explain.c ++++ b/src/backend/commands/explain.c +@@ -18,6 +18,7 @@ + #include "commands/createas.h" + #include "commands/defrem.h" + #include "commands/prepare.h" ++#include "executor/nodeHash.h" + #include "foreign/fdwapi.h" + #include "jit/jit.h" + #include "libpq/pqformat.h" +@@ -1233,14 +1234,36 @@ report_triggers(ResultRelInfo *rInfo, bool show_relname, ExplainState *es) + char *relname; + char *conname = NULL; + ++ instr_time starttimespan; ++ double total; ++ double ntuples; ++ double ncalls; ++ ++ if (!es->runtime) ++ { + /* Must clean up instrumentation state */ + InstrEndLoop(instr); ++ } ++ ++ /* Collect statistic variables */ ++ if (!INSTR_TIME_IS_ZERO(instr->starttime)) ++ { ++ INSTR_TIME_SET_CURRENT(starttimespan); ++ INSTR_TIME_SUBTRACT(starttimespan, instr->starttime); ++ } ++ else ++ INSTR_TIME_SET_ZERO(starttimespan); ++ ++ total = instr->total + INSTR_TIME_GET_DOUBLE(instr->counter) ++ + INSTR_TIME_GET_DOUBLE(starttimespan); ++ ntuples = instr->ntuples + instr->tuplecount; ++ ncalls = ntuples + !INSTR_TIME_IS_ZERO(starttimespan); + + /* + * We ignore triggers that were never invoked; they likely aren't + * relevant to the current query type. + */ +- if (instr->ntuples == 0) ++ if (ncalls == 0) + continue; + + ExplainOpenGroup("Trigger", NULL, true, es); +@@ -1266,9 +1289,9 @@ report_triggers(ResultRelInfo *rInfo, bool show_relname, ExplainState *es) + appendStringInfo(es->str, " on %s", relname); + if (es->timing) + appendStringInfo(es->str, ": time=%.3f calls=%.0f\n", +- 1000.0 * instr->total, instr->ntuples); ++ 1000.0 * total, ncalls); + else +- appendStringInfo(es->str, ": calls=%.0f\n", instr->ntuples); ++ appendStringInfo(es->str, ": calls=%.0f\n", ncalls); + } + else + { +@@ -1277,9 +1300,8 @@ report_triggers(ResultRelInfo *rInfo, bool show_relname, ExplainState *es) + ExplainPropertyText("Constraint Name", conname, es); + ExplainPropertyText("Relation", relname, es); + if (es->timing) +- ExplainPropertyFloat("Time", "ms", 1000.0 * instr->total, 3, +- es); +- ExplainPropertyFloat("Calls", NULL, instr->ntuples, 0, es); ++ ExplainPropertyFloat("Time", "ms", 1000.0 * total, 3, es); ++ ExplainPropertyFloat("Calls", NULL, ncalls, 0, es); + } + + if (conname) +@@ -1949,8 +1971,11 @@ ExplainNode(PlanState *planstate, List *ancestors, + * instrumentation results the user didn't ask for. But we do the + * InstrEndLoop call anyway, if possible, to reduce the number of cases + * auto_explain has to contend with. ++ * ++ * If flag es->stateinfo is set, i.e. when printing the current execution ++ * state, this step of cleaning up is missed. + */ +- if (planstate->instrument) ++ if (planstate->instrument && !es->runtime) + InstrEndLoop(planstate->instrument); + + if (es->analyze && +@@ -1985,7 +2010,7 @@ ExplainNode(PlanState *planstate, List *ancestors, + ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es); + } + } +- else if (es->analyze) ++ else if (es->analyze && !es->runtime) + { + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfoString(es->str, " (never executed)"); +@@ -2001,6 +2026,75 @@ ExplainNode(PlanState *planstate, List *ancestors, + } + } + ++ /* ++ * Print the progress of node execution at current loop. ++ */ ++ if (planstate->instrument && es->analyze && es->runtime) ++ { ++ instr_time starttimespan; ++ double startup_sec; ++ double total_sec; ++ double rows; ++ double loop_num; ++ bool finished; ++ ++ if (!INSTR_TIME_IS_ZERO(planstate->instrument->starttime)) ++ { ++ INSTR_TIME_SET_CURRENT(starttimespan); ++ INSTR_TIME_SUBTRACT(starttimespan, planstate->instrument->starttime); ++ } ++ else ++ INSTR_TIME_SET_ZERO(starttimespan); ++ startup_sec = 1000.0 * planstate->instrument->firsttuple; ++ total_sec = 1000.0 * (INSTR_TIME_GET_DOUBLE(planstate->instrument->counter) ++ + INSTR_TIME_GET_DOUBLE(starttimespan)); ++ rows = planstate->instrument->tuplecount; ++ loop_num = planstate->instrument->nloops + 1; ++ ++ finished = planstate->instrument->nloops > 0 ++ && !planstate->instrument->running ++ && INSTR_TIME_IS_ZERO(starttimespan); ++ ++ if (!finished) ++ { ++ ExplainOpenGroup("Current loop", "Current loop", true, es); ++ if (es->format == EXPLAIN_FORMAT_TEXT) ++ { ++ if (es->timing) ++ { ++ if (planstate->instrument->running) ++ appendStringInfo(es->str, ++ " (Current loop: actual time=%.3f..%.3f rows=%.0f, loop number=%.0f)", ++ startup_sec, total_sec, rows, loop_num); ++ else ++ appendStringInfo(es->str, ++ " (Current loop: running time=%.3f actual rows=0, loop number=%.0f)", ++ total_sec, loop_num); ++ } ++ else ++ appendStringInfo(es->str, ++ " (Current loop: actual rows=%.0f, loop number=%.0f)", ++ rows, loop_num); ++ } ++ else ++ { ++ ExplainPropertyFloat("Actual Loop Number", NULL, loop_num, 0, es); ++ if (es->timing) ++ { ++ if (planstate->instrument->running) ++ { ++ ExplainPropertyFloat("Actual Startup Time", NULL, startup_sec, 3, es); ++ ExplainPropertyFloat("Actual Total Time", NULL, total_sec, 3, es); ++ } ++ else ++ ExplainPropertyFloat("Running Time", NULL, total_sec, 3, es); ++ } ++ ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es); ++ } ++ ExplainCloseGroup("Current loop", "Current loop", true, es); ++ } ++ } ++ + /* in text format, first line ends here */ + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfoChar(es->str, '\n'); +@@ -2416,6 +2510,9 @@ ExplainNode(PlanState *planstate, List *ancestors, + + /* Prepare per-worker buffer/WAL usage */ + if (es->workers_state && (es->buffers || es->wal) && es->verbose) ++ /* Show worker detail after query execution */ ++ if (es->analyze && es->verbose && planstate->worker_instrument ++ && !es->runtime) + { + WorkerInstrumentation *w = planstate->worker_instrument; + +@@ -3403,6 +3500,11 @@ show_hash_info(HashState *hashstate, ExplainState *es) + memcpy(&hinstrument, hashstate->hinstrument, + sizeof(HashInstrumentation)); + ++ if (hashstate->hashtable) ++ { ++ ExecHashAccumInstrumentation(&hinstrument, hashstate->hashtable); ++ } ++ + /* + * Merge results from workers. In the parallel-oblivious case, the + * results from all participants should be identical, except where +@@ -3937,20 +4039,16 @@ show_instrumentation_count(const char *qlabel, int which, + if (!es->analyze || !planstate->instrument) + return; + ++ nloops = planstate->instrument->nloops; + if (which == 2) +- nfiltered = planstate->instrument->nfiltered2; ++ nfiltered = ((nloops > 0) ? planstate->instrument->nfiltered2 / nloops : 0); + else +- nfiltered = planstate->instrument->nfiltered1; ++ nfiltered = ((nloops > 0) ? planstate->instrument->nfiltered1 / nloops : 0); + nloops = planstate->instrument->nloops; + + /* In text mode, suppress zero counts; they're not interesting enough */ + if (nfiltered > 0 || es->format != EXPLAIN_FORMAT_TEXT) +- { +- if (nloops > 0) +- ExplainPropertyFloat(qlabel, NULL, nfiltered / nloops, 0, es); +- else +- ExplainPropertyFloat(qlabel, NULL, 0.0, 0, es); +- } ++ ExplainPropertyFloat(qlabel, NULL, nfiltered, 0, es); + } + + /* +@@ -4617,15 +4715,27 @@ show_modifytable_info(ModifyTableState *mtstate, List *ancestors, + double insert_path; + double other_path; + +- InstrEndLoop(outerPlanState(mtstate)->instrument); ++ if (!es->runtime) ++ InstrEndLoop(outerPlanState(mtstate)->instrument); + + /* count the number of source rows */ +- total = outerPlanState(mtstate)->instrument->ntuples; + other_path = mtstate->ps.instrument->ntuples2; +- insert_path = total - other_path; + +- ExplainPropertyFloat("Tuples Inserted", NULL, +- insert_path, 0, es); ++ /* ++ * Insert occurs after extracting row from subplan and in runtime mode ++ * we can appear between these two operations - situation when ++ * total > insert_path + other_path. Therefore we don't know exactly ++ * whether last row from subplan is inserted. ++ * We don't print inserted tuples in runtime mode in order to not print ++ * inconsistent data ++ */ ++ if (!es->runtime) ++ { ++ total = outerPlanState(mtstate)->instrument->ntuples; ++ insert_path = total - other_path; ++ ExplainPropertyFloat("Tuples Inserted", NULL, insert_path, 0, es); ++ } ++ + ExplainPropertyFloat("Conflicting Tuples", NULL, + other_path, 0, es); + } +diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h +index 3ab0aae78f7..3644c0db116 100644 +--- a/src/include/commands/explain.h ++++ b/src/include/commands/explain.h +@@ -57,6 +57,8 @@ typedef struct ExplainState + bool generic; /* generate a generic plan */ + ExplainSerializeOption serialize; /* serialize the query's output? */ + ExplainFormat format; /* output format */ ++ bool runtime; /* print intermediate state of query execution, ++ not after completion */ + /* state for output formatting --- not reset for each new plan tree */ + int indent; /* current indentation level */ + List *grouping_stack; /* format-specific grouping state */ diff --git a/pg_query_state.c b/pg_query_state.c index 9bd6771..635b967 100644 --- a/pg_query_state.c +++ b/pg_query_state.c @@ -2,7 +2,7 @@ * pg_query_state.c * Extract information about query state from other backend * - * Copyright (c) 2016-2016, Postgres Professional + * Copyright (c) 2016-2024, Postgres Professional * * contrib/pg_query_state/pg_query_state.c * IDENTIFICATION @@ -33,11 +33,6 @@ PG_MODULE_MAGIC; #endif -#define PG_QS_MODULE_KEY 0xCA94B108 -#define PG_QUERY_STATE_KEY 0 - -#define MIN_TIMEOUT 5000 - #define TEXT_CSTR_CMP(text, cstr) \ (memcmp(VARDATA(text), (cstr), VARSIZE(text) - VARHDRSZ)) @@ -50,11 +45,9 @@ bool pg_qs_buffers = false; static ExecutorStart_hook_type prev_ExecutorStart = NULL; static ExecutorRun_hook_type prev_ExecutorRun = NULL; static ExecutorFinish_hook_type prev_ExecutorFinish = NULL; -static ExecutorEnd_hook_type prev_ExecutorEnd = NULL; static shmem_startup_hook_type prev_shmem_startup_hook = NULL; void _PG_init(void); -void _PG_fini(void); /* hooks defined in this module */ static void qs_ExecutorStart(QueryDesc *queryDesc, int eflags); @@ -65,7 +58,9 @@ static void qs_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, bool execute_once); #endif static void qs_ExecutorFinish(QueryDesc *queryDesc); -static void qs_ExecutorEnd(QueryDesc *queryDesc); + +static shm_mq_result receive_msg_by_parts(shm_mq_handle *mqh, Size *total, + void **datap, int64 timeout, int *rc, bool nowait); /* Global variables */ List *QueryDescStack = NIL; @@ -82,12 +77,14 @@ static const char *be_state_str[] = { /* BackendState -> string repr */ "idle in transaction (aborted)", /* STATE_IDLEINTRANSACTION_ABORTED */ "disabled", /* STATE_DISABLED */ }; +static int reqid = 0; typedef struct { slock_t mutex; /* protect concurrent access to `userid` */ Oid userid; Latch *caller; + pg_atomic_uint32 n_peers; } RemoteUserIdResult; static void SendCurrentUserId(void); @@ -104,10 +101,10 @@ static List *GetRemoteBackendQueryStates(PGPROC *leader, ExplainFormat format); /* Shared memory variables */ -shm_toc *toc = NULL; -RemoteUserIdResult *counterpart_userid = NULL; -pg_qs_params *params = NULL; -shm_mq *mq = NULL; +static shm_toc *toc = NULL; +static RemoteUserIdResult *counterpart_userid = NULL; +pg_qs_params *params = NULL; +shm_mq *mq = NULL; /* * Estimate amount of shared memory needed. @@ -152,6 +149,7 @@ pg_qs_shmem_startup(void) counterpart_userid = shm_toc_allocate(toc, sizeof(RemoteUserIdResult)); shm_toc_insert(toc, num_toc++, counterpart_userid); SpinLockInit(&counterpart_userid->mutex); + pg_atomic_init_u32(&counterpart_userid->n_peers, 0); params = shm_toc_allocate(toc, sizeof(pg_qs_params)); shm_toc_insert(toc, num_toc++, params); @@ -180,6 +178,11 @@ pg_qs_shmem_startup(void) module_initialized = true; } +#if PG_VERSION_NUM >= 150000 +static shmem_request_hook_type prev_shmem_request_hook = NULL; +static void pg_qs_shmem_request(void); +#endif + /* * Module load callback */ @@ -189,12 +192,12 @@ _PG_init(void) if (!process_shared_preload_libraries_in_progress) return; - /* - * Request additional shared resources. (These are no-ops if we're not in - * the postmaster process.) We'll allocate or attach to the shared - * resources in qs_shmem_startup(). - */ +#if PG_VERSION_NUM >= 150000 + prev_shmem_request_hook = shmem_request_hook; + shmem_request_hook = pg_qs_shmem_request; +#else RequestAddinShmemSpace(pg_qs_shmem_size()); +#endif /* Register interrupt on custom signal of polling query state */ UserIdPollReason = RegisterCustomProcSignalHandler(SendCurrentUserId); @@ -205,7 +208,7 @@ _PG_init(void) || UserIdPollReason == INVALID_PROCSIGNAL) { ereport(WARNING, (errcode(ERRCODE_INSUFFICIENT_RESOURCES), - errmsg("pg_query_state isn't loaded: insufficient custom ProcSignal slots"))); + errmsg("pg_query_state isn't loaded: insufficient custom ProcSignal slots"))); return; } @@ -249,31 +252,20 @@ _PG_init(void) ExecutorRun_hook = qs_ExecutorRun; prev_ExecutorFinish = ExecutorFinish_hook; ExecutorFinish_hook = qs_ExecutorFinish; - prev_ExecutorEnd = ExecutorEnd_hook; - ExecutorEnd_hook = qs_ExecutorEnd; prev_shmem_startup_hook = shmem_startup_hook; shmem_startup_hook = pg_qs_shmem_startup; } -/* - * Module unload callback - */ -void -_PG_fini(void) +#if PG_VERSION_NUM >= 150000 +static void +pg_qs_shmem_request(void) { - module_initialized = false; - - /* clear global state */ - list_free(QueryDescStack); - AssignCustomProcSignalHandler(QueryStatePollReason, NULL); - - /* Uninstall hooks. */ - ExecutorStart_hook = prev_ExecutorStart; - ExecutorRun_hook = prev_ExecutorRun; - ExecutorFinish_hook = prev_ExecutorFinish; - ExecutorEnd_hook = prev_ExecutorEnd; - shmem_startup_hook = prev_shmem_startup_hook; + if (prev_shmem_request_hook) + prev_shmem_request_hook(); + + RequestAddinShmemSpace(pg_qs_shmem_size()); } +#endif /* * ExecutorStart hook: @@ -283,32 +275,20 @@ _PG_fini(void) static void qs_ExecutorStart(QueryDesc *queryDesc, int eflags) { - PG_TRY(); - { - /* Enable per-node instrumentation */ - if (pg_qs_enable && ((eflags & EXEC_FLAG_EXPLAIN_ONLY) == 0)) - { - queryDesc->instrument_options |= INSTRUMENT_ROWS; - if (pg_qs_timing) - queryDesc->instrument_options |= INSTRUMENT_TIMER; - if (pg_qs_buffers) - queryDesc->instrument_options |= INSTRUMENT_BUFFERS; - } - - if (prev_ExecutorStart) - prev_ExecutorStart(queryDesc, eflags); - else - standard_ExecutorStart(queryDesc, eflags); - - /* push structure about current query in global stack */ - QueryDescStack = lcons(queryDesc, QueryDescStack); - } - PG_CATCH(); + /* Enable per-node instrumentation */ + if (pg_qs_enable && ((eflags & EXEC_FLAG_EXPLAIN_ONLY) == 0)) { - QueryDescStack = NIL; - PG_RE_THROW(); + queryDesc->instrument_options |= INSTRUMENT_ROWS; + if (pg_qs_timing) + queryDesc->instrument_options |= INSTRUMENT_TIMER; + if (pg_qs_buffers) + queryDesc->instrument_options |= INSTRUMENT_BUFFERS; } - PG_END_TRY(); + + if (prev_ExecutorStart) + prev_ExecutorStart(queryDesc, eflags); + else + standard_ExecutorStart(queryDesc, eflags); } /* @@ -323,6 +303,8 @@ qs_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, bool execute_once) #endif { + QueryDescStack = lcons(queryDesc, QueryDescStack); + PG_TRY(); { if (prev_ExecutorRun) @@ -335,10 +317,11 @@ qs_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, else standard_ExecutorRun(queryDesc, direction, count, execute_once); #endif + QueryDescStack = list_delete_first(QueryDescStack); } PG_CATCH(); { - QueryDescStack = NIL; + QueryDescStack = list_delete_first(QueryDescStack); PG_RE_THROW(); } PG_END_TRY(); @@ -351,40 +334,19 @@ qs_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, static void qs_ExecutorFinish(QueryDesc *queryDesc) { + QueryDescStack = lcons(queryDesc, QueryDescStack); + PG_TRY(); { if (prev_ExecutorFinish) prev_ExecutorFinish(queryDesc); else standard_ExecutorFinish(queryDesc); - } - PG_CATCH(); - { - QueryDescStack = NIL; - PG_RE_THROW(); - } - PG_END_TRY(); -} - -/* - * ExecutorEnd hook: - * pop current query description from global stack - */ -static void -qs_ExecutorEnd(QueryDesc *queryDesc) -{ - PG_TRY(); - { QueryDescStack = list_delete_first(QueryDescStack); - - if (prev_ExecutorEnd) - prev_ExecutorEnd(queryDesc); - else - standard_ExecutorEnd(queryDesc); } PG_CATCH(); { - QueryDescStack = NIL; + QueryDescStack = list_delete_first(QueryDescStack); PG_RE_THROW(); } PG_END_TRY(); @@ -403,7 +365,19 @@ search_be_status(int pid) for (beid = 1; beid <= pgstat_fetch_stat_numbackends(); beid++) { +#if PG_VERSION_NUM >= 160000 + LocalPgBackendStatus *lbe_status = pgstat_get_local_beentry_by_index(beid); + PgBackendStatus *be_status; + + Assert(lbe_status); + #ifndef PGPRO_STD + be_status = &lbe_status->backendStatus; + #else + be_status = lbe_status->backendStatus; + #endif +#else PgBackendStatus *be_status = pgstat_fetch_stat_beentry(beid); +#endif if (be_status && be_status->st_procpid == pid) return be_status; @@ -412,20 +386,30 @@ search_be_status(int pid) return NULL; } -/* - * Init userlock - */ -static void -init_lock_tag(LOCKTAG *tag, uint32 key) + +void +UnlockShmem(LOCKTAG *tag) +{ + LockRelease(tag, ExclusiveLock, false); +} + +void +LockShmem(LOCKTAG *tag, uint32 key) { + LockAcquireResult result; tag->locktag_field1 = PG_QS_MODULE_KEY; tag->locktag_field2 = key; tag->locktag_field3 = 0; tag->locktag_field4 = 0; tag->locktag_type = LOCKTAG_USERLOCK; tag->locktag_lockmethodid = USER_LOCKMETHOD; + result = LockAcquire(tag, ExclusiveLock, false, false); + Assert(result == LOCKACQUIRE_OK); + elog(DEBUG1, "LockAcquireResult is not OK %d", result); } + + /* * Structure of stack frame of fucntion call which transfers through message queue */ @@ -463,7 +447,7 @@ deserialize_stack(char *src, int stack_depth) { List *result = NIL; char *curr_ptr = src; - int i; + int i; for (i = 0; i < stack_depth; i++) { @@ -517,6 +501,8 @@ pg_query_state(PG_FUNCTION_ARGS) shm_mq_msg *msg; List *bg_worker_procs = NIL; List *msgs; + instr_time start_time; + instr_time cur_time; if (!module_initialized) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), @@ -527,7 +513,14 @@ pg_query_state(PG_FUNCTION_ARGS) errmsg("attempt to extract state of current process"))); proc = BackendPidGetProc(pid); - if (!proc || proc->backendId == InvalidBackendId) + if (!proc || +#if PG_VERSION_NUM >= 170000 + proc->vxid.procNumber == INVALID_PROC_NUMBER || +#else + proc->backendId == InvalidBackendId || +#endif + proc->databaseId == InvalidOid || + proc->roleId == InvalidOid) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("backend with pid=%d not found", pid))); @@ -546,13 +539,36 @@ pg_query_state(PG_FUNCTION_ARGS) * init and acquire lock so that any other concurrent calls of this fuction * can not occupy shared queue for transfering query state */ - init_lock_tag(&tag, PG_QUERY_STATE_KEY); - LockAcquire(&tag, ExclusiveLock, false, false); + LockShmem(&tag, PG_QS_RCV_KEY); + + INSTR_TIME_SET_CURRENT(start_time); + + while (pg_atomic_read_u32(&counterpart_userid->n_peers) != 0) + { + pg_usleep(1000000); /* wait one second */ + CHECK_FOR_INTERRUPTS(); + + INSTR_TIME_SET_CURRENT(cur_time); + INSTR_TIME_SUBTRACT(cur_time, start_time); + + if (INSTR_TIME_GET_MILLISEC(cur_time) > MAX_RCV_TIMEOUT) + { + elog(WARNING, "pg_query_state: last request was interrupted"); + break; + } + } counterpart_user_id = GetRemoteBackendUserId(proc); if (!(superuser() || GetUserId() == counterpart_user_id)) + { + UnlockShmem(&tag); ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("permission denied"))); + } + + pg_atomic_write_u32(&counterpart_userid->n_peers, 1); + params->reqid = ++reqid; + pg_write_barrier(); bg_worker_procs = GetRemoteBackendWorkers(proc); @@ -569,7 +585,7 @@ pg_query_state(PG_FUNCTION_ARGS) if (list_length(msgs) == 0) { elog(WARNING, "backend does not reply"); - LockRelease(&tag, ExclusiveLock, false); + UnlockShmem(&tag); SRF_RETURN_DONE(funcctx); } @@ -586,12 +602,12 @@ pg_query_state(PG_FUNCTION_ARGS) else elog(INFO, "backend is not running query"); - LockRelease(&tag, ExclusiveLock, false); + UnlockShmem(&tag); SRF_RETURN_DONE(funcctx); } case STAT_DISABLED: elog(INFO, "query execution statistics disabled"); - LockRelease(&tag, ExclusiveLock, false); + UnlockShmem(&tag); SRF_RETURN_DONE(funcctx); case QS_RETURNED: { @@ -602,10 +618,10 @@ pg_query_state(PG_FUNCTION_ARGS) /* print warnings if exist */ if (msg->warnings & TIMINIG_OFF_WARNING) ereport(WARNING, (errcode(ERRCODE_WARNING), - errmsg("timing statistics disabled"))); + errmsg("timing statistics disabled"))); if (msg->warnings & BUFFERS_OFF_WARNING) ereport(WARNING, (errcode(ERRCODE_WARNING), - errmsg("buffers statistics disabled"))); + errmsg("buffers statistics disabled"))); oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); @@ -615,17 +631,18 @@ pg_query_state(PG_FUNCTION_ARGS) foreach(i, msgs) { List *qs_stack; - shm_mq_msg *msg = (shm_mq_msg *) lfirst(i); + shm_mq_msg *current_msg = (shm_mq_msg *) lfirst(i); proc_state *p_state = (proc_state *) palloc(sizeof(proc_state)); - if (msg->result_code != QS_RETURNED) + if (current_msg->result_code != QS_RETURNED) continue; - AssertState(msg->result_code == QS_RETURNED); + Assert(current_msg->result_code == QS_RETURNED); - qs_stack = deserialize_stack(msg->stack, msg->stack_depth); + qs_stack = deserialize_stack(current_msg->stack, + current_msg->stack_depth); - p_state->proc = msg->proc; + p_state->proc = current_msg->proc; p_state->stack = qs_stack; p_state->frame_index = 0; p_state->frame_cursor = list_head(qs_stack); @@ -640,7 +657,11 @@ pg_query_state(PG_FUNCTION_ARGS) funcctx->max_calls = max_calls; /* Make tuple descriptor */ +#if PG_VERSION_NUM < 120000 tupdesc = CreateTemplateTupleDesc(N_ATTRS, false); +#else + tupdesc = CreateTemplateTupleDesc(N_ATTRS); +#endif TupleDescInitEntry(tupdesc, (AttrNumber) 1, "pid", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "frame_number", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "query_text", TEXTOID, -1, 0); @@ -648,7 +669,7 @@ pg_query_state(PG_FUNCTION_ARGS) TupleDescInitEntry(tupdesc, (AttrNumber) 5, "leader_pid", INT4OID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(tupdesc); - LockRelease(&tag, ExclusiveLock, false); + UnlockShmem(&tag); MemoryContextSwitchTo(oldcontext); } break; @@ -681,11 +702,19 @@ pg_query_state(PG_FUNCTION_ARGS) tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); /* increment cursor */ +#if PG_VERSION_NUM >= 130000 + p_state->frame_cursor = lnext(p_state->stack, p_state->frame_cursor); +#else p_state->frame_cursor = lnext(p_state->frame_cursor); +#endif p_state->frame_index++; if (p_state->frame_cursor == NULL) +#if PG_VERSION_NUM >= 130000 + fctx->proc_cursor = lnext(fctx->procs, fctx->proc_cursor); +#else fctx->proc_cursor = lnext(fctx->proc_cursor); +#endif SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); } @@ -716,7 +745,12 @@ GetRemoteBackendUserId(PGPROC *proc) { Oid result; +#if PG_VERSION_NUM >= 170000 + Assert(proc && proc->vxid.procNumber != INVALID_PROC_NUMBER); +#else Assert(proc && proc->backendId != InvalidBackendId); +#endif + Assert(UserIdPollReason != INVALID_PROCSIGNAL); Assert(counterpart_userid); @@ -724,7 +758,12 @@ GetRemoteBackendUserId(PGPROC *proc) counterpart_userid->caller = MyLatch; pg_write_barrier(); +#if PG_VERSION_NUM >= 170000 + SendProcSignal(proc->pid, UserIdPollReason, proc->vxid.procNumber); +#else SendProcSignal(proc->pid, UserIdPollReason, proc->backendId); +#endif + for (;;) { SpinLockAcquire(&counterpart_userid->mutex); @@ -736,8 +775,11 @@ GetRemoteBackendUserId(PGPROC *proc) #if PG_VERSION_NUM < 100000 WaitLatch(MyLatch, WL_LATCH_SET, 0); -#else +#elif PG_VERSION_NUM < 120000 WaitLatch(MyLatch, WL_LATCH_SET, 0, PG_WAIT_EXTENSION); +#else + WaitLatch(MyLatch, WL_LATCH_SET | WL_EXIT_ON_PM_DEATH, 0, + PG_WAIT_EXTENSION); #endif CHECK_FOR_INTERRUPTS(); ResetLatch(MyLatch); @@ -756,20 +798,20 @@ static shm_mq_result shm_mq_receive_with_timeout(shm_mq_handle *mqh, Size *nbytesp, void **datap, - long timeout) + int64 timeout) { int rc = 0; - long delay = timeout; + int64 delay = timeout; + instr_time start_time; + instr_time cur_time; + + INSTR_TIME_SET_CURRENT(start_time); for (;;) { - instr_time start_time; - instr_time cur_time; shm_mq_result mq_receive_result; - INSTR_TIME_SET_CURRENT(start_time); - - mq_receive_result = shm_mq_receive(mqh, nbytesp, datap, true); + mq_receive_result = receive_msg_by_parts(mqh, nbytesp, datap, timeout, &rc, true); if (mq_receive_result != SHM_MQ_WOULD_BLOCK) return mq_receive_result; if (rc & WL_TIMEOUT || delay <= 0) @@ -777,15 +819,22 @@ shm_mq_receive_with_timeout(shm_mq_handle *mqh, #if PG_VERSION_NUM < 100000 rc = WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT, delay); +#elif PG_VERSION_NUM < 120000 + rc = WaitLatch(MyLatch, + WL_LATCH_SET | WL_TIMEOUT, + delay, PG_WAIT_EXTENSION); #else - rc = WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT, delay, - PG_WAIT_EXTENSION); + rc = WaitLatch(MyLatch, + WL_LATCH_SET | WL_EXIT_ON_PM_DEATH | WL_TIMEOUT, + delay, PG_WAIT_EXTENSION); #endif INSTR_TIME_SET_CURRENT(cur_time); INSTR_TIME_SUBTRACT(cur_time, start_time); - delay = timeout - (long) INSTR_TIME_GET_MILLISEC(cur_time); + delay = timeout - (int64) INSTR_TIME_GET_MILLISEC(cur_time); + if (delay <= 0) + return SHM_MQ_WOULD_BLOCK; CHECK_FOR_INTERRUPTS(); ResetLatch(MyLatch); @@ -830,6 +879,7 @@ extract_running_bgworkers(PlanState *node, List **result) typedef struct { + int reqid; int number; pid_t pids[FLEXIBLE_ARRAY_MEMBER]; } BgWorkerPids; @@ -843,6 +893,10 @@ SendBgWorkerPids(void) int msg_len; int i; shm_mq_handle *mqh; + LOCKTAG tag; + shm_mq_result result; + + LockShmem(&tag, PG_QS_SND_KEY); mqh = shm_mq_attach(mq, NULL, NULL); @@ -858,17 +912,28 @@ SendBgWorkerPids(void) msg_len = offsetof(BgWorkerPids, pids) + sizeof(pid_t) * list_length(all_workers); msg = palloc(msg_len); + msg->reqid = params->reqid; msg->number = list_length(all_workers); i = 0; foreach(iter, all_workers) { pid_t current_pid = lfirst_int(iter); - AssertState(current_pid > 0); + Assert(current_pid > 0); msg->pids[i++] = current_pid; } - shm_mq_send(mqh, msg_len, msg, false); +#if PG_VERSION_NUM < 150000 + result = shm_mq_send(mqh, msg_len, msg, false); +#else + result = shm_mq_send(mqh, msg_len, msg, false, true); +#endif + + /* Check for failure. */ + if(result == SHM_MQ_DETACHED) + elog(WARNING, "could not send message queue to shared-memory queue: receiver has been detached"); + + UnlockShmem(&tag); } /* @@ -884,31 +949,44 @@ GetRemoteBackendWorkers(PGPROC *proc) Size msg_len; int i; List *result = NIL; + LOCKTAG tag; +#if PG_VERSION_NUM >= 170000 + Assert(proc && proc->vxid.procNumber != INVALID_PROC_NUMBER); +#else Assert(proc && proc->backendId != InvalidBackendId); +#endif + Assert(WorkerPollReason != INVALID_PROCSIGNAL); Assert(mq); + LockShmem(&tag, PG_QS_SND_KEY); mq = shm_mq_create(mq, QUEUE_SIZE); shm_mq_set_sender(mq, proc); shm_mq_set_receiver(mq, MyProc); + UnlockShmem(&tag); +#if PG_VERSION_NUM >= 170000 + sig_result = SendProcSignal(proc->pid, WorkerPollReason, proc->vxid.procNumber); +#else sig_result = SendProcSignal(proc->pid, WorkerPollReason, proc->backendId); +#endif + if (sig_result == -1) goto signal_error; mqh = shm_mq_attach(mq, NULL, NULL); mq_receive_result = shm_mq_receive(mqh, &msg_len, (void **) &msg, false); - if (mq_receive_result != SHM_MQ_SUCCESS) + if (mq_receive_result != SHM_MQ_SUCCESS || msg == NULL || msg->reqid != reqid || msg_len != offsetof(BgWorkerPids, pids) + msg->number*sizeof(pid_t)) goto mq_error; for (i = 0; i < msg->number; i++) { pid_t pid = msg->pids[i]; - PGPROC *proc = BackendPidGetProc(pid); - if (!proc || !proc->pid) + PGPROC *current_proc = BackendPidGetProc(pid); + if (!current_proc || !current_proc->pid) continue; - result = lcons(proc, result); + result = lcons(current_proc, result); } #if PG_VERSION_NUM < 100000 @@ -921,10 +999,12 @@ GetRemoteBackendWorkers(PGPROC *proc) signal_error: ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("invalid send signal"))); + errmsg("invalid send signal"))); mq_error: ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("error in message queue data transmitting"))); + errmsg("error in message queue data transmitting"))); + + return NIL; } static shm_mq_msg * @@ -936,6 +1016,70 @@ copy_msg(shm_mq_msg *msg) return result; } +static shm_mq_result +receive_msg_by_parts(shm_mq_handle *mqh, Size *total, void **datap, + int64 timeout, int *rc, bool nowait) +{ + shm_mq_result mq_receive_result; + shm_mq_msg *buff; + int offset; + Size *expected; + Size expected_data; + Size len; + + /* Get the expected number of bytes in message */ + mq_receive_result = shm_mq_receive(mqh, &len, (void **) &expected, nowait); + if (mq_receive_result != SHM_MQ_SUCCESS) + return mq_receive_result; + Assert(len == sizeof(Size)); + + expected_data = *expected; + *datap = palloc0(expected_data); + + /* Get the message itself */ + for (offset = 0; offset < expected_data; ) + { + int64 delay = timeout; + /* Keep receiving new messages until we assemble the full message */ + for (;;) + { + mq_receive_result = shm_mq_receive(mqh, &len, ((void **) &buff), nowait); + if (mq_receive_result != SHM_MQ_SUCCESS) + { + if (nowait && mq_receive_result == SHM_MQ_WOULD_BLOCK) + { + /* + * We can't leave this function during reading parts with + * error code SHM_MQ_WOULD_BLOCK because can be be error + * at next call receive_msg_by_parts() with continuing + * reading non-readed parts. + * So we should wait whole MAX_RCV_TIMEOUT timeout and + * return error after that only. + */ + if (delay > 0) + { + pg_usleep(PART_RCV_DELAY * 1000); + delay -= PART_RCV_DELAY; + continue; + } + if (rc) + { /* Mark that the timeout has expired: */ + *rc |= WL_TIMEOUT; + } + } + return mq_receive_result; + } + break; + } + memcpy((char *) *datap + offset, buff, len); + offset += len; + } + + *total = offset; + + return mq_receive_result; +} + static List * GetRemoteBackendQueryStates(PGPROC *leader, List *pworkers, @@ -954,6 +1098,7 @@ GetRemoteBackendQueryStates(PGPROC *leader, shm_mq_result mq_receive_result; shm_mq_msg *msg; Size len; + LOCKTAG tag; Assert(QueryStatePollReason != INVALID_PROCSIGNAL); Assert(mq); @@ -968,15 +1113,26 @@ GetRemoteBackendQueryStates(PGPROC *leader, pg_write_barrier(); /* initialize message queue that will transfer query states */ + LockShmem(&tag, PG_QS_SND_KEY); mq = shm_mq_create(mq, QUEUE_SIZE); + shm_mq_set_sender(mq, leader); + shm_mq_set_receiver(mq, MyProc); + UnlockShmem(&tag); /* * send signal `QueryStatePollReason` to all processes and define all alive * ones */ +#if PG_VERSION_NUM >= 170000 + sig_result = SendProcSignal(leader->pid, + QueryStatePollReason, + leader->vxid.procNumber); +#else sig_result = SendProcSignal(leader->pid, QueryStatePollReason, leader->backendId); +#endif + if (sig_result == -1) goto signal_error; foreach(iter, pworkers) @@ -984,9 +1140,19 @@ GetRemoteBackendQueryStates(PGPROC *leader, PGPROC *proc = (PGPROC *) lfirst(iter); if (!proc || !proc->pid) continue; + + pg_atomic_add_fetch_u32(&counterpart_userid->n_peers, 1); + +#if PG_VERSION_NUM >= 170000 + sig_result = SendProcSignal(proc->pid, + QueryStatePollReason, + proc->vxid.procNumber); +#else sig_result = SendProcSignal(proc->pid, QueryStatePollReason, proc->backendId); +#endif + if (sig_result == -1) { if (errno != ESRCH) @@ -998,12 +1164,15 @@ GetRemoteBackendQueryStates(PGPROC *leader, } /* extract query state from leader process */ - shm_mq_set_sender(mq, leader); - shm_mq_set_receiver(mq, MyProc); mqh = shm_mq_attach(mq, NULL, NULL); - mq_receive_result = shm_mq_receive(mqh, &len, (void **) &msg, false); + elog(DEBUG1, "Wait response from leader %d", leader->pid); + mq_receive_result = receive_msg_by_parts(mqh, &len, (void **) &msg, + 0, NULL, false); if (mq_receive_result != SHM_MQ_SUCCESS) goto mq_error; + if (msg->reqid != reqid) + goto mq_error; + Assert(len == msg->length); result = lappend(result, copy_msg(msg)); #if PG_VERSION_NUM < 100000 @@ -1017,25 +1186,31 @@ GetRemoteBackendQueryStates(PGPROC *leader, */ foreach(iter, alive_procs) { - PGPROC *proc = (PGPROC *) lfirst(iter); + PGPROC *proc = (PGPROC *) lfirst(iter); /* prepare message queue to transfer data */ + elog(DEBUG1, "Wait response from worker %d", proc->pid); + LockShmem(&tag, PG_QS_SND_KEY); mq = shm_mq_create(mq, QUEUE_SIZE); shm_mq_set_sender(mq, proc); shm_mq_set_receiver(mq, MyProc); /* this function notifies the counterpart to come into data transfer */ + UnlockShmem(&tag); /* retrieve result data from message queue */ mqh = shm_mq_attach(mq, NULL, NULL); mq_receive_result = shm_mq_receive_with_timeout(mqh, &len, (void **) &msg, - MIN_TIMEOUT); + MAX_RCV_TIMEOUT); if (mq_receive_result != SHM_MQ_SUCCESS) - /* counterpart is died, not consider it */ - continue; - + { + /* counterpart is dead, not considering it */ + goto mq_error; + } + if (msg->reqid != reqid) + goto mq_error; Assert(len == msg->length); /* aggregate result data */ @@ -1047,13 +1222,28 @@ GetRemoteBackendQueryStates(PGPROC *leader, shm_mq_detach(mqh); #endif } - return result; signal_error: ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("invalid send signal"))); + errmsg("invalid send signal"))); mq_error: +#if PG_VERSION_NUM < 100000 + shm_mq_detach(mq); +#else + shm_mq_detach(mqh); +#endif ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("error in message queue data transmitting"))); + errmsg("error in message queue data transmitting"))); + + return NIL; +} + +void +DetachPeer(void) +{ + int n_peers = pg_atomic_fetch_sub_u32(&counterpart_userid->n_peers, 1); + if (n_peers <= 0) + ereport(LOG, (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("pg_query_state peer is not responding"))); } diff --git a/pg_query_state.h b/pg_query_state.h index 959cd80..f632008 100644 --- a/pg_query_state.h +++ b/pg_query_state.h @@ -2,7 +2,7 @@ * pg_query_state.h * Headers for pg_query_state extension. * - * Copyright (c) 2016-2016, Postgres Professional + * Copyright (c) 2016-2024, Postgres Professional * * IDENTIFICATION * contrib/pg_query_state/pg_query_state.h @@ -14,13 +14,31 @@ #include "commands/explain.h" #include "nodes/pg_list.h" +#include "storage/procarray.h" #include "storage/shm_mq.h" #define QUEUE_SIZE (16 * 1024) +#define MSG_MAX_SIZE 1024 +#define WRITING_DELAY (100 * 1000) /* 100ms */ +#define NUM_OF_ATTEMPTS 6 #define TIMINIG_OFF_WARNING 1 #define BUFFERS_OFF_WARNING 2 +#define PG_QS_MODULE_KEY 0xCA94B108 +#define PG_QS_RCV_KEY 0 +#define PG_QS_SND_KEY 1 + +/* Receive timeout should be larger than send timeout to let workers stop waiting before polling process */ +#define MAX_RCV_TIMEOUT 6000 /* 6 seconds */ +#define MAX_SND_TIMEOUT 3000 /* 3 seconds */ + +/* + * Delay for receiving parts of full message (in case SHM_MQ_WOULD_BLOCK code), + * should be tess than MAX_RCV_TIMEOUT + */ +#define PART_RCV_DELAY 1000 /* 1 second */ + /* * Result status on query state request from asked backend */ @@ -28,7 +46,7 @@ typedef enum { QUERY_NOT_RUNNING, /* Backend doesn't execute any query */ STAT_DISABLED, /* Collection of execution statistics is disabled */ - QS_RETURNED /* Backend succesfully returned its query state */ + QS_RETURNED /* Backend succx[esfully returned its query state */ } PG_QS_RequestResult; /* @@ -36,6 +54,7 @@ typedef enum */ typedef struct { + int reqid; int length; /* size of message record, for sanity check */ PGPROC *proc; PG_QS_RequestResult result_code; @@ -50,6 +69,7 @@ typedef struct /* pg_query_state arguments */ typedef struct { + int reqid; bool verbose; bool costs; bool timing; @@ -68,5 +88,8 @@ extern shm_mq *mq; /* signal_handler.c */ extern void SendQueryState(void); +extern void DetachPeer(void); +extern void UnlockShmem(LOCKTAG *tag); +extern void LockShmem(LOCKTAG *tag, uint32 key); #endif diff --git a/run_tests.sh b/run_tests.sh old mode 100755 new mode 100644 index bb7b75c..d330d1e --- a/run_tests.sh +++ b/run_tests.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash # -# Copyright (c) 2018, Postgres Professional +# Copyright (c) 2019-2024, Postgres Professional # # supported levels: # * standard @@ -13,6 +13,9 @@ set -ux status=0 +venv_path=tmp/env +rm -rf "$venv_path" + # global exports export PGPORT=55435 export VIRTUAL_ENV_DISABLE_PROMPT=1 @@ -55,7 +58,7 @@ fi # build and install PostgreSQL if [ "$LEVEL" = "hardcore" ] || \ - [ "$LEVEL" = "nightmare" ]; then + [ "$LEVEL" = "nightmare" ]; then # enable Valgrind support sed -i.bak "s/\/* #define USE_VALGRIND *\//#define USE_VALGRIND/g" src/include/pg_config_manual.h @@ -99,12 +102,16 @@ if [ "$LEVEL" = "scan-build" ] || \ fi +# XXX: Hackish way to make possible to run all contrib tests +mkdir $CUSTOM_PG_SRC/contrib/pg_query_state +cp -r * $CUSTOM_PG_SRC/contrib/pg_query_state/ + # don't forget to "make clean" -make USE_PGXS=1 clean +make -C $CUSTOM_PG_SRC/contrib/pg_query_state clean # build and install extension (using PG_CPPFLAGS and SHLIB_LINK for gcov) -make USE_PGXS=1 PG_CPPFLAGS="-coverage" SHLIB_LINK="-coverage" -make USE_PGXS=1 install +make -C $CUSTOM_PG_SRC/contrib/pg_query_state PG_CPPFLAGS="-coverage" SHLIB_LINK="-coverage" +make -C $CUSTOM_PG_SRC/contrib/pg_query_state install # initialize database initdb -D $PGDATA @@ -136,17 +143,23 @@ if [ $status -ne 0 ]; then cat /tmp/postgres.log; exit 1; fi # run regression tests export PG_REGRESS_DIFF_OPTS="-w -U3" # for alpine's diff (BusyBox) -make USE_PGXS=1 installcheck || status=$? +cd $CUSTOM_PG_SRC/contrib/pg_query_state +make installcheck || status=$? # show diff if it exists if [ -f regression.diffs ]; then cat regression.diffs; fi # run python tests set +x -e -virtualenv /tmp/env && source /tmp/env/bin/activate && -pip install PyYAML && pip install psycopg2 +python3 -m venv "$venv_path" && source "$venv_path/bin/activate" +pip3 install --upgrade -t "$venv_path" -r ./tests/requirements.txt +#pip3 install -e "./$venv_path" set -e #exit virtualenv with error code -python tests/pg_qs_test_runner.py --port $PGPORT +python3 tests/pg_qs_test_runner.py --port $PGPORT +if [[ "$USE_TPCDS" == "1" ]]; then + python3 tests/pg_qs_test_runner.py --port $PGPORT --tpc-ds-setup + python3 tests/pg_qs_test_runner.py --port $PGPORT --tpc-ds-run +fi deactivate set -x @@ -165,9 +178,10 @@ fi if [ $status -ne 0 ]; then exit 1; fi set +e # generate *.gcov files -gcov *.c *.h +gcov $CUSTOM_PG_SRC/contrib/pg_query_state/*.c $CUSTOM_PG_SRC/contrib/pg_query_state/*.h set +ux # send coverage stats to Codecov -bash <(curl -s https://codecov.io/bash) \ No newline at end of file +export CODECOV_TOKEN=55ab7421-9277-45af-a329-d8b40db96b2a +bash <(curl -s https://codecov.io/bash) diff --git a/signal_handler.c b/signal_handler.c index 0f2cf90..dfe8780 100644 --- a/signal_handler.c +++ b/signal_handler.c @@ -2,7 +2,7 @@ * signal_handler.c * Collect current query state and send it to requestor in custom signal handler * - * Copyright (c) 2016-2016, Postgres Professional + * Copyright (c) 2016-2024, Postgres Professional * * IDENTIFICATION * contrib/pg_query_state/signal_handler.c @@ -27,6 +27,17 @@ typedef struct char *plan; } stack_frame; +/* + * An self-explanarory enum describing the send_msg_by_parts results + */ +typedef enum +{ + MSG_BY_PARTS_SUCCEEDED, + MSG_BY_PARTS_FAILED +} msg_by_parts_result; + +static msg_by_parts_result send_msg_by_parts(shm_mq_handle *mqh, Size nbytes, const void *data); + /* * Get List of stack_frames as a stack of function calls starting from outermost call. * Each entry contains query text and query state in form of EXPLAIN ANALYZE output. @@ -149,6 +160,63 @@ serialize_stack(char *dest, List *qs_stack) } } +static msg_by_parts_result +shm_mq_send_nonblocking(shm_mq_handle *mqh, Size nbytes, const void *data, Size attempts) +{ + int i; + shm_mq_result res; + + for(i = 0; i < attempts; i++) + { +#if PG_VERSION_NUM < 150000 + res = shm_mq_send(mqh, nbytes, data, true); +#else + res = shm_mq_send(mqh, nbytes, data, true, true); +#endif + + if(res == SHM_MQ_SUCCESS) + break; + else if (res == SHM_MQ_DETACHED) + return MSG_BY_PARTS_FAILED; + + /* SHM_MQ_WOULD_BLOCK - sleeping for some delay */ + pg_usleep(WRITING_DELAY); + } + + if(i == attempts) + return MSG_BY_PARTS_FAILED; + + return MSG_BY_PARTS_SUCCEEDED; +} + +/* + * send_msg_by_parts sends data through the queue as a bunch of messages + * of smaller size + */ +static msg_by_parts_result +send_msg_by_parts(shm_mq_handle *mqh, Size nbytes, const void *data) +{ + int bytes_left; + int bytes_send; + int offset; + + /* Send the expected message length */ + if(shm_mq_send_nonblocking(mqh, sizeof(Size), &nbytes, NUM_OF_ATTEMPTS) == MSG_BY_PARTS_FAILED) + return MSG_BY_PARTS_FAILED; + + /* Send the message itself */ + for (offset = 0; offset < nbytes; offset += bytes_send) + { + bytes_left = nbytes - offset; + bytes_send = (bytes_left < MSG_MAX_SIZE) ? bytes_left : MSG_MAX_SIZE; + if(shm_mq_send_nonblocking(mqh, bytes_send, &(((unsigned char*)data)[offset]), NUM_OF_ATTEMPTS) + == MSG_BY_PARTS_FAILED) + return MSG_BY_PARTS_FAILED; + } + + return MSG_BY_PARTS_SUCCEEDED; +} + /* * Send state of current query to shared queue. * This function is called when fire custom signal QueryStatePollReason @@ -156,7 +224,14 @@ serialize_stack(char *dest, List *qs_stack) void SendQueryState(void) { - shm_mq_handle *mqh; + shm_mq_handle *mqh; + instr_time start_time; + instr_time cur_time; + int64 delay = MAX_SND_TIMEOUT; + int reqid = params->reqid; + LOCKTAG tag; + + INSTR_TIME_SET_CURRENT(start_time); /* wait until caller sets this process as sender to message queue */ for (;;) @@ -165,30 +240,52 @@ SendQueryState(void) break; #if PG_VERSION_NUM < 100000 - WaitLatch(MyLatch, WL_LATCH_SET, 0); + WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT, delay); +#elif PG_VERSION_NUM < 120000 + WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT, delay, PG_WAIT_IPC); #else - WaitLatch(MyLatch, WL_LATCH_SET, 0, PG_WAIT_IPC); + WaitLatch(MyLatch, WL_LATCH_SET | WL_EXIT_ON_PM_DEATH | WL_TIMEOUT, delay, PG_WAIT_IPC); #endif + INSTR_TIME_SET_CURRENT(cur_time); + INSTR_TIME_SUBTRACT(cur_time, start_time); + + delay = MAX_SND_TIMEOUT - (int64) INSTR_TIME_GET_MILLISEC(cur_time); + if (delay <= 0) + { + elog(WARNING, "pg_query_state: failed to receive request from leader"); + DetachPeer(); + return; + } CHECK_FOR_INTERRUPTS(); ResetLatch(MyLatch); } + LockShmem(&tag, PG_QS_SND_KEY); + + elog(DEBUG1, "Worker %d receives pg_query_state request from %d", shm_mq_get_sender(mq)->pid, shm_mq_get_receiver(mq)->pid); mqh = shm_mq_attach(mq, NULL, NULL); + if (reqid != params->reqid || shm_mq_get_sender(mq) != MyProc) + { + UnlockShmem(&tag); + return; + } /* check if module is enabled */ if (!pg_qs_enable) { - shm_mq_msg msg = { BASE_SIZEOF_SHM_MQ_MSG, MyProc, STAT_DISABLED }; + shm_mq_msg msg = { reqid, BASE_SIZEOF_SHM_MQ_MSG, MyProc, STAT_DISABLED }; - shm_mq_send(mqh, msg.length, &msg, false); + if(send_msg_by_parts(mqh, msg.length, &msg) != MSG_BY_PARTS_SUCCEEDED) + goto connection_cleanup; } /* check if backend doesn't execute any query */ else if (list_length(QueryDescStack) == 0) { - shm_mq_msg msg = { BASE_SIZEOF_SHM_MQ_MSG, MyProc, QUERY_NOT_RUNNING }; + shm_mq_msg msg = { reqid, BASE_SIZEOF_SHM_MQ_MSG, MyProc, QUERY_NOT_RUNNING }; - shm_mq_send(mqh, msg.length, &msg, false); + if(send_msg_by_parts(mqh, msg.length, &msg) != MSG_BY_PARTS_SUCCEEDED) + goto connection_cleanup; } /* happy path */ @@ -198,6 +295,7 @@ SendQueryState(void) int msglen = sizeof(shm_mq_msg) + serialized_stack_length(qs_stack); shm_mq_msg *msg = palloc(msglen); + msg->reqid = reqid; msg->length = msglen; msg->proc = MyProc; msg->result_code = QS_RETURNED; @@ -210,6 +308,25 @@ SendQueryState(void) msg->stack_depth = list_length(qs_stack); serialize_stack(msg->stack, qs_stack); - shm_mq_send(mqh, msglen, msg, false); + + if(send_msg_by_parts(mqh, msglen, msg) != MSG_BY_PARTS_SUCCEEDED) + { + elog(WARNING, "pg_query_state: peer seems to have detached"); + goto connection_cleanup; + } } + elog(DEBUG1, "Worker %d sends response for pg_query_state to %d", shm_mq_get_sender(mq)->pid, shm_mq_get_receiver(mq)->pid); + DetachPeer(); + UnlockShmem(&tag); + + return; + +connection_cleanup: +#if PG_VERSION_NUM < 100000 + shm_mq_detach(mq); +#else + shm_mq_detach(mqh); +#endif + DetachPeer(); + UnlockShmem(&tag); } diff --git a/specs/corner_cases.spec b/specs/corner_cases.spec index 292b39d..315b676 100644 --- a/specs/corner_cases.spec +++ b/specs/corner_cases.spec @@ -1,6 +1,5 @@ setup { - CREATE EXTENSION pg_query_state; CREATE ROLE alice; CREATE ROLE bob; CREATE ROLE super SUPERUSER; @@ -31,7 +30,6 @@ teardown DROP ROLE super; DROP ROLE bob; DROP ROLE alice; - DROP EXTENSION pg_query_state; } session "s1" @@ -64,12 +62,12 @@ permutation "s1_pg_qs_1" permutation "s1_pg_qs_2" # Check idle -permutation "s1_save_pid" "s2_pg_qs_counterpart" +permutation "s1_save_pid" "s2_pg_qs_counterpart"(*) # Check module disable -permutation "s1_save_pid" "s1_disable_pg_qs" "s2_pg_qs_counterpart" +permutation "s1_save_pid" "s1_disable_pg_qs" "s2_pg_qs_counterpart"(*) # Check roles correspondence -permutation "s1_set_bob" "s2_set_bob" "s1_save_pid" "s2_pg_qs_counterpart" -permutation "s1_set_bob" "s2_set_su" "s1_save_pid" "s2_pg_qs_counterpart" -permutation "s1_set_bob" "s2_set_alice" "s1_save_pid" "s2_pg_qs_counterpart" +permutation "s1_set_bob" "s2_set_bob" "s1_save_pid" "s2_pg_qs_counterpart"(*) +permutation "s1_set_bob" "s2_set_su" "s1_save_pid" "s2_pg_qs_counterpart"(*) +permutation "s1_set_bob" "s2_set_alice" "s1_save_pid" "s2_pg_qs_counterpart"(*) diff --git a/tests/common.py b/tests/common.py new file mode 100644 index 0000000..6dab69a --- /dev/null +++ b/tests/common.py @@ -0,0 +1,167 @@ +''' +common.py +Copyright (c) 2016-2024, Postgres Professional +''' + +import psycopg2 +import psycopg2.extensions +import select +import time + +BACKEND_IS_IDLE_INFO = 'INFO: state of backend is idle\n' +BACKEND_IS_ACTIVE_INFO = 'INFO: state of backend is active\n' + +def wait(conn): + """wait for some event on connection to postgres""" + while 1: + state = conn.poll() + if state == psycopg2.extensions.POLL_OK: + break + elif state == psycopg2.extensions.POLL_WRITE: + select.select([], [conn.fileno()], []) + elif state == psycopg2.extensions.POLL_READ: + select.select([conn.fileno()], [], []) + else: + raise psycopg2.OperationalError("poll() returned %s" % state) + +def n_async_connect(config, n=1): + """establish n asynchronious connections to the postgres with specified config""" + + aconfig = config.copy() + aconfig['async'] = True + + result = [] + for _ in range(n): + conn = psycopg2.connect(**aconfig) + wait(conn) + result.append(conn) + return result + +def n_close(conns): + """close connections to postgres""" + + for conn in conns: + conn.close() + +def pg_query_state_locks(config, pid, conn, verbose=False, costs=False, timing=False, \ + buffers=False, triggers=False, format='text'): + """ + Get query state from backend with specified pid and optional parameters. + Save any warning, info, notice and log data in global variable 'notices' + """ + + curs = conn.cursor() + curs.callproc('pg_query_state', (pid, verbose, costs, timing, buffers, triggers, format)) + wait(conn) + result = curs.fetchall() + notices = conn.notices[:] + + return result, notices + +def pg_query_state(config, pid, verbose=False, costs=False, timing=False, \ + buffers=False, triggers=False, format='text'): + """ + Get query state from backend with specified pid and optional parameters. + Save any warning, info, notice and log data in global variable 'notices' + """ + + conn = psycopg2.connect(**config) + curs = conn.cursor() + curs.callproc('pg_query_state', (pid, verbose, costs, timing, buffers, triggers, format)) + result = curs.fetchall() + notices = conn.notices[:] + conn.close() + + return result, notices + +def onetime_query_state_locks(config, acon_query, acon_pg, query, args={}, num_workers=0): + """ + Get intermediate state of 'query' on connection 'acon_query' after number of 'steps' + of node executions from start of query + """ + + curs_query = acon_query.cursor() + curs_pg = acon_pg.cursor() + curs_query.execute("select pg_advisory_lock(1);") + curs_pg.execute("select pg_advisory_lock(2);") + wait(acon_query) + wait(acon_pg) + curs_pg.execute("select pg_advisory_lock(1);") + set_guc(acon_query, 'enable_mergejoin', 'off') + set_guc(acon_query, 'max_parallel_workers_per_gather', num_workers) + curs_query.execute(query) + # extract current state of query progress + MAX_PG_QS_RETRIES = 10 + DELAY_BETWEEN_RETRIES = 0.1 + pg_qs_args = { + 'config': config, + 'pid': acon_query.get_backend_pid(), + 'conn': acon_pg + } + for k, v in args.items(): + pg_qs_args[k] = v + n_retries = 0 + + wait(acon_pg) + + while True: + result, notices = pg_query_state_locks(**pg_qs_args) + n_retries += 1 + if len(result) > 0: + break + if n_retries >= MAX_PG_QS_RETRIES: + # pg_query_state callings don't return any result, more likely run + # query has completed + break + time.sleep(DELAY_BETWEEN_RETRIES) + + curs_pg.execute("select pg_advisory_unlock(2);") + wait(acon_pg) + wait(acon_query) + + set_guc(acon_query, 'enable_mergejoin', 'on') + curs_query.execute("select pg_advisory_unlock(2);") + curs_pg.execute("select pg_advisory_unlock(1);") + return result, notices + +def onetime_query_state(config, async_conn, query, args={}, num_workers=0): + """ + Get intermediate state of 'query' on connection 'async_conn' after number of 'steps' + of node executions from start of query + """ + + acurs = async_conn.cursor() + + set_guc(async_conn, 'enable_mergejoin', 'off') + set_guc(async_conn, 'max_parallel_workers_per_gather', num_workers) + acurs.execute(query) + + # extract current state of query progress + MAX_PG_QS_RETRIES = 10 + DELAY_BETWEEN_RETRIES = 0.1 + pg_qs_args = { + 'config': config, + 'pid': async_conn.get_backend_pid() + } + for k, v in args.items(): + pg_qs_args[k] = v + n_retries = 0 + while True: + result, notices = pg_query_state(**pg_qs_args) + n_retries += 1 + if len(result) > 0: + break + if n_retries >= MAX_PG_QS_RETRIES: + # pg_query_state callings don't return any result, more likely run + # query has completed + break + time.sleep(DELAY_BETWEEN_RETRIES) + wait(async_conn) + + set_guc(async_conn, 'enable_mergejoin', 'on') + return result, notices + +def set_guc(async_conn, param, value): + acurs = async_conn.cursor() + acurs.execute('set %s to %s' % (param, value)) + wait(async_conn) diff --git a/tests/pg_qs_test_runner.py b/tests/pg_qs_test_runner.py index 716719e..944f77f 100644 --- a/tests/pg_qs_test_runner.py +++ b/tests/pg_qs_test_runner.py @@ -1,13 +1,20 @@ ''' -pg_qs_test_cases.py - Tests extract query state from running backend (including concurrent extracts) -Copyright (c) 2016-2016, Postgres Professional +pg_qs_test_runner.py +Copyright (c) 2016-2024, Postgres Professional ''' import argparse -import psycopg2 +import getpass +import os import sys + +sys.path.append(os.path.dirname(os.path.abspath(__file__))) +sys.path.append(os.path.abspath('tmp/env')) + +import psycopg2 + from test_cases import * +import tpcds class PasswordPromptAction(argparse.Action): def __call__(self, parser, args, values, option_string=None): @@ -17,6 +24,20 @@ def __call__(self, parser, args, values, option_string=None): class SetupException(Exception): pass class TeardownException(Exception): pass +unlock_if_eq_1 = """ + CREATE OR REPLACE FUNCTION unlock_if_eq_1(x integer) RETURNS integer AS $$ + BEGIN + IF x = 1 THEN + perform pg_advisory_unlock(1); + perform pg_advisory_lock(2); + return 1; + ELSE + return x; + END IF; + END; + $$ LANGUAGE plpgsql + """ + setup_cmd = [ 'drop extension if exists pg_query_state cascade', 'drop table if exists foo cascade', @@ -28,77 +49,98 @@ class TeardownException(Exception): pass 'insert into bar select i, i%2=1 from generate_series(1, 500000) as i', 'analyze foo', 'analyze bar', - ] + unlock_if_eq_1, +] teardown_cmd = [ 'drop table foo cascade', 'drop table bar cascade', 'drop extension pg_query_state cascade', - ] +] tests = [ - test_deadlock, - test_simple_query, - test_concurrent_access, - test_nested_call, - test_trigger, - test_costs, - test_buffers, - test_timing, - test_formats, - test_timing_buffers_conflicts, - test_insert_on_conflict, - ] + test_deadlock, + test_simple_query, + test_concurrent_access, + test_nested_call, + test_trigger, + test_costs, + test_buffers, + test_timing, + test_formats, + test_timing_buffers_conflicts, + test_insert_on_conflict, +] def setup(con): ''' Creates pg_query_state extension, creates tables for tests, fills it with data ''' - print 'setting up...' + print('setting up...') try: cur = con.cursor() for cmd in setup_cmd: cur.execute(cmd) con.commit() cur.close() - except Exception, e: + except Exception as e: raise SetupException('Setup failed: %s' % e) - print 'done!' + print('done!') def teardown(con): ''' Drops table and extension ''' - print 'tearing down...' + print('tearing down...') try: cur = con.cursor() for cmd in teardown_cmd: cur.execute(cmd) con.commit() cur.close() - except Exception, e: + except Exception as e: raise TeardownException('Teardown failed: %s' % e) - print 'done!' + print('done!') def main(config): ''' Main test function ''' - con = psycopg2.connect(**config) - setup(con) + conn_params = { + key:config.__dict__[key] for key in ('host', 'port', 'user', 'database', 'password') + } + if config.tpcds_setup: + print('Setup database for TPC-DS bench') + tpcds.setup_tpcds(conn_params) + print('Database is setup successfully') + return + + if config.tpcds_run: + print('Starting stress test') + tpcds.run_tpcds(conn_params) + print('Stress finished successfully') + return + + # run default tests + init_conn = psycopg2.connect(**conn_params) + setup(init_conn) for i, test in enumerate(tests): if test.__doc__: descr = test.__doc__ else: descr = 'test case %d' % (i+1) - print ("%s..." % descr),; sys.stdout.flush() - test(config) - print 'ok!' - - teardown(con) - con.close() + print(("%s..." % descr)) + sys.stdout.flush() + test(conn_params) + print('ok!') + teardown(init_conn) + init_conn.close() if __name__ == '__main__': parser = argparse.ArgumentParser(description='Query state of running backends tests') + parser.add_argument('--host', default='localhost', help='postgres server host') parser.add_argument('--port', type=int, default=5432, help='postgres server port') parser.add_argument('--user', dest='user', default='postgres', help='user name') parser.add_argument('--database', dest='database', default='postgres', help='database name') - parser.add_argument('--password', dest='password', nargs=0, action=PasswordPromptAction, default='') + parser.add_argument('--password', dest='password', nargs=0, action=PasswordPromptAction, default='', help='password') + parser.add_argument('--tpc-ds-setup', dest='tpcds_setup', action='store_true', help='setup database to run TPC-DS benchmark') + parser.add_argument('--tpc-ds-run', dest='tpcds_run', action='store_true', help='run only stress test based on TPC-DS benchmark') + args = parser.parse_args() - main(args.__dict__) + main(args) diff --git a/tests/prepare_stress.sh b/tests/prepare_stress.sh new file mode 100755 index 0000000..3bdb2a5 --- /dev/null +++ b/tests/prepare_stress.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env sh + +mkdir -p tmp_stress +cd tmp_stress +rm -rf ./* + +git clone --depth 1 --single-branch --branch master https://github.com/gregrahn/tpcds-kit.git # used for data and schema +git clone --depth 1 --single-branch --branch master https://github.com/cwida/tpcds-result-reproduction.git # used for queries only + +cd tpcds-kit/tools + +# This is a palliative care, since tpcds-kit is old and doesn't compile with modern ld. +# Anyway, now it works and this is better than nothing. +make LDFLAGS=-zmuldefs -s + +# Generate data +./dsdgen -FORCE -VERBOSE -SCALE 1 + +# Prepare data +mkdir -p tables +for i in `ls *.dat`; do + echo "Preparing file" $i + sed 's/|$//' $i > tables/$i +done + +# Generate queries +./dsqgen -DIRECTORY ../query_templates \ + -INPUT ../query_templates/templates.lst \ + -VERBOSE Y \ + -QUALIFY Y \ + -SCALE 1 \ + -DIALECT netezza diff --git a/tests/requirements.txt b/tests/requirements.txt new file mode 100644 index 0000000..ff6b4f4 --- /dev/null +++ b/tests/requirements.txt @@ -0,0 +1,3 @@ +PyYAML +psycopg2 +progressbar2 diff --git a/tests/test_cases.py b/tests/test_cases.py index 175dbd1..498484b 100644 --- a/tests/test_cases.py +++ b/tests/test_cases.py @@ -1,109 +1,23 @@ +''' +test_cases.py +Copyright (c) 2016-2024, Postgres Professional +''' + import json -import psycopg2 -import psycopg2.extensions import re import select import time import xml.etree.ElementTree as ET + +import psycopg2 import yaml -from time import sleep -def wait(conn): - """wait for some event on connection to postgres""" - while 1: - state = conn.poll() - if state == psycopg2.extensions.POLL_OK: - break - elif state == psycopg2.extensions.POLL_WRITE: - select.select([], [conn.fileno()], []) - elif state == psycopg2.extensions.POLL_READ: - select.select([conn.fileno()], [], []) - else: - raise psycopg2.OperationalError("poll() returned %s" % state) - -def n_async_connect(config, n=1): - """establish n asynchronious connections to the postgres with specified config""" - - aconfig = config.copy() - aconfig['async'] = True - - result = [] - for _ in xrange(n): - conn = psycopg2.connect(**aconfig) - wait(conn) - result.append(conn) - return result - -def n_close(conns): - """close connections to postgres""" - - for conn in conns: - conn.close() - -notices = [] - -def debug_output(qs, qs_len, pid, query, expected): - something_happened = False - if (qs_len and len(qs) != qs_len ): - print "len(qs): ", len(qs), ", expected: ", qs_len - something_happened = True - if (pid and qs[0][0] != pid): - print "qs[0][0]: ", qs[0][0], " = ", pid - something_happened = True - if (qs[0][1] != 0): - print "qs[0][1]: ", qs[0][1], ", expected: 0" - something_happened = True - if (qs[0][2] != query): - print "qs[0][2]:\n", qs[0][2] - print "Expected:\n", query - something_happened = True - if (not (re.match(expected, qs[0][3]))): - print "qs[0][3]:\n", qs[0][3] - print "Expected:\n", expected - something_happened = True - if (qs[0][4] != None): - print "qs[0][4]: ", qs[0][4], "Expected: None" - something_happened = True - if (qs_len and len(qs) > qs_len): - for i in range(qs_len, len(qs)): - print "qs[",i,"][0]: ", qs[i][0] - print "qs[",i,"][1]: ", qs[i][1] - print "qs[",i,"][2]: ", qs[i][2] - print "qs[",i,"][3]: ", qs[i][3] - print "qs[",i,"][4]: ", qs[i][4] - something_happened = True - if (something_happened): - print "If test have not crashed, then it's OK" - -def notices_warning(): - if (len(notices) > 0): - print("") - print("WARNING:") - print(notices) - -def pg_query_state(config, pid, verbose=False, costs=False, timing=False, \ - buffers=False, triggers=False, format='text'): - """ - Get query state from backend with specified pid and optional parameters. - Save any warning, info, notice and log data in global variable 'notices' - """ - - global notices - - conn = psycopg2.connect(**config) - curs = conn.cursor() - result = [] - while not result: - curs.callproc('pg_query_state', (pid, verbose, costs, timing, buffers, triggers, format)) - result = curs.fetchall() - notices = conn.notices[:] - conn.close() - return result +import common def test_deadlock(config): """test when two backends try to extract state of each other""" - acon1, acon2 = n_async_connect(config, 2) + acon1, acon2 = common.n_async_connect(config, 2) acurs1 = acon1.cursor() acurs2 = acon2.cursor() @@ -115,143 +29,137 @@ def test_deadlock(config): r, w, x = select.select([acon1.fileno(), acon2.fileno()], [], [], 10) assert (r or w or x), "Deadlock is happened under cross reading of query states" - wait(acon1) - wait(acon2) + common.wait(acon1) + common.wait(acon2) # exit from loop if one backend could read state of execution 'pg_query_state' # from other backend if acurs1.fetchone() or acurs2.fetchone(): break - n_close((acon1, acon2)) - -def query_state(config, async_conn, query, args={}, num_workers=0): - """ - Get intermediate state of 'query' on connection 'async_conn' after number of 'steps' - of node executions from start of query - """ - - acurs = async_conn.cursor() - conn = psycopg2.connect(**config) - curs = conn.cursor() - - set_guc(async_conn, 'enable_mergejoin', 'off') - set_guc(async_conn, 'max_parallel_workers_per_gather', num_workers) - acurs.execute(query) - - # extract current state of query progress - pg_qs_args = { - 'config': config, - 'pid': async_conn.get_backend_pid() - } - for k, v in args.iteritems(): - pg_qs_args[k] = v - result = pg_query_state(**pg_qs_args) - wait(async_conn) - - set_guc(async_conn, 'pg_query_state.executor_trace', 'off') - set_guc(async_conn, 'enable_mergejoin', 'on') - - conn.close() - return result + common.n_close((acon1, acon2)) def test_simple_query(config): """test statistics of simple query""" - acon, = n_async_connect(config) - query = 'select count(*) from foo join bar on foo.c1=bar.c1' + acon1, acon2 = common.n_async_connect(config, 2) + query = 'select count(*) from foo join bar on foo.c1=bar.c1 and unlock_if_eq_1(foo.c1)=bar.c1' expected = r"""Aggregate \(Current loop: actual rows=\d+, loop number=1\) -> Hash Join \(Current loop: actual rows=\d+, loop number=1\) Hash Cond: \(foo.c1 = bar.c1\) + Join Filter: \(unlock_if_eq_1\(foo.c1\) = bar.c1\) -> Seq Scan on foo \(Current loop: actual rows=\d+, loop number=1\) -> Hash \(Current loop: actual rows=\d+, loop number=1\) Buckets: \d+ Batches: \d+ Memory Usage: \d+kB -> Seq Scan on bar \(Current loop: actual rows=\d+, loop number=1\)""" - qs = query_state(config, acon, query) - debug_output(qs, 1, acon.get_backend_pid(), query, expected) - notices_warning() - #assert len(qs) == 1 #Skip this check while output of test can be different - assert qs[0][0] == acon.get_backend_pid() and qs[0][1] == 0 \ - and qs[0][2] == query and re.match(expected, qs[0][3]) and qs[0][4] == None + qs, _ = common.onetime_query_state_locks(config, acon1, acon2, query) + + assert qs[0][0] == acon1.get_backend_pid() + assert qs[0][1] == 0 + assert qs[0][2] == query + assert re.match(expected, qs[0][3]) + assert qs[0][4] == None + # assert qs[0][0] == acon.get_backend_pid() and qs[0][1] == 0 \ + # and qs[0][2] == query and re.match(expected, qs[0][3]) and qs[0][4] == None - n_close((acon,)) + common.n_close((acon1, acon2)) def test_concurrent_access(config): """test when two backends compete with each other to extract state from third running backend""" - acon1, acon2, acon3 = n_async_connect(config, 3) + acon1, acon2, acon3 = common.n_async_connect(config, 3) acurs1, acurs2, acurs3 = acon1.cursor(), acon2.cursor(), acon3.cursor() query = 'select count(*) from foo join bar on foo.c1=bar.c1' - set_guc(acon3, 'max_parallel_workers_per_gather', 0) + common.set_guc(acon3, 'max_parallel_workers_per_gather', 0) acurs3.execute(query) time.sleep(0.1) acurs1.callproc('pg_query_state', (acon3.get_backend_pid(),)) acurs2.callproc('pg_query_state', (acon3.get_backend_pid(),)) - wait(acon1) - wait(acon2) - wait(acon3) + common.wait(acon1) + common.wait(acon2) + common.wait(acon3) qs1, qs2 = acurs1.fetchall(), acurs2.fetchall() - assert len(qs1) == len(qs2) == 1 \ + assert len(qs1) == len(qs2) == 1 \ and qs1[0][0] == qs2[0][0] == acon3.get_backend_pid() \ and qs1[0][1] == qs2[0][1] == 0 \ and qs1[0][2] == qs2[0][2] == query \ and len(qs1[0][3]) > 0 and len(qs2[0][3]) > 0 \ and qs1[0][4] == qs2[0][4] == None - #assert len(notices) == 0 - notices_warning() - n_close((acon1, acon2, acon3)) + common.n_close((acon1, acon2, acon3)) def test_nested_call(config): """test statistics under calling function""" - acon, = n_async_connect(config) + acon1, acon2 = common.n_async_connect(config, 2) util_conn = psycopg2.connect(**config) util_curs = util_conn.cursor() create_function = """ create or replace function n_join_foo_bar() returns integer as $$ begin - return (select count(*) from foo join bar on foo.c1=bar.c1); + return (select count(*) from foo join bar on foo.c1=bar.c1 and unlock_if_eq_1(foo.c1)=bar.c1); end; $$ language plpgsql""" drop_function = 'drop function n_join_foo_bar()' call_function = 'select * from n_join_foo_bar()' - nested_query = 'SELECT (select count(*) from foo join bar on foo.c1=bar.c1)' + nested_query1 = '(select count(*) from foo join bar on foo.c1=bar.c1 and unlock_if_eq_1(foo.c1)=bar.c1)' + nested_query2 = 'SELECT (select count(*) from foo join bar on foo.c1=bar.c1 and unlock_if_eq_1(foo.c1)=bar.c1)' expected = 'Function Scan on n_join_foo_bar (Current loop: actual rows=0, loop number=1)' expected_nested = r"""Result \(Current loop: actual rows=0, loop number=1\) InitPlan 1 \(returns \$0\) -> Aggregate \(Current loop: actual rows=0, loop number=1\) - -> Hash Join \(Current loop: actual rows=0, loop number=1\) + -> Hash Join \(Current loop: actual rows=\d+, loop number=1\) Hash Cond: \(foo.c1 = bar.c1\) - -> Seq Scan on foo \(Current loop: actual rows=1, loop number=1\) - -> Hash \(Current loop: actual rows=0, loop number=1\) + Join Filter: \(unlock_if_eq_1\(foo.c1\) = bar.c1\) + -> Seq Scan on foo \(Current loop: actual rows=\d+, loop number=1\) + -> Hash \(Current loop: actual rows=500000, loop number=1\) Buckets: \d+ Batches: \d+ Memory Usage: \d+kB -> Seq Scan on bar \(Current loop: actual rows=\d+, loop number=1\)""" + expected_nested_2 = r"""Result \(Current loop: actual rows=0, loop number=1\) + InitPlan 1 + -> Aggregate \(Current loop: actual rows=0, loop number=1\) + -> Hash Join \(Current loop: actual rows=\d+, loop number=1\) + Hash Cond: \(foo.c1 = bar.c1\) + Join Filter: \(unlock_if_eq_1\(foo.c1\) = bar.c1\) + -> Seq Scan on foo \(Current loop: actual rows=\d+, loop number=1\) + -> Hash \(Current loop: actual rows=500000, loop number=1\) + Buckets: \d+ Batches: \d+ Memory Usage: \d+kB + -> Seq Scan on bar \(Current loop: actual rows=\d+, loop number=1\)""" + + util_curs.execute(create_function) util_conn.commit() - qs = query_state(config, acon, call_function) - assert len(qs) == 2 \ - and qs[0][0] == qs[1][0] == acon.get_backend_pid() \ - and qs[0][1] == 0 and qs[1][1] == 1 \ - and qs[0][2] == call_function and qs[0][3] == expected \ - and qs[1][2] == nested_query and re.match(expected_nested, qs[1][3]) \ - and qs[0][4] == qs[1][4] == None - assert len(notices) == 0 + qs, notices = common.onetime_query_state_locks(config, acon1, acon2, call_function) + + # Print some debug output before assertion + if len(qs) < 2: + print(qs) + + assert len(qs) == 3 + assert qs[0][0] == qs[1][0] == acon1.get_backend_pid() + assert qs[0][1] == 0 + assert qs[1][1] == 1 + assert qs[0][2] == call_function + assert qs[0][3] == expected + assert qs[1][2] == nested_query1 or qs[1][2] == nested_query2 + assert re.match(expected_nested, qs[1][3]) or re.match(expected_nested_2, qs[1][3]) + assert qs[0][4] == qs[1][4] == None + assert len(notices) == 0 util_curs.execute(drop_function) util_conn.close() - n_close((acon,)) + common.n_close((acon1, acon2)) def test_insert_on_conflict(config): """test statistics on conflicting tuples under INSERT ON CONFLICT query""" - acon, = n_async_connect(config) + acon, = common.n_async_connect(config) util_conn = psycopg2.connect(**config) util_curs = util_conn.cursor() add_field_uniqueness = 'alter table foo add constraint unique_c1 unique(c1)' @@ -266,30 +174,22 @@ def test_insert_on_conflict(config): util_curs.execute(add_field_uniqueness) util_conn.commit() - qs = query_state(config, acon, query) + qs, notices = common.onetime_query_state(config, acon, query) - debug_output(qs, 1, acon.get_backend_pid(), query, expected) - notices_warning() - #assert len(qs) == 1 \ - assert qs[0][0] == acon.get_backend_pid() and qs[0][1] == 0 \ + assert qs[0][0] == acon.get_backend_pid() and qs[0][1] == 0 \ and qs[0][2] == query and re.match(expected, qs[0][3]) \ and qs[0][4] == None - assert len(notices) == 0 + assert len(notices) == 0 util_curs.execute(drop_field_uniqueness) util_conn.close() - n_close((acon,)) - -def set_guc(async_conn, param, value): - acurs = async_conn.cursor() - acurs.execute('set %s to %s' % (param, value)) - wait(async_conn) + common.n_close((acon,)) def test_trigger(config): """test trigger statistics""" - acon, = n_async_connect(config) + acon, = common.n_async_connect(config) acurs = acon.cursor() util_conn = psycopg2.connect(**config) util_curs = util_conn.cursor() @@ -305,7 +205,7 @@ def test_trigger(config): create_trigger = """ create trigger unique_foo_c1 before insert or update of c1 on foo for row - execute procedure unique_c1_in_foo()""" + execute procedure unique_c1_in_foo()""" drop_temps = 'drop function unique_c1_in_foo() cascade' query = 'insert into foo select i, md5(random()::text) from generate_series(1, 10000) as i' expected_upper = r"""Insert on foo \(Current loop: actual rows=\d+, loop number=1\) @@ -316,103 +216,113 @@ def test_trigger(config): util_curs.execute(create_trigger) util_conn.commit() - qs = query_state(config, acon, query, {'triggers': True}) - debug_output(qs, None, acon.get_backend_pid(), query, expected_upper) - notices_warning() + qs, notices = common.onetime_query_state(config, acon, query, {'triggers': True}) assert qs[0][0] == acon.get_backend_pid() and qs[0][1] == 0 \ and qs[0][2] == query and re.match(expected_upper, qs[0][3]) \ and qs[0][4] == None - assert len(notices) == 0 + assert len(notices) == 0 - qs = query_state(config, acon, query, {'triggers': False}) - debug_output(qs, None, acon.get_backend_pid(), query, expected_upper) - notices_warning() + qs, notices = common.onetime_query_state(config, acon, query, {'triggers': False}) assert qs[0][0] == acon.get_backend_pid() and qs[0][1] == 0 \ and qs[0][2] == query and re.match(expected_upper, qs[0][3]) \ and qs[0][4] == None - assert len(notices) == 0 + assert len(notices) == 0 util_curs.execute(drop_temps) util_conn.close() - n_close((acon,)) + common.n_close((acon,)) def test_costs(config): """test plan costs""" - acon, = n_async_connect(config) - query = 'select count(*) from foo join bar on foo.c1=bar.c1' + acon1, acon2 = common.n_async_connect(config, 2) + query = 'select count(*) from foo join bar on foo.c1=bar.c1 and unlock_if_eq_1(foo.c1)=bar.c1;' + expected = r"""Aggregate \(cost=\d+.\d+..\d+.\d+ rows=\d+ width=8\) \(Current loop: actual rows=0, loop number=1\) - -> Hash Join \(cost=\d+.\d+..\d+.\d+ rows=\d+ width=0\) \(Current loop: actual rows=0, loop number=1\) + -> Hash Join \(cost=\d+.\d+..\d+.\d+ rows=\d+ width=0\) \(Current loop: actual rows=\d+, loop number=1\) Hash Cond: \(foo.c1 = bar.c1\) - -> Seq Scan on foo \(cost=0.00..\d+.\d+ rows=\d+ width=4\) \(Current loop: actual rows=1, loop number=1\) - -> Hash \(cost=\d+.\d+..\d+.\d+ rows=\d+ width=4\) \(Current loop: actual rows=0, loop number=1\) + Join Filter: \(unlock_if_eq_1\(foo.c1\) = bar.c1\) + -> Seq Scan on foo \(cost=0.00..\d+.\d+ rows=\d+ width=4\) \(Current loop: actual rows=\d+, loop number=1\) + -> Hash \(cost=\d+.\d+..\d+.\d+ rows=\d+ width=4\) \(Current loop: actual rows=500000, loop number=1\) Buckets: \d+ Batches: \d+ Memory Usage: \d+kB -> Seq Scan on bar \(cost=0.00..\d+.\d+ rows=\d+ width=4\) \(Current loop: actual rows=\d+, loop number=1\)""" - qs = query_state(config, acon, query, {'costs': True}) - debug_output(qs, 1, None, query, expected) - notices_warning() - assert len(qs) == 1 and re.match(expected, qs[0][3]) - assert len(notices) == 0 + qs, notices = common.onetime_query_state_locks(config, acon1, acon2, query, {'costs': True}) + + assert len(qs) == 2 and re.match(expected, qs[0][3]) + assert len(notices) == 0 - n_close((acon,)) + common.n_close((acon1, acon2)) def test_buffers(config): """test buffer statistics""" - acon, = n_async_connect(config) - query = 'select count(*) from foo join bar on foo.c1=bar.c1' - expected = r"""Aggregate \(Current loop: actual rows=0, loop number=1\) - -> Hash Join \(Current loop: actual rows=0, loop number=1\) + acon1, acon2 = common.n_async_connect(config, 2) + query = 'select count(*) from foo join bar on foo.c1=bar.c1 and unlock_if_eq_1(foo.c1)=bar.c1' + temporary = r"""Aggregate \(Current loop: actual rows=0, loop number=1\) + -> Hash Join \(Current loop: actual rows=\d+, loop number=1\) Hash Cond: \(foo.c1 = bar.c1\) - -> Seq Scan on foo \(Current loop: actual rows=1, loop number=1\) + Join Filter: \(unlock_if_eq_1\(foo.c1\) = bar.c1\)""" + expected = temporary + expected_15 = temporary + expected += r""" + Buffers: shared hit=\d+, temp read=\d+ written=\d+""" + expected_15 += r""" + Buffers: shared hit=\d+, temp written=\d+""" + temporary = r""" + -> Seq Scan on foo \(Current loop: actual rows=\d+, loop number=1\) Buffers: [^\n]* - -> Hash \(Current loop: actual rows=0, loop number=1\) + -> Hash \(Current loop: actual rows=500000, loop number=1\) Buckets: \d+ Batches: \d+ Memory Usage: \d+kB + Buffers: shared hit=\d+, temp written=\d+ -> Seq Scan on bar \(Current loop: actual rows=\d+, loop number=1\) Buffers: .*""" + expected += temporary + expected_15 += temporary + + common.set_guc(acon1, 'pg_query_state.enable_buffers', 'on') - set_guc(acon, 'pg_query_state.enable_buffers', 'on') + qs, notices = common.onetime_query_state_locks(config, acon1, acon2, query, {'buffers': True}) - qs = query_state(config, acon, query, {'buffers': True}) - debug_output(qs, 1, None, query, expected) - notices_warning() - assert len(qs) == 1 and re.match(expected, qs[0][3]) - assert len(notices) == 0 + assert len(qs) == 2 + assert (re.match(expected, qs[0][3]) or re.match(expected_15, qs[0][3])) + assert len(notices) == 0 - n_close((acon,)) + common.n_close((acon1, acon2)) def test_timing(config): """test timing statistics""" - acon, = n_async_connect(config) - query = 'select count(*) from foo join bar on foo.c1=bar.c1' + acon1, acon2 = common.n_async_connect(config, 2) + query = 'select count(*) from foo join bar on foo.c1=bar.c1 and unlock_if_eq_1(foo.c1)=bar.c1' + expected = r"""Aggregate \(Current loop: running time=\d+.\d+ actual rows=0, loop number=1\) - -> Hash Join \(Current loop: running time=\d+.\d+ actual rows=0, loop number=1\) + -> Hash Join \(Current loop: actual time=\d+.\d+..\d+.\d+ rows=\d+, loop number=1\) Hash Cond: \(foo.c1 = bar.c1\) - -> Seq Scan on foo \(Current loop: actual time=\d+.\d+..\d+.\d+ rows=1, loop number=1\) - -> Hash \(Current loop: running time=\d+.\d+ actual rows=0, loop number=1\) + Join Filter: \(unlock_if_eq_1\(foo.c1\) = bar.c1\) + -> Seq Scan on foo \(Current loop: actual time=\d+.\d+..\d+.\d+ rows=\d+, loop number=1\) + -> Hash \(Current loop: actual time=\d+.\d+..\d+.\d+ rows=500000, loop number=1\) Buckets: \d+ Batches: \d+ Memory Usage: \d+kB -> Seq Scan on bar \(Current loop: actual time=\d+.\d+..\d+.\d+ rows=\d+, loop number=1\)""" - set_guc(acon, 'pg_query_state.enable_timing', 'on') + common.set_guc(acon1, 'pg_query_state.enable_timing', 'on') - qs = query_state(config, acon, query, {'timing': True}) - debug_output(qs, 1, None, query, expected) - notices_warning() - assert len(qs) == 1 and re.match(expected, qs[0][3]) - assert len(notices) == 0 + qs, notices = common.onetime_query_state_locks(config, acon1, acon2, query, {'timing': True}) - n_close((acon,)) + assert len(qs) == 2 + assert re.match(expected, qs[0][3]) + assert len(notices) == 0 + + common.n_close((acon1, acon2)) def check_plan(plan): - assert plan.has_key('Current loop') + assert 'Current loop' in plan cur_loop = plan['Current loop'] - assert cur_loop.has_key('Actual Loop Number') \ - and cur_loop.has_key('Actual Rows') + assert 'Actual Loop Number' in cur_loop\ + and 'Actual Rows' in cur_loop - if not plan.has_key('Plans'): + if not 'Plans' in plan: return for subplan in plan['Plans']: @@ -422,14 +332,14 @@ def check_xml(root): prefix = '{http://www.postgresql.org/2009/explain}' for plan in root.iter(prefix + 'Plan'): cur_loop = plan.find(prefix + 'Current-loop') - assert cur_loop != None \ + assert cur_loop != None \ and cur_loop.find(prefix + 'Actual-Loop-Number') != None \ and cur_loop.find(prefix + 'Actual-Rows') != None def test_formats(config): """test all formats of pg_query_state output""" - acon, = n_async_connect(config) + acon, = common.n_async_connect(config) query = 'select count(*) from foo join bar on foo.c1=bar.c1' expected = r"""Aggregate \(Current loop: actual rows=0, loop number=1\) -> Hash Join \(Current loop: actual rows=0, loop number=1\) @@ -439,61 +349,62 @@ def test_formats(config): Buckets: \d+ Batches: \d+ Memory Usage: \d+kB -> Seq Scan on bar \(Current loop: actual rows=\d+, loop number=1\)""" - qs = query_state(config, acon, query, {'format': 'text'}) - debug_output(qs, 1, None, query, expected) - notices_warning() - assert len(qs) == 1 and re.match(expected, qs[0][3]) - assert len(notices) == 0 + qs, notices = common.onetime_query_state(config, acon, query, {'format': 'text'}) + assert len(qs) == 1 and re.match(expected, qs[0][3]) + assert len(notices) == 0 - qs = query_state(config, acon, query, {'format': 'json'}) + qs, notices = common.onetime_query_state(config, acon, query, {'format': 'json'}) try: js_obj = json.loads(qs[0][3]) except ValueError: assert False, 'Invalid json format' - assert len(qs) == 1 - assert len(notices) == 0 + assert len(qs) == 1 + assert len(notices) == 0 check_plan(js_obj['Plan']) - qs = query_state(config, acon, query, {'format': 'xml'}) - assert len(qs) == 1 - assert len(notices) == 0 + qs, notices = common.onetime_query_state(config, acon, query, {'format': 'xml'}) + assert len(qs) == 1 + assert len(notices) == 0 try: xml_root = ET.fromstring(qs[0][3]) except: assert False, 'Invalid xml format' check_xml(xml_root) - qs = query_state(config, acon, query, {'format': 'yaml'}) + qs, _ = common.onetime_query_state(config, acon, query, {'format': 'yaml'}) try: - yaml_doc = yaml.load(qs[0][3]) + yaml_doc = yaml.load(qs[0][3], Loader=yaml.FullLoader) except: assert False, 'Invalid yaml format' - assert len(qs) == 1 - assert len(notices) == 0 + assert len(qs) == 1 + assert len(notices) == 0 check_plan(yaml_doc['Plan']) - n_close((acon,)) + common.n_close((acon,)) def test_timing_buffers_conflicts(config): """test when caller requests timing and buffers but counterpart turned off its""" - acon, = n_async_connect(config) + acon, = common.n_async_connect(config) query = 'select count(*) from foo join bar on foo.c1=bar.c1' timing_pattern = '(?:running time=\d+.\d+)|(?:actual time=\d+.\d+..\d+.\d+)' buffers_pattern = 'Buffers:' - qs = query_state(config, acon, query, {'timing': True, 'buffers': False}) - assert len(qs) == 1 and not re.search(timing_pattern, qs[0][3]) + common.set_guc(acon, 'pg_query_state.enable_timing', 'off') + common.set_guc(acon, 'pg_query_state.enable_buffers', 'off') + + qs, notices = common.onetime_query_state(config, acon, query, {'timing': True, 'buffers': False}) + assert len(qs) == 1 and not re.search(timing_pattern, qs[0][3]) assert notices == ['WARNING: timing statistics disabled\n'] - qs = query_state(config, acon, query, {'timing': False, 'buffers': True}) - assert len(qs) == 1 and not re.search(buffers_pattern, qs[0][3]) + qs, notices = common.onetime_query_state(config, acon, query, {'timing': False, 'buffers': True}) + assert len(qs) == 1 and not re.search(buffers_pattern, qs[0][3]) assert notices == ['WARNING: buffers statistics disabled\n'] - qs = query_state(config, acon, query, {'timing': True, 'buffers': True}) - assert len(qs) == 1 and not re.search(timing_pattern, qs[0][3]) \ + qs, notices = common.onetime_query_state(config, acon, query, {'timing': True, 'buffers': True}) + assert len(qs) == 1 and not re.search(timing_pattern, qs[0][3]) \ and not re.search(buffers_pattern, qs[0][3]) assert len(notices) == 2 and 'WARNING: timing statistics disabled\n' in notices \ and 'WARNING: buffers statistics disabled\n' in notices - n_close((acon,)) + common.n_close((acon,)) diff --git a/tests/tpcds.py b/tests/tpcds.py new file mode 100644 index 0000000..bdeb408 --- /dev/null +++ b/tests/tpcds.py @@ -0,0 +1,145 @@ +''' +test_cases.py +Copyright (c) 2016-2024, Postgres Professional +''' + +import os +import subprocess +import time + +import progressbar +# This actually imports progressbar2 but `import progressbar2' itself doesn't work. +# In case of problems with the progressbar/progressbar2, check that you have the +# progressbar2 installed and the path to it or venv is specified. + +import psycopg2.extensions + +import common + +class DataLoadException(Exception): pass +class StressTestException(Exception): pass + +def setup_tpcds(config): + print('Setting up TPC-DS test...') + subprocess.call(['./tests/prepare_stress.sh']) + + try: + conn = psycopg2.connect(**config) + cur = conn.cursor() + except Exception as e: + raise DataLoadException('Load failed: %s' % e) + + try: + # Create pg_query_state extension + cur.execute('CREATE EXTENSION IF NOT EXISTS pg_query_state') + + # Create tables + with open('tmp_stress/tpcds-kit/tools/tpcds.sql', 'r') as f: + cur.execute(f.read()) + + # Copy table data from files + for table_datafile in os.listdir('tmp_stress/tpcds-kit/tools/'): + if table_datafile.endswith('.dat'): + table_name = os.path.splitext(os.path.basename(table_datafile))[0] + + print('Loading table', table_name) + with open('tmp_stress/tpcds-kit/tools/tables/%s' % table_datafile) as f: + cur.copy_from(f, table_name, sep='|', null='') + + conn.commit() + + except Exception as e: + cur.close() + conn.close() + raise DataLoadException('Load failed: %s' % e) + + print('done!') + +def run_tpcds(config): + """TPC-DS stress test""" + + TPC_DS_EXCLUDE_LIST = [] # actual numbers of TPC-DS tests to exclude + TPC_DS_STATEMENT_TIMEOUT = 20000 # statement_timeout in ms + + print('Preparing TPC-DS queries...') + err_count = 0 + queries = [] + for query_file in sorted(os.listdir('tmp_stress/tpcds-result-reproduction/query_qualification/')): + with open('tmp_stress/tpcds-result-reproduction/query_qualification/%s' % query_file, 'r') as f: + queries.append(f.read()) + + acon, = common.n_async_connect(config) + + print('Starting TPC-DS queries...') + timeout_list = [] + bar = progressbar.ProgressBar(max_value=len(queries)) + for i, query in enumerate(queries): + bar.update(i + 1) + if i + 1 in TPC_DS_EXCLUDE_LIST: + continue + try: + # Set query timeout to TPC_DS_STATEMENT_TIMEOUT / 1000 seconds + common.set_guc(acon, 'statement_timeout', TPC_DS_STATEMENT_TIMEOUT) + + # run query + acurs = acon.cursor() + acurs.execute(query) + + # periodically run pg_query_state on running backend trying to get + # crash of PostgreSQL + MAX_FIRST_GETTING_QS_RETRIES = 10 + PG_QS_DELAY, BEFORE_GETTING_QS_DELAY = 0.1, 0.1 + BEFORE_GETTING_QS, GETTING_QS = range(2) + state, n_first_getting_qs_retries = BEFORE_GETTING_QS, 0 + + pg_qs_args = { + 'config': config, + 'pid': acon.get_backend_pid() + } + + while True: + try: + result, notices = common.pg_query_state(**pg_qs_args) + except Exception as e: + # do not consider the test failed if the "error in message + # queue data transmitting" is received, this may happen with + # some small probability, but if it happens too often it is + # a problem, we will handle this case after the loop + if "error in message queue data transmitting" in e.pgerror: + err_count += 1 + else: + raise e + + # run state machine to determine the first getting of query state + # and query finishing + if state == BEFORE_GETTING_QS: + if len(result) > 0 or common.BACKEND_IS_ACTIVE_INFO in notices: + state = GETTING_QS + continue + n_first_getting_qs_retries += 1 + if n_first_getting_qs_retries >= MAX_FIRST_GETTING_QS_RETRIES: + # pg_query_state callings don't return any result, more likely run + # query has completed + break + time.sleep(BEFORE_GETTING_QS_DELAY) + elif state == GETTING_QS: + if common.BACKEND_IS_IDLE_INFO in notices: + break + time.sleep(PG_QS_DELAY) + + # wait for real query completion + common.wait(acon) + + except psycopg2.extensions.QueryCanceledError: + timeout_list.append(i + 1) + + if err_count > 2: + print("\nERROR: error in message queue data transmitting") + raise Exception('error was received %d times'%err_count) + elif err_count > 0: + print(err_count, " times there was error in message queue data transmitting") + + common.n_close((acon,)) + + if len(timeout_list) > 0: + print('\nThere were pg_query_state timeouts (%s s) on queries:' % TPC_DS_STATEMENT_TIMEOUT, timeout_list)