From 42703d81d618a815a027336de230988d7ec466f4 Mon Sep 17 00:00:00 2001 From: Andrew Papsujko Date: Mon, 19 May 2025 17:31:03 +0300 Subject: [PATCH 1/6] fix: deadlock in send queue --- mamonsu/lib/queue.py | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/mamonsu/lib/queue.py b/mamonsu/lib/queue.py index 96ceadf1..e348fc49 100644 --- a/mamonsu/lib/queue.py +++ b/mamonsu/lib/queue.py @@ -10,25 +10,21 @@ def __init__(self): self.lock = threading.Lock() def add(self, metric): - self.lock.acquire() - self.queue.insert(0, metric) - self.lock.release() + with self.lock: + self.queue.insert(0, metric) # replace last metric def replace(self, metric): - self.lock.acquire() - self.queue.pop() - self.queue.append(metric) - self.lock.release() + with self.lock: + if self.queue: + self.queue.pop() + self.queue.append(metric) def size(self): - self.lock.acquire() - result = len(self.queue) - self.lock.release() - return result + with self.lock: + return len(self.queue) def flush(self): - self.lock.acquire() - result, self.queue = self.queue, [] - self.lock.release() - return result + with self.lock: + result, self.queue = self.queue, [] + return result From 8c198ef48118e70a1a0d7b1386068868ff2de6ce Mon Sep 17 00:00:00 2001 From: Andrew Papsujko Date: Wed, 21 May 2025 11:04:51 +0300 Subject: [PATCH 2/6] fix: added cgroup2 to excluded fs types --- mamonsu/plugins/system/linux/disk_sizes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mamonsu/plugins/system/linux/disk_sizes.py b/mamonsu/plugins/system/linux/disk_sizes.py index 898c2c04..d4618120 100644 --- a/mamonsu/plugins/system/linux/disk_sizes.py +++ b/mamonsu/plugins/system/linux/disk_sizes.py @@ -20,7 +20,7 @@ class DiskSizes(Plugin): ExcludeFsTypes = [ "none", "unknown", "rootfs", "iso9660", "squashfs", "udf", "romfs", "ramfs", "debugfs", "cgroup", "cgroup_root", - "pstore", "devtmpfs", "autofs", "cgroup", "configfs", "devpts", "efivarfs", "fusectl", "fuse.gvfsd-fuse", + "pstore", "devtmpfs", "autofs", "cgroup2", "configfs", "devpts", "efivarfs", "fusectl", "fuse.gvfsd-fuse", "hugetlbfs", "mqueue", "binfmt_misc", "nfsd", "proc", "pstore", "selinuxfs", "rpc_pipefs", "securityfs", "sysfs", "nsfs", "tmpfs", "tracefs" ] From 5299263670f22cf5682076e5f601427a30bb90f6 Mon Sep 17 00:00:00 2001 From: Andrew Papsujko Date: Fri, 16 May 2025 12:32:51 +0300 Subject: [PATCH 3/6] feat: added metric that shows bytes held by non-active replication slot and according trigger --- documentation/metrics.md | 64 +++++++++++++++++++++++++- mamonsu/lib/default_config.py | 2 + mamonsu/lib/platform.py | 1 + mamonsu/plugins/pgsql/driver/pool.py | 26 ++++++++++- mamonsu/plugins/pgsql/replication.py | 69 ++++++++++++++++++++++++++-- mamonsu/tools/bootstrap/sql.py | 17 +++++++ 6 files changed, 172 insertions(+), 7 deletions(-) diff --git a/documentation/metrics.md b/documentation/metrics.md index 15ef4a1a..d507a8e4 100644 --- a/documentation/metrics.md +++ b/documentation/metrics.md @@ -3691,7 +3691,8 @@ Default config: ### Replication Default config: -        lag_more_than_in_sec = 300 +        lag_more_than_in_sec = 300\ +        critical_bytes_held_by_non_active_slot = 1073741824 bytes ### Items @@ -3763,6 +3764,37 @@ Default config: *Non-active Replication Slots* calculates as count of slots with `false` active status. +- **Bytes Held By Non-active Replication Slots** + + Zabbix item: + + + + + + + + + + + + + + + + + + + + + + + + + +
NamePostgreSQL Replication: Bytes held by non-active slot {#NON_ACTIVE_SLOT_NAME}
Keypgsql.replication.non_active_slots_discovery[]
TypeNumeric (float)
UnitsBytes
DeltaAs Is
Supported Version10+
+ + *Non-active Replication Slots* calculates as count of slots with `false` active status. - **Streaming Replication Lag** @@ -3861,12 +3893,40 @@ Default config: +- **PostgreSQL Replication: Non-active Slots Discovery** + + Items: + + + + + + + + + + + + + + + + + + + + + +
NamePostgreSQL Replication: Bytes held by non-active slot {#NON_ACTIVE_SLOT_NAME}
Keypgsql.replication.non_active_slots_discovery[]
TypeNumeric (float)
UnitsBytes
DeltaAs Is
+ ### Triggers - **PostgreSQL Instance: server mode has been changed on {HOSTNAME} to {ITEM.LASTVALUE}** - **PostgreSQL number of non-active replication slots on {HOSTNAME} (value={ITEM.LASTVALUE})** - + Disabled by default +- **PostgreSQL Replication: bytes held by slot {#NON_ACTIVE_SLOT_NAME} is too high (value={ITEM.LASTVALUE})** + Triggers if *PostgreSQL Replication: Bytes held by non-active slot {#NON_ACTIVE_SLOT_NAME}* exceeds `critical_bytes_held_by_non_active_slot`. - **PostgreSQL streaming lag too high on {HOSTNAME} (value={ITEM.LASTVALUE})** Triggers if *PostgreSQL Replication: Streaming Replication Lag* exceeds `lag_more_than_in_sec`. diff --git a/mamonsu/lib/default_config.py b/mamonsu/lib/default_config.py index c7f2d985..12791a18 100644 --- a/mamonsu/lib/default_config.py +++ b/mamonsu/lib/default_config.py @@ -35,6 +35,8 @@ def default_host(): host = os.environ.get('PGHOST') or 'auto' if platform.FREEBSD: host = os.environ.get('PGHOST') or 'auto' + if platform.DARWIN: + host = os.environ.get('PGHOST') or 'auto' return host @staticmethod diff --git a/mamonsu/lib/platform.py b/mamonsu/lib/platform.py index 5ea5faa0..279200d1 100644 --- a/mamonsu/lib/platform.py +++ b/mamonsu/lib/platform.py @@ -3,5 +3,6 @@ LINUX = (sys.platform == 'linux' or sys.platform == 'linux2') WINDOWS = (sys.platform == 'win32' or sys.platform == 'win64') FREEBSD = ('freebsd' in sys.platform) +DARWIN = sys.platform == 'darwin' UNIX = LINUX or FREEBSD INTEGER_TYPES = int, diff --git a/mamonsu/plugins/pgsql/driver/pool.py b/mamonsu/plugins/pgsql/driver/pool.py index 6576f92a..a8433d98 100644 --- a/mamonsu/plugins/pgsql/driver/pool.py +++ b/mamonsu/plugins/pgsql/driver/pool.py @@ -86,7 +86,7 @@ class Pool(object): """ SELECT application_name, {0} - coalesce((pg_{1}_{2}_diff(pg_current_{1}_{2}(), replay_lsn))::int, 0) AS total_lag + coalesce((pg_{1}_{2}_diff(pg_current_{1}_{2}(), replay_{2}))::int, 0) AS total_lag FROM pg_stat_replication; """, """ @@ -95,6 +95,30 @@ class Pool(object): total_lag FROM mamonsu.count_{1}_lag_lsn(); """ + ), + "wal_held_bytes_master": ( + """ + SELECT slot_name, + coalesce((pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn))::int, 0) AS wal_held_bytes + FROM pg_replication_slots; + """, + """ + SELECT slot_name, + wal_held_bytes + FROM mamonsu.bytes_held_by_inactive_slot_on_master(); + """ + ), + "wal_held_bytes_replica": ( + """ + SELECT slot_name, + coalesce((pg_wal_lsn_diff(pg_last_wal_replay_lsn(), restart_lsn))::int, 0) AS wal_held_bytes + FROM pg_replication_slots; + """, + """ + SELECT slot_name, + wal_held_bytes + FROM mamonsu.bytes_held_by_inactive_slot_on_replica(); + """ ) } diff --git a/mamonsu/plugins/pgsql/replication.py b/mamonsu/plugins/pgsql/replication.py index 8a51889a..7ed701c1 100644 --- a/mamonsu/plugins/pgsql/replication.py +++ b/mamonsu/plugins/pgsql/replication.py @@ -13,7 +13,8 @@ class Replication(Plugin): AgentPluginType = "pg" # key: (macro, value) plugin_macros = { - "critical_lag_seconds": [("macro", "{$CRITICAL_LAG_SECONDS}"), ("value", 60 * 5)] + "critical_lag_seconds": [("macro", "{$CRITICAL_LAG_SECONDS}"), ("value", 60 * 5)], + "critical_bytes_held_by_none_active_slot": [("macro", "{$CRITICAL_BYTES_HELD_BY_NON_ACTIVE_SLOT}"), ("value", 1024 * 1024 * 1024)] } # get time of replication lag @@ -30,8 +31,15 @@ class Replication(Plugin): WHERE active = 'false'; """ + query_bytes_held_by_non_active_slot = """ + SELECT slot_name, coalesce(pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn)::bigint, 0) AS wal_size_bytes + FROM pg_replication_slots + WHERE active = 'false'; + """ + # for discovery rule for name of each replica key_lsn_replication_discovery = "pgsql.replication.discovery{0}" + key_replication_non_active_slots_discovery = "pgsql.replication.non_active_slots_discovery{0}" key_total_lag = "pgsql.replication.total_lag{0}" # for PG 10 and higher key_flush = "pgsql.replication.flush_lag{0}" @@ -42,6 +50,7 @@ class Replication(Plugin): key_replication = "pgsql.replication_lag{0}" key_non_active_slots = "pgsql.replication.non_active_slots{0}" + key_non_active_slots_held_bytes = "pgsql.replication.non_active_slots_held_bytes{0}" def run(self, zbx): @@ -79,6 +88,14 @@ def run(self, zbx): zbx.send("pgsql.replication.replay_lag[{0}]".format(info[0]), float(info[5])) zbx.send("pgsql.replication.discovery[]", zbx.json({"data": lags})) del lags + bytes_held_by_non_active_slot = Pooler.run_sql_type("wal_held_bytes_master", args=[]) + if bytes_held_by_non_active_slot: + discovery = [] + for info in bytes_held_by_non_active_slot: + discovery.append({"{#NON_ACTIVE_SLOT_NAME}": info[0]}) + zbx.send("pgsql.replication.non_active_slots_held_bytes[{0}]".format(info[0]), int(info[1])) + zbx.send("pgsql.replication.non_active_slots_discovery[]", zbx.json({"data": discovery})) + del discovery elif Pooler.is_superuser() or Pooler.is_bootstraped(): result_lags = Pooler.run_sql_type("wal_lag_lsn", args=[" ", "xlog", "location"]) if result_lags: @@ -90,7 +107,15 @@ def run(self, zbx): del lags else: self.disable_and_exit_if_not_superuser() - + else: + bytes_held_by_non_active_slot = Pooler.run_sql_type("wal_held_bytes_replica", args=[]) + if bytes_held_by_non_active_slot: + discovery = [] + for info in bytes_held_by_non_active_slot: + discovery.append({"{#NON_ACTIVE_SLOT_NAME}": info[0]}) + zbx.send("pgsql.replication.non_active_slots_held_bytes[{0}]".format(info[0]), int(info[1])) + zbx.send("pgsql.replication.non_active_slots_discovery[]", zbx.json({"data": discovery})) + del discovery non_active_slots = Pooler.query(self.query_non_active_slots) zbx.send(self.key_non_active_slots.format("[]"), int(non_active_slots[0][0])) @@ -132,7 +157,8 @@ def triggers(self, template, dashboard=False): }) + template.trigger({ "name": "PostgreSQL Replication: number of non-active replication slots on {HOSTNAME} (value={ITEM.LASTVALUE})", "expression": "{#TEMPLATE:" + self.right_type(self.key_non_active_slots) + ".last()}>" + str( - NUMBER_NON_ACTIVE_SLOTS) + NUMBER_NON_ACTIVE_SLOTS), + "status": 1 }) return triggers @@ -198,7 +224,42 @@ def discovery_rules(self, template, dashboard=False): ] } ] - return template.discovery_rule(rule=rule, conditions=conditions, items=items, graphs=graphs) + active_slots_discovery_rule = template.discovery_rule(rule=rule, conditions=conditions, items=items, graphs=graphs) + + rule = { + "name": "PostgreSQL Replication: Non Active Slots Discovery", + "key": self.key_replication_non_active_slots_discovery.format("[{0}]".format(self.Macros[self.Type])) + } + if Plugin.old_zabbix: + conditions = [] + rule["filter"] = "{#NON_ACTIVE_SLOT_NAME}:.*" + else: + conditions = [{ + "condition": [ + {"macro": "{#NON_ACTIVE_SLOT_NAME}", + "value": ".*", + "operator": 8, + "formulaid": "A"} + ] + }] + items = [ + {"key": self.right_type(self.key_non_active_slots_held_bytes, var_discovery="{#NON_ACTIVE_SLOT_NAME},"), + "name": "PostgreSQL Replication: Bytes held by non-active slot {#NON_ACTIVE_SLOT_NAME}", + "value_type": Plugin.VALUE_TYPE.numeric_float, + "delay": self.plugin_config("interval"), + "drawtype": 2} + ] + graphs = [] + triggers = [ + { + "name": "PostgreSQL Replication: bytes held by slot {#NON_ACTIVE_SLOT_NAME} is too high (value={ITEM.LASTVALUE})", + "expression": "{#TEMPLATE:" + self.right_type(self.key_non_active_slots_held_bytes, var_discovery="{#NON_ACTIVE_SLOT_NAME},") + ".last()}>" + + self.plugin_macros["critical_bytes_held_by_none_active_slot"][0][1] + } + ] + non_active_slots_discovery_rule = template.discovery_rule(rule=rule, conditions=conditions, items=items, graphs=graphs, triggers=triggers) + + return active_slots_discovery_rule + non_active_slots_discovery_rule def keys_and_queries(self, template_zabbix): result = [] diff --git a/mamonsu/tools/bootstrap/sql.py b/mamonsu/tools/bootstrap/sql.py index f37be0f0..bf99442a 100644 --- a/mamonsu/tools/bootstrap/sql.py +++ b/mamonsu/tools/bootstrap/sql.py @@ -236,6 +236,23 @@ coalesce((pg_{7}_diff(pg_current_{7}(), replay_{9}))::bigint, 0) AS total_lag FROM pg_stat_replication $$ LANGUAGE SQL SECURITY DEFINER; + +DROP FUNCTION IF EXISTS mamonsu.bytes_held_by_inactive_slot_on_master(); +CREATE OR REPLACE FUNCTION mamonsu.bytes_held_by_inactive_slot_on_master() +RETURNS TABLE(slot_name TEXT, wal_held_bytes BIGINT) AS $$ +SELECT slot_name::TEXT, coalesce((pg_{7}_diff(pg_current_wal_lsn(), restart_lsn))::bigint, 0) AS wal_held_bytes +FROM pg_replication_slots +WHERE active = 'false' +$$ LANGUAGE SQL SECURITY DEFINER; + +DROP FUNCTION IF EXISTS mamonsu.bytes_held_by_inactive_slot_on_replica(); +CREATE OR REPLACE FUNCTION mamonsu.bytes_held_by_inactive_slot_on_replica() +RETURNS TABLE(slot_name TEXT, wal_held_bytes BIGINT) AS $$ +SELECT slot_name::TEXT, coalesce((pg_{7}_diff(pg_last_wal_replay_lsn(), restart_lsn))::bigint, 0) AS wal_held_bytes +FROM pg_replication_slots +WHERE active = 'false' +$$ LANGUAGE SQL SECURITY DEFINER; + """ CreatePgBuffercacheFunctionsSQL = """ From 671d9a946e97a9831c2bb8cf951ffce693739f6c Mon Sep 17 00:00:00 2001 From: Ytipytiman <61265312+Ytipytiman@users.noreply.github.com> Date: Thu, 12 Sep 2024 11:32:53 +0300 Subject: [PATCH 4/6] Update README.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit В процессе установки требуется скачать скрипт: ``` Install from repository using script: wget https://repo.postgrespro.ru/mamonsu/keys/pgpro-repo-add.sh ``` По данному пути скрипта нет, получаешь ошибку 404, скрипт можно скачать по ссылке: `https://repo.postgrespro.ru/mamonsu/mamonsu/keys/pgpro-repo-add.sh` Предлагаю исправить ошибку --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a4f3076a..fd6b6bfc 100644 --- a/README.md +++ b/README.md @@ -179,7 +179,7 @@ Pre-built _mamonsu_ packages are provided in official Postgres Pro repository: [ **Install from repository using script:** ```shell -$ wget https://repo.postgrespro.ru/mamonsu/keys/pgpro-repo-add.sh +$ wget https://repo.postgrespro.ru/mamonsu/mamonsu/keys/pgpro-repo-add.sh $ sudo chmod 700 ./pgpro-repo-add.sh $ sudo ./pgpro-repo-add.sh ``` From f218ed4ab5683f04b007b09f9960c481330e8c1a Mon Sep 17 00:00:00 2001 From: Andrew Papsujko Date: Thu, 29 May 2025 16:34:26 +0300 Subject: [PATCH 5/6] build: upgrade to 3.5.13 --- .../sources/agent_3.5.13.conf | 210 ++++++++++++++++++ 1 file changed, 210 insertions(+) create mode 100644 github-actions-tests/sources/agent_3.5.13.conf diff --git a/github-actions-tests/sources/agent_3.5.13.conf b/github-actions-tests/sources/agent_3.5.13.conf new file mode 100644 index 00000000..076419ed --- /dev/null +++ b/github-actions-tests/sources/agent_3.5.13.conf @@ -0,0 +1,210 @@ +# This is a configuration file for mamonsu +# To get more information about mamonsu, visit https://postgrespro.ru/docs/postgrespro/12/mamonsu + +######### Connection parameters sections ############## + +# specify connection parameters for the Postgres cluster +# in the user, password, and database fields, you must specify the mamonsu_user, mamonsu_password, +# and the mamonsu_database used for bootstrap, respectively. +# if you skipped the bootstrap, specify a superuser credentials and the database to connect to. + +[postgres] +enabled = True +user = mamonsu +password = mamonsu +database = mamonsu_test_db +host = localhost +port = 5432 +application_name = mamonsu +query_timeout = 10 + +# the address field must point to the running Zabbix server, while the client field must provide the name of +# the Zabbix host. You can find the list of hosts available for your account in the Zabbix web +# interface under Configuration > Hosts. +# re_send - True - in case of transmission error, mamonsu repeats sending metrics one by one to look in log metrics with error + +[zabbix] +enabled = True +client = pg-master +address = 127.0.0.1 +port = 10051 +timeout = 15 +re_send = False + +######### General parameters sections ############ + +# enable or disable collection of system metrics. + +[system] +enabled = True + +# control the queue size of the data to be sent to the Zabbix server + +[sender] +queue = 2048 + +# specify the location of mamonsu and whether it is allowed to access metrics from the command line + +[agent] +enabled = True +host = 127.0.0.1 +port = 10052 + +# specify custom plugins to be added for metrics collection + +[plugins] +enabled = False +directory = /etc/mamonsu/plugins + +# enable storing the collected metric data in text files locally. + +[metric_log] +enabled = False +directory = /var/log/mamonsu +max_size_mb = 1024 + +# specify logging settings for mamonsu + +[log] +file = /var/log/mamonsu/mamonsu.log +level = DEBUG +format = [%(levelname)s] %(asctime)s - %(name)s - %(message)s + +######### Individual Plugin Sections ############ + +# to disable any plugin set the enabled option to False. +# modify collection interval for each plugin in the interval field. +# set customer parameters for some plugins in the individual section. +# below listed all available parameters for each plugin to modify. + +[health] +interval = 60 + +[archivecommand] +interval = 60 + +# Besides standard autovacuum workers count, mamonsu also counts autovacuum utilization. +# But this metric is instantaneous, so recommended to run this plugin frequently +# to get a complete picture of autovacuum utilization. +[autovacuum] +interval = 30 + +[bgwriter] +interval = 60 + +[cfs] +force_enable = False +interval = 60 + +[checkpoint] +interval = 300 + +[connections] +interval = 60 + +[databases] +interval = 300 + +[pghealth] +interval = 60 + +[instance] +interval = 60 + +# This plugin allows detects possible memory leaks while working with PostgreSQL using /proc/pid/status and /proc/pid/statm +# We use RES and SHR difference to calculate approximate volume of private anonymous backend memory. +# If it exceeds private_anon_mem_threshold then that pid will be added to a message. An example is presented below +# statm - 'pid: {pid}, RES {RES} - SHR {SHR} more then {private_anon_mem_threshold}\n' +# Since Linux 4.5 RssAnon, RssFile and RssShmem have been added. +# They allows to distinguish types of memory such as private anonymous, file-backed, and shared anonymous memory. +# We are interested in RssAnon. If its value exceeds private_anon_mem_threshold then that pid will also be added to a message. +# By default this plugin disabled. To enable this plugin - set bellow "enabled = False" +# #interval - (onitoring frequency in seconds. 60 seconds by default +# private_anon_mem_threshold - memory volume threshold after which we need an investigation about memory leak. 1GB by default. +# Possible values MB, GB, TB. For example 1GB +[memoryleakdiagnostic] +enabled = True +interval = 15 +private_anon_mem_threshold = 1GB + +[oldest] +interval = 60 + +[pgbuffercache] +interval = 60 + +[pglocks] +interval = 60 + +# Get age (in seconds) of the oldest running prepared transaction and number of all prepared transactions for two-phase commit. +# https://www.postgresql.org/docs/current/sql-prepare-transaction.html +# https://www.postgresql.org/docs/12/view-pg-prepared-xacts.html +# max_prepared_transaction_time - age of prepared transaction in seconds. +# If pgsql.prepared.oldest exceeds max_prepared_transaction_time the trigger fires. +[preparedtransaction] +interval = 60 + +# Get size of relations defined in this section +# Relations - comma separated list of objects - tables and endexes (database_name.schema.relation) used to calculate relations size. +# Example: +# relations=postgres.pg_catalog.pg_class,postgres.pg_catalog.pg_user +# If the relation is blocked by some process such as vacuum full or create index, the result will be -1 +# by default this plugin disabled. To enable this plugin - set bellow "enabled = False" and define a list of relations. +[relationssize] +enabled = True +relations=postgres.pg_catalog.pg_class,mamonsu_test_db.mamonsu.config +interval = 15 + +[replication] +interval = 60 + +[statstatements] +interval = 60 + +[waitsampling] +interval = 60 + +[wal] +interval = 60 + +[disksizes] +interval = 60 + +[diskstats] +interval = 60 + +[la] +interval = 60 + +[memory] +interval = 60 + +[net] +interval = 60 + +[openfiles] +interval = 60 + +# Get size of backup catalogs stroring all WAL and backup files using pg_probackup +# (https://github.com/postgrespro/pg_probackup) +# Trigger fires if some backup has bad status e.g. (ERROR,CORRUPT,ORPHAN). +[pgprobackup] +enabled = False +interval = 300 +backup_dirs = /backup_dir1,/backup_dir2 +pg_probackup_path = /usr/bin/pg_probackup-11 + +[procstat] +interval = 60 + +[systemuptime] +interval = 60 + +[agentapi] +interval = 60 + +[logsender] +interval = 2 + +[zbxsender] +interval = 10 From 8f9655668b94d9f58eb7dfb22791c3b7fb5d3418 Mon Sep 17 00:00:00 2001 From: Andrew Papsujko Date: Thu, 29 May 2025 17:00:42 +0300 Subject: [PATCH 6/6] build: fix version --- .github/workflows/mamonsu-tests-dev.yml | 2 +- .github/workflows/mamonsu-tests-master.yml | 2 +- github-actions-tests/mamonsu_build.sh | 4 +- .../sources/agent_3.5.12.conf | 210 ------------------ mamonsu/__init__.py | 2 +- packaging/debian/changelog | 6 + packaging/rpm/SPECS/mamonsu.spec | 8 +- packaging/win/mamonsu.def.nsh | 2 +- 8 files changed, 19 insertions(+), 217 deletions(-) delete mode 100644 github-actions-tests/sources/agent_3.5.12.conf diff --git a/.github/workflows/mamonsu-tests-dev.yml b/.github/workflows/mamonsu-tests-dev.yml index 0336b7ce..2427a7d8 100644 --- a/.github/workflows/mamonsu-tests-dev.yml +++ b/.github/workflows/mamonsu-tests-dev.yml @@ -86,7 +86,7 @@ jobs: echo "zabbix_address=$(hostname -I | awk '{print $1}')" >> $GITHUB_OUTPUT id: zabbix_address - name: Edit Zabbix address in agent.conf - run: sed -i "s/\(address *= *\).*/\1 ${{ steps.zabbix_address.outputs.zabbix_address }}/" ${{ env.MAMONSU_PATH }}/github-actions-tests/sources/agent_3.5.12.conf + run: sed -i "s/\(address *= *\).*/\1 ${{ steps.zabbix_address.outputs.zabbix_address }}/" ${{ env.MAMONSU_PATH }}/github-actions-tests/sources/agent_3.5.13.conf - name: Copy test scripts to container run: docker exec $( echo "${{ matrix.docker_os }}" | sed 's/://' | sed 's/\.//' ) mkdir -p -m 755 /mamonsu/ diff --git a/.github/workflows/mamonsu-tests-master.yml b/.github/workflows/mamonsu-tests-master.yml index 6dfb86d3..e14042c1 100644 --- a/.github/workflows/mamonsu-tests-master.yml +++ b/.github/workflows/mamonsu-tests-master.yml @@ -91,7 +91,7 @@ jobs: echo "zabbix_address=$(hostname -I | awk '{print $1}')" >> $GITHUB_OUTPUT id: zabbix_address - name: Edit Zabbix address in agent.conf - run: sed -i "s/\(address *= *\).*/\1 ${{ steps.zabbix_address.outputs.zabbix_address }}/" ${{ env.MAMONSU_PATH }}/github-actions-tests/sources/agent_3.5.12.conf + run: sed -i "s/\(address *= *\).*/\1 ${{ steps.zabbix_address.outputs.zabbix_address }}/" ${{ env.MAMONSU_PATH }}/github-actions-tests/sources/agent_3.5.13.conf - name: Copy test scripts to container run: docker exec $( echo "${{ matrix.docker_os }}" | sed 's/://' | sed 's/\.//' ) mkdir -p -m 755 /mamonsu/ diff --git a/github-actions-tests/mamonsu_build.sh b/github-actions-tests/mamonsu_build.sh index a7668068..6c24eb99 100644 --- a/github-actions-tests/mamonsu_build.sh +++ b/github-actions-tests/mamonsu_build.sh @@ -41,7 +41,7 @@ if [ "${OS%:*}" = "centos" ]; then python3 setup.py build && python3 setup.py install make rpm sudo rpm -i ./mamonsu*.rpm - cat /mamonsu/github-actions-tests/sources/agent_3.5.12.conf > /etc/mamonsu/agent.conf + cat /mamonsu/github-actions-tests/sources/agent_3.5.13.conf > /etc/mamonsu/agent.conf # ensuring mamonsu can actually start sudo su -s /bin/bash -c "mamonsu bootstrap -x --user postgres -d mamonsu_test_db" mamonsu /etc/init.d/mamonsu restart @@ -65,7 +65,7 @@ elif [ "${OS%:*}" = "ubuntu" ]; then python3 setup.py build && python3 setup.py install make deb sudo dpkg -i ./mamonsu*.deb - cat /mamonsu/github-actions-tests/sources/agent_3.5.12.conf > /etc/mamonsu/agent.conf + cat /mamonsu/github-actions-tests/sources/agent_3.5.13.conf > /etc/mamonsu/agent.conf # ensuring mamonsu can actually start sudo su -s /bin/bash -c "mamonsu bootstrap -x --user postgres -d mamonsu_test_db" mamonsu service mamonsu restart diff --git a/github-actions-tests/sources/agent_3.5.12.conf b/github-actions-tests/sources/agent_3.5.12.conf deleted file mode 100644 index 076419ed..00000000 --- a/github-actions-tests/sources/agent_3.5.12.conf +++ /dev/null @@ -1,210 +0,0 @@ -# This is a configuration file for mamonsu -# To get more information about mamonsu, visit https://postgrespro.ru/docs/postgrespro/12/mamonsu - -######### Connection parameters sections ############## - -# specify connection parameters for the Postgres cluster -# in the user, password, and database fields, you must specify the mamonsu_user, mamonsu_password, -# and the mamonsu_database used for bootstrap, respectively. -# if you skipped the bootstrap, specify a superuser credentials and the database to connect to. - -[postgres] -enabled = True -user = mamonsu -password = mamonsu -database = mamonsu_test_db -host = localhost -port = 5432 -application_name = mamonsu -query_timeout = 10 - -# the address field must point to the running Zabbix server, while the client field must provide the name of -# the Zabbix host. You can find the list of hosts available for your account in the Zabbix web -# interface under Configuration > Hosts. -# re_send - True - in case of transmission error, mamonsu repeats sending metrics one by one to look in log metrics with error - -[zabbix] -enabled = True -client = pg-master -address = 127.0.0.1 -port = 10051 -timeout = 15 -re_send = False - -######### General parameters sections ############ - -# enable or disable collection of system metrics. - -[system] -enabled = True - -# control the queue size of the data to be sent to the Zabbix server - -[sender] -queue = 2048 - -# specify the location of mamonsu and whether it is allowed to access metrics from the command line - -[agent] -enabled = True -host = 127.0.0.1 -port = 10052 - -# specify custom plugins to be added for metrics collection - -[plugins] -enabled = False -directory = /etc/mamonsu/plugins - -# enable storing the collected metric data in text files locally. - -[metric_log] -enabled = False -directory = /var/log/mamonsu -max_size_mb = 1024 - -# specify logging settings for mamonsu - -[log] -file = /var/log/mamonsu/mamonsu.log -level = DEBUG -format = [%(levelname)s] %(asctime)s - %(name)s - %(message)s - -######### Individual Plugin Sections ############ - -# to disable any plugin set the enabled option to False. -# modify collection interval for each plugin in the interval field. -# set customer parameters for some plugins in the individual section. -# below listed all available parameters for each plugin to modify. - -[health] -interval = 60 - -[archivecommand] -interval = 60 - -# Besides standard autovacuum workers count, mamonsu also counts autovacuum utilization. -# But this metric is instantaneous, so recommended to run this plugin frequently -# to get a complete picture of autovacuum utilization. -[autovacuum] -interval = 30 - -[bgwriter] -interval = 60 - -[cfs] -force_enable = False -interval = 60 - -[checkpoint] -interval = 300 - -[connections] -interval = 60 - -[databases] -interval = 300 - -[pghealth] -interval = 60 - -[instance] -interval = 60 - -# This plugin allows detects possible memory leaks while working with PostgreSQL using /proc/pid/status and /proc/pid/statm -# We use RES and SHR difference to calculate approximate volume of private anonymous backend memory. -# If it exceeds private_anon_mem_threshold then that pid will be added to a message. An example is presented below -# statm - 'pid: {pid}, RES {RES} - SHR {SHR} more then {private_anon_mem_threshold}\n' -# Since Linux 4.5 RssAnon, RssFile and RssShmem have been added. -# They allows to distinguish types of memory such as private anonymous, file-backed, and shared anonymous memory. -# We are interested in RssAnon. If its value exceeds private_anon_mem_threshold then that pid will also be added to a message. -# By default this plugin disabled. To enable this plugin - set bellow "enabled = False" -# #interval - (onitoring frequency in seconds. 60 seconds by default -# private_anon_mem_threshold - memory volume threshold after which we need an investigation about memory leak. 1GB by default. -# Possible values MB, GB, TB. For example 1GB -[memoryleakdiagnostic] -enabled = True -interval = 15 -private_anon_mem_threshold = 1GB - -[oldest] -interval = 60 - -[pgbuffercache] -interval = 60 - -[pglocks] -interval = 60 - -# Get age (in seconds) of the oldest running prepared transaction and number of all prepared transactions for two-phase commit. -# https://www.postgresql.org/docs/current/sql-prepare-transaction.html -# https://www.postgresql.org/docs/12/view-pg-prepared-xacts.html -# max_prepared_transaction_time - age of prepared transaction in seconds. -# If pgsql.prepared.oldest exceeds max_prepared_transaction_time the trigger fires. -[preparedtransaction] -interval = 60 - -# Get size of relations defined in this section -# Relations - comma separated list of objects - tables and endexes (database_name.schema.relation) used to calculate relations size. -# Example: -# relations=postgres.pg_catalog.pg_class,postgres.pg_catalog.pg_user -# If the relation is blocked by some process such as vacuum full or create index, the result will be -1 -# by default this plugin disabled. To enable this plugin - set bellow "enabled = False" and define a list of relations. -[relationssize] -enabled = True -relations=postgres.pg_catalog.pg_class,mamonsu_test_db.mamonsu.config -interval = 15 - -[replication] -interval = 60 - -[statstatements] -interval = 60 - -[waitsampling] -interval = 60 - -[wal] -interval = 60 - -[disksizes] -interval = 60 - -[diskstats] -interval = 60 - -[la] -interval = 60 - -[memory] -interval = 60 - -[net] -interval = 60 - -[openfiles] -interval = 60 - -# Get size of backup catalogs stroring all WAL and backup files using pg_probackup -# (https://github.com/postgrespro/pg_probackup) -# Trigger fires if some backup has bad status e.g. (ERROR,CORRUPT,ORPHAN). -[pgprobackup] -enabled = False -interval = 300 -backup_dirs = /backup_dir1,/backup_dir2 -pg_probackup_path = /usr/bin/pg_probackup-11 - -[procstat] -interval = 60 - -[systemuptime] -interval = 60 - -[agentapi] -interval = 60 - -[logsender] -interval = 2 - -[zbxsender] -interval = 10 diff --git a/mamonsu/__init__.py b/mamonsu/__init__.py index b43c4918..9264cb85 100644 --- a/mamonsu/__init__.py +++ b/mamonsu/__init__.py @@ -1,7 +1,7 @@ __author__ = 'Dmitry Vasilyev' __author_email__ = 'info@postgrespro.ru' __description__ = 'Monitoring agent for PostgreSQL' -__version__ = '3.5.12' +__version__ = '3.5.13' __licence__ = 'BSD' __url__ = 'https://github.com/postgrespro/mamonsu' diff --git a/packaging/debian/changelog b/packaging/debian/changelog index 218931a0..6efa0973 100644 --- a/packaging/debian/changelog +++ b/packaging/debian/changelog @@ -1,3 +1,9 @@ +mamonsu (3.5.13-1) stable; urgency=low + * Added a new metric that displays the bytes held by non-active replication slots, along with the corresponding trigger.; + * Set the trigger for 'number of non-active replication slots' to be disabled by default.; + * Fixed the Linux plugin to ensure compatibility with recent Linux versions that use cgroups2.; + * Resolved a deadlock issue in the send queue that caused Mamonsu to hang after network problems.; + mamonsu (3.5.12-1) stable; urgency=low * Port version parser code from public archive of pypa/pkg_resources; * Thread-safe implementation of connection cache; diff --git a/packaging/rpm/SPECS/mamonsu.spec b/packaging/rpm/SPECS/mamonsu.spec index dcc7c9f1..dcfd2bde 100644 --- a/packaging/rpm/SPECS/mamonsu.spec +++ b/packaging/rpm/SPECS/mamonsu.spec @@ -1,5 +1,5 @@ Name: mamonsu -Version: 3.5.12 +Version: 3.5.13 Release: 1%{?dist} Summary: Monitoring agent for PostgreSQL Group: Applications/Internet @@ -73,6 +73,12 @@ chown -R mamonsu:mamonsu /var/log/mamonsu chown -R mamonsu:mamonsu /etc/mamonsu %changelog +* Thu May 29 2025 Andrey Papsuyko - 3.5.13-1 + - Added a new metric that displays the bytes held by non-active replication slots, along with the corresponding trigger.; + - Set the trigger for 'number of non-active replication slots' to be disabled by default.; + - Fixed the Linux plugin to ensure compatibility with recent Linux versions that use cgroups2.; + - Resolved a deadlock issue in the send queue that caused Mamonsu to hang after network problems.; + * Wed Mar 5 2025 Maxim Styushin - 3.5.12-1 - Port version parser code from public archive of pypa/pkg_resources; - Thread-safe implementation of connection cache; diff --git a/packaging/win/mamonsu.def.nsh b/packaging/win/mamonsu.def.nsh index 1b60f1cf..5afbfdc5 100644 --- a/packaging/win/mamonsu.def.nsh +++ b/packaging/win/mamonsu.def.nsh @@ -1,5 +1,5 @@ !define NAME Mamonsu -!define VERSION 3.5.12 +!define VERSION 3.5.13 !define MAMONSU_REG_PATH "Software\PostgresPro\Mamonsu" !define MAMONSU_REG_UNINSTALLER_PATH "Software\Microsoft\Windows\CurrentVersion\Uninstall" !define EDB_REG "SOFTWARE\Postgresql"