Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit c288af8

Browse files
Merge remote-tracking branch 'origin/D20250227_001--node-pid' into D20250226_001--ci_and_ssh
2 parents ebd6208 + 0402c4a commit c288af8

File tree

3 files changed

+209
-65
lines changed

3 files changed

+209
-65
lines changed

testgres/consts.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,7 @@
3535

3636
# logical replication settings
3737
LOGICAL_REPL_MAX_CATCHUP_ATTEMPTS = 60
38+
39+
PG_CTL__STATUS__OK = 0
40+
PG_CTL__STATUS__NODE_IS_STOPPED = 3
41+
PG_CTL__STATUS__BAD_DATADIR = 4

testgres/node.py

Lines changed: 201 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,9 @@
4949
RECOVERY_CONF_FILE, \
5050
PG_LOG_FILE, \
5151
UTILS_LOG_FILE, \
52-
PG_PID_FILE
52+
PG_CTL__STATUS__OK, \
53+
PG_CTL__STATUS__NODE_IS_STOPPED, \
54+
PG_CTL__STATUS__BAD_DATADIR \
5355

5456
from .consts import \
5557
MAX_LOGICAL_REPLICATION_WORKERS, \
@@ -132,9 +134,6 @@ class PostgresNode(object):
132134
# a max number of node start attempts
133135
_C_MAX_START_ATEMPTS = 5
134136

135-
# a max number of read pid file attempts
136-
_C_MAX_GET_PID_ATEMPTS = 5
137-
138137
def __init__(self, name=None, base_dir=None, port=None, conn_params: ConnectionParams = ConnectionParams(),
139138
bin_dir=None, prefix=None):
140139
"""
@@ -211,40 +210,136 @@ def pid(self):
211210
Return postmaster's PID if node is running, else 0.
212211
"""
213212

214-
nAttempt = 0
215-
pid_file = os.path.join(self.data_dir, PG_PID_FILE)
216-
pid_s: str = None
213+
self__data_dir = self.data_dir
214+
215+
_params = [
216+
self._get_bin_path('pg_ctl'),
217+
"-D", self__data_dir,
218+
"status"
219+
] # yapf: disable
220+
221+
status_code, out, error = execute_utility2(
222+
self.os_ops,
223+
_params,
224+
self.utils_log_file,
225+
verbose=True,
226+
ignore_errors=True)
227+
228+
assert type(status_code) == int # noqa: E721
229+
assert type(out) == str # noqa: E721
230+
assert type(error) == str # noqa: E721
231+
232+
# -----------------
233+
if status_code == PG_CTL__STATUS__NODE_IS_STOPPED:
234+
return 0
235+
236+
# -----------------
237+
if status_code == PG_CTL__STATUS__BAD_DATADIR:
238+
return 0
239+
240+
# -----------------
241+
if status_code != PG_CTL__STATUS__OK:
242+
errMsg = "Getting of a node status [data_dir is {0}] failed.".format(self__data_dir)
243+
244+
raise ExecUtilException(
245+
message=errMsg,
246+
command=_params,
247+
exit_code=status_code,
248+
out=out,
249+
error=error,
250+
)
251+
252+
# -----------------
253+
assert status_code == PG_CTL__STATUS__OK
254+
255+
if out == "":
256+
__class__._throw_error__pg_ctl_returns_an_empty_string(
257+
_params
258+
)
259+
260+
C_PID_PREFIX = "(PID: "
261+
262+
i = out.find(C_PID_PREFIX)
263+
264+
if i == -1:
265+
__class__._throw_error__pg_ctl_returns_an_unexpected_string(
266+
out,
267+
_params
268+
)
269+
270+
assert i > 0
271+
assert i < len(out)
272+
assert len(C_PID_PREFIX) <= len(out)
273+
assert i <= len(out) - len(C_PID_PREFIX)
274+
275+
i += len(C_PID_PREFIX)
276+
start_pid_s = i
277+
217278
while True:
218-
if nAttempt == __class__._C_MAX_GET_PID_ATEMPTS:
219-
errMsg = "Can't read postmaster pid file [{0}].".format(pid_file)
220-
raise Exception(errMsg)
279+
if i == len(out):
280+
__class__._throw_error__pg_ctl_returns_an_unexpected_string(
281+
out,
282+
_params
283+
)
221284

222-
nAttempt += 1
285+
ch = out[i]
223286

224-
s1 = self.status()
225-
if s1 != NodeStatus.Running:
226-
return 0
287+
if ch == ")":
288+
break
227289

228-
try:
229-
lines = self.os_ops.readlines(pid_file)
230-
except Exception:
231-
s2 = self.status()
232-
if s2 == NodeStatus.Running:
233-
raise
234-
return 0
235-
236-
assert lines is not None # [2025-02-27] OK?
237-
assert type(lines) == list # noqa: E721
238-
if len(lines) == 0:
290+
if ch.isdigit():
291+
i += 1
239292
continue
240293

241-
pid_s = lines[0]
242-
assert type(pid_s) == str # noqa: E721
243-
if len(pid_s) == 0:
244-
continue
294+
__class__._throw_error__pg_ctl_returns_an_unexpected_string(
295+
out,
296+
_params
297+
)
298+
assert False
299+
300+
if i == start_pid_s:
301+
__class__._throw_error__pg_ctl_returns_an_unexpected_string(
302+
out,
303+
_params
304+
)
305+
306+
# TODO: Let's verify a length of pid string.
307+
308+
pid = int(out[start_pid_s:i])
309+
310+
if pid == 0:
311+
__class__._throw_error__pg_ctl_returns_a_zero_pid(
312+
out,
313+
_params
314+
)
315+
316+
assert pid != 0
317+
return pid
245318

246-
pid = int(pid_s)
247-
return pid
319+
@staticmethod
320+
def _throw_error__pg_ctl_returns_an_empty_string(_params):
321+
errLines = []
322+
errLines.append("Utility pg_ctl returns empty string.")
323+
errLines.append("Command line is {0}".format(_params))
324+
raise RuntimeError("\n".join(errLines))
325+
326+
@staticmethod
327+
def _throw_error__pg_ctl_returns_an_unexpected_string(out, _params):
328+
errLines = []
329+
errLines.append("Utility pg_ctl returns an unexpected string:")
330+
errLines.append(out)
331+
errLines.append("------------")
332+
errLines.append("Command line is {0}".format(_params))
333+
raise RuntimeError("\n".join(errLines))
334+
335+
@staticmethod
336+
def _throw_error__pg_ctl_returns_a_zero_pid(out, _params):
337+
errLines = []
338+
errLines.append("Utility pg_ctl returns a zero pid. Output string is:")
339+
errLines.append(out)
340+
errLines.append("------------")
341+
errLines.append("Command line is {0}".format(_params))
342+
raise RuntimeError("\n".join(errLines))
248343

249344
@property
250345
def auxiliary_pids(self):
@@ -367,41 +462,84 @@ def version(self):
367462
return self._pg_version
368463

369464
def _try_shutdown(self, max_attempts, with_force=False):
465+
assert type(max_attempts) == int # noqa: E721
466+
assert type(with_force) == bool # noqa: E721
467+
assert max_attempts > 0
468+
370469
attempts = 0
470+
471+
# try stopping server N times
472+
while attempts < max_attempts:
473+
attempts += 1
474+
try:
475+
self.stop()
476+
except ExecUtilException:
477+
continue # one more time
478+
except Exception:
479+
eprint('cannot stop node {}'.format(self.name))
480+
break
481+
482+
return # OK
483+
484+
# If force stopping is enabled and PID is valid
485+
if not with_force:
486+
return False
487+
371488
node_pid = self.pid
489+
assert node_pid is not None
490+
assert type(node_pid) == int # noqa: E721
372491

373-
if node_pid > 0:
374-
# try stopping server N times
375-
while attempts < max_attempts:
376-
try:
377-
self.stop()
378-
break # OK
379-
except ExecUtilException:
380-
pass # one more time
381-
except Exception:
382-
eprint('cannot stop node {}'.format(self.name))
383-
break
384-
385-
attempts += 1
386-
387-
# If force stopping is enabled and PID is valid
388-
if with_force and node_pid != 0:
389-
# If we couldn't stop the node
390-
p_status_output = self.os_ops.exec_command(cmd=f'ps -o pid= -p {node_pid}', shell=True, ignore_errors=True).decode('utf-8')
391-
if self.status() != NodeStatus.Stopped and p_status_output and str(node_pid) in p_status_output:
392-
try:
393-
eprint(f'Force stopping node {self.name} with PID {node_pid}')
394-
self.os_ops.kill(node_pid, signal.SIGKILL, expect_error=False)
395-
except Exception:
396-
# The node has already stopped
397-
pass
398-
399-
# Check that node stopped - print only column pid without headers
400-
p_status_output = self.os_ops.exec_command(f'ps -o pid= -p {node_pid}', shell=True, ignore_errors=True).decode('utf-8')
401-
if p_status_output and str(node_pid) in p_status_output:
402-
eprint(f'Failed to stop node {self.name}.')
403-
else:
404-
eprint(f'Node {self.name} has been stopped successfully.')
492+
if node_pid == 0:
493+
return
494+
495+
# TODO: [2025-02-28] It is really the old ugly code. We have to rewrite it!
496+
497+
ps_command = ['ps', '-o', 'pid=', '-p', str(node_pid)]
498+
499+
ps_output = self.os_ops.exec_command(cmd=ps_command, shell=True, ignore_errors=True).decode('utf-8')
500+
assert type(ps_output) == str # noqa: E721
501+
502+
if ps_output == "":
503+
return
504+
505+
if ps_output != str(node_pid):
506+
__class__._throw_bugcheck__unexpected_result_of_ps(
507+
ps_output,
508+
ps_command)
509+
510+
try:
511+
eprint('Force stopping node {0} with PID {1}'.format(self.name, node_pid))
512+
self.os_ops.kill(node_pid, signal.SIGKILL, expect_error=False)
513+
except Exception:
514+
# The node has already stopped
515+
pass
516+
517+
# Check that node stopped - print only column pid without headers
518+
ps_output = self.os_ops.exec_command(cmd=ps_command, shell=True, ignore_errors=True).decode('utf-8')
519+
assert type(ps_output) == str # noqa: E721
520+
521+
if ps_output == "":
522+
eprint('Node {0} has been stopped successfully.'.format(self.name))
523+
return
524+
525+
if ps_output == str(node_pid):
526+
eprint('Failed to stop node {0}.'.format(self.name))
527+
return
528+
529+
__class__._throw_bugcheck__unexpected_result_of_ps(
530+
ps_output,
531+
ps_command)
532+
533+
@staticmethod
534+
def _throw_bugcheck__unexpected_result_of_ps(result, cmd):
535+
assert type(result) == str # noqa: E721
536+
assert type(cmd) == list # noqa: E721
537+
errLines = []
538+
errLines.append("[BUG CHECK] Unexpected result of command ps:")
539+
errLines.append(result)
540+
errLines.append("-----")
541+
errLines.append("Command line is {0}".format(cmd))
542+
raise RuntimeError("\n".join(errLines))
405543

406544
def _assign_master(self, master):
407545
"""NOTE: this is a private method!"""

testgres/utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,11 +73,13 @@ def execute_utility(args, logfile=None, verbose=False):
7373
return execute_utility2(tconf.os_ops, args, logfile, verbose)
7474

7575

76-
def execute_utility2(os_ops: OsOperations, args, logfile=None, verbose=False):
76+
def execute_utility2(os_ops: OsOperations, args, logfile=None, verbose=False, ignore_errors=False):
7777
assert os_ops is not None
7878
assert isinstance(os_ops, OsOperations)
79+
assert type(verbose) == bool # noqa: E721
80+
assert type(ignore_errors) == bool # noqa: E721
7981

80-
exit_status, out, error = os_ops.exec_command(args, verbose=True)
82+
exit_status, out, error = os_ops.exec_command(args, verbose=True, ignore_errors=ignore_errors)
8183
# decode result
8284
out = '' if not out else out
8385
if isinstance(out, bytes):

0 commit comments

Comments
 (0)