|
49 | 49 | RECOVERY_CONF_FILE, \
|
50 | 50 | PG_LOG_FILE, \
|
51 | 51 | UTILS_LOG_FILE, \
|
52 |
| - PG_PID_FILE |
| 52 | + PG_CTL__STATUS__OK, \ |
| 53 | + PG_CTL__STATUS__NODE_IS_STOPPED, \ |
| 54 | + PG_CTL__STATUS__BAD_DATADIR \ |
53 | 55 |
|
54 | 56 | from .consts import \
|
55 | 57 | MAX_LOGICAL_REPLICATION_WORKERS, \
|
@@ -132,9 +134,6 @@ class PostgresNode(object):
|
132 | 134 | # a max number of node start attempts
|
133 | 135 | _C_MAX_START_ATEMPTS = 5
|
134 | 136 |
|
135 |
| - # a max number of read pid file attempts |
136 |
| - _C_MAX_GET_PID_ATEMPTS = 5 |
137 |
| - |
138 | 137 | def __init__(self, name=None, base_dir=None, port=None, conn_params: ConnectionParams = ConnectionParams(),
|
139 | 138 | bin_dir=None, prefix=None):
|
140 | 139 | """
|
@@ -211,40 +210,136 @@ def pid(self):
|
211 | 210 | Return postmaster's PID if node is running, else 0.
|
212 | 211 | """
|
213 | 212 |
|
214 |
| - nAttempt = 0 |
215 |
| - pid_file = os.path.join(self.data_dir, PG_PID_FILE) |
216 |
| - pid_s: str = None |
| 213 | + self__data_dir = self.data_dir |
| 214 | + |
| 215 | + _params = [ |
| 216 | + self._get_bin_path('pg_ctl'), |
| 217 | + "-D", self__data_dir, |
| 218 | + "status" |
| 219 | + ] # yapf: disable |
| 220 | + |
| 221 | + status_code, out, error = execute_utility2( |
| 222 | + self.os_ops, |
| 223 | + _params, |
| 224 | + self.utils_log_file, |
| 225 | + verbose=True, |
| 226 | + ignore_errors=True) |
| 227 | + |
| 228 | + assert type(status_code) == int # noqa: E721 |
| 229 | + assert type(out) == str # noqa: E721 |
| 230 | + assert type(error) == str # noqa: E721 |
| 231 | + |
| 232 | + # ----------------- |
| 233 | + if status_code == PG_CTL__STATUS__NODE_IS_STOPPED: |
| 234 | + return 0 |
| 235 | + |
| 236 | + # ----------------- |
| 237 | + if status_code == PG_CTL__STATUS__BAD_DATADIR: |
| 238 | + return 0 |
| 239 | + |
| 240 | + # ----------------- |
| 241 | + if status_code != PG_CTL__STATUS__OK: |
| 242 | + errMsg = "Getting of a node status [data_dir is {0}] failed.".format(self__data_dir) |
| 243 | + |
| 244 | + raise ExecUtilException( |
| 245 | + message=errMsg, |
| 246 | + command=_params, |
| 247 | + exit_code=status_code, |
| 248 | + out=out, |
| 249 | + error=error, |
| 250 | + ) |
| 251 | + |
| 252 | + # ----------------- |
| 253 | + assert status_code == PG_CTL__STATUS__OK |
| 254 | + |
| 255 | + if out == "": |
| 256 | + __class__._throw_error__pg_ctl_returns_an_empty_string( |
| 257 | + _params |
| 258 | + ) |
| 259 | + |
| 260 | + C_PID_PREFIX = "(PID: " |
| 261 | + |
| 262 | + i = out.find(C_PID_PREFIX) |
| 263 | + |
| 264 | + if i == -1: |
| 265 | + __class__._throw_error__pg_ctl_returns_an_unexpected_string( |
| 266 | + out, |
| 267 | + _params |
| 268 | + ) |
| 269 | + |
| 270 | + assert i > 0 |
| 271 | + assert i < len(out) |
| 272 | + assert len(C_PID_PREFIX) <= len(out) |
| 273 | + assert i <= len(out) - len(C_PID_PREFIX) |
| 274 | + |
| 275 | + i += len(C_PID_PREFIX) |
| 276 | + start_pid_s = i |
| 277 | + |
217 | 278 | while True:
|
218 |
| - if nAttempt == __class__._C_MAX_GET_PID_ATEMPTS: |
219 |
| - errMsg = "Can't read postmaster pid file [{0}].".format(pid_file) |
220 |
| - raise Exception(errMsg) |
| 279 | + if i == len(out): |
| 280 | + __class__._throw_error__pg_ctl_returns_an_unexpected_string( |
| 281 | + out, |
| 282 | + _params |
| 283 | + ) |
221 | 284 |
|
222 |
| - nAttempt += 1 |
| 285 | + ch = out[i] |
223 | 286 |
|
224 |
| - s1 = self.status() |
225 |
| - if s1 != NodeStatus.Running: |
226 |
| - return 0 |
| 287 | + if ch == ")": |
| 288 | + break |
227 | 289 |
|
228 |
| - try: |
229 |
| - lines = self.os_ops.readlines(pid_file) |
230 |
| - except Exception: |
231 |
| - s2 = self.status() |
232 |
| - if s2 == NodeStatus.Running: |
233 |
| - raise |
234 |
| - return 0 |
235 |
| - |
236 |
| - assert lines is not None # [2025-02-27] OK? |
237 |
| - assert type(lines) == list # noqa: E721 |
238 |
| - if len(lines) == 0: |
| 290 | + if ch.isdigit(): |
| 291 | + i += 1 |
239 | 292 | continue
|
240 | 293 |
|
241 |
| - pid_s = lines[0] |
242 |
| - assert type(pid_s) == str # noqa: E721 |
243 |
| - if len(pid_s) == 0: |
244 |
| - continue |
| 294 | + __class__._throw_error__pg_ctl_returns_an_unexpected_string( |
| 295 | + out, |
| 296 | + _params |
| 297 | + ) |
| 298 | + assert False |
| 299 | + |
| 300 | + if i == start_pid_s: |
| 301 | + __class__._throw_error__pg_ctl_returns_an_unexpected_string( |
| 302 | + out, |
| 303 | + _params |
| 304 | + ) |
| 305 | + |
| 306 | + # TODO: Let's verify a length of pid string. |
| 307 | + |
| 308 | + pid = int(out[start_pid_s:i]) |
| 309 | + |
| 310 | + if pid == 0: |
| 311 | + __class__._throw_error__pg_ctl_returns_a_zero_pid( |
| 312 | + out, |
| 313 | + _params |
| 314 | + ) |
| 315 | + |
| 316 | + assert pid != 0 |
| 317 | + return pid |
245 | 318 |
|
246 |
| - pid = int(pid_s) |
247 |
| - return pid |
| 319 | + @staticmethod |
| 320 | + def _throw_error__pg_ctl_returns_an_empty_string(_params): |
| 321 | + errLines = [] |
| 322 | + errLines.append("Utility pg_ctl returns empty string.") |
| 323 | + errLines.append("Command line is {0}".format(_params)) |
| 324 | + raise RuntimeError("\n".join(errLines)) |
| 325 | + |
| 326 | + @staticmethod |
| 327 | + def _throw_error__pg_ctl_returns_an_unexpected_string(out, _params): |
| 328 | + errLines = [] |
| 329 | + errLines.append("Utility pg_ctl returns an unexpected string:") |
| 330 | + errLines.append(out) |
| 331 | + errLines.append("------------") |
| 332 | + errLines.append("Command line is {0}".format(_params)) |
| 333 | + raise RuntimeError("\n".join(errLines)) |
| 334 | + |
| 335 | + @staticmethod |
| 336 | + def _throw_error__pg_ctl_returns_a_zero_pid(out, _params): |
| 337 | + errLines = [] |
| 338 | + errLines.append("Utility pg_ctl returns a zero pid. Output string is:") |
| 339 | + errLines.append(out) |
| 340 | + errLines.append("------------") |
| 341 | + errLines.append("Command line is {0}".format(_params)) |
| 342 | + raise RuntimeError("\n".join(errLines)) |
248 | 343 |
|
249 | 344 | @property
|
250 | 345 | def auxiliary_pids(self):
|
@@ -367,41 +462,84 @@ def version(self):
|
367 | 462 | return self._pg_version
|
368 | 463 |
|
369 | 464 | def _try_shutdown(self, max_attempts, with_force=False):
|
| 465 | + assert type(max_attempts) == int # noqa: E721 |
| 466 | + assert type(with_force) == bool # noqa: E721 |
| 467 | + assert max_attempts > 0 |
| 468 | + |
370 | 469 | attempts = 0
|
| 470 | + |
| 471 | + # try stopping server N times |
| 472 | + while attempts < max_attempts: |
| 473 | + attempts += 1 |
| 474 | + try: |
| 475 | + self.stop() |
| 476 | + except ExecUtilException: |
| 477 | + continue # one more time |
| 478 | + except Exception: |
| 479 | + eprint('cannot stop node {}'.format(self.name)) |
| 480 | + break |
| 481 | + |
| 482 | + return # OK |
| 483 | + |
| 484 | + # If force stopping is enabled and PID is valid |
| 485 | + if not with_force: |
| 486 | + return False |
| 487 | + |
371 | 488 | node_pid = self.pid
|
| 489 | + assert node_pid is not None |
| 490 | + assert type(node_pid) == int # noqa: E721 |
372 | 491 |
|
373 |
| - if node_pid > 0: |
374 |
| - # try stopping server N times |
375 |
| - while attempts < max_attempts: |
376 |
| - try: |
377 |
| - self.stop() |
378 |
| - break # OK |
379 |
| - except ExecUtilException: |
380 |
| - pass # one more time |
381 |
| - except Exception: |
382 |
| - eprint('cannot stop node {}'.format(self.name)) |
383 |
| - break |
384 |
| - |
385 |
| - attempts += 1 |
386 |
| - |
387 |
| - # If force stopping is enabled and PID is valid |
388 |
| - if with_force and node_pid != 0: |
389 |
| - # If we couldn't stop the node |
390 |
| - p_status_output = self.os_ops.exec_command(cmd=f'ps -o pid= -p {node_pid}', shell=True, ignore_errors=True).decode('utf-8') |
391 |
| - if self.status() != NodeStatus.Stopped and p_status_output and str(node_pid) in p_status_output: |
392 |
| - try: |
393 |
| - eprint(f'Force stopping node {self.name} with PID {node_pid}') |
394 |
| - self.os_ops.kill(node_pid, signal.SIGKILL, expect_error=False) |
395 |
| - except Exception: |
396 |
| - # The node has already stopped |
397 |
| - pass |
398 |
| - |
399 |
| - # Check that node stopped - print only column pid without headers |
400 |
| - p_status_output = self.os_ops.exec_command(f'ps -o pid= -p {node_pid}', shell=True, ignore_errors=True).decode('utf-8') |
401 |
| - if p_status_output and str(node_pid) in p_status_output: |
402 |
| - eprint(f'Failed to stop node {self.name}.') |
403 |
| - else: |
404 |
| - eprint(f'Node {self.name} has been stopped successfully.') |
| 492 | + if node_pid == 0: |
| 493 | + return |
| 494 | + |
| 495 | + # TODO: [2025-02-28] It is really the old ugly code. We have to rewrite it! |
| 496 | + |
| 497 | + ps_command = ['ps', '-o', 'pid=', '-p', str(node_pid)] |
| 498 | + |
| 499 | + ps_output = self.os_ops.exec_command(cmd=ps_command, shell=True, ignore_errors=True).decode('utf-8') |
| 500 | + assert type(ps_output) == str # noqa: E721 |
| 501 | + |
| 502 | + if ps_output == "": |
| 503 | + return |
| 504 | + |
| 505 | + if ps_output != str(node_pid): |
| 506 | + __class__._throw_bugcheck__unexpected_result_of_ps( |
| 507 | + ps_output, |
| 508 | + ps_command) |
| 509 | + |
| 510 | + try: |
| 511 | + eprint('Force stopping node {0} with PID {1}'.format(self.name, node_pid)) |
| 512 | + self.os_ops.kill(node_pid, signal.SIGKILL, expect_error=False) |
| 513 | + except Exception: |
| 514 | + # The node has already stopped |
| 515 | + pass |
| 516 | + |
| 517 | + # Check that node stopped - print only column pid without headers |
| 518 | + ps_output = self.os_ops.exec_command(cmd=ps_command, shell=True, ignore_errors=True).decode('utf-8') |
| 519 | + assert type(ps_output) == str # noqa: E721 |
| 520 | + |
| 521 | + if ps_output == "": |
| 522 | + eprint('Node {0} has been stopped successfully.'.format(self.name)) |
| 523 | + return |
| 524 | + |
| 525 | + if ps_output == str(node_pid): |
| 526 | + eprint('Failed to stop node {0}.'.format(self.name)) |
| 527 | + return |
| 528 | + |
| 529 | + __class__._throw_bugcheck__unexpected_result_of_ps( |
| 530 | + ps_output, |
| 531 | + ps_command) |
| 532 | + |
| 533 | + @staticmethod |
| 534 | + def _throw_bugcheck__unexpected_result_of_ps(result, cmd): |
| 535 | + assert type(result) == str # noqa: E721 |
| 536 | + assert type(cmd) == list # noqa: E721 |
| 537 | + errLines = [] |
| 538 | + errLines.append("[BUG CHECK] Unexpected result of command ps:") |
| 539 | + errLines.append(result) |
| 540 | + errLines.append("-----") |
| 541 | + errLines.append("Command line is {0}".format(cmd)) |
| 542 | + raise RuntimeError("\n".join(errLines)) |
405 | 543 |
|
406 | 544 | def _assign_master(self, master):
|
407 | 545 | """NOTE: this is a private method!"""
|
|
0 commit comments