diff --git a/src/daemon/commands.c b/src/daemon/commands.c index cdc6ef75569d5c..334a02e01a7347 100644 --- a/src/daemon/commands.c +++ b/src/daemon/commands.c @@ -14,7 +14,7 @@ char cmd_prefix_by_status[] = { CMD_PREFIX_ERROR }; -static int command_server_initialized = 0; +static cmd_init_status_t command_server_initialized = CMD_INIT_STATUS_OFF; static int command_thread_error; static int command_thread_shutdown; static unsigned clients = 0; @@ -52,24 +52,24 @@ static cmd_status_t cmd_mark_stale_nodes_ephemeral(char *args, char **message); static cmd_status_t cmd_update_node_info(char *args, char **message); static command_info_t command_info_array[] = { - {"help", "", "Show this help menu.", cmd_help_execute, CMD_TYPE_HIGH_PRIORITY}, // show help menu - {"reload-health", "", "Reload health configuration.", cmd_reload_health_execute, CMD_TYPE_ORTHOGONAL}, // reload health configuration - {"reopen-logs", "", "Close and reopen log files.", cmd_reopen_logs_execute, CMD_TYPE_ORTHOGONAL}, // Close and reopen log files - {"shutdown-agent", "", "Cleanup and exit the netdata agent.", cmd_exit_execute, CMD_TYPE_EXCLUSIVE}, // exit cleanly - {"fatal-agent", "", "Log the state and halt the netdata agent.", cmd_fatal_execute, CMD_TYPE_HIGH_PRIORITY}, // exit with fatal error - {"reload-claiming-state", "", "Reload agent claiming state from disk.", cmd_reload_claiming_state_execute, CMD_TYPE_ORTHOGONAL}, // reload claiming state - {"reload-labels", "", "Reload all localhost labels.", cmd_reload_labels_execute, CMD_TYPE_ORTHOGONAL}, // reload the labels - {"read-config", "", "", cmd_read_config_execute, CMD_TYPE_CONCURRENT}, - {"write-config", "", "", cmd_write_config_execute, CMD_TYPE_ORTHOGONAL}, - {"ping", "", "Return with 'pong' if agent is alive.", cmd_ping_execute, CMD_TYPE_ORTHOGONAL}, - {"aclk-state", "[json]", "Returns current state of ACLK and Netdata Cloud connection. (optionally in json).", cmd_aclk_state, CMD_TYPE_ORTHOGONAL}, - {"version", "", "Returns the netdata version.", cmd_version, CMD_TYPE_ORTHOGONAL}, - {"dumpconfig", "", "Returns the current netdata.conf on stdout.", cmd_dumpconfig, CMD_TYPE_ORTHOGONAL}, + {"help", "", "Show this help menu.", cmd_help_execute, CMD_TYPE_HIGH_PRIORITY, CMD_INIT_STATUS_INIT}, // show help menu + {"reload-health", "", "Reload health configuration.", cmd_reload_health_execute, CMD_TYPE_ORTHOGONAL, CMD_INIT_STATUS_FULL}, // reload health configuration + {"reopen-logs", "", "Close and reopen log files.", cmd_reopen_logs_execute, CMD_TYPE_ORTHOGONAL, CMD_INIT_STATUS_FULL}, // Close and reopen log files + {"shutdown-agent", "", "Cleanup and exit the netdata agent.", cmd_exit_execute, CMD_TYPE_EXCLUSIVE, CMD_INIT_STATUS_FULL}, // exit cleanly + {"fatal-agent", "", "Log the state and halt the netdata agent.", cmd_fatal_execute, CMD_TYPE_HIGH_PRIORITY, CMD_INIT_STATUS_FULL}, // exit with fatal error + {"reload-claiming-state", "", "Reload agent claiming state from disk.", cmd_reload_claiming_state_execute, CMD_TYPE_ORTHOGONAL, CMD_INIT_STATUS_FULL}, // reload claiming state + {"reload-labels", "", "Reload all localhost labels.", cmd_reload_labels_execute, CMD_TYPE_ORTHOGONAL, CMD_INIT_STATUS_FULL}, // reload the labels + {"read-config", "", "", cmd_read_config_execute, CMD_TYPE_CONCURRENT, CMD_INIT_STATUS_FULL}, + {"write-config", "", "", cmd_write_config_execute, CMD_TYPE_ORTHOGONAL, CMD_INIT_STATUS_FULL}, + {"ping", "", "Return with 'pong' if agent is alive.", cmd_ping_execute, CMD_TYPE_ORTHOGONAL, CMD_INIT_STATUS_INIT}, // ping command + {"aclk-state", "[json]", "Returns current state of ACLK and Netdata Cloud connection. (optionally in json).", cmd_aclk_state, CMD_TYPE_ORTHOGONAL, CMD_INIT_STATUS_FULL}, + {"version", "", "Returns the netdata version.", cmd_version, CMD_TYPE_ORTHOGONAL, CMD_INIT_STATUS_INIT}, + {"dumpconfig", "", "Returns the current netdata.conf on stdout.", cmd_dumpconfig, CMD_TYPE_ORTHOGONAL, CMD_INIT_STATUS_FULL}, {"mark-stale-nodes-ephemeral", "", - "Marks one or all disconnected nodes as ephemeral, while keeping their retention\n available for queries on both this Netdata Agent dashboard and Netdata Cloud", cmd_mark_stale_nodes_ephemeral, CMD_TYPE_ORTHOGONAL}, + "Marks one or all disconnected nodes as ephemeral, while keeping their retention\n available for queries on both this Netdata Agent dashboard and Netdata Cloud", cmd_mark_stale_nodes_ephemeral, CMD_TYPE_ORTHOGONAL, CMD_INIT_STATUS_FULL}, {"remove-stale-node", "", - "Marks one or all disconnected nodes as ephemeral, and removes them\n so that they are no longer available for queries, from both this\n Netdata Agent dashboard and Netdata Cloud.", cmd_remove_stale_node, CMD_TYPE_ORTHOGONAL}, - {"update-node-info", "", "Schedules an node update message for localhost to Netdata Cloud.", cmd_update_node_info, CMD_TYPE_ORTHOGONAL}, + "Marks one or all disconnected nodes as ephemeral, and removes them\n so that they are no longer available for queries, from both this\n Netdata Agent dashboard and Netdata Cloud.", cmd_remove_stale_node, CMD_TYPE_ORTHOGONAL, CMD_INIT_STATUS_FULL}, + {"update-node-info", "", "Schedules an node update message for localhost to Netdata Cloud.", cmd_update_node_info, CMD_TYPE_ORTHOGONAL, CMD_INIT_STATUS_FULL}, }; /* Mutexes for commands of type CMD_TYPE_ORTHOGONAL */ @@ -599,7 +599,12 @@ cmd_status_t execute_command(cmd_t idx, char *args, char **message) cmd_type_t type = command_info_array[idx].type; cmd_lock_by_type[type](idx); - status = command_info_array[idx].func(args, message); + if (command_server_initialized >= command_info_array[idx].init_status) + status = command_info_array[idx].func(args, message); + else { + *message = strdupz("Agent is initializing"); + status = CMD_STATUS_SUCCESS; + } cmd_unlock_by_type[type](idx); return status; @@ -851,10 +856,19 @@ void commands_init(void) int error; sanity_check(); - if (command_server_initialized) + if (command_server_initialized == CMD_INIT_STATUS_FULL) return; - netdata_log_info("Initializing command server."); + if (command_server_initialized == CMD_INIT_STATUS_OFF) { + netdata_log_info("Initializing command server for liveness CHECK"); + command_server_initialized = CMD_INIT_STATUS_INIT; + } + else { + netdata_log_info("Initializing full command server."); + command_server_initialized = CMD_INIT_STATUS_FULL; + return; + } + for (i = 0 ; i < CMD_TOTAL_COMMANDS ; ++i) { fatal_assert(0 == netdata_mutex_init(&command_lock_array[i])); } @@ -878,7 +892,6 @@ void commands_init(void) goto after_error; } - command_server_initialized = 1; return; after_error: @@ -889,7 +902,7 @@ void commands_exit(void) { cmd_t i; - if (!command_server_initialized) + if (command_server_initialized == CMD_INIT_STATUS_OFF) return; command_thread_shutdown = 1; @@ -903,5 +916,5 @@ void commands_exit(void) } netdata_rwlock_destroy(&exclusive_rwlock); netdata_log_info("Command server has stopped."); - command_server_initialized = 0; + command_server_initialized = CMD_INIT_STATUS_OFF; } diff --git a/src/daemon/commands.h b/src/daemon/commands.h index 6b405708eb9350..7d50a0f3052283 100644 --- a/src/daemon/commands.h +++ b/src/daemon/commands.h @@ -65,12 +65,19 @@ typedef enum cmd_type { */ typedef cmd_status_t (command_action_t) (char *args, char **message); +typedef enum cmd_init_status { + CMD_INIT_STATUS_OFF, + CMD_INIT_STATUS_INIT, + CMD_INIT_STATUS_FULL, +} cmd_init_status_t; + typedef struct command_info { char *cmd_str; // the command string char *params; char *help; command_action_t *func; // the function that executes the command cmd_type_t type; // Concurrency control information for the command + cmd_init_status_t init_status; // command availability during start } command_info_t; typedef void (command_lock_t) (unsigned index); diff --git a/src/daemon/main.c b/src/daemon/main.c index 1d6548f467186b..121b2936d35e29 100644 --- a/src/daemon/main.c +++ b/src/daemon/main.c @@ -1055,6 +1055,10 @@ int netdata_main(int argc, char **argv) { // ---------------------------------------------------------------------------------------------------------------- delta_startup_time("RRD structures"); + delta_startup_time("commands liveness support"); + + commands_init(); + abort_on_fatal_disable(); if (rrd_init(netdata_configured_hostname, system_info, false)) fatal("Cannot initialize localhost instance with name '%s'.", netdata_configured_hostname); @@ -1098,7 +1102,7 @@ int netdata_main(int argc, char **argv) { ml_start_threads(); // ---------------------------------------------------------------------------------------------------------------- - delta_startup_time("commands API"); + delta_startup_time("commands full API"); commands_init();