Skip to content

Commit 8ca14bc

Browse files
authored
Initialize cli earlier during agent startup (#21188)
Initialize the cli command in two phases. Support the ping command while the agent is initializing Enable the full cli once the agent has finished initialization
1 parent fc39809 commit 8ca14bc

File tree

3 files changed

+48
-24
lines changed

3 files changed

+48
-24
lines changed

src/daemon/commands.c

Lines changed: 36 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ char cmd_prefix_by_status[] = {
1414
CMD_PREFIX_ERROR
1515
};
1616

17-
static int command_server_initialized = 0;
17+
static cmd_init_status_t command_server_initialized = CMD_INIT_STATUS_OFF;
1818
static int command_thread_error;
1919
static int command_thread_shutdown;
2020
static unsigned clients = 0;
@@ -52,24 +52,24 @@ static cmd_status_t cmd_mark_stale_nodes_ephemeral(char *args, char **message);
5252
static cmd_status_t cmd_update_node_info(char *args, char **message);
5353

5454
static command_info_t command_info_array[] = {
55-
{"help", "", "Show this help menu.", cmd_help_execute, CMD_TYPE_HIGH_PRIORITY}, // show help menu
56-
{"reload-health", "", "Reload health configuration.", cmd_reload_health_execute, CMD_TYPE_ORTHOGONAL}, // reload health configuration
57-
{"reopen-logs", "", "Close and reopen log files.", cmd_reopen_logs_execute, CMD_TYPE_ORTHOGONAL}, // Close and reopen log files
58-
{"shutdown-agent", "", "Cleanup and exit the netdata agent.", cmd_exit_execute, CMD_TYPE_EXCLUSIVE}, // exit cleanly
59-
{"fatal-agent", "", "Log the state and halt the netdata agent.", cmd_fatal_execute, CMD_TYPE_HIGH_PRIORITY}, // exit with fatal error
60-
{"reload-claiming-state", "", "Reload agent claiming state from disk.", cmd_reload_claiming_state_execute, CMD_TYPE_ORTHOGONAL}, // reload claiming state
61-
{"reload-labels", "", "Reload all localhost labels.", cmd_reload_labels_execute, CMD_TYPE_ORTHOGONAL}, // reload the labels
62-
{"read-config", "", "", cmd_read_config_execute, CMD_TYPE_CONCURRENT},
63-
{"write-config", "", "", cmd_write_config_execute, CMD_TYPE_ORTHOGONAL},
64-
{"ping", "", "Return with 'pong' if agent is alive.", cmd_ping_execute, CMD_TYPE_ORTHOGONAL},
65-
{"aclk-state", "[json]", "Returns current state of ACLK and Netdata Cloud connection. (optionally in json).", cmd_aclk_state, CMD_TYPE_ORTHOGONAL},
66-
{"version", "", "Returns the netdata version.", cmd_version, CMD_TYPE_ORTHOGONAL},
67-
{"dumpconfig", "", "Returns the current netdata.conf on stdout.", cmd_dumpconfig, CMD_TYPE_ORTHOGONAL},
55+
{"help", "", "Show this help menu.", cmd_help_execute, CMD_TYPE_HIGH_PRIORITY, CMD_INIT_STATUS_INIT}, // show help menu
56+
{"reload-health", "", "Reload health configuration.", cmd_reload_health_execute, CMD_TYPE_ORTHOGONAL, CMD_INIT_STATUS_FULL}, // reload health configuration
57+
{"reopen-logs", "", "Close and reopen log files.", cmd_reopen_logs_execute, CMD_TYPE_ORTHOGONAL, CMD_INIT_STATUS_FULL}, // Close and reopen log files
58+
{"shutdown-agent", "", "Cleanup and exit the netdata agent.", cmd_exit_execute, CMD_TYPE_EXCLUSIVE, CMD_INIT_STATUS_FULL}, // exit cleanly
59+
{"fatal-agent", "", "Log the state and halt the netdata agent.", cmd_fatal_execute, CMD_TYPE_HIGH_PRIORITY, CMD_INIT_STATUS_FULL}, // exit with fatal error
60+
{"reload-claiming-state", "", "Reload agent claiming state from disk.", cmd_reload_claiming_state_execute, CMD_TYPE_ORTHOGONAL, CMD_INIT_STATUS_FULL}, // reload claiming state
61+
{"reload-labels", "", "Reload all localhost labels.", cmd_reload_labels_execute, CMD_TYPE_ORTHOGONAL, CMD_INIT_STATUS_FULL}, // reload the labels
62+
{"read-config", "", "", cmd_read_config_execute, CMD_TYPE_CONCURRENT, CMD_INIT_STATUS_FULL},
63+
{"write-config", "", "", cmd_write_config_execute, CMD_TYPE_ORTHOGONAL, CMD_INIT_STATUS_FULL},
64+
{"ping", "", "Return with 'pong' if agent is alive.", cmd_ping_execute, CMD_TYPE_ORTHOGONAL, CMD_INIT_STATUS_INIT}, // ping command
65+
{"aclk-state", "[json]", "Returns current state of ACLK and Netdata Cloud connection. (optionally in json).", cmd_aclk_state, CMD_TYPE_ORTHOGONAL, CMD_INIT_STATUS_FULL},
66+
{"version", "", "Returns the netdata version.", cmd_version, CMD_TYPE_ORTHOGONAL, CMD_INIT_STATUS_INIT},
67+
{"dumpconfig", "", "Returns the current netdata.conf on stdout.", cmd_dumpconfig, CMD_TYPE_ORTHOGONAL, CMD_INIT_STATUS_FULL},
6868
{"mark-stale-nodes-ephemeral", "<node_id | machine_guid | hostname | ALL_NODES>",
69-
"Marks one or all disconnected nodes as ephemeral, while keeping their retention\n available for queries on both this Netdata Agent dashboard and Netdata Cloud", cmd_mark_stale_nodes_ephemeral, CMD_TYPE_ORTHOGONAL},
69+
"Marks one or all disconnected nodes as ephemeral, while keeping their retention\n available for queries on both this Netdata Agent dashboard and Netdata Cloud", cmd_mark_stale_nodes_ephemeral, CMD_TYPE_ORTHOGONAL, CMD_INIT_STATUS_FULL},
7070
{"remove-stale-node", "<node_id | machine_guid | hostname | ALL_NODES>",
71-
"Marks one or all disconnected nodes as ephemeral, and removes them\n so that they are no longer available for queries, from both this\n Netdata Agent dashboard and Netdata Cloud.", cmd_remove_stale_node, CMD_TYPE_ORTHOGONAL},
72-
{"update-node-info", "", "Schedules an node update message for localhost to Netdata Cloud.", cmd_update_node_info, CMD_TYPE_ORTHOGONAL},
71+
"Marks one or all disconnected nodes as ephemeral, and removes them\n so that they are no longer available for queries, from both this\n Netdata Agent dashboard and Netdata Cloud.", cmd_remove_stale_node, CMD_TYPE_ORTHOGONAL, CMD_INIT_STATUS_FULL},
72+
{"update-node-info", "", "Schedules an node update message for localhost to Netdata Cloud.", cmd_update_node_info, CMD_TYPE_ORTHOGONAL, CMD_INIT_STATUS_FULL},
7373
};
7474

7575
/* Mutexes for commands of type CMD_TYPE_ORTHOGONAL */
@@ -599,7 +599,12 @@ cmd_status_t execute_command(cmd_t idx, char *args, char **message)
599599
cmd_type_t type = command_info_array[idx].type;
600600

601601
cmd_lock_by_type[type](idx);
602-
status = command_info_array[idx].func(args, message);
602+
if (command_server_initialized >= command_info_array[idx].init_status)
603+
status = command_info_array[idx].func(args, message);
604+
else {
605+
*message = strdupz("Agent is initializing");
606+
status = CMD_STATUS_SUCCESS;
607+
}
603608
cmd_unlock_by_type[type](idx);
604609

605610
return status;
@@ -851,10 +856,19 @@ void commands_init(void)
851856
int error;
852857

853858
sanity_check();
854-
if (command_server_initialized)
859+
if (command_server_initialized == CMD_INIT_STATUS_FULL)
855860
return;
856861

857-
netdata_log_info("Initializing command server.");
862+
if (command_server_initialized == CMD_INIT_STATUS_OFF) {
863+
netdata_log_info("Initializing command server for liveness CHECK");
864+
command_server_initialized = CMD_INIT_STATUS_INIT;
865+
}
866+
else {
867+
netdata_log_info("Initializing full command server.");
868+
command_server_initialized = CMD_INIT_STATUS_FULL;
869+
return;
870+
}
871+
858872
for (i = 0 ; i < CMD_TOTAL_COMMANDS ; ++i) {
859873
fatal_assert(0 == netdata_mutex_init(&command_lock_array[i]));
860874
}
@@ -878,7 +892,6 @@ void commands_init(void)
878892
goto after_error;
879893
}
880894

881-
command_server_initialized = 1;
882895
return;
883896

884897
after_error:
@@ -889,7 +902,7 @@ void commands_exit(void)
889902
{
890903
cmd_t i;
891904

892-
if (!command_server_initialized)
905+
if (command_server_initialized == CMD_INIT_STATUS_OFF)
893906
return;
894907

895908
command_thread_shutdown = 1;
@@ -903,5 +916,5 @@ void commands_exit(void)
903916
}
904917
netdata_rwlock_destroy(&exclusive_rwlock);
905918
netdata_log_info("Command server has stopped.");
906-
command_server_initialized = 0;
919+
command_server_initialized = CMD_INIT_STATUS_OFF;
907920
}

src/daemon/commands.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,19 @@ typedef enum cmd_type {
6565
*/
6666
typedef cmd_status_t (command_action_t) (char *args, char **message);
6767

68+
typedef enum cmd_init_status {
69+
CMD_INIT_STATUS_OFF,
70+
CMD_INIT_STATUS_INIT,
71+
CMD_INIT_STATUS_FULL,
72+
} cmd_init_status_t;
73+
6874
typedef struct command_info {
6975
char *cmd_str; // the command string
7076
char *params;
7177
char *help;
7278
command_action_t *func; // the function that executes the command
7379
cmd_type_t type; // Concurrency control information for the command
80+
cmd_init_status_t init_status; // command availability during start
7481
} command_info_t;
7582

7683
typedef void (command_lock_t) (unsigned index);

src/daemon/main.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1055,6 +1055,10 @@ int netdata_main(int argc, char **argv) {
10551055
// ----------------------------------------------------------------------------------------------------------------
10561056
delta_startup_time("RRD structures");
10571057

1058+
delta_startup_time("commands liveness support");
1059+
1060+
commands_init();
1061+
10581062
abort_on_fatal_disable();
10591063
if (rrd_init(netdata_configured_hostname, system_info, false))
10601064
fatal("Cannot initialize localhost instance with name '%s'.", netdata_configured_hostname);
@@ -1098,7 +1102,7 @@ int netdata_main(int argc, char **argv) {
10981102
ml_start_threads();
10991103

11001104
// ----------------------------------------------------------------------------------------------------------------
1101-
delta_startup_time("commands API");
1105+
delta_startup_time("commands full API");
11021106

11031107
commands_init();
11041108

0 commit comments

Comments
 (0)