@@ -19,6 +19,11 @@ set -o pipefail
1919
2020# CRICTL is the path of crictl
2121CRICTL=${CRICTL:- " crictl" }
22+ # INITIAL_WAIT_ATTEMPTS is the number to attempt, before start
23+ # performing health check. The problem is that cri-containerd
24+ # and containerd are started around the same time with health
25+ # monitor, they may not be ready yet when health-monitor is started.
26+ INITIAL_WAIT_ATTEMPTS=${INITIAL_WAIT_ATTEMPTS:- 5}
2227# COMMAND_TIMEOUT is the timeout for the health check command.
2328COMMAND_TIMEOUT=${COMMAND_TIMEOUT:- 60}
2429# CHECK_PERIOD is the health check period.
@@ -27,13 +32,21 @@ CHECK_PERIOD=${CHECK_PERIOD:-10}
2732# and containerd.
2833SLEEP_SECONDS=${SLEEP_SECONDS:- 120}
2934
35+ attempt=1
36+ until timeout ${COMMAND_TIMEOUT} ${CRICTL} pods > /dev/null || (( attempt == INITIAL_WAIT_ATTEMPTS ))
37+ do
38+ echo " $attempt initial attempt \" $CRICTL pods\" ! Trying again in $attempt seconds..."
39+ sleep $(( attempt++ ))
40+ done
41+
42+ echo " Start performing health check."
3043while true ; do
3144 # Use crictl pods because it requires both containerd and
3245 # cri-containerd to be working.
3346 if ! timeout ${COMMAND_TIMEOUT} ${CRICTL} pods > /dev/null; then
34- echo " crictl pods timeout !"
35- pkill containerd
36- pkill cri- containerd
47+ echo " \" $CRICTL pods\" failed !"
48+ pkill -x cri- containerd
49+ pkill -x containerd
3750 # Wait for a while, as we don't want to kill it again before it is really up.
3851 sleep ${SLEEP_SECONDS}
3952 else
0 commit comments