Skip to content

Commit 206b239

Browse files
Random-Liuestesp
authored andcommitted
Add initial wait for health-monitor and use pkill -x.
Signed-off-by: Lantao Liu <[email protected]>
1 parent fc561a2 commit 206b239

1 file changed

Lines changed: 16 additions & 3 deletions

File tree

contrib/health-monitor.sh

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@ set -o pipefail
1919

2020
# CRICTL is the path of crictl
2121
CRICTL=${CRICTL:-"crictl"}
22+
# INITIAL_WAIT_ATTEMPTS is the number to attempt, before start
23+
# performing health check. The problem is that cri-containerd
24+
# and containerd are started around the same time with health
25+
# monitor, they may not be ready yet when health-monitor is started.
26+
INITIAL_WAIT_ATTEMPTS=${INITIAL_WAIT_ATTEMPTS:-5}
2227
# COMMAND_TIMEOUT is the timeout for the health check command.
2328
COMMAND_TIMEOUT=${COMMAND_TIMEOUT:-60}
2429
# CHECK_PERIOD is the health check period.
@@ -27,13 +32,21 @@ CHECK_PERIOD=${CHECK_PERIOD:-10}
2732
# and containerd.
2833
SLEEP_SECONDS=${SLEEP_SECONDS:-120}
2934

35+
attempt=1
36+
until timeout ${COMMAND_TIMEOUT} ${CRICTL} pods > /dev/null || (( attempt == INITIAL_WAIT_ATTEMPTS ))
37+
do
38+
echo "$attempt initial attempt \"$CRICTL pods\"! Trying again in $attempt seconds..."
39+
sleep $(( attempt++ ))
40+
done
41+
42+
echo "Start performing health check."
3043
while true; do
3144
# Use crictl pods because it requires both containerd and
3245
# cri-containerd to be working.
3346
if ! timeout ${COMMAND_TIMEOUT} ${CRICTL} pods > /dev/null; then
34-
echo "crictl pods timeout!"
35-
pkill containerd
36-
pkill cri-containerd
47+
echo "\"$CRICTL pods\" failed!"
48+
pkill -x cri-containerd
49+
pkill -x containerd
3750
# Wait for a while, as we don't want to kill it again before it is really up.
3851
sleep ${SLEEP_SECONDS}
3952
else

0 commit comments

Comments
 (0)