Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
b4baa8f
feat: Gracefully drain listeners before envoy shutdown on pod termina…
davidalger Feb 16, 2024
2b9d79c
Setup hooks to manage graceful Envoy shutdown process
davidalger Feb 17, 2024
fa8ad3a
Implement graceful drain process in shutdown-manager
davidalger Feb 19, 2024
9a8b290
Send logs from exec prestop hook to stdout of main process
davidalger Feb 19, 2024
666275e
Merge branch 'main' into algerdev/graceful-pod-termination
davidalger Feb 21, 2024
367d4a3
Make linter happy
davidalger Feb 21, 2024
b2e68b1
Minor cleanup
davidalger Feb 21, 2024
4a3b90f
Stop polling when ready-timeout exceeded
davidalger Feb 22, 2024
fa0f1ec
Container configuration for shutdown manager
davidalger Feb 23, 2024
23144c9
Setup health probes
davidalger Feb 23, 2024
62ec399
Configurable shutdown timeouts
davidalger Feb 23, 2024
b3bc785
Correct exitAtConnections logic
davidalger Feb 23, 2024
561530a
Lower shutdown timeouts for conformance tests
davidalger Feb 23, 2024
1ab6e6c
Merge branch 'main' into algerdev/graceful-pod-termination
davidalger Feb 23, 2024
ac5bac1
Integrate with latest from main
davidalger Feb 23, 2024
8cdc14f
Describe node used in test runs
davidalger Feb 23, 2024
0c543b6
Use TAG=latest for conformance tests
davidalger Feb 23, 2024
b3dcffd
Update shutdown/ready logic and misc cleanup
davidalger Feb 23, 2024
c6bc7fb
Shutdown manager config test
davidalger Feb 23, 2024
2ffd947
Test coverage FileLogger
davidalger Feb 23, 2024
cf106fa
Require use of patch field to override config on shutdown-manager con…
davidalger Feb 26, 2024
ae829d0
Merge branch 'main' into algerdev/graceful-pod-termination
davidalger Feb 26, 2024
f06cc01
Update docs
davidalger Feb 26, 2024
fb4e3ed
Remove knob for ExitAtConnections
davidalger Feb 26, 2024
f1b4be4
Pass image version for shutdown-manager in from build
davidalger Feb 26, 2024
24f9484
Fix generated content
davidalger Feb 26, 2024
0c701bd
Recombine image consts
davidalger Feb 26, 2024
eafa8a5
Lower default min-drain-duration to 5 seconds
davidalger Feb 26, 2024
7696402
Update generated docs
davidalger Feb 26, 2024
3a5071a
Merge branch 'main' into algerdev/graceful-pod-termination
davidalger Feb 26, 2024
f249d73
Fail health checks to support fast failure when active health checking
davidalger Feb 26, 2024
fa9bcd3
Update tests
davidalger Feb 26, 2024
013099d
Merge branch 'main' into algerdev/graceful-pod-termination
davidalger Feb 26, 2024
afcbf23
Move drain type config to correct place
davidalger Feb 26, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions api/v1alpha1/envoyproxy_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (

autoscalingv2 "k8s.io/api/autoscaling/v2"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/utils/ptr"
)

Expand Down Expand Up @@ -120,3 +121,13 @@ func (logging *ProxyLogging) GetEnvoyProxyComponentLevel() string {

return strings.Join(args, ",")
}

// DefaultShutdownManagerContainerResourceRequirements returns a new ResourceRequirements with default settings.
func DefaultShutdownManagerContainerResourceRequirements() *v1.ResourceRequirements {
return &v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse(DefaultShutdownManagerCPUResourceRequests),
v1.ResourceMemory: resource.MustParse(DefaultShutdownManagerMemoryResourceRequests),
},
}
}
19 changes: 19 additions & 0 deletions api/v1alpha1/envoyproxy_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ type EnvoyProxySpec struct {
//
// +optional
MergeGateways *bool `json:"mergeGateways,omitempty"`

// Shutdown defines configuration for graceful envoy shutdown process.
//
// +optional
Shutdown *ShutdownConfig `json:"shutdown,omitempty"`
}

type ProxyTelemetry struct {
Expand Down Expand Up @@ -115,6 +120,20 @@ type EnvoyProxyProvider struct {
Kubernetes *EnvoyProxyKubernetesProvider `json:"kubernetes,omitempty"`
}

// ShutdownConfig defines configuration for graceful envoy shutdown process.
type ShutdownConfig struct {
// DrainTimeout defines the graceful drain timeout. This should be less than the pod's terminationGracePeriodSeconds.
// If unspecified, defaults to 600 seconds.
//
// +optional
DrainTimeout *metav1.Duration `json:"drainTimeout,omitempty"`
// MinDrainDuration defines the minimum drain duration allowing time for endpoint deprogramming to complete.
// If unspecified, defaults to 5 seconds.
//
// +optional
Copy link
Copy Markdown
Contributor

@arkodg arkodg Feb 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need MinDrainDuration & ExitAtConnections in the first iteration ?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed ExitAtConnections in fb4e3ed but MinDrainDuration really should stay. It's an important knob that should be configurable. I will be using it when rolling this out to infra I manage, and it's being used in test/config/gatewayclass.yaml to prevent a conservative default of 15 seconds from slowing CI processes.

Copy link
Copy Markdown
Contributor

@arkodg arkodg Feb 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you elaborate a little more on the endpoint deprogramming part ? this timeout is added to ensure the proxy's IP is removed from kube-proxy or other external loadbalancers ?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if this value is present to deal with eventual consistency of the various distributed networking components, prefer if we dropped this to 5s

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to deal with eventual consistency of the various distributed networking components

That's exactly what it's for. I've lowered the default to 5s

MinDrainDuration *metav1.Duration `json:"minDrainDuration,omitempty"`
}

// EnvoyProxyKubernetesProvider defines configuration for the Kubernetes resource
// provider.
type EnvoyProxyKubernetesProvider struct {
Expand Down
6 changes: 6 additions & 0 deletions api/v1alpha1/shared_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@ const (
DefaultDeploymentMemoryResourceRequests = "512Mi"
// DefaultEnvoyProxyImage is the default image used by envoyproxy
DefaultEnvoyProxyImage = "envoyproxy/envoy:distroless-dev"
// DefaultShutdownManagerCPUResourceRequests for shutdown manager cpu resource
DefaultShutdownManagerCPUResourceRequests = "10m"
// DefaultShutdownManagerMemoryResourceRequests for shutdown manager memory resource
DefaultShutdownManagerMemoryResourceRequests = "32Mi"
// DefaultShutdownManagerImage is the default image used for the shutdown manager.
DefaultShutdownManagerImage = "envoyproxy/gateway-dev:latest"
// DefaultRateLimitImage is the default image used by ratelimit.
DefaultRateLimitImage = "envoyproxy/ratelimit:master"
// HTTPProtocol is the common-used http protocol.
Expand Down
30 changes: 30 additions & 0 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -6631,6 +6631,21 @@ spec:
required:
- type
type: object
shutdown:
description: Shutdown defines configuration for graceful envoy shutdown
process.
properties:
drainTimeout:
description: DrainTimeout defines the graceful drain timeout.
This should be less than the pod's terminationGracePeriodSeconds.
If unspecified, defaults to 600 seconds.
type: string
minDrainDuration:
description: MinDrainDuration defines the minimum drain duration
allowing time for endpoint deprogramming to complete. If unspecified,
defaults to 5 seconds.
type: string
type: object
telemetry:
description: Telemetry defines telemetry parameters for managed proxies.
properties:
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ require (
go.opentelemetry.io/proto/otlp v1.1.0
go.uber.org/zap v1.26.0
golang.org/x/exp v0.0.0-20231006140011-7918f672742d
golang.org/x/sys v0.17.0
google.golang.org/grpc v1.61.1
google.golang.org/protobuf v1.32.0
gopkg.in/yaml.v3 v3.0.1
Expand Down Expand Up @@ -109,7 +110,6 @@ require (
golang.org/x/net v0.20.0 // indirect
golang.org/x/oauth2 v0.16.0 // indirect
golang.org/x/sync v0.6.0 // indirect
golang.org/x/sys v0.17.0 // indirect
golang.org/x/term v0.16.0 // indirect
golang.org/x/text v0.14.0 // indirect
golang.org/x/time v0.3.0 // indirect
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -815,6 +815,7 @@ xds:
serverHeaderTransformation: PASS_THROUGH
statPrefix: http
useRemoteAddress: true
drainType: MODIFY_ONLY
name: default/eg/http
perConnectionBufferLimitBytes: 32768
- activeState:
Expand Down Expand Up @@ -881,6 +882,7 @@ xds:
serverHeaderTransformation: PASS_THROUGH
statPrefix: http
useRemoteAddress: true
drainType: MODIFY_ONLY
name: default/eg/grpc
perConnectionBufferLimitBytes: 32768
- activeState:
Expand All @@ -903,6 +905,7 @@ xds:
socketAddress:
address: 0.0.0.0
portValue: 8443
drainType: MODIFY_ONLY
filterChains:
- filterChainMatch:
serverNames:
Expand Down Expand Up @@ -948,6 +951,7 @@ xds:
socketAddress:
address: 0.0.0.0
portValue: 1234
drainType: MODIFY_ONLY
filterChains:
- filters:
- name: envoy.filters.network.tcp_proxy
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ xds:
serverHeaderTransformation: PASS_THROUGH
statPrefix: http
useRemoteAddress: true
drainType: MODIFY_ONLY
name: default/eg/http
perConnectionBufferLimitBytes: 32768
- '@type': type.googleapis.com/envoy.admin.v3.RoutesConfigDump
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -575,6 +575,7 @@
}
]
},
"drainType": "MODIFY_ONLY",
"name": "default/eg/http",
"perConnectionBufferLimitBytes": 32768
}
Expand Down Expand Up @@ -679,6 +680,7 @@
}
]
},
"drainType": "MODIFY_ONLY",
"name": "default/eg/grpc",
"perConnectionBufferLimitBytes": 32768
}
Expand Down Expand Up @@ -715,6 +717,7 @@
"portValue": 8443
}
},
"drainType": "MODIFY_ONLY",
"filterChains": [
{
"filterChainMatch": {
Expand Down Expand Up @@ -792,6 +795,7 @@
"portValue": 1234
}
},
"drainType": "MODIFY_ONLY",
"filterChains": [
{
"filters": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,7 @@ xds:
serverHeaderTransformation: PASS_THROUGH
statPrefix: http
useRemoteAddress: true
drainType: MODIFY_ONLY
name: default/eg/http
perConnectionBufferLimitBytes: 32768
- activeState:
Expand Down Expand Up @@ -407,6 +408,7 @@ xds:
serverHeaderTransformation: PASS_THROUGH
statPrefix: http
useRemoteAddress: true
drainType: MODIFY_ONLY
name: default/eg/grpc
perConnectionBufferLimitBytes: 32768
- activeState:
Expand All @@ -429,6 +431,7 @@ xds:
socketAddress:
address: 0.0.0.0
portValue: 8443
drainType: MODIFY_ONLY
filterChains:
- filterChainMatch:
serverNames:
Expand Down Expand Up @@ -474,6 +477,7 @@ xds:
socketAddress:
address: 0.0.0.0
portValue: 1234
drainType: MODIFY_ONLY
filterChains:
- filters:
- name: envoy.filters.network.tcp_proxy
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ xds:
serverHeaderTransformation: PASS_THROUGH
statPrefix: http
useRemoteAddress: true
drainType: MODIFY_ONLY
name: default/eg/http
perConnectionBufferLimitBytes: 32768
- activeState:
Expand Down Expand Up @@ -124,6 +125,7 @@ xds:
serverHeaderTransformation: PASS_THROUGH
statPrefix: http
useRemoteAddress: true
drainType: MODIFY_ONLY
name: default/eg/grpc
perConnectionBufferLimitBytes: 32768
- activeState:
Expand All @@ -146,6 +148,7 @@ xds:
socketAddress:
address: 0.0.0.0
portValue: 8443
drainType: MODIFY_ONLY
filterChains:
- filterChainMatch:
serverNames:
Expand Down Expand Up @@ -191,6 +194,7 @@ xds:
socketAddress:
address: 0.0.0.0
portValue: 1234
drainType: MODIFY_ONLY
filterChains:
- filters:
- name: envoy.filters.network.tcp_proxy
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,7 @@
}
]
},
"drainType": "MODIFY_ONLY",
"name": "envoy-gateway-system/eg/http",
"perConnectionBufferLimitBytes": 32768
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,7 @@ xds:
serverHeaderTransformation: PASS_THROUGH
statPrefix: http
useRemoteAddress: true
drainType: MODIFY_ONLY
name: envoy-gateway-system/eg/http
perConnectionBufferLimitBytes: 32768
- '@type': type.googleapis.com/envoy.admin.v3.RoutesConfigDump
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,5 +75,6 @@ xds:
serverHeaderTransformation: PASS_THROUGH
statPrefix: http
useRemoteAddress: true
drainType: MODIFY_ONLY
name: envoy-gateway-system/eg/http
perConnectionBufferLimitBytes: 32768
71 changes: 71 additions & 0 deletions internal/cmd/envoy.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// Copyright Envoy Gateway Authors
// SPDX-License-Identifier: Apache-2.0
// The full text of the Apache license is available in the LICENSE file at
// the root of the repo.

package cmd

import (
"time"

"github.com/spf13/cobra"

"github.com/envoyproxy/gateway/internal/cmd/envoy"
)

// getEnvoyCommand returns the envoy cobra command to be executed.
func getEnvoyCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "envoy",
Short: "Envoy proxy management",
}

cmd.AddCommand(getShutdownCommand())
cmd.AddCommand(getShutdownManagerCommand())

return cmd
}

// getShutdownCommand returns the shutdown cobra command to be executed.
func getShutdownCommand() *cobra.Command {
var drainTimeout time.Duration
var minDrainDuration time.Duration
var exitAtConnections int

cmd := &cobra.Command{
Use: "shutdown",
Short: "Gracefully drain open connections prior to pod shutdown.",
RunE: func(cmd *cobra.Command, args []string) error {
return envoy.Shutdown(drainTimeout, minDrainDuration, exitAtConnections)
},
}

cmd.PersistentFlags().DurationVar(&drainTimeout, "drain-timeout", 600*time.Second,
"Graceful shutdown timeout. This should be less than the pod's terminationGracePeriodSeconds.")

cmd.PersistentFlags().DurationVar(&minDrainDuration, "min-drain-duration", 5*time.Second,
"Minimum drain duration allowing time for endpoint deprogramming to complete.")

cmd.PersistentFlags().IntVar(&exitAtConnections, "exit-at-connections", 0,
"Number of connections to wait for when monitoring Envoy listener drain process.")

return cmd
}

// getShutdownManagerCommand returns the shutdown manager cobra command to be executed.
func getShutdownManagerCommand() *cobra.Command {
var readyTimeout time.Duration

cmd := &cobra.Command{
Use: "shutdown-manager",
Short: "Provides HTTP endpoint used in preStop hook to block until ready for pod shutdown.",
RunE: func(cmd *cobra.Command, args []string) error {
return envoy.ShutdownManager(readyTimeout)
},
}

cmd.PersistentFlags().DurationVar(&readyTimeout, "ready-timeout", 610*time.Second,
"Shutdown ready timeout. This should be greater than shutdown's drain-timeout and less than the pod's terminationGracePeriodSeconds.")

return cmd
}
Loading