-
Notifications
You must be signed in to change notification settings - Fork 883
trace connections using pinned eBPF map #2057
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
a3b0086
f70c674
09ca232
2f054f0
4e251b6
62cd5f7
5f958e2
d228e08
886db13
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| /* | ||
| * Copyright (c) 2019 Fastly Inc. | ||
| * | ||
| * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
| * of this software and associated documentation files (the "Software"), to | ||
| * deal in the Software without restriction, including without limitation the | ||
| * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or | ||
| * sell copies of the Software, and to permit persons to whom the Software is | ||
| * furnished to do so, subject to the following conditions: | ||
| * | ||
| * The above copyright notice and this permission notice shall be included in | ||
| * all copies or substantial portions of the Software. | ||
| * | ||
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
| * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
| * IN THE SOFTWARE. | ||
| */ | ||
| #ifndef h2o__ebpf_h | ||
| #define h2o__ebpf_h | ||
|
|
||
| typedef struct h2o_ebpf_map_key_t { | ||
| struct { | ||
| uint8_t ip[16]; | ||
| uint16_t port; | ||
| } source; | ||
| struct { | ||
| uint8_t ip[16]; | ||
| uint16_t port; | ||
| } destination; | ||
| uint8_t family; | ||
| uint8_t protocol; | ||
| } h2o_ebpf_map_key_t; | ||
|
|
||
| #define H2O_EBPF_MAP_PATH "/sys/fs/bpf/h2o_map" | ||
|
|
||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -108,6 +108,11 @@ struct st_h2o_socket_t { | |
| * total bytes written (above the TLS layer) | ||
| */ | ||
| size_t bytes_written; | ||
| /** | ||
| * ternary flag to track if sock is being traced. 0 indicates value unknown, 1 means is traced, -1 is not traced | ||
| */ | ||
| int _is_traced; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure if this should be a ternary flag. If I understand correctly, the intent of defining this as a ternary flag is to lazy-load the information from the eBPF map. However, I am not sure if that should be done, because the map is an LRU. I think we should try to load the information as soon as the server-side of the socket is created, to avoid the risk of the entry corresponding to the created socket evicted from the eBPF map. The positive side effect of making such a change would be that this can then be an ordinary boolean flag, because the "unknown" state becomes unnecessary. WDYT?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes exactly - but the LRU relatively small data retention is a good point indeed ! My initial idea behind the lazily-loaded ternary state was to completely isolate the tracing path from the rest of the code. This works if I'm not sure what to conclude from here. I implemented the non-ternary-state version in this commit - feel free to tell me what you think, I can cherry-pick it to this branch if you think it's a better setup 👍
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you for the non-ternary-state version. Applied. |
||
|
|
||
| struct { | ||
| void (*cb)(void *data); | ||
| void *data; | ||
|
|
@@ -339,6 +344,10 @@ void h2o_ssl_register_alpn_protocols(SSL_CTX *ctx, const h2o_iovec_t *protocols) | |
| * registers the protocol list to be used for NPN | ||
| */ | ||
| void h2o_ssl_register_npn_protocols(SSL_CTX *ctx, const char *protocols); | ||
| /** | ||
| * helper to check if socket is to be traced according to eBPF map | ||
| */ | ||
| int h2o_socket_is_traced(h2o_socket_t *sock); | ||
|
|
||
| void h2o_socket__write_pending(h2o_socket_t *sock); | ||
| void h2o_socket__write_on_complete(h2o_socket_t *sock, int status); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1470,3 +1470,116 @@ void h2o_sliding_counter_stop(h2o_sliding_counter_t *counter, uint64_t now) | |
| /* recalc average */ | ||
| counter->average = counter->prev.sum / (sizeof(counter->prev.slots) / sizeof(counter->prev.slots[0])); | ||
| } | ||
|
|
||
| #if H2O_USE_DTRACE && defined(__linux__) | ||
| #include <linux/bpf.h> | ||
| #include <linux/unistd.h> | ||
| #include "h2o-probes.h" | ||
| #include "include/h2o/ebpf.h" | ||
| #include <sys/stat.h> | ||
|
|
||
| static __thread int tracing_map_fd = -1; | ||
| static __thread uint64_t tracing_map_last_attempt = 0; | ||
|
|
||
| static void open_tracing_map(h2o_socket_t *sock) | ||
| { | ||
| // only check every second | ||
| uint64_t now = h2o_now(h2o_socket_get_loop(sock)); | ||
| if (tracing_map_last_attempt - now < 1000) | ||
| return; | ||
|
|
||
| tracing_map_last_attempt = now; | ||
|
|
||
| // check if map exists at path | ||
| struct stat s; | ||
| if (stat(&H2O_EBPF_MAP_PATH[0], &s) == -1) { | ||
| // map path unavailable, cleanup fd if needed and leave | ||
| if (tracing_map_fd >= 0) { | ||
| close(tracing_map_fd); | ||
| tracing_map_fd = -1; | ||
| } | ||
| return; | ||
| } | ||
|
|
||
| if (tracing_map_fd >= 0) | ||
| return; // map still exists and we have a fd | ||
|
|
||
| // map exists, try connect | ||
| union bpf_attr attr; | ||
| memset(&attr, 0, sizeof(attr)); | ||
| attr.pathname = (uint64_t)&H2O_EBPF_MAP_PATH[0]; | ||
| tracing_map_fd = syscall(__NR_bpf, BPF_OBJ_GET, &attr, sizeof(attr)); | ||
| } | ||
|
|
||
| static int lookup_map(const void *key, const void *value) | ||
| { | ||
| union bpf_attr attr; | ||
| memset(&attr, 0, sizeof(attr)); | ||
| attr.map_fd = tracing_map_fd; | ||
| attr.key = (uint64_t)key; | ||
| attr.value = (uint64_t)value; | ||
| return syscall(__NR_bpf, BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)) == -1 ? -1 : 1; // return 1 if found, -1 otherwise | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the returned value a boolean? Assuming that it is, I think using 1 (true) / 0 (false) would make the code consistent.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The syscall returns I changed this to a |
||
| } | ||
|
|
||
| static inline void set_ebpf_map_key_tuples(struct sockaddr *sa, uint8_t *ip, uint16_t *port) | ||
| { | ||
| if (sa->sa_family == AF_INET) { | ||
| struct sockaddr_in *sin = (void *)sa; | ||
| memcpy(ip, &sin->sin_addr, sizeof(sin->sin_addr)); | ||
| *port = sin->sin_port; | ||
| } else if (sa->sa_family == AF_INET6) { | ||
| struct sockaddr_in6 *sin = (void *)sa; | ||
| memcpy(ip, &sin->sin6_addr, sizeof(sin->sin6_addr)); | ||
| *port = sin->sin6_port; | ||
| } | ||
| } | ||
|
|
||
| static inline int init_ebpf_map_key(h2o_ebpf_map_key_t *key, h2o_socket_t *sock) | ||
| { | ||
| struct sockaddr_storage sockname, peername; | ||
| unsigned int sock_type, sock_type_len = sizeof(sock_type_len); | ||
| memset(key, 0, sizeof(*key)); | ||
|
|
||
| // fetch sock/peer name and socket type | ||
| if (h2o_socket_getsockname(sock, (void *)&sockname) == 0 || | ||
| h2o_socket_getpeername(sock, (void *)&peername) == 0 || | ||
| getsockopt(h2o_socket_get_fd(sock), SOL_SOCKET, SO_TYPE, &sock_type, &sock_type_len) == -1) | ||
| return 0; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we might call Assuming that that could happen, I think we might call
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure I understand here - is the idea to early exit if we can't determine the socket type ?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FTR, the problem was that we were failing to setup the eBPF key when the socket was unix socket. Fixed in 262f9c9. |
||
|
|
||
| set_ebpf_map_key_tuples((void *)&sockname, &key->source.ip[0], &key->source.port); | ||
| set_ebpf_map_key_tuples((void *)&peername, &key->destination.ip[0], &key->destination.port); | ||
| key->family = sockname.ss_family == AF_INET6 ? 6 : 4; | ||
| key->protocol = sock_type; | ||
| return 1; | ||
| } | ||
|
|
||
| int h2o_socket_is_traced(h2o_socket_t *sock) | ||
| { | ||
| if (sock == NULL) | ||
| return 0; | ||
|
|
||
| if (sock->_is_traced != 0) | ||
| return sock->_is_traced; | ||
|
|
||
| // try open map if not opened | ||
| open_tracing_map(sock); | ||
| if (tracing_map_fd <= 0) | ||
| return 1; // map is not connected, fallback accepting probe | ||
|
|
||
| // define key/vals - we are only interrested in presence of the key, discard values | ||
| h2o_ebpf_map_key_t key; | ||
| void *vals = NULL; | ||
|
|
||
| // init key - fallback refusing probe if key can't be initialized | ||
| if (init_ebpf_map_key(&key, sock) == 0) | ||
| return 0; | ||
|
|
||
| // lookup map for our key | ||
| return sock->_is_traced = lookup_map(&key, &vals); | ||
| } | ||
| #else | ||
| int h2o_socket_is_traced(h2o_socket_t *sock) | ||
| { | ||
| return 1; | ||
| } | ||
| #endif | ||
Uh oh!
There was an error while loading. Please reload this page.