Skip to content

Commit eccc1bb

Browse files
stephen hemmingerdavem330
authored andcommitted
tunnel: drop packet if ECN present with not-ECT
Linux tunnels were written before RFC6040 and therefore never implemented the corner case of ECN getting set in the outer header and the inner header not being ready for it. Section 4.2. Default Tunnel Egress Behaviour. o If the inner ECN field is Not-ECT, the decapsulator MUST NOT propagate any other ECN codepoint onwards. This is because the inner Not-ECT marking is set by transports that rely on dropped packets as an indication of congestion and would not understand or respond to any other ECN codepoint [RFC4774]. Specifically: * If the inner ECN field is Not-ECT and the outer ECN field is CE, the decapsulator MUST drop the packet. * If the inner ECN field is Not-ECT and the outer ECN field is Not-ECT, ECT(0), or ECT(1), the decapsulator MUST forward the outgoing packet with the ECN field cleared to Not-ECT. This patch moves the ECN decap logic out of the individual tunnels into a common place. It also adds logging to allow detecting broken systems that set ECN bits incorrectly when tunneling (or an intermediate router might be changing the header). Overloads rx_frame_error to keep track of ECN related error. Thanks to Chris Wright who caught this while reviewing the new VXLAN tunnel. This code was tested by injecting faulty logic in other end GRE to send incorrectly encapsulated packets. Signed-off-by: Stephen Hemminger <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent b0558ef commit eccc1bb

4 files changed

Lines changed: 147 additions & 63 deletions

File tree

include/net/inet_ecn.h

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ enum {
1515
INET_ECN_MASK = 3,
1616
};
1717

18+
extern int sysctl_tunnel_ecn_log;
19+
1820
static inline int INET_ECN_is_ce(__u8 dsfield)
1921
{
2022
return (dsfield & INET_ECN_MASK) == INET_ECN_CE;
@@ -145,4 +147,78 @@ static inline int INET_ECN_set_ce(struct sk_buff *skb)
145147
return 0;
146148
}
147149

150+
/*
151+
* RFC 6080 4.2
152+
* To decapsulate the inner header at the tunnel egress, a compliant
153+
* tunnel egress MUST set the outgoing ECN field to the codepoint at the
154+
* intersection of the appropriate arriving inner header (row) and outer
155+
* header (column) in Figure 4
156+
*
157+
* +---------+------------------------------------------------+
158+
* |Arriving | Arriving Outer Header |
159+
* | Inner +---------+------------+------------+------------+
160+
* | Header | Not-ECT | ECT(0) | ECT(1) | CE |
161+
* +---------+---------+------------+------------+------------+
162+
* | Not-ECT | Not-ECT |Not-ECT(!!!)|Not-ECT(!!!)| <drop>(!!!)|
163+
* | ECT(0) | ECT(0) | ECT(0) | ECT(1) | CE |
164+
* | ECT(1) | ECT(1) | ECT(1) (!) | ECT(1) | CE |
165+
* | CE | CE | CE | CE(!!!)| CE |
166+
* +---------+---------+------------+------------+------------+
167+
*
168+
* Figure 4: New IP in IP Decapsulation Behaviour
169+
*
170+
* returns 0 on success
171+
* 1 if something is broken and should be logged (!!! above)
172+
* 2 if packet should be dropped
173+
*/
174+
static inline int INET_ECN_decapsulate(struct sk_buff *skb,
175+
__u8 outer, __u8 inner)
176+
{
177+
if (INET_ECN_is_not_ect(inner)) {
178+
switch (outer & INET_ECN_MASK) {
179+
case INET_ECN_NOT_ECT:
180+
return 0;
181+
case INET_ECN_ECT_0:
182+
case INET_ECN_ECT_1:
183+
return 1;
184+
case INET_ECN_CE:
185+
return 2;
186+
}
187+
}
188+
189+
if (INET_ECN_is_ce(outer))
190+
INET_ECN_set_ce(skb);
191+
192+
return 0;
193+
}
194+
195+
static inline int IP_ECN_decapsulate(const struct iphdr *oiph,
196+
struct sk_buff *skb)
197+
{
198+
__u8 inner;
199+
200+
if (skb->protocol == htons(ETH_P_IP))
201+
inner = ip_hdr(skb)->tos;
202+
else if (skb->protocol == htons(ETH_P_IPV6))
203+
inner = ipv6_get_dsfield(ipv6_hdr(skb));
204+
else
205+
return 0;
206+
207+
return INET_ECN_decapsulate(skb, oiph->tos, inner);
208+
}
209+
210+
static inline int IP6_ECN_decapsulate(const struct ipv6hdr *oipv6h,
211+
struct sk_buff *skb)
212+
{
213+
__u8 inner;
214+
215+
if (skb->protocol == htons(ETH_P_IP))
216+
inner = ip_hdr(skb)->tos;
217+
else if (skb->protocol == htons(ETH_P_IPV6))
218+
inner = ipv6_get_dsfield(ipv6_hdr(skb));
219+
else
220+
return 0;
221+
222+
return INET_ECN_decapsulate(skb, ipv6_get_dsfield(oipv6h), inner);
223+
}
148224
#endif

net/ipv4/ip_gre.c

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,10 @@
120120
Alexey Kuznetsov.
121121
*/
122122

123+
static bool log_ecn_error = true;
124+
module_param(log_ecn_error, bool, 0644);
125+
MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
126+
123127
static struct rtnl_link_ops ipgre_link_ops __read_mostly;
124128
static int ipgre_tunnel_init(struct net_device *dev);
125129
static void ipgre_tunnel_setup(struct net_device *dev);
@@ -204,7 +208,9 @@ static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev,
204208
tot->rx_crc_errors = dev->stats.rx_crc_errors;
205209
tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
206210
tot->rx_length_errors = dev->stats.rx_length_errors;
211+
tot->rx_frame_errors = dev->stats.rx_frame_errors;
207212
tot->rx_errors = dev->stats.rx_errors;
213+
208214
tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
209215
tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
210216
tot->tx_dropped = dev->stats.tx_dropped;
@@ -587,17 +593,6 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
587593
t->err_time = jiffies;
588594
}
589595

590-
static inline void ipgre_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb)
591-
{
592-
if (INET_ECN_is_ce(iph->tos)) {
593-
if (skb->protocol == htons(ETH_P_IP)) {
594-
IP_ECN_set_ce(ip_hdr(skb));
595-
} else if (skb->protocol == htons(ETH_P_IPV6)) {
596-
IP6_ECN_set_ce(ipv6_hdr(skb));
597-
}
598-
}
599-
}
600-
601596
static inline u8
602597
ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb)
603598
{
@@ -620,6 +615,7 @@ static int ipgre_rcv(struct sk_buff *skb)
620615
struct ip_tunnel *tunnel;
621616
int offset = 4;
622617
__be16 gre_proto;
618+
int err;
623619

624620
if (!pskb_may_pull(skb, 16))
625621
goto drop;
@@ -723,17 +719,27 @@ static int ipgre_rcv(struct sk_buff *skb)
723719
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
724720
}
725721

722+
__skb_tunnel_rx(skb, tunnel->dev);
723+
724+
skb_reset_network_header(skb);
725+
err = IP_ECN_decapsulate(iph, skb);
726+
if (unlikely(err)) {
727+
if (log_ecn_error)
728+
net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
729+
&iph->saddr, iph->tos);
730+
if (err > 1) {
731+
++tunnel->dev->stats.rx_frame_errors;
732+
++tunnel->dev->stats.rx_errors;
733+
goto drop;
734+
}
735+
}
736+
726737
tstats = this_cpu_ptr(tunnel->dev->tstats);
727738
u64_stats_update_begin(&tstats->syncp);
728739
tstats->rx_packets++;
729740
tstats->rx_bytes += skb->len;
730741
u64_stats_update_end(&tstats->syncp);
731742

732-
__skb_tunnel_rx(skb, tunnel->dev);
733-
734-
skb_reset_network_header(skb);
735-
ipgre_ecn_decapsulate(iph, skb);
736-
737743
netif_rx(skb);
738744

739745
return 0;

net/ipv4/ipip.c

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,10 @@
120120
#define HASH_SIZE 16
121121
#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
122122

123+
static bool log_ecn_error = true;
124+
module_param(log_ecn_error, bool, 0644);
125+
MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
126+
123127
static int ipip_net_id __read_mostly;
124128
struct ipip_net {
125129
struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
@@ -400,28 +404,18 @@ static int ipip_err(struct sk_buff *skb, u32 info)
400404
return err;
401405
}
402406

403-
static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
404-
struct sk_buff *skb)
405-
{
406-
struct iphdr *inner_iph = ip_hdr(skb);
407-
408-
if (INET_ECN_is_ce(outer_iph->tos))
409-
IP_ECN_set_ce(inner_iph);
410-
}
411-
412407
static int ipip_rcv(struct sk_buff *skb)
413408
{
414409
struct ip_tunnel *tunnel;
415410
const struct iphdr *iph = ip_hdr(skb);
411+
int err;
416412

417413
tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
418414
if (tunnel != NULL) {
419415
struct pcpu_tstats *tstats;
420416

421-
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
422-
kfree_skb(skb);
423-
return 0;
424-
}
417+
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
418+
goto drop;
425419

426420
secpath_reset(skb);
427421

@@ -430,21 +424,35 @@ static int ipip_rcv(struct sk_buff *skb)
430424
skb->protocol = htons(ETH_P_IP);
431425
skb->pkt_type = PACKET_HOST;
432426

427+
__skb_tunnel_rx(skb, tunnel->dev);
428+
429+
err = IP_ECN_decapsulate(iph, skb);
430+
if (unlikely(err)) {
431+
if (log_ecn_error)
432+
net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
433+
&iph->saddr, iph->tos);
434+
if (err > 1) {
435+
++tunnel->dev->stats.rx_frame_errors;
436+
++tunnel->dev->stats.rx_errors;
437+
goto drop;
438+
}
439+
}
440+
433441
tstats = this_cpu_ptr(tunnel->dev->tstats);
434442
u64_stats_update_begin(&tstats->syncp);
435443
tstats->rx_packets++;
436444
tstats->rx_bytes += skb->len;
437445
u64_stats_update_end(&tstats->syncp);
438446

439-
__skb_tunnel_rx(skb, tunnel->dev);
440-
441-
ipip_ecn_decapsulate(iph, skb);
442-
443447
netif_rx(skb);
444448
return 0;
445449
}
446450

447451
return -1;
452+
453+
drop:
454+
kfree_skb(skb);
455+
return 0;
448456
}
449457

450458
/*

net/ipv6/ip6_gre.c

Lines changed: 24 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,10 @@
5656
#include <net/ip6_tunnel.h>
5757

5858

59+
static bool log_ecn_error = true;
60+
module_param(log_ecn_error, bool, 0644);
61+
MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
62+
5963
#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
6064
#define IPV6_TCLASS_SHIFT 20
6165

@@ -149,7 +153,9 @@ static struct rtnl_link_stats64 *ip6gre_get_stats64(struct net_device *dev,
149153
tot->rx_crc_errors = dev->stats.rx_crc_errors;
150154
tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
151155
tot->rx_length_errors = dev->stats.rx_length_errors;
156+
tot->rx_frame_errors = dev->stats.rx_frame_errors;
152157
tot->rx_errors = dev->stats.rx_errors;
158+
153159
tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
154160
tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
155161
tot->tx_dropped = dev->stats.tx_dropped;
@@ -489,28 +495,6 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
489495
t->err_time = jiffies;
490496
}
491497

492-
static inline void ip6gre_ecn_decapsulate_ipv4(const struct ip6_tnl *t,
493-
const struct ipv6hdr *ipv6h, struct sk_buff *skb)
494-
{
495-
__u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK;
496-
497-
if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
498-
ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield);
499-
500-
if (INET_ECN_is_ce(dsfield))
501-
IP_ECN_set_ce(ip_hdr(skb));
502-
}
503-
504-
static inline void ip6gre_ecn_decapsulate_ipv6(const struct ip6_tnl *t,
505-
const struct ipv6hdr *ipv6h, struct sk_buff *skb)
506-
{
507-
if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
508-
ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb));
509-
510-
if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h)))
511-
IP6_ECN_set_ce(ipv6_hdr(skb));
512-
}
513-
514498
static int ip6gre_rcv(struct sk_buff *skb)
515499
{
516500
const struct ipv6hdr *ipv6h;
@@ -522,6 +506,7 @@ static int ip6gre_rcv(struct sk_buff *skb)
522506
struct ip6_tnl *tunnel;
523507
int offset = 4;
524508
__be16 gre_proto;
509+
int err;
525510

526511
if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
527512
goto drop;
@@ -625,20 +610,29 @@ static int ip6gre_rcv(struct sk_buff *skb)
625610
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
626611
}
627612

613+
__skb_tunnel_rx(skb, tunnel->dev);
614+
615+
skb_reset_network_header(skb);
616+
617+
err = IP6_ECN_decapsulate(ipv6h, skb);
618+
if (unlikely(err)) {
619+
if (log_ecn_error)
620+
net_info_ratelimited("non-ECT from %pI6 with dsfield=%#x\n",
621+
&ipv6h->saddr,
622+
ipv6_get_dsfield(ipv6h));
623+
if (err > 1) {
624+
++tunnel->dev->stats.rx_frame_errors;
625+
++tunnel->dev->stats.rx_errors;
626+
goto drop;
627+
}
628+
}
629+
628630
tstats = this_cpu_ptr(tunnel->dev->tstats);
629631
u64_stats_update_begin(&tstats->syncp);
630632
tstats->rx_packets++;
631633
tstats->rx_bytes += skb->len;
632634
u64_stats_update_end(&tstats->syncp);
633635

634-
__skb_tunnel_rx(skb, tunnel->dev);
635-
636-
skb_reset_network_header(skb);
637-
if (skb->protocol == htons(ETH_P_IP))
638-
ip6gre_ecn_decapsulate_ipv4(tunnel, ipv6h, skb);
639-
else if (skb->protocol == htons(ETH_P_IPV6))
640-
ip6gre_ecn_decapsulate_ipv6(tunnel, ipv6h, skb);
641-
642636
netif_rx(skb);
643637

644638
return 0;

0 commit comments

Comments
 (0)