Add SA-19:08 and EN-19:11.

Approved by:	so
This commit is contained in:
Gordon Tetlow 2019-06-19 16:54:06 +00:00
parent 0050b1446c
commit dcab058f7e
Notes: svn2git 2020-12-08 03:00:23 +00:00
svn path=/head/; revision=53171
8 changed files with 641 additions and 0 deletions

View file

@ -0,0 +1,114 @@
--- sys/net/if.c.orig
+++ sys/net/if.c
@@ -62,6 +62,8 @@
#include <sys/domain.h>
#include <sys/jail.h>
#include <sys/priv.h>
+#include <sys/sched.h>
+#include <sys/smp.h>
#include <machine/stdarg.h>
#include <vm/uma.h>
@@ -1755,6 +1757,30 @@
ifd->ifi_noproto = ifp->if_get_counter(ifp, IFCOUNTER_NOPROTO);
}
+struct ifnet_read_lock {
+ struct mtx mtx; /* lock protecting tracker below */
+ struct epoch_tracker et;
+};
+
+DPCPU_DEFINE_STATIC(struct ifnet_read_lock, ifnet_addr_read_lock);
+DPCPU_DEFINE_STATIC(struct ifnet_read_lock, ifnet_maddr_read_lock);
+
+static void
+ifnet_read_lock_init(void __unused *arg)
+{
+ struct ifnet_read_lock *pifrl;
+ int cpu;
+
+ CPU_FOREACH(cpu) {
+ pifrl = DPCPU_ID_PTR(cpu, ifnet_addr_read_lock);
+ mtx_init(&pifrl->mtx, "ifnet_addr_read_lock", NULL, MTX_DEF);
+
+ pifrl = DPCPU_ID_PTR(cpu, ifnet_maddr_read_lock);
+ mtx_init(&pifrl->mtx, "ifnet_maddr_read_lock", NULL, MTX_DEF);
+ }
+}
+SYSINIT(ifnet_read_lock_init, SI_SUB_CPU + 1, SI_ORDER_FIRST, &ifnet_read_lock_init, NULL);
+
/*
* Wrapper functions for struct ifnet address list locking macros. These are
* used by kernel modules to avoid encoding programming interface or binary
@@ -1764,35 +1790,47 @@
void
if_addr_rlock(struct ifnet *ifp)
{
- MPASS(*(uint64_t *)&ifp->if_addr_et == 0);
- epoch_enter_preempt(net_epoch_preempt, &ifp->if_addr_et);
+ struct ifnet_read_lock *pifrl;
+
+ sched_pin();
+ pifrl = DPCPU_PTR(ifnet_addr_read_lock);
+ mtx_lock(&pifrl->mtx);
+ epoch_enter_preempt(net_epoch_preempt, &pifrl->et);
}
void
if_addr_runlock(struct ifnet *ifp)
{
- epoch_exit_preempt(net_epoch_preempt, &ifp->if_addr_et);
-#ifdef INVARIANTS
- bzero(&ifp->if_addr_et, sizeof(struct epoch_tracker));
-#endif
+ struct ifnet_read_lock *pifrl;
+
+ pifrl = DPCPU_PTR(ifnet_addr_read_lock);
+
+ epoch_exit_preempt(net_epoch_preempt, &pifrl->et);
+ mtx_unlock(&pifrl->mtx);
+ sched_unpin();
}
void
if_maddr_rlock(if_t ifp)
{
+ struct ifnet_read_lock *pifrl;
- MPASS(*(uint64_t *)&ifp->if_maddr_et == 0);
- epoch_enter_preempt(net_epoch_preempt, &ifp->if_maddr_et);
+ sched_pin();
+ pifrl = DPCPU_PTR(ifnet_maddr_read_lock);
+ mtx_lock(&pifrl->mtx);
+ epoch_enter_preempt(net_epoch_preempt, &pifrl->et);
}
void
if_maddr_runlock(if_t ifp)
{
+ struct ifnet_read_lock *pifrl;
- epoch_exit_preempt(net_epoch_preempt, &ifp->if_maddr_et);
-#ifdef INVARIANTS
- bzero(&ifp->if_maddr_et, sizeof(struct epoch_tracker));
-#endif
+ pifrl = DPCPU_PTR(ifnet_maddr_read_lock);
+
+ epoch_exit_preempt(net_epoch_preempt, &pifrl->et);
+ mtx_unlock(&pifrl->mtx);
+ sched_unpin();
}
/*
--- sys/net/if_var.h.orig
+++ sys/net/if_var.h
@@ -381,8 +381,7 @@
*/
struct netdump_methods *if_netdump_methods;
struct epoch_context if_epoch_ctx;
- struct epoch_tracker if_addr_et;
- struct epoch_tracker if_maddr_et;
+ void *if_unused[4];
/*
* Spare fields to be added before branching a stable branch, so

View file

@ -0,0 +1,18 @@
-----BEGIN PGP SIGNATURE-----
iQKTBAABCgB9FiEE/A6HiuWv54gCjWNV05eS9J6n5cIFAl0KZ0lfFIAAAAAALgAo
aXNzdWVyLWZwckBub3RhdGlvbnMub3BlbnBncC5maWZ0aGhvcnNlbWFuLm5ldEZD
MEU4NzhBRTVBRkU3ODgwMjhENjM1NUQzOTc5MkY0OUVBN0U1QzIACgkQ05eS9J6n
5cK+MQ//UXhOeoBsuv5BC6tRlXO3685gNeVBrv3AUW4P11eDNoRRKJ5zzUx4NoIs
PdGLuhJzqPHx3rBEWldhORfdNGl7207CS9LHMmf/zGLnx5h0Sveuef70QIzjBWT/
GjIRQ/wkbsWRXH9CgLw/OgnBRvtO2EYL2+evsxpir471ehF+5/zQ2a/5jczhDYnR
v0wX9AV5gINm3RSwWBTX7vNaQfCvR1pfD4lZUu/o8fYEP8YQeCZUplf2BE1APoNc
zmKqn21aGXWLhP1+lGR0yBNRGYEZVvNLf3URhfJOQqMWf3LXIsR6XOGbYPZcUg22
EY3oKYtLzUZINPW/hDEzKKw8mx+KXwN7fIe4r/m7IY5093QdQLKRanl8AwWhEcuE
aDxe6lv4Kg9staT5Jmy4z06dl/DOGlCvi/k1Wmiuk6svxS2BQ6SWJpoGbZDgUeLO
0mYnWRrSLr/rfy7YfYUW4UAY7I2GoGzXnWSXq54BiSQd4saB+1NYvSV+GzRmdpgU
OtD3o59rjleWtS/FboqWrL7ViVbzvJRjoKGHFPh/olc/OW0vwTleFo0xG7iXOJJK
kfAw1KVC79PF7PFec1pDEEmhSlkaBSto+QNKE2cKC7pBbSAysHHnZDwMRcZFWm48
3zmq/jjwYNnjfmWBhRIMgqPjZdGzOiv2+KN8X53TWwiy4iio4GQ=
=kUNo
-----END PGP SIGNATURE-----

View file

@ -0,0 +1,190 @@
--- sys/netinet/tcp_stacks/rack.c.orig
+++ sys/netinet/tcp_stacks/rack.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2016-2018
+ * Copyright (c) 2016-2019
* Netflix Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -203,6 +203,7 @@
static int32_t rack_sack_block_limit = 128;
static int32_t rack_use_sack_filter = 1;
static int32_t rack_tlp_threshold_use = TLP_USE_TWO_ONE;
+static uint32_t rack_map_split_limit = 0; /* unlimited by default */
/* Rack specific counters */
counter_u64_t rack_badfr;
@@ -228,6 +229,8 @@
counter_u64_t rack_to_alloc;
counter_u64_t rack_to_alloc_hard;
counter_u64_t rack_to_alloc_emerg;
+counter_u64_t rack_alloc_limited_conns;
+counter_u64_t rack_split_limited;
counter_u64_t rack_sack_proc_all;
counter_u64_t rack_sack_proc_short;
@@ -261,6 +264,8 @@
rack_ack_received(struct tcpcb *tp, struct tcp_rack *rack,
struct tcphdr *th, uint16_t nsegs, uint16_t type, int32_t recovery);
static struct rack_sendmap *rack_alloc(struct tcp_rack *rack);
+static struct rack_sendmap *rack_alloc_limit(struct tcp_rack *rack,
+ uint8_t limit_type);
static struct rack_sendmap *
rack_check_recovery_mode(struct tcpcb *tp,
uint32_t tsused);
@@ -445,6 +450,8 @@
counter_u64_zero(rack_sack_proc_short);
counter_u64_zero(rack_sack_proc_restart);
counter_u64_zero(rack_to_alloc);
+ counter_u64_zero(rack_alloc_limited_conns);
+ counter_u64_zero(rack_split_limited);
counter_u64_zero(rack_find_high);
counter_u64_zero(rack_runt_sacks);
counter_u64_zero(rack_used_tlpmethod);
@@ -622,6 +629,11 @@
OID_AUTO, "pktdelay", CTLFLAG_RW,
&rack_pkt_delay, 1,
"Extra RACK time (in ms) besides reordering thresh");
+ SYSCTL_ADD_U32(&rack_sysctl_ctx,
+ SYSCTL_CHILDREN(rack_sysctl_root),
+ OID_AUTO, "split_limit", CTLFLAG_RW,
+ &rack_map_split_limit, 0,
+ "Is there a limit on the number of map split entries (0=unlimited)");
SYSCTL_ADD_S32(&rack_sysctl_ctx,
SYSCTL_CHILDREN(rack_sysctl_root),
OID_AUTO, "inc_var", CTLFLAG_RW,
@@ -757,7 +769,19 @@
SYSCTL_CHILDREN(rack_sysctl_root),
OID_AUTO, "allocemerg", CTLFLAG_RD,
&rack_to_alloc_emerg,
- "Total alocations done from emergency cache");
+ "Total allocations done from emergency cache");
+ rack_alloc_limited_conns = counter_u64_alloc(M_WAITOK);
+ SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
+ SYSCTL_CHILDREN(rack_sysctl_root),
+ OID_AUTO, "alloc_limited_conns", CTLFLAG_RD,
+ &rack_alloc_limited_conns,
+ "Connections with allocations dropped due to limit");
+ rack_split_limited = counter_u64_alloc(M_WAITOK);
+ SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
+ SYSCTL_CHILDREN(rack_sysctl_root),
+ OID_AUTO, "split_limited", CTLFLAG_RD,
+ &rack_split_limited,
+ "Split allocations dropped due to limit");
rack_sack_proc_all = counter_u64_alloc(M_WAITOK);
SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
SYSCTL_CHILDREN(rack_sysctl_root),
@@ -1121,10 +1145,11 @@
{
struct rack_sendmap *rsm;
- counter_u64_add(rack_to_alloc, 1);
- rack->r_ctl.rc_num_maps_alloced++;
rsm = uma_zalloc(rack_zone, M_NOWAIT);
if (rsm) {
+alloc_done:
+ counter_u64_add(rack_to_alloc, 1);
+ rack->r_ctl.rc_num_maps_alloced++;
return (rsm);
}
if (rack->rc_free_cnt) {
@@ -1132,14 +1157,46 @@
rsm = TAILQ_FIRST(&rack->r_ctl.rc_free);
TAILQ_REMOVE(&rack->r_ctl.rc_free, rsm, r_next);
rack->rc_free_cnt--;
- return (rsm);
+ goto alloc_done;
}
return (NULL);
}
+/* wrapper to allocate a sendmap entry, subject to a specific limit */
+static struct rack_sendmap *
+rack_alloc_limit(struct tcp_rack *rack, uint8_t limit_type)
+{
+ struct rack_sendmap *rsm;
+
+ if (limit_type) {
+ /* currently there is only one limit type */
+ if (rack_map_split_limit > 0 &&
+ rack->r_ctl.rc_num_split_allocs >= rack_map_split_limit) {
+ counter_u64_add(rack_split_limited, 1);
+ if (!rack->alloc_limit_reported) {
+ rack->alloc_limit_reported = 1;
+ counter_u64_add(rack_alloc_limited_conns, 1);
+ }
+ return (NULL);
+ }
+ }
+
+ /* allocate and mark in the limit type, if set */
+ rsm = rack_alloc(rack);
+ if (rsm != NULL && limit_type) {
+ rsm->r_limit_type = limit_type;
+ rack->r_ctl.rc_num_split_allocs++;
+ }
+ return (rsm);
+}
+
static void
rack_free(struct tcp_rack *rack, struct rack_sendmap *rsm)
{
+ if (rsm->r_limit_type) {
+ /* currently there is only one limit type */
+ rack->r_ctl.rc_num_split_allocs--;
+ }
rack->r_ctl.rc_num_maps_alloced--;
if (rack->r_ctl.rc_tlpsend == rsm)
rack->r_ctl.rc_tlpsend = NULL;
@@ -3955,7 +4012,7 @@
/*
* Need to split this in two pieces the before and after.
*/
- nrsm = rack_alloc(rack);
+ nrsm = rack_alloc_limit(rack, RACK_LIMIT_TYPE_SPLIT);
if (nrsm == NULL) {
/*
* failed XXXrrs what can we do but loose the sack
@@ -4016,7 +4073,7 @@
goto do_rest_ofb;
}
/* Ok we need to split off this one at the tail */
- nrsm = rack_alloc(rack);
+ nrsm = rack_alloc_limit(rack, RACK_LIMIT_TYPE_SPLIT);
if (nrsm == NULL) {
/* failed rrs what can we do but loose the sack info? */
goto out;
--- sys/netinet/tcp_stacks/tcp_rack.h.orig
+++ sys/netinet/tcp_stacks/tcp_rack.h
@@ -55,8 +55,10 @@
uint8_t r_sndcnt; /* Retran count, not limited by
* RACK_NUM_OF_RETRANS */
uint8_t r_in_tmap; /* Flag to see if its in the r_tnext array */
- uint8_t r_resv[3];
+ uint8_t r_limit_type; /* is this entry counted against a limit? */
+ uint8_t r_resv[2];
};
+#define RACK_LIMIT_TYPE_SPLIT 1
TAILQ_HEAD(rack_head, rack_sendmap);
@@ -242,7 +244,7 @@
uint32_t rc_num_maps_alloced; /* Number of map blocks (sacks) we
* have allocated */
uint32_t rc_rcvtime; /* When we last received data */
- uint32_t rc_notused;
+ uint32_t rc_num_split_allocs; /* num split map entries allocated */
uint32_t rc_last_output_to;
uint32_t rc_went_idle_time;
@@ -311,7 +313,8 @@
uint8_t rack_tlp_threshold_use;
uint8_t rc_allow_data_af_clo: 1,
delayed_ack : 1,
- rc_avail : 6;
+ alloc_limit_reported : 1,
+ rc_avail : 5;
uint8_t r_resv[2]; /* Fill to cache line boundary */
/* Cache line 2 0x40 */
struct rack_control r_ctl;

View file

@ -0,0 +1,18 @@
-----BEGIN PGP SIGNATURE-----
iQKTBAABCgB9FiEE/A6HiuWv54gCjWNV05eS9J6n5cIFAl0KZ0ZfFIAAAAAALgAo
aXNzdWVyLWZwckBub3RhdGlvbnMub3BlbnBncC5maWZ0aGhvcnNlbWFuLm5ldEZD
MEU4NzhBRTVBRkU3ODgwMjhENjM1NUQzOTc5MkY0OUVBN0U1QzIACgkQ05eS9J6n
5cJOQg/+Jd8CDSaVJ+s6mB6ZWEfwPlLOn2t8eRr0Wm1+JgcWvLZyXfDKkyBmO998
SAV8eIKveF+hvA9CRy8/ZHU+NLLERqS6PdzTtFhITMbS1Jnn7foPNzr3B45hZMmC
g08fMvQB8gbOMrBJc0KZWgQywyMmNcr9Mudo6rj+D75tYTSnimxevOny7cSfixL/
MtASHue0cU3OcPC/Z9tDptDnsFNKpXIrK4iHKN6jO5lrn+kZnWVHAPHlB2fxC9ny
nuwfoXxABdYAhpG5Bh9IV5wfd9TEyg4WWUtR/t2LvxDRJaovlz6IT0buI4j/Ulqs
UlXQ8FHBt36b8TGzx1pZYUAYK4dZlil6UTGERs7Bxoi8+OR7kaYHCCmAq4ql0d5/
8gPAJqb/wbsM48jCV9nvl0j8QuDrLObmEVWgXON9ZxpXwzL3RdyuI58rklIOTXoh
5Du1rkBL3CD1gXUynroTWLjCBabT4nLT97wd1xbg9OyxRclW/N1/v+PALARG4o6A
zG6YlSpTqZp/bdiAweEqTiuTCGdSJMkbJOox1jZD6MK570vojoqS2xhlWZzGPEk2
cKlpiTZowIEVQEeWvOj3doLD9bfkShWpnjYLUnh0dAY+l9cD27JlJwHqoumMZMv4
CHZ9CO5crPhi0TKBP+uHaLpk6QRCHETH9mZ7n5OLtjVbncFBmsk=
=5bec
-----END PGP SIGNATURE-----