1315 lines
38 KiB
Diff
1315 lines
38 KiB
Diff
--- share/man/man4/inet.4.orig
|
|
+++ share/man/man4/inet.4
|
|
@@ -28,7 +28,7 @@
|
|
.\" From: @(#)inet.4 8.1 (Berkeley) 6/5/93
|
|
.\" $FreeBSD$
|
|
.\"
|
|
-.Dd Feb 4, 2016
|
|
+.Dd August 14, 2018
|
|
.Dt INET 4
|
|
.Os
|
|
.Sh NAME
|
|
@@ -229,15 +229,38 @@
|
|
cycle greatly.
|
|
Default is 0 (sequential IP IDs).
|
|
IPv6 flow IDs and fragment IDs are always random.
|
|
+.It Va ip.maxfrags
|
|
+Integer: maximum number of fragments the host will accept and simultaneously
|
|
+hold across all reassembly queues in all VNETs.
|
|
+If set to 0, reassembly is disabled.
|
|
+If set to -1, this limit is not applied.
|
|
+This limit is recalculated when the number of mbuf clusters is changed.
|
|
+This is a global limit.
|
|
.It Va ip.maxfragpackets
|
|
-Integer: maximum number of fragmented packets the host will accept and hold
|
|
-in the reassembling queue simultaneously.
|
|
-0 means that the host will not accept any fragmented packets.
|
|
-\-1 means that the host will accept as many fragmented packets as it receives.
|
|
+Integer: maximum number of fragmented packets the host will accept and
|
|
+simultaneously hold in the reassembly queue for a particular VNET.
|
|
+0 means that the host will not accept any fragmented packets for that VNET.
|
|
+\-1 means that the host will not apply this limit for that VNET.
|
|
+This limit is recalculated when the number of mbuf clusters is changed.
|
|
+This is a per-VNET limit.
|
|
+.It Va ip.maxfragbucketsize
|
|
+Integer: maximum number of reassembly queues per bucket.
|
|
+Fragmented packets are hashed to buckets.
|
|
+Each bucket has a list of reassembly queues.
|
|
+The system must compare the incoming packets to the existing reassembly queues
|
|
+in the bucket to find a matching reassembly queue.
|
|
+To preserve system resources, the system limits the number of reassembly
|
|
+queues allowed in each bucket.
|
|
+This limit is recalculated when the number of mbuf clusters is changed or
|
|
+when the value of
|
|
+.Va ip.maxfragpackets
|
|
+changes.
|
|
+This is a per-VNET limit.
|
|
.It Va ip.maxfragsperpacket
|
|
Integer: maximum number of fragments the host will accept and hold
|
|
-in the reassembling queue for a packet.
|
|
-0 means that the host will not accept any fragmented packets.
|
|
+in the reassembly queue for a packet.
|
|
+0 means that the host will not accept any fragmented packets for the VNET.
|
|
+This is a per-VNET limit.
|
|
.El
|
|
.Sh SEE ALSO
|
|
.Xr ioctl 2 ,
|
|
--- share/man/man4/inet6.4.orig
|
|
+++ share/man/man4/inet6.4
|
|
@@ -29,7 +29,7 @@
|
|
.\"
|
|
.\" $FreeBSD$
|
|
.\"
|
|
-.Dd September 2, 2009
|
|
+.Dd August 14, 2018
|
|
.Dt INET6 4
|
|
.Os
|
|
.Sh NAME
|
|
@@ -219,12 +219,41 @@
|
|
This value applies to all the transport protocols on top of
|
|
.Tn IPv6 .
|
|
There are APIs to override the value.
|
|
+.It Dv IPV6CTL_MAXFRAGS
|
|
+.Pq ip6.maxfrags
|
|
+Integer: maximum number of fragments the host will accept and simultaneously
|
|
+hold across all reassembly queues in all VNETs.
|
|
+If set to 0, fragment reassembly is disabled.
|
|
+If set to -1, this limit is not applied.
|
|
+This limit is recalculated when the number of mbuf clusters is changed.
|
|
+This is a global limit.
|
|
.It Dv IPV6CTL_MAXFRAGPACKETS
|
|
.Pq ip6.maxfragpackets
|
|
-Integer: default maximum number of fragmented packets the node will accept.
|
|
-0 means that the node will not accept any fragmented packets.
|
|
--1 means that the node will accept as many fragmented packets as it receives.
|
|
-The flag is provided basically for avoiding possible DoS attacks.
|
|
+Integer: maximum number of fragmented packets the node will accept and
|
|
+simultaneously hold in the reassembly queue for a particular VNET.
|
|
+0 means that the node will not accept any fragmented packets for that VNET.
|
|
+-1 means that the node will not apply this limit for that VNET.
|
|
+This limit is recalculated when the number of mbuf clusters is changed.
|
|
+This is a per-VNET limit.
|
|
+.It Dv IPV6CTL_MAXFRAGBUCKETSIZE
|
|
+.Pq ip6.maxfragbucketsize
|
|
+Integer: maximum number of reassembly queues per bucket.
|
|
+Fragmented packets are hashed to buckets.
|
|
+Each bucket has a list of reassembly queues.
|
|
+The system must compare the incoming packets to the existing reassembly queues
|
|
+in the bucket to find a matching reassembly queue.
|
|
+To preserve system resources, the system limits the number of reassembly
|
|
+queues allowed in each bucket.
|
|
+This limit is recalculated when the number of mbuf clusters is changed or
|
|
+when the value of
|
|
+.Va ip6.maxfragpackets
|
|
+changes.
|
|
+This is a per-VNET limit.
|
|
+.It Dv IPV6CTL_MAXFRAGSPERPACKET
|
|
+.Pq ip6.maxfragsperpacket
|
|
+Integer: maximum number of fragments the host will accept and hold in the
|
|
+ressembly queue for a packet.
|
|
+This is a per-VNET limit.
|
|
.It Dv IPV6CTL_ACCEPT_RTADV
|
|
.Pq ip6.accept_rtadv
|
|
Boolean: the default value of a per-interface flag to
|
|
--- sys/netinet/ip_reass.c.orig
|
|
+++ sys/netinet/ip_reass.c
|
|
@@ -42,6 +42,7 @@
|
|
#include <sys/hash.h>
|
|
#include <sys/mbuf.h>
|
|
#include <sys/malloc.h>
|
|
+#include <sys/limits.h>
|
|
#include <sys/lock.h>
|
|
#include <sys/mutex.h>
|
|
#include <sys/sysctl.h>
|
|
@@ -63,13 +64,14 @@
|
|
/*
|
|
* Reassembly headers are stored in hash buckets.
|
|
*/
|
|
-#define IPREASS_NHASH_LOG2 6
|
|
+#define IPREASS_NHASH_LOG2 10
|
|
#define IPREASS_NHASH (1 << IPREASS_NHASH_LOG2)
|
|
#define IPREASS_HMASK (IPREASS_NHASH - 1)
|
|
|
|
struct ipqbucket {
|
|
TAILQ_HEAD(ipqhead, ipq) head;
|
|
struct mtx lock;
|
|
+ int count;
|
|
};
|
|
|
|
static VNET_DEFINE(struct ipqbucket, ipq[IPREASS_NHASH]);
|
|
@@ -82,6 +84,9 @@
|
|
#define IPQ_UNLOCK(i) mtx_unlock(&V_ipq[i].lock)
|
|
#define IPQ_LOCK_ASSERT(i) mtx_assert(&V_ipq[i].lock, MA_OWNED)
|
|
|
|
+static VNET_DEFINE(int, ipreass_maxbucketsize);
|
|
+#define V_ipreass_maxbucketsize VNET(ipreass_maxbucketsize)
|
|
+
|
|
void ipreass_init(void);
|
|
void ipreass_drain(void);
|
|
void ipreass_slowtimo(void);
|
|
@@ -89,27 +94,53 @@
|
|
void ipreass_destroy(void);
|
|
#endif
|
|
static int sysctl_maxfragpackets(SYSCTL_HANDLER_ARGS);
|
|
+static int sysctl_maxfragbucketsize(SYSCTL_HANDLER_ARGS);
|
|
static void ipreass_zone_change(void *);
|
|
static void ipreass_drain_tomax(void);
|
|
-static void ipq_free(struct ipqhead *, struct ipq *);
|
|
+static void ipq_free(struct ipqbucket *, struct ipq *);
|
|
static struct ipq * ipq_reuse(int);
|
|
|
|
static inline void
|
|
-ipq_timeout(struct ipqhead *head, struct ipq *fp)
|
|
+ipq_timeout(struct ipqbucket *bucket, struct ipq *fp)
|
|
{
|
|
|
|
IPSTAT_ADD(ips_fragtimeout, fp->ipq_nfrags);
|
|
- ipq_free(head, fp);
|
|
+ ipq_free(bucket, fp);
|
|
}
|
|
|
|
static inline void
|
|
-ipq_drop(struct ipqhead *head, struct ipq *fp)
|
|
+ipq_drop(struct ipqbucket *bucket, struct ipq *fp)
|
|
{
|
|
|
|
IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags);
|
|
- ipq_free(head, fp);
|
|
+ ipq_free(bucket, fp);
|
|
}
|
|
|
|
+/*
|
|
+ * By default, limit the number of IP fragments across all reassembly
|
|
+ * queues to 1/32 of the total number of mbuf clusters.
|
|
+ *
|
|
+ * Limit the total number of reassembly queues per VNET to the
|
|
+ * IP fragment limit, but ensure the limit will not allow any bucket
|
|
+ * to grow above 100 items. (The bucket limit is
|
|
+ * IP_MAXFRAGPACKETS / (IPREASS_NHASH / 2), so the 50 is the correct
|
|
+ * multiplier to reach a 100-item limit.)
|
|
+ * The 100-item limit was chosen as brief testing seems to show that
|
|
+ * this produces "reasonable" performance on some subset of systems
|
|
+ * under DoS attack.
|
|
+ */
|
|
+#define IP_MAXFRAGS (nmbclusters / 32)
|
|
+#define IP_MAXFRAGPACKETS (imin(IP_MAXFRAGS, IPREASS_NHASH * 50))
|
|
+
|
|
+static int maxfrags;
|
|
+static volatile u_int nfrags;
|
|
+SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfrags, CTLFLAG_RW,
|
|
+ &maxfrags, 0,
|
|
+ "Maximum number of IPv4 fragments allowed across all reassembly queues");
|
|
+SYSCTL_UINT(_net_inet_ip, OID_AUTO, curfrags, CTLFLAG_RD,
|
|
+ __DEVOLATILE(u_int *, &nfrags), 0,
|
|
+ "Current number of IPv4 fragments across all reassembly queues");
|
|
+
|
|
static VNET_DEFINE(uma_zone_t, ipq_zone);
|
|
#define V_ipq_zone VNET(ipq_zone)
|
|
SYSCTL_PROC(_net_inet_ip, OID_AUTO, maxfragpackets, CTLFLAG_VNET |
|
|
@@ -127,6 +158,10 @@
|
|
SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_VNET | CTLFLAG_RW,
|
|
&VNET_NAME(maxfragsperpacket), 0,
|
|
"Maximum number of IPv4 fragments allowed per packet");
|
|
+SYSCTL_PROC(_net_inet_ip, OID_AUTO, maxfragbucketsize,
|
|
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0,
|
|
+ sysctl_maxfragbucketsize, "I",
|
|
+ "Maximum number of IPv4 fragment reassembly queue entries per bucket");
|
|
|
|
/*
|
|
* Take incoming datagram fragment and try to reassemble it into
|
|
@@ -146,9 +181,9 @@
|
|
struct mbuf *p, *q, *nq, *t;
|
|
struct ipq *fp;
|
|
struct ipqhead *head;
|
|
- int i, hlen, next;
|
|
+ int i, hlen, next, tmpmax;
|
|
u_int8_t ecn, ecn0;
|
|
- uint32_t hash;
|
|
+ uint32_t hash, hashkey[3];
|
|
#ifdef RSS
|
|
uint32_t rss_hash, rss_type;
|
|
#endif
|
|
@@ -156,8 +191,12 @@
|
|
/*
|
|
* If no reassembling or maxfragsperpacket are 0,
|
|
* never accept fragments.
|
|
+ * Also, drop packet if it would exceed the maximum
|
|
+ * number of fragments.
|
|
*/
|
|
- if (V_noreass == 1 || V_maxfragsperpacket == 0) {
|
|
+ tmpmax = maxfrags;
|
|
+ if (V_noreass == 1 || V_maxfragsperpacket == 0 ||
|
|
+ (tmpmax >= 0 && nfrags >= (u_int)tmpmax)) {
|
|
IPSTAT_INC(ips_fragments);
|
|
IPSTAT_INC(ips_fragdropped);
|
|
m_freem(m);
|
|
@@ -202,8 +241,12 @@
|
|
m->m_data += hlen;
|
|
m->m_len -= hlen;
|
|
|
|
- hash = ip->ip_src.s_addr ^ ip->ip_id;
|
|
- hash = jenkins_hash32(&hash, 1, V_ipq_hashseed) & IPREASS_HMASK;
|
|
+ hashkey[0] = ip->ip_src.s_addr;
|
|
+ hashkey[1] = ip->ip_dst.s_addr;
|
|
+ hashkey[2] = (uint32_t)ip->ip_p << 16;
|
|
+ hashkey[2] += ip->ip_id;
|
|
+ hash = jenkins_hash32(hashkey, nitems(hashkey), V_ipq_hashseed);
|
|
+ hash &= IPREASS_HMASK;
|
|
head = &V_ipq[hash].head;
|
|
IPQ_LOCK(hash);
|
|
|
|
@@ -224,9 +267,12 @@
|
|
* If first fragment to arrive, create a reassembly queue.
|
|
*/
|
|
if (fp == NULL) {
|
|
- fp = uma_zalloc(V_ipq_zone, M_NOWAIT);
|
|
+ if (V_ipq[hash].count < V_ipreass_maxbucketsize)
|
|
+ fp = uma_zalloc(V_ipq_zone, M_NOWAIT);
|
|
if (fp == NULL)
|
|
fp = ipq_reuse(hash);
|
|
+ if (fp == NULL)
|
|
+ goto dropfrag;
|
|
#ifdef MAC
|
|
if (mac_ipq_init(fp, M_NOWAIT) != 0) {
|
|
uma_zfree(V_ipq_zone, fp);
|
|
@@ -236,7 +282,9 @@
|
|
mac_ipq_create(m, fp);
|
|
#endif
|
|
TAILQ_INSERT_HEAD(head, fp, ipq_list);
|
|
+ V_ipq[hash].count++;
|
|
fp->ipq_nfrags = 1;
|
|
+ atomic_add_int(&nfrags, 1);
|
|
fp->ipq_ttl = IPFRAGTTL;
|
|
fp->ipq_p = ip->ip_p;
|
|
fp->ipq_id = ip->ip_id;
|
|
@@ -247,6 +295,7 @@
|
|
goto done;
|
|
} else {
|
|
fp->ipq_nfrags++;
|
|
+ atomic_add_int(&nfrags, 1);
|
|
#ifdef MAC
|
|
mac_ipq_update(m, fp);
|
|
#endif
|
|
@@ -323,6 +372,7 @@
|
|
m->m_nextpkt = nq;
|
|
IPSTAT_INC(ips_fragdropped);
|
|
fp->ipq_nfrags--;
|
|
+ atomic_subtract_int(&nfrags, 1);
|
|
m_freem(q);
|
|
}
|
|
|
|
@@ -340,7 +390,7 @@
|
|
for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) {
|
|
if (ntohs(GETIP(q)->ip_off) != next) {
|
|
if (fp->ipq_nfrags > V_maxfragsperpacket)
|
|
- ipq_drop(head, fp);
|
|
+ ipq_drop(&V_ipq[hash], fp);
|
|
goto done;
|
|
}
|
|
next += ntohs(GETIP(q)->ip_len);
|
|
@@ -348,7 +398,7 @@
|
|
/* Make sure the last packet didn't have the IP_MF flag */
|
|
if (p->m_flags & M_IP_FRAG) {
|
|
if (fp->ipq_nfrags > V_maxfragsperpacket)
|
|
- ipq_drop(head, fp);
|
|
+ ipq_drop(&V_ipq[hash], fp);
|
|
goto done;
|
|
}
|
|
|
|
@@ -359,7 +409,7 @@
|
|
ip = GETIP(q);
|
|
if (next + (ip->ip_hl << 2) > IP_MAXPACKET) {
|
|
IPSTAT_INC(ips_toolong);
|
|
- ipq_drop(head, fp);
|
|
+ ipq_drop(&V_ipq[hash], fp);
|
|
goto done;
|
|
}
|
|
|
|
@@ -388,6 +438,7 @@
|
|
while (m->m_pkthdr.csum_data & 0xffff0000)
|
|
m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) +
|
|
(m->m_pkthdr.csum_data >> 16);
|
|
+ atomic_subtract_int(&nfrags, fp->ipq_nfrags);
|
|
#ifdef MAC
|
|
mac_ipq_reassemble(fp, m);
|
|
mac_ipq_destroy(fp);
|
|
@@ -402,6 +453,7 @@
|
|
ip->ip_src = fp->ipq_src;
|
|
ip->ip_dst = fp->ipq_dst;
|
|
TAILQ_REMOVE(head, fp, ipq_list);
|
|
+ V_ipq[hash].count--;
|
|
uma_zfree(V_ipq_zone, fp);
|
|
m->m_len += (ip->ip_hl << 2);
|
|
m->m_data -= (ip->ip_hl << 2);
|
|
@@ -447,8 +499,10 @@
|
|
|
|
dropfrag:
|
|
IPSTAT_INC(ips_fragdropped);
|
|
- if (fp != NULL)
|
|
+ if (fp != NULL) {
|
|
fp->ipq_nfrags--;
|
|
+ atomic_subtract_int(&nfrags, 1);
|
|
+ }
|
|
m_freem(m);
|
|
done:
|
|
IPQ_UNLOCK(hash);
|
|
@@ -463,21 +517,27 @@
|
|
void
|
|
ipreass_init(void)
|
|
{
|
|
+ int max;
|
|
|
|
for (int i = 0; i < IPREASS_NHASH; i++) {
|
|
TAILQ_INIT(&V_ipq[i].head);
|
|
mtx_init(&V_ipq[i].lock, "IP reassembly", NULL,
|
|
MTX_DEF | MTX_DUPOK);
|
|
+ V_ipq[i].count = 0;
|
|
}
|
|
V_ipq_hashseed = arc4random();
|
|
V_maxfragsperpacket = 16;
|
|
V_ipq_zone = uma_zcreate("ipq", sizeof(struct ipq), NULL, NULL, NULL,
|
|
NULL, UMA_ALIGN_PTR, 0);
|
|
- uma_zone_set_max(V_ipq_zone, nmbclusters / 32);
|
|
+ max = IP_MAXFRAGPACKETS;
|
|
+ max = uma_zone_set_max(V_ipq_zone, max);
|
|
+ V_ipreass_maxbucketsize = imax(max / (IPREASS_NHASH / 2), 1);
|
|
|
|
- if (IS_DEFAULT_VNET(curvnet))
|
|
+ if (IS_DEFAULT_VNET(curvnet)) {
|
|
+ maxfrags = IP_MAXFRAGS;
|
|
EVENTHANDLER_REGISTER(nmbclusters_change, ipreass_zone_change,
|
|
NULL, EVENTHANDLER_PRI_ANY);
|
|
+ }
|
|
}
|
|
|
|
/*
|
|
@@ -492,7 +552,7 @@
|
|
IPQ_LOCK(i);
|
|
TAILQ_FOREACH_SAFE(fp, &V_ipq[i].head, ipq_list, tmp)
|
|
if (--fp->ipq_ttl == 0)
|
|
- ipq_timeout(&V_ipq[i].head, fp);
|
|
+ ipq_timeout(&V_ipq[i], fp);
|
|
IPQ_UNLOCK(i);
|
|
}
|
|
}
|
|
@@ -507,7 +567,10 @@
|
|
for (int i = 0; i < IPREASS_NHASH; i++) {
|
|
IPQ_LOCK(i);
|
|
while(!TAILQ_EMPTY(&V_ipq[i].head))
|
|
- ipq_drop(&V_ipq[i].head, TAILQ_FIRST(&V_ipq[i].head));
|
|
+ ipq_drop(&V_ipq[i], TAILQ_FIRST(&V_ipq[i].head));
|
|
+ KASSERT(V_ipq[i].count == 0,
|
|
+ ("%s: V_ipq[%d] count %d (V_ipq=%p)", __func__, i,
|
|
+ V_ipq[i].count, V_ipq));
|
|
IPQ_UNLOCK(i);
|
|
}
|
|
}
|
|
@@ -535,8 +598,22 @@
|
|
static void
|
|
ipreass_drain_tomax(void)
|
|
{
|
|
+ struct ipq *fp;
|
|
int target;
|
|
|
|
+ /*
|
|
+ * Make sure each bucket is under the new limit. If
|
|
+ * necessary, drop enough of the oldest elements from
|
|
+ * each bucket to get under the new limit.
|
|
+ */
|
|
+ for (int i = 0; i < IPREASS_NHASH; i++) {
|
|
+ IPQ_LOCK(i);
|
|
+ while (V_ipq[i].count > V_ipreass_maxbucketsize &&
|
|
+ (fp = TAILQ_LAST(&V_ipq[i].head, ipqhead)) != NULL)
|
|
+ ipq_timeout(&V_ipq[i], fp);
|
|
+ IPQ_UNLOCK(i);
|
|
+ }
|
|
+
|
|
/*
|
|
* If we are over the maximum number of fragments,
|
|
* drain off enough to get down to the new limit,
|
|
@@ -545,13 +622,11 @@
|
|
*/
|
|
target = uma_zone_get_max(V_ipq_zone);
|
|
while (uma_zone_get_cur(V_ipq_zone) > target) {
|
|
- struct ipq *fp;
|
|
-
|
|
for (int i = 0; i < IPREASS_NHASH; i++) {
|
|
IPQ_LOCK(i);
|
|
fp = TAILQ_LAST(&V_ipq[i].head, ipqhead);
|
|
if (fp != NULL)
|
|
- ipq_timeout(&V_ipq[i].head, fp);
|
|
+ ipq_timeout(&V_ipq[i], fp);
|
|
IPQ_UNLOCK(i);
|
|
}
|
|
}
|
|
@@ -560,9 +635,20 @@
|
|
static void
|
|
ipreass_zone_change(void *tag)
|
|
{
|
|
-
|
|
- uma_zone_set_max(V_ipq_zone, nmbclusters / 32);
|
|
- ipreass_drain_tomax();
|
|
+ VNET_ITERATOR_DECL(vnet_iter);
|
|
+ int max;
|
|
+
|
|
+ maxfrags = IP_MAXFRAGS;
|
|
+ max = IP_MAXFRAGPACKETS;
|
|
+ VNET_LIST_RLOCK_NOSLEEP();
|
|
+ VNET_FOREACH(vnet_iter) {
|
|
+ CURVNET_SET(vnet_iter);
|
|
+ max = uma_zone_set_max(V_ipq_zone, max);
|
|
+ V_ipreass_maxbucketsize = imax(max / (IPREASS_NHASH / 2), 1);
|
|
+ ipreass_drain_tomax();
|
|
+ CURVNET_RESTORE();
|
|
+ }
|
|
+ VNET_LIST_RUNLOCK_NOSLEEP();
|
|
}
|
|
|
|
/*
|
|
@@ -590,6 +676,7 @@
|
|
* and place an extreme upper bound.
|
|
*/
|
|
max = uma_zone_set_max(V_ipq_zone, max);
|
|
+ V_ipreass_maxbucketsize = imax(max / (IPREASS_NHASH / 2), 1);
|
|
ipreass_drain_tomax();
|
|
V_noreass = 0;
|
|
} else if (max == 0) {
|
|
@@ -598,6 +685,7 @@
|
|
} else if (max == -1) {
|
|
V_noreass = 0;
|
|
uma_zone_set_max(V_ipq_zone, 0);
|
|
+ V_ipreass_maxbucketsize = INT_MAX;
|
|
} else
|
|
return (EINVAL);
|
|
return (0);
|
|
@@ -611,49 +699,72 @@
|
|
ipq_reuse(int start)
|
|
{
|
|
struct ipq *fp;
|
|
- int i;
|
|
+ int bucket, i;
|
|
|
|
IPQ_LOCK_ASSERT(start);
|
|
|
|
- for (i = start;; i++) {
|
|
- if (i == IPREASS_NHASH)
|
|
- i = 0;
|
|
- if (i != start && IPQ_TRYLOCK(i) == 0)
|
|
+ for (i = 0; i < IPREASS_NHASH; i++) {
|
|
+ bucket = (start + i) % IPREASS_NHASH;
|
|
+ if (bucket != start && IPQ_TRYLOCK(bucket) == 0)
|
|
continue;
|
|
- fp = TAILQ_LAST(&V_ipq[i].head, ipqhead);
|
|
+ fp = TAILQ_LAST(&V_ipq[bucket].head, ipqhead);
|
|
if (fp) {
|
|
struct mbuf *m;
|
|
|
|
IPSTAT_ADD(ips_fragtimeout, fp->ipq_nfrags);
|
|
+ atomic_subtract_int(&nfrags, fp->ipq_nfrags);
|
|
while (fp->ipq_frags) {
|
|
m = fp->ipq_frags;
|
|
fp->ipq_frags = m->m_nextpkt;
|
|
m_freem(m);
|
|
}
|
|
- TAILQ_REMOVE(&V_ipq[i].head, fp, ipq_list);
|
|
- if (i != start)
|
|
- IPQ_UNLOCK(i);
|
|
- IPQ_LOCK_ASSERT(start);
|
|
- return (fp);
|
|
+ TAILQ_REMOVE(&V_ipq[bucket].head, fp, ipq_list);
|
|
+ V_ipq[bucket].count--;
|
|
+ if (bucket != start)
|
|
+ IPQ_UNLOCK(bucket);
|
|
+ break;
|
|
}
|
|
- if (i != start)
|
|
- IPQ_UNLOCK(i);
|
|
+ if (bucket != start)
|
|
+ IPQ_UNLOCK(bucket);
|
|
}
|
|
+ IPQ_LOCK_ASSERT(start);
|
|
+ return (fp);
|
|
}
|
|
|
|
/*
|
|
* Free a fragment reassembly header and all associated datagrams.
|
|
*/
|
|
static void
|
|
-ipq_free(struct ipqhead *fhp, struct ipq *fp)
|
|
+ipq_free(struct ipqbucket *bucket, struct ipq *fp)
|
|
{
|
|
struct mbuf *q;
|
|
|
|
+ atomic_subtract_int(&nfrags, fp->ipq_nfrags);
|
|
while (fp->ipq_frags) {
|
|
q = fp->ipq_frags;
|
|
fp->ipq_frags = q->m_nextpkt;
|
|
m_freem(q);
|
|
}
|
|
- TAILQ_REMOVE(fhp, fp, ipq_list);
|
|
+ TAILQ_REMOVE(&bucket->head, fp, ipq_list);
|
|
+ bucket->count--;
|
|
uma_zfree(V_ipq_zone, fp);
|
|
}
|
|
+
|
|
+/*
|
|
+ * Get or set the maximum number of reassembly queues per bucket.
|
|
+ */
|
|
+static int
|
|
+sysctl_maxfragbucketsize(SYSCTL_HANDLER_ARGS)
|
|
+{
|
|
+ int error, max;
|
|
+
|
|
+ max = V_ipreass_maxbucketsize;
|
|
+ error = sysctl_handle_int(oidp, &max, 0, req);
|
|
+ if (error || !req->newptr)
|
|
+ return (error);
|
|
+ if (max <= 0)
|
|
+ return (EINVAL);
|
|
+ V_ipreass_maxbucketsize = max;
|
|
+ ipreass_drain_tomax();
|
|
+ return (0);
|
|
+}
|
|
--- sys/netinet6/frag6.c.orig
|
|
+++ sys/netinet6/frag6.c
|
|
@@ -36,6 +36,7 @@
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/systm.h>
|
|
+#include <sys/hash.h>
|
|
#include <sys/malloc.h>
|
|
#include <sys/mbuf.h>
|
|
#include <sys/domain.h>
|
|
@@ -47,6 +48,8 @@
|
|
#include <sys/kernel.h>
|
|
#include <sys/syslog.h>
|
|
|
|
+#include <machine/atomic.h>
|
|
+
|
|
#include <net/if.h>
|
|
#include <net/if_var.h>
|
|
#include <net/netisr.h>
|
|
@@ -63,58 +66,110 @@
|
|
|
|
#include <security/mac/mac_framework.h>
|
|
|
|
-static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *);
|
|
-static void frag6_deq(struct ip6asfrag *);
|
|
-static void frag6_insque(struct ip6q *, struct ip6q *);
|
|
-static void frag6_remque(struct ip6q *);
|
|
-static void frag6_freef(struct ip6q *);
|
|
-
|
|
-static struct mtx ip6qlock;
|
|
/*
|
|
- * These fields all protected by ip6qlock.
|
|
+ * Reassembly headers are stored in hash buckets.
|
|
*/
|
|
-static VNET_DEFINE(u_int, frag6_nfragpackets);
|
|
-static VNET_DEFINE(u_int, frag6_nfrags);
|
|
-static VNET_DEFINE(struct ip6q, ip6q); /* ip6 reassemble queue */
|
|
+#define IP6REASS_NHASH_LOG2 10
|
|
+#define IP6REASS_NHASH (1 << IP6REASS_NHASH_LOG2)
|
|
+#define IP6REASS_HMASK (IP6REASS_NHASH - 1)
|
|
+
|
|
+static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *,
|
|
+ uint32_t bucket __unused);
|
|
+static void frag6_deq(struct ip6asfrag *, uint32_t bucket __unused);
|
|
+static void frag6_insque_head(struct ip6q *, struct ip6q *,
|
|
+ uint32_t bucket);
|
|
+static void frag6_remque(struct ip6q *, uint32_t bucket);
|
|
+static void frag6_freef(struct ip6q *, uint32_t bucket);
|
|
+
|
|
+struct ip6qbucket {
|
|
+ struct ip6q ip6q;
|
|
+ struct mtx lock;
|
|
+ int count;
|
|
+};
|
|
+
|
|
+static VNET_DEFINE(volatile u_int, frag6_nfragpackets);
|
|
+volatile u_int frag6_nfrags = 0;
|
|
+static VNET_DEFINE(struct ip6qbucket, ip6q[IP6REASS_NHASH]);
|
|
+static VNET_DEFINE(uint32_t, ip6q_hashseed);
|
|
|
|
#define V_frag6_nfragpackets VNET(frag6_nfragpackets)
|
|
-#define V_frag6_nfrags VNET(frag6_nfrags)
|
|
#define V_ip6q VNET(ip6q)
|
|
+#define V_ip6q_hashseed VNET(ip6q_hashseed)
|
|
|
|
-#define IP6Q_LOCK_INIT() mtx_init(&ip6qlock, "ip6qlock", NULL, MTX_DEF);
|
|
-#define IP6Q_LOCK() mtx_lock(&ip6qlock)
|
|
-#define IP6Q_TRYLOCK() mtx_trylock(&ip6qlock)
|
|
-#define IP6Q_LOCK_ASSERT() mtx_assert(&ip6qlock, MA_OWNED)
|
|
-#define IP6Q_UNLOCK() mtx_unlock(&ip6qlock)
|
|
+#define IP6Q_LOCK(i) mtx_lock(&V_ip6q[(i)].lock)
|
|
+#define IP6Q_TRYLOCK(i) mtx_trylock(&V_ip6q[(i)].lock)
|
|
+#define IP6Q_LOCK_ASSERT(i) mtx_assert(&V_ip6q[(i)].lock, MA_OWNED)
|
|
+#define IP6Q_UNLOCK(i) mtx_unlock(&V_ip6q[(i)].lock)
|
|
+#define IP6Q_HEAD(i) (&V_ip6q[(i)].ip6q)
|
|
|
|
static MALLOC_DEFINE(M_FTABLE, "fragment", "fragment reassembly header");
|
|
|
|
+/*
|
|
+ * By default, limit the number of IP6 fragments across all reassembly
|
|
+ * queues to 1/32 of the total number of mbuf clusters.
|
|
+ *
|
|
+ * Limit the total number of reassembly queues per VNET to the
|
|
+ * IP6 fragment limit, but ensure the limit will not allow any bucket
|
|
+ * to grow above 100 items. (The bucket limit is
|
|
+ * IP_MAXFRAGPACKETS / (IPREASS_NHASH / 2), so the 50 is the correct
|
|
+ * multiplier to reach a 100-item limit.)
|
|
+ * The 100-item limit was chosen as brief testing seems to show that
|
|
+ * this produces "reasonable" performance on some subset of systems
|
|
+ * under DoS attack.
|
|
+ */
|
|
+#define IP6_MAXFRAGS (nmbclusters / 32)
|
|
+#define IP6_MAXFRAGPACKETS (imin(IP6_MAXFRAGS, IP6REASS_NHASH * 50))
|
|
+
|
|
/*
|
|
* Initialise reassembly queue and fragment identifier.
|
|
*/
|
|
+void
|
|
+frag6_set_bucketsize()
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ if ((i = V_ip6_maxfragpackets) > 0)
|
|
+ V_ip6_maxfragbucketsize = imax(i / (IP6REASS_NHASH / 2), 1);
|
|
+}
|
|
+
|
|
static void
|
|
frag6_change(void *tag)
|
|
{
|
|
+ VNET_ITERATOR_DECL(vnet_iter);
|
|
|
|
- V_ip6_maxfragpackets = nmbclusters / 4;
|
|
- V_ip6_maxfrags = nmbclusters / 4;
|
|
+ ip6_maxfrags = IP6_MAXFRAGS;
|
|
+ VNET_LIST_RLOCK_NOSLEEP();
|
|
+ VNET_FOREACH(vnet_iter) {
|
|
+ CURVNET_SET(vnet_iter);
|
|
+ V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS;
|
|
+ frag6_set_bucketsize();
|
|
+ CURVNET_RESTORE();
|
|
+ }
|
|
+ VNET_LIST_RUNLOCK_NOSLEEP();
|
|
}
|
|
|
|
void
|
|
frag6_init(void)
|
|
{
|
|
-
|
|
- V_ip6_maxfragpackets = nmbclusters / 4;
|
|
- V_ip6_maxfrags = nmbclusters / 4;
|
|
- V_ip6q.ip6q_next = V_ip6q.ip6q_prev = &V_ip6q;
|
|
-
|
|
+ struct ip6q *q6;
|
|
+ int i;
|
|
+
|
|
+ V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS;
|
|
+ frag6_set_bucketsize();
|
|
+ for (i = 0; i < IP6REASS_NHASH; i++) {
|
|
+ q6 = IP6Q_HEAD(i);
|
|
+ q6->ip6q_next = q6->ip6q_prev = q6;
|
|
+ mtx_init(&V_ip6q[i].lock, "ip6qlock", NULL, MTX_DEF);
|
|
+ V_ip6q[i].count = 0;
|
|
+ }
|
|
+ V_ip6q_hashseed = arc4random();
|
|
+ V_ip6_maxfragsperpacket = 64;
|
|
if (!IS_DEFAULT_VNET(curvnet))
|
|
return;
|
|
|
|
+ ip6_maxfrags = IP6_MAXFRAGS;
|
|
EVENTHANDLER_REGISTER(nmbclusters_change,
|
|
frag6_change, NULL, EVENTHANDLER_PRI_ANY);
|
|
-
|
|
- IP6Q_LOCK_INIT();
|
|
}
|
|
|
|
/*
|
|
@@ -155,12 +210,13 @@
|
|
struct mbuf *m = *mp, *t;
|
|
struct ip6_hdr *ip6;
|
|
struct ip6_frag *ip6f;
|
|
- struct ip6q *q6;
|
|
+ struct ip6q *head, *q6;
|
|
struct ip6asfrag *af6, *ip6af, *af6dwn;
|
|
struct in6_ifaddr *ia;
|
|
int offset = *offp, nxt, i, next;
|
|
int first_frag = 0;
|
|
int fragoff, frgpartlen; /* must be larger than u_int16_t */
|
|
+ uint32_t hash, hashkey[sizeof(struct in6_addr) * 2 + 1], *hashkeyp;
|
|
struct ifnet *dstifp;
|
|
u_int8_t ecn, ecn0;
|
|
#ifdef RSS
|
|
@@ -229,19 +285,38 @@
|
|
return (ip6f->ip6f_nxt);
|
|
}
|
|
|
|
- IP6Q_LOCK();
|
|
+ /* Get fragment length and discard 0-byte fragments. */
|
|
+ frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset;
|
|
+ if (frgpartlen == 0) {
|
|
+ icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
|
|
+ offsetof(struct ip6_hdr, ip6_plen));
|
|
+ in6_ifstat_inc(dstifp, ifs6_reass_fail);
|
|
+ IP6STAT_INC(ip6s_fragdropped);
|
|
+ return IPPROTO_DONE;
|
|
+ }
|
|
+
|
|
+ hashkeyp = hashkey;
|
|
+ memcpy(hashkeyp, &ip6->ip6_src, sizeof(struct in6_addr));
|
|
+ hashkeyp += sizeof(struct in6_addr) / sizeof(*hashkeyp);
|
|
+ memcpy(hashkeyp, &ip6->ip6_dst, sizeof(struct in6_addr));
|
|
+ hashkeyp += sizeof(struct in6_addr) / sizeof(*hashkeyp);
|
|
+ *hashkeyp = ip6f->ip6f_ident;
|
|
+ hash = jenkins_hash32(hashkey, nitems(hashkey), V_ip6q_hashseed);
|
|
+ hash &= IP6REASS_HMASK;
|
|
+ head = IP6Q_HEAD(hash);
|
|
+ IP6Q_LOCK(hash);
|
|
|
|
/*
|
|
* Enforce upper bound on number of fragments.
|
|
* If maxfrag is 0, never accept fragments.
|
|
* If maxfrag is -1, accept all fragments without limitation.
|
|
*/
|
|
- if (V_ip6_maxfrags < 0)
|
|
+ if (ip6_maxfrags < 0)
|
|
;
|
|
- else if (V_frag6_nfrags >= (u_int)V_ip6_maxfrags)
|
|
+ else if (frag6_nfrags >= (u_int)ip6_maxfrags)
|
|
goto dropfrag;
|
|
|
|
- for (q6 = V_ip6q.ip6q_next; q6 != &V_ip6q; q6 = q6->ip6q_next)
|
|
+ for (q6 = head->ip6q_next; q6 != head; q6 = q6->ip6q_next)
|
|
if (ip6f->ip6f_ident == q6->ip6q_ident &&
|
|
IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &q6->ip6q_src) &&
|
|
IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &q6->ip6q_dst)
|
|
@@ -251,7 +326,7 @@
|
|
)
|
|
break;
|
|
|
|
- if (q6 == &V_ip6q) {
|
|
+ if (q6 == head) {
|
|
/*
|
|
* the first fragment to arrive, create a reassembly queue.
|
|
*/
|
|
@@ -266,9 +341,10 @@
|
|
*/
|
|
if (V_ip6_maxfragpackets < 0)
|
|
;
|
|
- else if (V_frag6_nfragpackets >= (u_int)V_ip6_maxfragpackets)
|
|
+ else if (V_ip6q[hash].count >= V_ip6_maxfragbucketsize ||
|
|
+ V_frag6_nfragpackets >= (u_int)V_ip6_maxfragpackets)
|
|
goto dropfrag;
|
|
- V_frag6_nfragpackets++;
|
|
+ atomic_add_int(&V_frag6_nfragpackets, 1);
|
|
q6 = (struct ip6q *)malloc(sizeof(struct ip6q), M_FTABLE,
|
|
M_NOWAIT);
|
|
if (q6 == NULL)
|
|
@@ -281,7 +357,7 @@
|
|
}
|
|
mac_ip6q_create(m, q6);
|
|
#endif
|
|
- frag6_insque(q6, &V_ip6q);
|
|
+ frag6_insque_head(q6, head, hash);
|
|
|
|
/* ip6q_nxt will be filled afterwards, from 1st fragment */
|
|
q6->ip6q_down = q6->ip6q_up = (struct ip6asfrag *)q6;
|
|
@@ -315,21 +391,20 @@
|
|
* in size.
|
|
* If it would exceed, discard the fragment and return an ICMP error.
|
|
*/
|
|
- frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset;
|
|
if (q6->ip6q_unfrglen >= 0) {
|
|
/* The 1st fragment has already arrived. */
|
|
if (q6->ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) {
|
|
icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
|
|
offset - sizeof(struct ip6_frag) +
|
|
offsetof(struct ip6_frag, ip6f_offlg));
|
|
- IP6Q_UNLOCK();
|
|
+ IP6Q_UNLOCK(hash);
|
|
return (IPPROTO_DONE);
|
|
}
|
|
} else if (fragoff + frgpartlen > IPV6_MAXPACKET) {
|
|
icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
|
|
offset - sizeof(struct ip6_frag) +
|
|
offsetof(struct ip6_frag, ip6f_offlg));
|
|
- IP6Q_UNLOCK();
|
|
+ IP6Q_UNLOCK(hash);
|
|
return (IPPROTO_DONE);
|
|
}
|
|
/*
|
|
@@ -348,7 +423,7 @@
|
|
int erroff = af6->ip6af_offset;
|
|
|
|
/* dequeue the fragment. */
|
|
- frag6_deq(af6);
|
|
+ frag6_deq(af6, hash);
|
|
free(af6, M_FTABLE);
|
|
|
|
/* adjust pointer. */
|
|
@@ -446,7 +521,7 @@
|
|
}
|
|
af6 = af6->ip6af_down;
|
|
m_freem(IP6_REASS_MBUF(af6->ip6af_up));
|
|
- frag6_deq(af6->ip6af_up);
|
|
+ frag6_deq(af6->ip6af_up, hash);
|
|
}
|
|
#else
|
|
/*
|
|
@@ -495,29 +570,38 @@
|
|
/*
|
|
* Stick new segment in its place;
|
|
* check for complete reassembly.
|
|
+ * If not complete, check fragment limit.
|
|
* Move to front of packet queue, as we are
|
|
* the most recently active fragmented packet.
|
|
*/
|
|
- frag6_enq(ip6af, af6->ip6af_up);
|
|
- V_frag6_nfrags++;
|
|
+ frag6_enq(ip6af, af6->ip6af_up, hash);
|
|
+ atomic_add_int(&frag6_nfrags, 1);
|
|
q6->ip6q_nfrag++;
|
|
#if 0 /* xxx */
|
|
- if (q6 != V_ip6q.ip6q_next) {
|
|
- frag6_remque(q6);
|
|
- frag6_insque(q6, &V_ip6q);
|
|
+ if (q6 != head->ip6q_next) {
|
|
+ frag6_remque(q6, hash);
|
|
+ frag6_insque_head(q6, head, hash);
|
|
}
|
|
#endif
|
|
next = 0;
|
|
for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
|
|
af6 = af6->ip6af_down) {
|
|
if (af6->ip6af_off != next) {
|
|
- IP6Q_UNLOCK();
|
|
+ if (q6->ip6q_nfrag > V_ip6_maxfragsperpacket) {
|
|
+ IP6STAT_INC(ip6s_fragdropped);
|
|
+ frag6_freef(q6, hash);
|
|
+ }
|
|
+ IP6Q_UNLOCK(hash);
|
|
return IPPROTO_DONE;
|
|
}
|
|
next += af6->ip6af_frglen;
|
|
}
|
|
if (af6->ip6af_up->ip6af_mff) {
|
|
- IP6Q_UNLOCK();
|
|
+ if (q6->ip6q_nfrag > V_ip6_maxfragsperpacket) {
|
|
+ IP6STAT_INC(ip6s_fragdropped);
|
|
+ frag6_freef(q6, hash);
|
|
+ }
|
|
+ IP6Q_UNLOCK(hash);
|
|
return IPPROTO_DONE;
|
|
}
|
|
|
|
@@ -527,7 +611,7 @@
|
|
ip6af = q6->ip6q_down;
|
|
t = m = IP6_REASS_MBUF(ip6af);
|
|
af6 = ip6af->ip6af_down;
|
|
- frag6_deq(ip6af);
|
|
+ frag6_deq(ip6af, hash);
|
|
while (af6 != (struct ip6asfrag *)q6) {
|
|
m->m_pkthdr.csum_flags &=
|
|
IP6_REASS_MBUF(af6)->m_pkthdr.csum_flags;
|
|
@@ -535,7 +619,7 @@
|
|
IP6_REASS_MBUF(af6)->m_pkthdr.csum_data;
|
|
|
|
af6dwn = af6->ip6af_down;
|
|
- frag6_deq(af6);
|
|
+ frag6_deq(af6, hash);
|
|
while (t->m_next)
|
|
t = t->m_next;
|
|
m_adj(IP6_REASS_MBUF(af6), af6->ip6af_offset);
|
|
@@ -562,13 +646,13 @@
|
|
#endif
|
|
|
|
if (ip6_deletefraghdr(m, offset, M_NOWAIT) != 0) {
|
|
- frag6_remque(q6);
|
|
- V_frag6_nfrags -= q6->ip6q_nfrag;
|
|
+ frag6_remque(q6, hash);
|
|
+ atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag);
|
|
#ifdef MAC
|
|
mac_ip6q_destroy(q6);
|
|
#endif
|
|
free(q6, M_FTABLE);
|
|
- V_frag6_nfragpackets--;
|
|
+ atomic_subtract_int(&V_frag6_nfragpackets, 1);
|
|
|
|
goto dropfrag;
|
|
}
|
|
@@ -579,14 +663,14 @@
|
|
m_copyback(m, ip6_get_prevhdr(m, offset), sizeof(uint8_t),
|
|
(caddr_t)&nxt);
|
|
|
|
- frag6_remque(q6);
|
|
- V_frag6_nfrags -= q6->ip6q_nfrag;
|
|
+ frag6_remque(q6, hash);
|
|
+ atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag);
|
|
#ifdef MAC
|
|
mac_ip6q_reassemble(q6, m);
|
|
mac_ip6q_destroy(q6);
|
|
#endif
|
|
free(q6, M_FTABLE);
|
|
- V_frag6_nfragpackets--;
|
|
+ atomic_subtract_int(&V_frag6_nfragpackets, 1);
|
|
|
|
if (m->m_flags & M_PKTHDR) { /* Isn't it always true? */
|
|
int plen = 0;
|
|
@@ -608,7 +692,7 @@
|
|
m_tag_prepend(m, mtag);
|
|
#endif
|
|
|
|
- IP6Q_UNLOCK();
|
|
+ IP6Q_UNLOCK(hash);
|
|
IP6STAT_INC(ip6s_reassembled);
|
|
in6_ifstat_inc(dstifp, ifs6_reass_ok);
|
|
|
|
@@ -630,7 +714,7 @@
|
|
return nxt;
|
|
|
|
dropfrag:
|
|
- IP6Q_UNLOCK();
|
|
+ IP6Q_UNLOCK(hash);
|
|
in6_ifstat_inc(dstifp, ifs6_reass_fail);
|
|
IP6STAT_INC(ip6s_fragdropped);
|
|
m_freem(m);
|
|
@@ -641,19 +725,19 @@
|
|
* Free a fragment reassembly header and all
|
|
* associated datagrams.
|
|
*/
|
|
-void
|
|
-frag6_freef(struct ip6q *q6)
|
|
+static void
|
|
+frag6_freef(struct ip6q *q6, uint32_t bucket)
|
|
{
|
|
struct ip6asfrag *af6, *down6;
|
|
|
|
- IP6Q_LOCK_ASSERT();
|
|
+ IP6Q_LOCK_ASSERT(bucket);
|
|
|
|
for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
|
|
af6 = down6) {
|
|
struct mbuf *m = IP6_REASS_MBUF(af6);
|
|
|
|
down6 = af6->ip6af_down;
|
|
- frag6_deq(af6);
|
|
+ frag6_deq(af6, bucket);
|
|
|
|
/*
|
|
* Return ICMP time exceeded error for the 1st fragment.
|
|
@@ -675,24 +759,25 @@
|
|
m_freem(m);
|
|
free(af6, M_FTABLE);
|
|
}
|
|
- frag6_remque(q6);
|
|
- V_frag6_nfrags -= q6->ip6q_nfrag;
|
|
+ frag6_remque(q6, bucket);
|
|
+ atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag);
|
|
#ifdef MAC
|
|
mac_ip6q_destroy(q6);
|
|
#endif
|
|
free(q6, M_FTABLE);
|
|
- V_frag6_nfragpackets--;
|
|
+ atomic_subtract_int(&V_frag6_nfragpackets, 1);
|
|
}
|
|
|
|
/*
|
|
* Put an ip fragment on a reassembly chain.
|
|
* Like insque, but pointers in middle of structure.
|
|
*/
|
|
-void
|
|
-frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6)
|
|
+static void
|
|
+frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6,
|
|
+ uint32_t bucket __unused)
|
|
{
|
|
|
|
- IP6Q_LOCK_ASSERT();
|
|
+ IP6Q_LOCK_ASSERT(bucket);
|
|
|
|
af6->ip6af_up = up6;
|
|
af6->ip6af_down = up6->ip6af_down;
|
|
@@ -703,36 +788,41 @@
|
|
/*
|
|
* To frag6_enq as remque is to insque.
|
|
*/
|
|
-void
|
|
-frag6_deq(struct ip6asfrag *af6)
|
|
+static void
|
|
+frag6_deq(struct ip6asfrag *af6, uint32_t bucket __unused)
|
|
{
|
|
|
|
- IP6Q_LOCK_ASSERT();
|
|
+ IP6Q_LOCK_ASSERT(bucket);
|
|
|
|
af6->ip6af_up->ip6af_down = af6->ip6af_down;
|
|
af6->ip6af_down->ip6af_up = af6->ip6af_up;
|
|
}
|
|
|
|
-void
|
|
-frag6_insque(struct ip6q *new, struct ip6q *old)
|
|
+static void
|
|
+frag6_insque_head(struct ip6q *new, struct ip6q *old, uint32_t bucket)
|
|
{
|
|
|
|
- IP6Q_LOCK_ASSERT();
|
|
+ IP6Q_LOCK_ASSERT(bucket);
|
|
+ KASSERT(IP6Q_HEAD(bucket) == old,
|
|
+ ("%s: attempt to insert at head of wrong bucket"
|
|
+ " (bucket=%u, old=%p)", __func__, bucket, old));
|
|
|
|
new->ip6q_prev = old;
|
|
new->ip6q_next = old->ip6q_next;
|
|
old->ip6q_next->ip6q_prev= new;
|
|
old->ip6q_next = new;
|
|
+ V_ip6q[bucket].count++;
|
|
}
|
|
|
|
-void
|
|
-frag6_remque(struct ip6q *p6)
|
|
+static void
|
|
+frag6_remque(struct ip6q *p6, uint32_t bucket)
|
|
{
|
|
|
|
- IP6Q_LOCK_ASSERT();
|
|
+ IP6Q_LOCK_ASSERT(bucket);
|
|
|
|
p6->ip6q_prev->ip6q_next = p6->ip6q_next;
|
|
p6->ip6q_next->ip6q_prev = p6->ip6q_prev;
|
|
+ V_ip6q[bucket].count--;
|
|
}
|
|
|
|
/*
|
|
@@ -744,37 +834,71 @@
|
|
frag6_slowtimo(void)
|
|
{
|
|
VNET_ITERATOR_DECL(vnet_iter);
|
|
- struct ip6q *q6;
|
|
+ struct ip6q *head, *q6;
|
|
+ int i;
|
|
|
|
VNET_LIST_RLOCK_NOSLEEP();
|
|
- IP6Q_LOCK();
|
|
VNET_FOREACH(vnet_iter) {
|
|
CURVNET_SET(vnet_iter);
|
|
- q6 = V_ip6q.ip6q_next;
|
|
- if (q6)
|
|
- while (q6 != &V_ip6q) {
|
|
+ for (i = 0; i < IP6REASS_NHASH; i++) {
|
|
+ IP6Q_LOCK(i);
|
|
+ head = IP6Q_HEAD(i);
|
|
+ q6 = head->ip6q_next;
|
|
+ if (q6 == NULL) {
|
|
+ /*
|
|
+ * XXXJTL: This should never happen. This
|
|
+ * should turn into an assertion.
|
|
+ */
|
|
+ IP6Q_UNLOCK(i);
|
|
+ continue;
|
|
+ }
|
|
+ while (q6 != head) {
|
|
--q6->ip6q_ttl;
|
|
q6 = q6->ip6q_next;
|
|
if (q6->ip6q_prev->ip6q_ttl == 0) {
|
|
IP6STAT_INC(ip6s_fragtimeout);
|
|
/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
|
|
- frag6_freef(q6->ip6q_prev);
|
|
+ frag6_freef(q6->ip6q_prev, i);
|
|
}
|
|
}
|
|
+ /*
|
|
+ * If we are over the maximum number of fragments
|
|
+ * (due to the limit being lowered), drain off
|
|
+ * enough to get down to the new limit.
|
|
+ * Note that we drain all reassembly queues if
|
|
+ * maxfragpackets is 0 (fragmentation is disabled),
|
|
+ * and don't enforce a limit when maxfragpackets
|
|
+ * is negative.
|
|
+ */
|
|
+ while ((V_ip6_maxfragpackets == 0 ||
|
|
+ (V_ip6_maxfragpackets > 0 &&
|
|
+ V_ip6q[i].count > V_ip6_maxfragbucketsize)) &&
|
|
+ head->ip6q_prev != head) {
|
|
+ IP6STAT_INC(ip6s_fragoverflow);
|
|
+ /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
|
|
+ frag6_freef(head->ip6q_prev, i);
|
|
+ }
|
|
+ IP6Q_UNLOCK(i);
|
|
+ }
|
|
/*
|
|
- * If we are over the maximum number of fragments
|
|
- * (due to the limit being lowered), drain off
|
|
- * enough to get down to the new limit.
|
|
+ * If we are still over the maximum number of fragmented
|
|
+ * packets, drain off enough to get down to the new limit.
|
|
*/
|
|
- while (V_frag6_nfragpackets > (u_int)V_ip6_maxfragpackets &&
|
|
- V_ip6q.ip6q_prev) {
|
|
- IP6STAT_INC(ip6s_fragoverflow);
|
|
- /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
|
|
- frag6_freef(V_ip6q.ip6q_prev);
|
|
+ i = 0;
|
|
+ while (V_ip6_maxfragpackets >= 0 &&
|
|
+ V_frag6_nfragpackets > (u_int)V_ip6_maxfragpackets) {
|
|
+ IP6Q_LOCK(i);
|
|
+ head = IP6Q_HEAD(i);
|
|
+ if (head->ip6q_prev != head) {
|
|
+ IP6STAT_INC(ip6s_fragoverflow);
|
|
+ /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
|
|
+ frag6_freef(head->ip6q_prev, i);
|
|
+ }
|
|
+ IP6Q_UNLOCK(i);
|
|
+ i = (i + 1) % IP6REASS_NHASH;
|
|
}
|
|
CURVNET_RESTORE();
|
|
}
|
|
- IP6Q_UNLOCK();
|
|
VNET_LIST_RUNLOCK_NOSLEEP();
|
|
}
|
|
|
|
@@ -785,22 +909,25 @@
|
|
frag6_drain(void)
|
|
{
|
|
VNET_ITERATOR_DECL(vnet_iter);
|
|
+ struct ip6q *head;
|
|
+ int i;
|
|
|
|
VNET_LIST_RLOCK_NOSLEEP();
|
|
- if (IP6Q_TRYLOCK() == 0) {
|
|
- VNET_LIST_RUNLOCK_NOSLEEP();
|
|
- return;
|
|
- }
|
|
VNET_FOREACH(vnet_iter) {
|
|
CURVNET_SET(vnet_iter);
|
|
- while (V_ip6q.ip6q_next != &V_ip6q) {
|
|
- IP6STAT_INC(ip6s_fragdropped);
|
|
- /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
|
|
- frag6_freef(V_ip6q.ip6q_next);
|
|
+ for (i = 0; i < IP6REASS_NHASH; i++) {
|
|
+ if (IP6Q_TRYLOCK(i) == 0)
|
|
+ continue;
|
|
+ head = IP6Q_HEAD(i);
|
|
+ while (head->ip6q_next != head) {
|
|
+ IP6STAT_INC(ip6s_fragdropped);
|
|
+ /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
|
|
+ frag6_freef(head->ip6q_next, i);
|
|
+ }
|
|
+ IP6Q_UNLOCK(i);
|
|
}
|
|
CURVNET_RESTORE();
|
|
}
|
|
- IP6Q_UNLOCK();
|
|
VNET_LIST_RUNLOCK_NOSLEEP();
|
|
}
|
|
|
|
--- sys/netinet6/in6.h.orig
|
|
+++ sys/netinet6/in6.h
|
|
@@ -637,7 +637,9 @@
|
|
#define IPV6CTL_INTRQMAXLEN 51 /* max length of IPv6 netisr queue */
|
|
#define IPV6CTL_INTRDQMAXLEN 52 /* max length of direct IPv6 netisr
|
|
* queue */
|
|
-#define IPV6CTL_MAXID 53
|
|
+#define IPV6CTL_MAXFRAGSPERPACKET 53 /* Max fragments per packet */
|
|
+#define IPV6CTL_MAXFRAGBUCKETSIZE 54 /* Max reassembly queues per bucket */
|
|
+#define IPV6CTL_MAXID 55
|
|
#endif /* __BSD_VISIBLE */
|
|
|
|
/*
|
|
--- sys/netinet6/in6_proto.c.orig
|
|
+++ sys/netinet6/in6_proto.c
|
|
@@ -383,7 +383,9 @@
|
|
VNET_DEFINE(int, ip6_norbit_raif) = 0;
|
|
VNET_DEFINE(int, ip6_rfc6204w3) = 0;
|
|
VNET_DEFINE(int, ip6_maxfragpackets); /* initialized in frag6.c:frag6_init() */
|
|
-VNET_DEFINE(int, ip6_maxfrags); /* initialized in frag6.c:frag6_init() */
|
|
+int ip6_maxfrags; /* initialized in frag6.c:frag6_init() */
|
|
+VNET_DEFINE(int, ip6_maxfragbucketsize);/* initialized in frag6.c:frag6_init() */
|
|
+VNET_DEFINE(int, ip6_maxfragsperpacket); /* initialized in frag6.c:frag6_init() */
|
|
VNET_DEFINE(int, ip6_log_interval) = 5;
|
|
VNET_DEFINE(int, ip6_hdrnestlimit) = 15;/* How many header options will we
|
|
* process? */
|
|
@@ -470,6 +472,20 @@
|
|
return (0);
|
|
}
|
|
|
|
+static int
|
|
+sysctl_ip6_maxfragpackets(SYSCTL_HANDLER_ARGS)
|
|
+{
|
|
+ int error, val;
|
|
+
|
|
+ val = V_ip6_maxfragpackets;
|
|
+ error = sysctl_handle_int(oidp, &val, 0, req);
|
|
+ if (error != 0 || !req->newptr)
|
|
+ return (error);
|
|
+ V_ip6_maxfragpackets = val;
|
|
+ frag6_set_bucketsize();
|
|
+ return (0);
|
|
+}
|
|
+
|
|
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_FORWARDING, forwarding,
|
|
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_forwarding), 0,
|
|
"Enable forwarding of IPv6 packets between interfaces");
|
|
@@ -482,8 +498,9 @@
|
|
SYSCTL_VNET_PCPUSTAT(_net_inet6_ip6, IPV6CTL_STATS, stats, struct ip6stat,
|
|
ip6stat,
|
|
"IP6 statistics (struct ip6stat, netinet6/ip6_var.h)");
|
|
-SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets,
|
|
- CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragpackets), 0,
|
|
+SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets,
|
|
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, NULL, 0,
|
|
+ sysctl_ip6_maxfragpackets, "I",
|
|
"Default maximum number of outstanding fragmented IPv6 packets. "
|
|
"A value of 0 means no fragmented packets will be accepted, while a "
|
|
"a value of -1 means no limit");
|
|
@@ -557,8 +574,16 @@
|
|
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_use_defzone), 0,
|
|
"Use the default scope zone when none is specified");
|
|
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags,
|
|
- CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfrags), 0,
|
|
- "Maximum allowed number of outstanding IPv6 packet fragments");
|
|
+ CTLFLAG_RW, &ip6_maxfrags, 0,
|
|
+ "Maximum allowed number of outstanding IPv6 packet fragments. "
|
|
+ "A value of 0 means no fragmented packets will be accepted, while a "
|
|
+ "a value of -1 means no limit");
|
|
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGBUCKETSIZE, maxfragbucketsize,
|
|
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragbucketsize), 0,
|
|
+ "Maximum number of reassembly queues per hash bucket");
|
|
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGSPERPACKET, maxfragsperpacket,
|
|
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragsperpacket), 0,
|
|
+ "Maximum allowed number of fragments per packet");
|
|
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MCAST_PMTU, mcast_pmtu,
|
|
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_mcast_pmtu), 0,
|
|
"Enable path MTU discovery for multicast packets");
|
|
--- sys/netinet6/ip6_var.h.orig
|
|
+++ sys/netinet6/ip6_var.h
|
|
@@ -296,8 +296,10 @@
|
|
VNET_DECLARE(int, ip6_sendredirects); /* send IP redirects when forwarding? */
|
|
VNET_DECLARE(int, ip6_maxfragpackets); /* Maximum packets in reassembly
|
|
* queue */
|
|
-VNET_DECLARE(int, ip6_maxfrags); /* Maximum fragments in reassembly
|
|
+extern int ip6_maxfrags; /* Maximum fragments in reassembly
|
|
* queue */
|
|
+VNET_DECLARE(int, ip6_maxfragbucketsize); /* Maximum reassembly queues per bucket */
|
|
+VNET_DECLARE(int, ip6_maxfragsperpacket); /* Maximum fragments per packet */
|
|
VNET_DECLARE(int, ip6_accept_rtadv); /* Acts as a host not a router */
|
|
VNET_DECLARE(int, ip6_no_radr); /* No defroute from RA */
|
|
VNET_DECLARE(int, ip6_norbit_raif); /* Disable R-bit in NA on RA
|
|
@@ -312,7 +314,8 @@
|
|
#define V_ip6_mrouter VNET(ip6_mrouter)
|
|
#define V_ip6_sendredirects VNET(ip6_sendredirects)
|
|
#define V_ip6_maxfragpackets VNET(ip6_maxfragpackets)
|
|
-#define V_ip6_maxfrags VNET(ip6_maxfrags)
|
|
+#define V_ip6_maxfragbucketsize VNET(ip6_maxfragbucketsize)
|
|
+#define V_ip6_maxfragsperpacket VNET(ip6_maxfragsperpacket)
|
|
#define V_ip6_accept_rtadv VNET(ip6_accept_rtadv)
|
|
#define V_ip6_no_radr VNET(ip6_no_radr)
|
|
#define V_ip6_norbit_raif VNET(ip6_norbit_raif)
|
|
@@ -399,6 +402,7 @@
|
|
|
|
int route6_input(struct mbuf **, int *, int);
|
|
|
|
+void frag6_set_bucketsize(void);
|
|
void frag6_init(void);
|
|
int frag6_input(struct mbuf **, int *, int);
|
|
void frag6_slowtimo(void);
|