netfilter related issues with PAQ patch

Rusty Russell rusty@rustcorp.com.au
Mon, 29 Oct 2001 13:51:22 +1100


In message <Pine.LNX.4.21.0110132303270.19448-100000@w-sridhar2.des.sequent.com
> you write:
> Hi Rusty,
> 
> Sometime back i contacted you to inform about a performance issue that causes
 
> zerocopy sends to be disabled when iptables is configured. You said that you
> had a patch to fix this issue, but it needs more work and asked me to remind 
> you in a month. Hope you have time to look into this now. 

Alexey accused the netfilter team of being lazy: that hurt, because
it's true (for me at least 8).

Please find attached a patch v2.4.13 (should apply to any recent
kernel) which pushes the responsibility for linearizing the skb to the
individual hooks.  This patch changes nothing BUT it allows for hooks
which don't need to linearize not to (actually, the ingress code may
now run a little faster).

For iptables, it could be pushed down furthur to the individual
modules, but that would break source compatibility for 2.4, so I'm not
likely to do that.

If this works fine for everyone, I'll send it to Harald Welte for
entry into the official kernel.

Cheers,
Rusty.
--
Premature optmztion is rt of all evl. --DK

diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.4.13-official/net/core/netfilter.c working-2.4.13-nfnonlinear/net/core/netfilter.c
--- linux-2.4.13-official/net/core/netfilter.c	Sat Apr 28 07:15:01 2001
+++ working-2.4.13-nfnonlinear/net/core/netfilter.c	Mon Oct 29 12:03:41 2001
@@ -451,11 +451,6 @@
 	unsigned int verdict;
 	int ret = 0;
 
-	/* This stopgap cannot be removed until all the hooks are audited. */
-	if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) {
-		kfree_skb(skb);
-		return -ENOMEM;
-	}
 	if (skb->ip_summed == CHECKSUM_HW) {
 		if (outdev == NULL) {
 			skb->ip_summed = CHECKSUM_NONE;
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.4.13-official/net/ipv4/netfilter/ip_conntrack_core.c working-2.4.13-nfnonlinear/net/ipv4/netfilter/ip_conntrack_core.c
--- linux-2.4.13-official/net/ipv4/netfilter/ip_conntrack_core.c	Wed Aug  8 01:30:50 2001
+++ working-2.4.13-nfnonlinear/net/ipv4/netfilter/ip_conntrack_core.c	Mon Oct 29 13:14:28 2001
@@ -631,6 +631,9 @@
 	int set_reply;
 	int ret;
 
+	if (skb_is_nonlinear(*pskb) && skb_linearize(*pskb, GFP_ATOMIC) != 0)
+		return NF_DROP;
+
 	/* FIXME: Do this right please. --RR */
 	(*pskb)->nfcache |= NFC_UNKNOWN;
 
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.4.13-official/net/ipv4/netfilter/ip_conntrack_standalone.c working-2.4.13-nfnonlinear/net/ipv4/netfilter/ip_conntrack_standalone.c
--- linux-2.4.13-official/net/ipv4/netfilter/ip_conntrack_standalone.c	Mon Oct  1 05:26:08 2001
+++ working-2.4.13-nfnonlinear/net/ipv4/netfilter/ip_conntrack_standalone.c	Mon Oct 29 13:13:28 2001
@@ -169,6 +169,9 @@
 			       const struct net_device *out,
 			       int (*okfn)(struct sk_buff *))
 {
+	if (skb_is_nonlinear(*pskb) && skb_linearize(*pskb, GFP_ATOMIC) != 0)
+		return NF_DROP;
+
 	/* We've seen it coming out the other side: confirm it */
 	return ip_conntrack_confirm(*pskb);
 }
@@ -181,6 +184,9 @@
 {
 	struct rtable *rt = (struct rtable *)(*pskb)->dst;
 
+	if (skb_is_nonlinear(*pskb) && skb_linearize(*pskb, GFP_ATOMIC) != 0)
+		return NF_DROP;
+
 	/* We've seen it coming out the other side: confirm */
 	if (ip_confirm(hooknum, pskb, in, out, okfn) != NF_ACCEPT)
 		return NF_DROP;
@@ -202,6 +208,8 @@
 				       const struct net_device *out,
 				       int (*okfn)(struct sk_buff *))
 {
+	if (skb_is_nonlinear(*pskb) && skb_linearize(*pskb, GFP_ATOMIC) != 0)
+		return NF_DROP;
 	/* root is playing with raw sockets. */
 	if ((*pskb)->len < sizeof(struct iphdr)
 	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.4.13-official/net/ipv4/netfilter/ip_fw_compat.c working-2.4.13-nfnonlinear/net/ipv4/netfilter/ip_fw_compat.c
--- linux-2.4.13-official/net/ipv4/netfilter/ip_fw_compat.c	Sat Apr 28 07:15:01 2001
+++ working-2.4.13-nfnonlinear/net/ipv4/netfilter/ip_fw_compat.c	Mon Oct 29 13:30:44 2001
@@ -79,6 +79,9 @@
 	int ret = FW_BLOCK;
 	u_int16_t redirpt;
 
+	if (skb_is_nonlinear(*pskb) && skb_linearize(*pskb, GFP_ATOMIC) != 0)
+		return NF_DROP;
+
 	/* Assume worse case: any hook could change packet */
 	(*pskb)->nfcache |= NFC_UNKNOWN | NFC_ALTERED;
 	if ((*pskb)->ip_summed == CHECKSUM_HW)
@@ -183,6 +186,8 @@
 			       const struct net_device *out,
 			       int (*okfn)(struct sk_buff *))
 {
+	if (skb_is_nonlinear(*pskb) && skb_linearize(*pskb, GFP_ATOMIC) != 0)
+		return NF_DROP;
 	return ip_conntrack_confirm(*pskb);
 }
 
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.4.13-official/net/ipv4/netfilter/ip_nat_standalone.c working-2.4.13-nfnonlinear/net/ipv4/netfilter/ip_nat_standalone.c
--- linux-2.4.13-official/net/ipv4/netfilter/ip_nat_standalone.c	Mon Oct  1 05:26:08 2001
+++ working-2.4.13-nfnonlinear/net/ipv4/netfilter/ip_nat_standalone.c	Mon Oct 29 13:27:13 2001
@@ -56,6 +56,9 @@
 	/* maniptype == SRC for postrouting. */
 	enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);
 
+	if (skb_is_nonlinear(*pskb) && skb_linearize(*pskb, GFP_ATOMIC) != 0)
+		return NF_DROP;
+
 	/* We never see fragments: conntrack defrags on pre-routing
 	   and local-out, and ip_nat_out protects post-routing. */
 	IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off
@@ -141,6 +144,9 @@
 	   const struct net_device *out,
 	   int (*okfn)(struct sk_buff *))
 {
+	if (skb_is_nonlinear(*pskb) && skb_linearize(*pskb, GFP_ATOMIC) != 0)
+		return NF_DROP;
+
 	/* root is playing with raw sockets. */
 	if ((*pskb)->len < sizeof(struct iphdr)
 	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
@@ -203,6 +209,9 @@
 {
 	u_int32_t saddr, daddr;
 	unsigned int ret;
+
+	if (skb_is_nonlinear(*pskb) && skb_linearize(*pskb, GFP_ATOMIC) != 0)
+		return NF_DROP;
 
 	/* root is playing with raw sockets. */
 	if ((*pskb)->len < sizeof(struct iphdr)
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.4.13-official/net/ipv4/netfilter/ip_tables.c working-2.4.13-nfnonlinear/net/ipv4/netfilter/ip_tables.c
--- linux-2.4.13-official/net/ipv4/netfilter/ip_tables.c	Mon Oct  1 05:26:08 2001
+++ working-2.4.13-nfnonlinear/net/ipv4/netfilter/ip_tables.c	Mon Oct 29 13:19:31 2001
@@ -264,6 +264,11 @@
 	void *table_base;
 	struct ipt_entry *e, *back;
 
+	/* This check cannot be removed until all the extensions are
+           audited. */
+	if (skb_is_nonlinear(*pskb))
+		BUG();
+
 	/* Initialization */
 	ip = (*pskb)->nh.iph;
 	protohdr = (u_int32_t *)ip + ip->ihl;
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.4.13-official/net/ipv4/netfilter/iptable_filter.c working-2.4.13-nfnonlinear/net/ipv4/netfilter/iptable_filter.c
--- linux-2.4.13-official/net/ipv4/netfilter/iptable_filter.c	Mon Oct  1 05:26:08 2001
+++ working-2.4.13-nfnonlinear/net/ipv4/netfilter/iptable_filter.c	Mon Oct 29 13:22:26 2001
@@ -93,6 +93,11 @@
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
+	/* This stopgap cannot be removed until all the extensions are
+           audited. */
+	if (skb_is_nonlinear(*pskb) && skb_linearize(*pskb, GFP_ATOMIC) != 0)
+		return NF_DROP;
+
 	return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL);
 }
 
@@ -103,6 +108,11 @@
 		   const struct net_device *out,
 		   int (*okfn)(struct sk_buff *))
 {
+	/* This stopgap cannot be removed until all the extensions are
+           audited. */
+	if (skb_is_nonlinear(*pskb) && skb_linearize(*pskb, GFP_ATOMIC) != 0)
+		return NF_DROP;
+
 	/* root is playing with raw sockets. */
 	if ((*pskb)->len < sizeof(struct iphdr)
 	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.4.13-official/net/ipv4/netfilter/iptable_mangle.c working-2.4.13-nfnonlinear/net/ipv4/netfilter/iptable_mangle.c
--- linux-2.4.13-official/net/ipv4/netfilter/iptable_mangle.c	Mon Oct  1 05:26:08 2001
+++ working-2.4.13-nfnonlinear/net/ipv4/netfilter/iptable_mangle.c	Mon Oct 29 13:23:57 2001
@@ -89,6 +89,10 @@
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
+	/* This stopgap cannot be removed until all the extensions are
+           audited. */
+	if (skb_is_nonlinear(*pskb) && skb_linearize(*pskb, GFP_ATOMIC) != 0)
+		return NF_DROP;
 	return ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL);
 }
 
@@ -131,6 +135,11 @@
 	u_int8_t tos;
 	u_int32_t saddr, daddr;
 	unsigned long nfmark;
+
+	/* This stopgap cannot be removed until all the extensions are
+           audited. */
+	if (skb_is_nonlinear(*pskb) && skb_linearize(*pskb, GFP_ATOMIC) != 0)
+		return NF_DROP;
 
 	/* root is playing with raw sockets. */
 	if ((*pskb)->len < sizeof(struct iphdr)
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.4.13-official/net/ipv6/netfilter/ip6table_filter.c working-2.4.13-nfnonlinear/net/ipv6/netfilter/ip6table_filter.c
--- linux-2.4.13-official/net/ipv6/netfilter/ip6table_filter.c	Mon Oct  1 05:26:08 2001
+++ working-2.4.13-nfnonlinear/net/ipv6/netfilter/ip6table_filter.c	Mon Oct 29 13:32:01 2001
@@ -93,6 +93,10 @@
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
+	/* This stopgap cannot be removed until all the extensions are
+           audited. */
+	if (skb_is_nonlinear(*pskb) && skb_linearize(*pskb, GFP_ATOMIC) != 0)
+		return NF_DROP;
 	return ip6t_do_table(pskb, hook, in, out, &packet_filter, NULL);
 }
 
@@ -103,6 +107,10 @@
 		   const struct net_device *out,
 		   int (*okfn)(struct sk_buff *))
 {
+	/* This stopgap cannot be removed until all the extensions are
+           audited. */
+	if (skb_is_nonlinear(*pskb) && skb_linearize(*pskb, GFP_ATOMIC) != 0)
+		return NF_DROP;
 #if 0
 	/* root is playing with raw sockets. */
 	if ((*pskb)->len < sizeof(struct iphdr)
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.4.13-official/net/ipv6/netfilter/ip6table_mangle.c working-2.4.13-nfnonlinear/net/ipv6/netfilter/ip6table_mangle.c
--- linux-2.4.13-official/net/ipv6/netfilter/ip6table_mangle.c	Mon Oct  1 05:26:09 2001
+++ working-2.4.13-nfnonlinear/net/ipv6/netfilter/ip6table_mangle.c	Mon Oct 29 13:34:39 2001
@@ -89,6 +89,10 @@
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
+	/* This stopgap cannot be removed until all the extensions are
+           audited. */
+	if (skb_is_nonlinear(*pskb) && skb_linearize(*pskb, GFP_ATOMIC) != 0)
+		return NF_DROP;
 	return ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL);
 }
 
@@ -106,6 +110,10 @@
 	u_int8_t hop_limit;
 	u_int32_t flowlabel;
 
+	/* This stopgap cannot be removed until all the extensions are
+           audited. */
+	if (skb_is_nonlinear(*pskb) && skb_linearize(*pskb, GFP_ATOMIC) != 0)
+		return NF_DROP;
 #if 0
 	/* root is playing with raw sockets. */
 	if ((*pskb)->len < sizeof(struct iphdr)