[PATCH] aggressive early_drop and reserved conntrack entries

Jozsef Kadlecsik kadlec at blackhole.kfki.hu
Mon Dec 13 13:14:32 CET 2004


On Sat, 11 Dec 2004, Jozsef Kadlecsik wrote:

> On Sat, 11 Dec 2004, Martin Josefsson wrote:
> > @@ -1107,6 +1117,10 @@
> >  			add_timer(&ct->timeout);
> >  		}
> >  		ct_add_counters(ct, ctinfo, skb);
> > +		if (set_assured) {
> > +			set_bit(IPS_ASSURED_BIT, &ct->status);
> > +			list_del(&ct->unassured);
> > ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> > Huh? No check to see if we already are assured or not?
> > Not needed for icmp, tcp or sctp but udp and the generic handler does.
>
> Oops, absolutely correct, I tested the code with TCP only. Fortunately
> it's easy to fix :-)

Attached is the patch which takes into account that the assured bit might
already be set (and thus we are on the unassured list).

Best regards,
Jozsef
-
E-mail  : kadlec at blackhole.kfki.hu, kadlec at sunserv.kfki.hu
PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
Address : KFKI Research Institute for Particle and Nuclear Physics
          H-1525 Budapest 114, POB. 49, Hungary
-------------- next part --------------
diff -urN --exclude-from=/usr/src/diff.exclude linux-2.6.9-orig/include/linux/netfilter_ipv4/ip_conntrack.h linux-2.6.9-early_drop/include/linux/netfilter_ipv4/ip_conntrack.h
--- linux-2.6.9-orig/include/linux/netfilter_ipv4/ip_conntrack.h	2004-10-18 23:55:21.000000000 +0200
+++ linux-2.6.9-early_drop/include/linux/netfilter_ipv4/ip_conntrack.h	2004-12-09 16:08:29.000000000 +0100
@@ -196,6 +196,9 @@
 	/* Helper, if any. */
 	struct ip_conntrack_helper *helper;
 
+	/* List of unassured connections */
+	struct list_head unassured;
+
 	/* Storage reserved for other modules: */
 	union ip_conntrack_proto proto;
 
@@ -260,7 +263,8 @@
 extern void ip_ct_refresh_acct(struct ip_conntrack *ct,
 			       enum ip_conntrack_info ctinfo,
 			       const struct sk_buff *skb,
-			       unsigned long extra_jiffies);
+			       unsigned long extra_jiffies,
+			       int set_assured);
 
 /* These are for NAT.  Icky. */
 /* Update TCP window tracking data when NAT mangles the packet */
diff -urN --exclude-from=/usr/src/diff.exclude linux-2.6.9-orig/net/ipv4/netfilter/ip_conntrack_core.c linux-2.6.9-early_drop/net/ipv4/netfilter/ip_conntrack_core.c
--- linux-2.6.9-orig/net/ipv4/netfilter/ip_conntrack_core.c	2004-10-18 23:53:05.000000000 +0200
+++ linux-2.6.9-early_drop/net/ipv4/netfilter/ip_conntrack_core.c	2004-12-12 13:20:33.000000000 +0100
@@ -66,6 +66,7 @@
 
 void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
 LIST_HEAD(ip_conntrack_expect_list);
+static LIST_HEAD(unassured_list);
 struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
 static LIST_HEAD(helpers);
 unsigned int ip_conntrack_htable_size = 0;
@@ -313,6 +314,10 @@
 		}
 		kmem_cache_free(ip_conntrack_expect_cachep, ct->master);
 	}
+	/* Delete from the list of unassured conntracks */
+	if (!test_bit(IPS_ASSURED_BIT, &ct->status)
+	    && test_bit(IPS_CONFIRMED_BIT, &ct->status))
+		list_del(&ct->unassured);
 	WRITE_UNLOCK(&ip_conntrack_lock);
 
 	if (master)
@@ -436,6 +441,7 @@
 		add_timer(&ct->timeout);
 		atomic_inc(&ct->ct_general.use);
 		set_bit(IPS_CONFIRMED_BIT, &ct->status);
+		list_add_tail(&ct->unassured, &unassured_list);
 		WRITE_UNLOCK(&ip_conntrack_lock);
 		CONNTRACK_STAT_INC(insert);
 		return NF_ACCEPT;
@@ -461,34 +467,36 @@
 	return h != NULL;
 }
 
-/* There's a small race here where we may free a just-assured
-   connection.  Too bad: we're in trouble anyway. */
-static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
+static int early_drop(void)
 {
-	return !(test_bit(IPS_ASSURED_BIT, &i->ctrack->status));
-}
-
-static int early_drop(struct list_head *chain)
-{
-	/* Traverse backwards: gives us oldest, which is roughly LRU */
-	struct ip_conntrack_tuple_hash *h;
+	struct ip_conntrack *ct = NULL;
+	struct list_head *entry;
 	int dropped = 0;
 
+    	/* There's a small race here where we may free a just-assured
+	   connection.  Too bad: we're in trouble anyway. */
 	READ_LOCK(&ip_conntrack_lock);
-	h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *);
-	if (h)
-		atomic_inc(&h->ctrack->ct_general.use);
+	__list_for_each(entry, &unassured_list) {
+		ct = list_entry(entry,
+			        struct ip_conntrack, unassured);
+		atomic_inc(&ct->ct_general.use);
+		break;
+	}
 	READ_UNLOCK(&ip_conntrack_lock);
 
-	if (!h)
+	if (!ct)
 		return dropped;
 
-	if (del_timer(&h->ctrack->timeout)) {
-		death_by_timeout((unsigned long)h->ctrack);
+	if (del_timer(&ct->timeout)) {
+		death_by_timeout((unsigned long)ct);
 		dropped = 1;
+		if (net_ratelimit())
+			printk(KERN_WARNING
+			       "ip_conntrack: table full, dropping"
+			       " unassured connection.\n");
 		CONNTRACK_STAT_INC(early_drop);
 	}
-	ip_conntrack_put(h->ctrack);
+	ip_conntrack_put(ct);
 	return dropped;
 }
 
@@ -526,8 +534,8 @@
 
 	if (ip_conntrack_max
 	    && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
-		/* Try dropping from this hash chain. */
-		if (!early_drop(&ip_conntrack_hash[hash])) {
+		/* Try to drop unassured connection. */
+		if (!early_drop()) {
 			if (net_ratelimit())
 				printk(KERN_WARNING
 				       "ip_conntrack: table full, dropping"
@@ -564,6 +572,7 @@
 	conntrack->timeout.function = death_by_timeout;
 
 	INIT_LIST_HEAD(&conntrack->sibling_list);
+	INIT_LIST_HEAD(&conntrack->unassured);
 
 	WRITE_LOCK(&ip_conntrack_lock);
 	/* Need finding and deleting of expected ONLY if we win race */
@@ -1091,7 +1100,8 @@
 void ip_ct_refresh_acct(struct ip_conntrack *ct, 
 		        enum ip_conntrack_info ctinfo,
 			const struct sk_buff *skb,
-			unsigned long extra_jiffies)
+			unsigned long extra_jiffies,
+			int set_assured)
 {
 	IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
 
@@ -1107,6 +1117,11 @@
 			add_timer(&ct->timeout);
 		}
 		ct_add_counters(ct, ctinfo, skb);
+		if (set_assured 
+		    && !test_bit(IPS_ASSURED_BIT, &ct->status)) {
+			set_bit(IPS_ASSURED_BIT, &ct->status);
+			list_del(&ct->unassured);
+		}
 		WRITE_UNLOCK(&ip_conntrack_lock);
 	}
 }
diff -urN --exclude-from=/usr/src/diff.exclude linux-2.6.9-orig/net/ipv4/netfilter/ip_conntrack_proto_generic.c linux-2.6.9-early_drop/net/ipv4/netfilter/ip_conntrack_proto_generic.c
--- linux-2.6.9-orig/net/ipv4/netfilter/ip_conntrack_proto_generic.c	2004-10-18 23:53:46.000000000 +0200
+++ linux-2.6.9-early_drop/net/ipv4/netfilter/ip_conntrack_proto_generic.c	2004-12-09 16:01:30.000000000 +0100
@@ -52,7 +52,8 @@
 		  const struct sk_buff *skb,
 		  enum ip_conntrack_info ctinfo)
 {
-	ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_generic_timeout);
+	ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_generic_timeout,
+			   CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY);
 	return NF_ACCEPT;
 }
 
diff -urN --exclude-from=/usr/src/diff.exclude linux-2.6.9-orig/net/ipv4/netfilter/ip_conntrack_proto_icmp.c linux-2.6.9-early_drop/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
--- linux-2.6.9-orig/net/ipv4/netfilter/ip_conntrack_proto_icmp.c	2004-10-18 23:53:06.000000000 +0200
+++ linux-2.6.9-early_drop/net/ipv4/netfilter/ip_conntrack_proto_icmp.c	2004-12-09 15:34:32.000000000 +0100
@@ -102,7 +102,7 @@
 			ct->timeout.function((unsigned long)ct);
 	} else {
 		atomic_inc(&ct->proto.icmp.count);
-		ip_ct_refresh_acct(ct, ctinfo, skb, ip_ct_icmp_timeout);
+		ip_ct_refresh_acct(ct, ctinfo, skb, ip_ct_icmp_timeout, 0);
 	}
 
 	return NF_ACCEPT;
diff -urN --exclude-from=/usr/src/diff.exclude linux-2.6.9-orig/net/ipv4/netfilter/ip_conntrack_proto_sctp.c linux-2.6.9-early_drop/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
--- linux-2.6.9-orig/net/ipv4/netfilter/ip_conntrack_proto_sctp.c	2004-10-18 23:55:07.000000000 +0200
+++ linux-2.6.9-early_drop/net/ipv4/netfilter/ip_conntrack_proto_sctp.c	2004-12-09 15:37:52.000000000 +0100
@@ -319,6 +319,7 @@
 	sctp_chunkhdr_t _sch, *sch;
 	u_int32_t offset, count;
 	char map[256 / sizeof (char)] = {0};
+	int set_assured = 0;
 
 	DEBUGP(__FUNCTION__);
 	DEBUGP("\n");
@@ -408,14 +409,14 @@
 		WRITE_UNLOCK(&sctp_lock);
 	}
 
-	ip_ct_refresh_acct(conntrack, ctinfo, skb, *sctp_timeouts[newconntrack]);
-
 	if (oldsctpstate == SCTP_CONNTRACK_COOKIE_ECHOED
 		&& CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY
 		&& newconntrack == SCTP_CONNTRACK_ESTABLISHED) {
 		DEBUGP("Setting assured bit\n");
-		set_bit(IPS_ASSURED_BIT, &conntrack->status);
+		set_assured = 1;
 	}
+	ip_ct_refresh_acct(conntrack, ctinfo, skb,
+			   *sctp_timeouts[newconntrack], set_assured);
 
 	return NF_ACCEPT;
 }
diff -urN --exclude-from=/usr/src/diff.exclude linux-2.6.9-orig/net/ipv4/netfilter/ip_conntrack_proto_tcp.c linux-2.6.9-early_drop/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
--- linux-2.6.9-orig/net/ipv4/netfilter/ip_conntrack_proto_tcp.c	2004-10-18 23:55:29.000000000 +0200
+++ linux-2.6.9-early_drop/net/ipv4/netfilter/ip_conntrack_proto_tcp.c	2004-12-09 15:36:36.000000000 +0100
@@ -834,6 +834,7 @@
 	struct tcphdr *th, _tcph;
 	unsigned long timeout;
 	unsigned int index;
+	int set_assured = 0;
 	
 	th = skb_header_pointer(skb, iph->ihl * 4,
 				sizeof(_tcph), &_tcph);
@@ -962,9 +963,9 @@
 		/* Set ASSURED if we see see valid ack in ESTABLISHED 
 		   after SYN_RECV or a valid answer for a picked up 
 		   connection. */
-			set_bit(IPS_ASSURED_BIT, &conntrack->status);
+		set_assured = 1;
 	}
-	ip_ct_refresh_acct(conntrack, ctinfo, skb, timeout);
+	ip_ct_refresh_acct(conntrack, ctinfo, skb, timeout, set_assured);
 
 	return NF_ACCEPT;
 }
diff -urN --exclude-from=/usr/src/diff.exclude linux-2.6.9-orig/net/ipv4/netfilter/ip_conntrack_proto_udp.c linux-2.6.9-early_drop/net/ipv4/netfilter/ip_conntrack_proto_udp.c
--- linux-2.6.9-orig/net/ipv4/netfilter/ip_conntrack_proto_udp.c	2004-10-18 23:53:05.000000000 +0200
+++ linux-2.6.9-early_drop/net/ipv4/netfilter/ip_conntrack_proto_udp.c	2004-12-09 15:35:16.000000000 +0100
@@ -70,12 +70,12 @@
 	/* If we've seen traffic both ways, this is some kind of UDP
 	   stream.  Extend timeout. */
 	if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
-		ip_ct_refresh_acct(conntrack, ctinfo, skb, 
-				   ip_ct_udp_timeout_stream);
 		/* Also, more likely to be important, and not a probe */
-		set_bit(IPS_ASSURED_BIT, &conntrack->status);
+		ip_ct_refresh_acct(conntrack, ctinfo, skb, 
+				   ip_ct_udp_timeout_stream, 1);
 	} else
-		ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_udp_timeout);
+		ip_ct_refresh_acct(conntrack, ctinfo, skb,
+				   ip_ct_udp_timeout, 0);
 
 	return NF_ACCEPT;
 }


More information about the netfilter-devel mailing list