[netfilter-cvslog] r3708 - in trunk/patch-o-matic-ng/nf_conntrack/linux-2.6: include/linux/netfilter net/netfilter

laforge at netfilter.org laforge at netfilter.org
Tue Feb 15 04:17:45 CET 2005


Author: laforge at netfilter.org
Date: 2005-02-15 04:17:44 +0100 (Tue, 15 Feb 2005)
New Revision: 3708

Modified:
   trunk/patch-o-matic-ng/nf_conntrack/linux-2.6/include/linux/netfilter/nf_conntrack.h
   trunk/patch-o-matic-ng/nf_conntrack/linux-2.6/include/linux/netfilter/nf_conntrack_core.h
   trunk/patch-o-matic-ng/nf_conntrack/linux-2.6/include/linux/netfilter/nf_conntrack_ftp.h
   trunk/patch-o-matic-ng/nf_conntrack/linux-2.6/include/linux/netfilter/nf_conntrack_helper.h
   trunk/patch-o-matic-ng/nf_conntrack/linux-2.6/net/netfilter/nf_conntrack_core.c
   trunk/patch-o-matic-ng/nf_conntrack/linux-2.6/net/netfilter/nf_conntrack_ftp.c
   trunk/patch-o-matic-ng/nf_conntrack/linux-2.6/net/netfilter/nf_conntrack_standalone.c
Log:
[NETFILTER]: Simplify expect handling

Now we've changed expect handling, we can simplify it significantly.

1) struct ip_conntrack_expect only exists until the connection
   matching it is created.  Now NAT is done directly at the time the
   expectation is matched, we don't need to keep this information
   around.

2) The term 'master' is used everywhere to mean the connection that
   expected this connection.  The 'master' field in the new connection
   points straight to the master connection, and holds a reference.

3) There is no direct link from the connection to the expectations it
   has created: we walk the global list to find them if we need to
   clean them up.  Each expectation holds a reference.

4) The ip_conntrack_expect_tuple_lock is now a proper subset of
   ip_conntrack_lock, so we can eliminate it.

5) Remove flags from helper: the policy of evicting the oldest
   expectation seems to be appropriate for everyone.

6) ip_conntrack_expect_find_get() and ip_conntrack_expect_put() are no
   longer required.

7) Remove reference count from expectations, and don't free when we
   fail ip_conntrack_expect_related(): have user call
   ip_conntrack_expect_free().

Signed-off-by: Rusty Russell <rusty at rustcorp.com.au>
Signed-off-by: David S. Miller <davem at davemloft.net>


Modified: trunk/patch-o-matic-ng/nf_conntrack/linux-2.6/include/linux/netfilter/nf_conntrack.h
===================================================================
--- trunk/patch-o-matic-ng/nf_conntrack/linux-2.6/include/linux/netfilter/nf_conntrack.h	2005-02-15 02:38:48 UTC (rev 3707)
+++ trunk/patch-o-matic-ng/nf_conntrack/linux-2.6/include/linux/netfilter/nf_conntrack.h	2005-02-15 03:17:44 UTC (rev 3708)
@@ -106,33 +106,19 @@
 	/* Internal linked list (global expectation list) */
 	struct list_head list;
 
-	/* reference count */
-	atomic_t use;
+	/* We expect this tuple, with the following mask */
+	struct nf_conntrack_tuple tuple, mask;
+ 
+	/* Function to call after setup and insertion */
+	void (*expectfn)(struct nf_conn *new,
+			 struct nf_conntrack_expect *this);
 
-	/* expectation list for this master */
-	struct list_head expected_list;
-
 	/* The conntrack of the master connection */
-	struct nf_conn *expectant;
+	struct nf_conn *master;
 
-	/* The conntrack of the sibling connection, set after
-	 * expectation arrived */
-	struct nf_conn *sibling;
-
-	/* Tuple saved for conntrack */
-	struct nf_conntrack_tuple ct_tuple;
-
 	/* Timer function; deletes the expectation. */
 	struct timer_list timeout;
 
-	/* Data filled out by the conntrack helpers follow: */
-
-	/* We expect this tuple, with the following mask */
-	struct nf_conntrack_tuple tuple, mask;
-
-	/* Function to call after setup and insertion */
-	void (*expectfn)(struct nf_conn *new);
-
 #ifdef CONFIG_NF_NAT_NEEDED
 	/* This is the original per-proto part, used to map the
 	 * expected connection the way the recipient expects. */
@@ -140,8 +126,6 @@
 	/* Direction relative to the master connection. */
 	enum nf_conntrack_dir dir;
 #endif
-  
-	union nf_conntrack_expect_proto proto;
 };
 
 struct nf_conntrack_counter
@@ -173,17 +157,12 @@
 	/* Accounting Information (same cache line as other written members) */
 	struct nf_conntrack_counter counters[NF_CT_DIR_MAX];
 #endif
-
-	/* If we're expecting another related connection, this will be
-           in expected linked list */
-	struct list_head sibling_list;
+	/* If we were expected by an expectation, this will be it */
+	struct nf_conn *master;
 	
 	/* Current number of expected connections */
 	unsigned int expecting;
 
-	/* If we were expected by an expectation, this will be it */
-	struct nf_conntrack_expect *master;
-
 	/* Helper. if any */
 	struct nf_conntrack_helper *helper;
 
@@ -210,7 +189,7 @@
 };
 
 /* get master conntrack via master expectation */
-#define master_ct(conntr) (conntr->master ? conntr->master->expectant : NULL)
+#define master_ct(conntr) (conntr->master)
 
 /* Alter reply tuple (maybe alter helper). */
 extern void
@@ -234,13 +213,6 @@
 /* decrement reference count on a conntrack */
 extern inline void nf_ct_put(struct nf_conn *ct);
 
-/* find unconfirmed expectation based on tuple */
-struct nf_conntrack_expect *
-nf_conntrack_expect_find_get(const struct nf_conntrack_tuple *tuple);
-
-/* decrement reference count on an expectation */
-void nf_conntrack_expect_put(struct nf_conntrack_expect *exp);
-
 /* call to create an explicit dependency on nf_conntrack. */
 extern void need_nf_conntrack(void);
 

Modified: trunk/patch-o-matic-ng/nf_conntrack/linux-2.6/include/linux/netfilter/nf_conntrack_core.h
===================================================================
--- trunk/patch-o-matic-ng/nf_conntrack/linux-2.6/include/linux/netfilter/nf_conntrack_core.h	2005-02-15 02:38:48 UTC (rev 3707)
+++ trunk/patch-o-matic-ng/nf_conntrack/linux-2.6/include/linux/netfilter/nf_conntrack_core.h	2005-02-15 03:17:44 UTC (rev 3708)
@@ -67,5 +67,4 @@
 extern struct list_head *nf_conntrack_hash;
 extern struct list_head nf_conntrack_expect_list;
 DECLARE_RWLOCK_EXTERN(nf_conntrack_lock);
-DECLARE_RWLOCK_EXTERN(nf_conntrack_expect_tuple_lock);
 #endif /* _NF_CONNTRACK_CORE_H */

Modified: trunk/patch-o-matic-ng/nf_conntrack/linux-2.6/include/linux/netfilter/nf_conntrack_ftp.h
===================================================================
--- trunk/patch-o-matic-ng/nf_conntrack/linux-2.6/include/linux/netfilter/nf_conntrack_ftp.h	2005-02-15 02:38:48 UTC (rev 3707)
+++ trunk/patch-o-matic-ng/nf_conntrack/linux-2.6/include/linux/netfilter/nf_conntrack_ftp.h	2005-02-15 03:17:44 UTC (rev 3708)
@@ -49,12 +49,11 @@
 
 /* For NAT to hook in when we find a packet which describes what other
  * connection we should expect. */
-extern unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
-				       struct ip_conntrack *ct,
-				       enum ip_conntrack_info ctinfo,
-				       enum ip_ct_ftp_type type,
+extern unsigned int (*nf_nat_ftp_hook)(struct sk_buff **pskb,
+				       enum nf_conntrack_info ctinfo,
+				       enum nf_ct_ftp_type type,
 				       unsigned int matchoff,
 				       unsigned int matchlen,
-				       struct ip_conntrack_expect *exp,
+				       struct nf_conntrack_expect *exp,
 				       u32 *seq);
 #endif /* _NF_CONNTRACK_FTP_H */

Modified: trunk/patch-o-matic-ng/nf_conntrack/linux-2.6/include/linux/netfilter/nf_conntrack_helper.h
===================================================================
--- trunk/patch-o-matic-ng/nf_conntrack/linux-2.6/include/linux/netfilter/nf_conntrack_helper.h	2005-02-15 02:38:48 UTC (rev 3707)
+++ trunk/patch-o-matic-ng/nf_conntrack/linux-2.6/include/linux/netfilter/nf_conntrack_helper.h	2005-02-15 03:17:44 UTC (rev 3708)
@@ -13,15 +13,11 @@
 
 struct module;
 
-/* Reuse expectation when max_expected reached */
-#define NF_CT_HELPER_F_REUSE_EXPECT	0x01
-
 struct nf_conntrack_helper
 {	
 	struct list_head list; 		/* Internal use. */
 
 	const char *name;		/* name of the module */
-	unsigned char flags;		/* Flags (see above) */
 	struct module *me;		/* pointer to self */
 	unsigned int max_expected;	/* Maximum number of concurrent 
 					 * expected connections */
@@ -45,11 +41,12 @@
 extern struct nf_conntrack_helper *nf_ct_find_helper(const struct nf_conntrack_tuple *tuple);
 
 /* Allocate space for an expectation: this is mandatory before calling
-   ip_conntrack_expect_related. */
+   nf_conntrack_expect_related. */
 extern struct nf_conntrack_expect *nf_conntrack_expect_alloc(void);
+extern void nf_conntrack_expect_free(struct nf_conntrack_expect *exp);
+
 /* Add an expected connection: can have more than one per connection */
-extern int nf_conntrack_expect_related(struct nf_conntrack_expect *exp,
-				       struct nf_conn *related_to);
+extern int nf_conntrack_expect_related(struct nf_conntrack_expect *exp);
 extern void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp);
 
 #endif /*_NF_CONNTRACK_HELPER_H*/

Modified: trunk/patch-o-matic-ng/nf_conntrack/linux-2.6/net/netfilter/nf_conntrack_core.c
===================================================================
--- trunk/patch-o-matic-ng/nf_conntrack/linux-2.6/net/netfilter/nf_conntrack_core.c	2005-02-15 02:38:48 UTC (rev 3707)
+++ trunk/patch-o-matic-ng/nf_conntrack/linux-2.6/net/netfilter/nf_conntrack_core.c	2005-02-15 03:17:44 UTC (rev 3708)
@@ -63,7 +63,6 @@
 #endif
 
 DECLARE_RWLOCK(nf_conntrack_lock);
-DECLARE_RWLOCK(nf_conntrack_expect_tuple_lock);
 
 /* nf_conntrack_standalone needs this */
 atomic_t nf_conntrack_count = ATOMIC_INIT(0);
@@ -355,128 +354,70 @@
 }
 
 /* nf_conntrack_expect helper functions */
-
-/* Compare tuple parts depending on mask. */
-static inline int expect_cmp(const struct nf_conntrack_expect *i,
-			     const struct nf_conntrack_tuple *tuple)
+static void destroy_expect(struct nf_conntrack_expect *exp)
 {
-	MUST_BE_READ_LOCKED(&nf_conntrack_expect_tuple_lock);
-	return nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask);
-}
-
-static void
-destroy_expect(struct nf_conntrack_expect *exp)
-{
-	DEBUGP("destroy_expect(%p) use=%d\n", exp, atomic_read(&exp->use));
-	NF_CT_ASSERT(atomic_read(&exp->use) == 0);
+	nf_conntrack_put(exp->master);
 	NF_CT_ASSERT(!timer_pending(&exp->timeout));
-
 	kmem_cache_free(nf_conntrack_expect_cachep, exp);
 	NF_CT_STAT_INC(expect_delete);
 }
 
-inline void nf_conntrack_expect_put(struct nf_conntrack_expect *exp)
+static void unlink_expect(struct nf_conntrack_expect *exp)
 {
-	NF_CT_ASSERT(exp);
-
-	if (atomic_dec_and_test(&exp->use)) {
-		/* usage count dropped to zero */
-		destroy_expect(exp);
-	}
+	MUST_BE_WRITE_LOCKED(&nf_conntrack_lock);
+	list_del(&exp->list);
+	/* Logically in destroy_expect, but we hold the lock here. */
+	exp->master->expecting--;
 }
 
-static inline struct nf_conntrack_expect *
-__nf_ct_expect_find(const struct nf_conntrack_tuple *tuple)
+static void expectation_timed_out(unsigned long ul_expect)
 {
-	MUST_BE_READ_LOCKED(&nf_conntrack_lock);
-	MUST_BE_READ_LOCKED(&nf_conntrack_expect_tuple_lock);
-	return LIST_FIND(&nf_conntrack_expect_list, expect_cmp, 
-			 struct nf_conntrack_expect *, tuple);
-}
+	struct nf_conntrack_expect *exp = (void *)ul_expect;
 
-/* Find a expectation corresponding to a tuple. */
-struct nf_conntrack_expect *
-nf_conntrack_expect_find_get(const struct nf_conntrack_tuple *tuple)
-{
-	struct nf_conntrack_expect *exp;
-
-	READ_LOCK(&nf_conntrack_lock);
-	READ_LOCK(&nf_conntrack_expect_tuple_lock);
-	exp = __nf_ct_expect_find(tuple);
-	if (exp)
-		atomic_inc(&exp->use);
-	READ_UNLOCK(&nf_conntrack_expect_tuple_lock);
-	READ_UNLOCK(&nf_conntrack_lock);
-
-	return exp;
+	WRITE_LOCK(&nf_conntrack_lock);
+	unlink_expect(exp);
+	WRITE_UNLOCK(&nf_conntrack_lock);
+	destroy_expect(exp);
 }
 
-/* remove one specific expectation from all lists and drop refcount,
- * does _NOT_ delete the timer. */
-static void __unexpect_related(struct nf_conntrack_expect *expect)
+/* If an expectation for this connection is found, it gets delete from
+ * global list then returned. */
+static struct nf_conntrack_expect *
+find_expectation(const struct nf_conntrack_tuple *tuple)
 {
-	DEBUGP("unexpect_related(%p)\n", expect);
-	MUST_BE_WRITE_LOCKED(&nf_conntrack_lock);
+	struct nf_conntrack_expect *i;
 
-	/* we're not allowed to unexpect a confirmed expectation! */
-	NF_CT_ASSERT(!expect->sibling);
-
-	/* delete from global and local lists */
-	list_del(&expect->list);
-	list_del(&expect->expected_list);
-
-	/* decrement expect-count of master conntrack */
-	if (expect->expectant)
-		expect->expectant->expecting--;
-
-	nf_conntrack_expect_put(expect);
+	list_for_each_entry(i, &nf_conntrack_expect_list, list) {
+	/* If master is not in hash table yet (ie. packet hasn't left
+	   this machine yet), how can other end know about expected?
+	   Hence these are not the droids you are looking for (if
+	   master ct never got confirmed, we'd hold a reference to it
+	   and weird things would happen to future packets). */
+		if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
+		    && is_confirmed(i->master)
+		    && (!i->timeout.function || del_timer(&i->timeout))) {
+			unlink_expect(i);
+			return i;
+		}
+	}
+	return NULL;
 }
 
-/* remove one specific expecatation from all lists, drop refcount
- * and expire timer. 
- * This function can _NOT_ be called for confirmed expects! */
-static void unexpect_related(struct nf_conntrack_expect *expect)
-{
-	NF_CT_ASSERT(expect->expectant);
-	NF_CT_ASSERT(expect->expectant->helper);
-	/* if we are supposed to have a timer, but we can't delete
-	 * it: race condition.  __unexpect_related will
-	 * be calledd by timeout function */
-	if (expect->expectant->helper->timeout && !del_timer(&expect->timeout))
-		return;
-
-	__unexpect_related(expect);
-}
-
 /* delete all unconfirmed expectations for this conntrack */
-static void remove_expectations(struct nf_conn *ct, int drop_refcount)
+static void remove_expectations(struct nf_conn *ct)
 {
-	struct list_head *exp_entry, *next;
-	struct nf_conntrack_expect *exp;
+	struct nf_conntrack_expect *i, *tmp;
 
-	DEBUGP("remove_expectations(%p)\n", ct);
+	/* Optimization: most connection never expect any others. */
+	if (ct->expecting == 0)
+		return;
 
-	list_for_each_safe(exp_entry, next, &ct->sibling_list) {
-		exp = list_entry(exp_entry, struct nf_conntrack_expect,
-				 expected_list);
-
-		/* we skip established expectations, as we want to delete
-		 * the un-established ones only */
-		if (exp->sibling) {
-			DEBUGP("remove_expectations: skipping established %p of %p\n", exp->sibling, ct);
-			if (drop_refcount) {
-				/* Indicate that this expectations parent is dead */
-				nf_ct_put(exp->expectant);
-				exp->expectant = NULL;
-			}
-			continue;
-		}
-
-		NF_CT_ASSERT(list_inlist(&nf_conntrack_expect_list, exp));
-		NF_CT_ASSERT(exp->expectant == ct);
-
-		/* delete expectation from global and private lists */
-		unexpect_related(exp);
+	list_for_each_entry_safe(i, tmp, &nf_conntrack_expect_list, list) {
+		if (i->master == ct
+		    && (!i->timeout.function || del_timer(&i->timeout))) {
+			unlink_expect(i);
+			destroy_expect(i);
+ 		}
 	}
 }
 
@@ -494,7 +435,7 @@
 	LIST_DELETE(&nf_conntrack_hash[hr], &ct->tuplehash[NF_CT_DIR_REPLY]);
 
 	/* Destroy all un-established, pending expectations */
-	remove_expectations(ct, 1);
+	remove_expectations(ct);
 }
 
 static void
@@ -533,25 +474,15 @@
 
 	/* We overload first tuple to link into unconfirmed list. */
 	if (!is_confirmed(ct)) {
-		BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list));
-		list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+		BUG_ON(list_empty(&ct->tuplehash[NF_CT_DIR_ORIGINAL].list));
+		list_del(&ct->tuplehash[NF_CT_DIR_ORIGINAL].list);
 	}
 
-	/* Delete our master expectation */
-	if (ct->master) {
-		if (ct->master->expectant) {
-			/* can't call __unexpect_related here,
-			 * since it would screw up expect_list */
-			list_del(&ct->master->expected_list);
-			master = ct->master->expectant;
-		}
-		kmem_cache_free(nf_conntrack_expect_cachep, ct->master);
-	}
 	NF_CT_STAT_INC(delete);
 	WRITE_UNLOCK(&nf_conntrack_lock);
 
-	if (master)
-		nf_ct_put(master);
+	if (ct->master)
+		nf_ct_put(ct->master);
 
 	DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
 	free_conntrack(ct);
@@ -665,7 +596,7 @@
 			  struct nf_conntrack_tuple_hash *,
 			  &ct->tuplehash[NF_CT_DIR_REPLY].tuple, NULL)) {
 		/* Remove from unconfirmed list */
-		list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+		list_del(&ct->tuplehash[NF_CT_DIR_ORIGINAL].list);
 
 		list_prepend(&nf_conntrack_hash[hash],
 			     &ct->tuplehash[NF_CT_DIR_ORIGINAL]);
@@ -761,7 +692,7 @@
 	struct nf_conn *conntrack;
 	struct nf_conntrack_tuple repl_tuple;
 	size_t hash;
-	struct nf_conntrack_expect *expected;
+	struct nf_conntrack_expect *exp;
 	u_int32_t features = 0;
 	int helper_used = 0;
 
@@ -821,79 +752,44 @@
 	conntrack->timeout.data = (unsigned long)conntrack;
 	conntrack->timeout.function = death_by_timeout;
 
-	INIT_LIST_HEAD(&conntrack->sibling_list);
-
 	WRITE_LOCK(&nf_conntrack_lock);
-	/* Need finding and deleting of expected ONLY if we win race */
-	READ_LOCK(&nf_conntrack_expect_tuple_lock);
-	expected = LIST_FIND(&nf_conntrack_expect_list, expect_cmp,
-			     struct nf_conntrack_expect *, tuple);
-	READ_UNLOCK(&nf_conntrack_expect_tuple_lock);
 
-	if (expected) {
-		/* If master is not in hash table yet (ie. packet hasn't left
-		   this machine yet), how can other end know about expected?
-		   Hence these are not the droids you are looking for (if
-		   master ct never got confirmed, we'd hold a reference to it
-		   and weird things would happen to future packets). */
-		if (!is_confirmed(expected->expectant)) {
-			/* This avoids timing problem. helper may be unloaded
-			   after allocating conntrack */
-			if (helper_used)
-				conntrack->helper =
-					nf_ct_find_helper(&repl_tuple);
-			goto end;
-		}
+	exp = find_expectation(tuple);
 
-		/* Expectation is dying... */
-		if (expected->expectant->helper->timeout
-		    && !del_timer(&expected->timeout))
-			goto end;
-
+	if (exp) {
 		DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
-			conntrack, expected);
+			conntrack, exp);
 		/* Welcome, Mr. Bond.  We've been expecting you... */
-		NF_CT_ASSERT(expected->expectant);
 		__set_bit(NF_S_EXPECTED_BIT, &conntrack->status);
-		conntrack->master = expected;
-		expected->sibling = conntrack;
+		conntrack->master = conntrack;
 #if CONFIG_NF_CONNTRACK_MARK
-		conntrack->mark = expected->expectant->mark;
+		conntrack->mark = exp->master->mark;
 #endif
-		LIST_DELETE(&nf_conntrack_expect_list, expected);
-		expected->expectant->expecting--;
-		nf_conntrack_get(&master_ct(conntrack)->ct_general);
-
-		/* this is a braindead... --pablo */
-		atomic_inc(&nf_conntrack_count);
-
-		/* Overload tuple linked list to put us in unconfirmed list. */
-		list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list,
-			 &unconfirmed);
-		WRITE_UNLOCK(&nf_conntrack_lock);
-
-		if (expected->expectfn)
-			expected->expectfn(conntrack);
-
+		nf_conntrack_get(&conntrack->master->ct_general);
 		NF_CT_STAT_INC(expect_new);
 
 		goto ret;
-	} else  {
+	} else {
 		/* This avoids timing problem. helper may be unloaded
 		   after allocating conntrack */
 		if (helper_used)
 			conntrack->helper = nf_ct_find_helper(&repl_tuple);
         }
 
-end:	
 	/* Overload tuple linked list to put us in unconfirmed list. */
-	list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
+	list_add(&conntrack->tuplehash[NF_CT_DIR_ORIGINAL].list, &unconfirmed);
 
 	NF_CT_STAT_INC(new);
 	atomic_inc(&nf_conntrack_count);
 	WRITE_UNLOCK(&nf_conntrack_lock);
 
-ret:	return &conntrack->tuplehash[NF_CT_DIR_ORIGINAL];
+	if (exp) {
+		if (exp->expectfn)
+			exp->expectfn(conntrack, exp);
+		destroy_expect(exp);
+	}
+
+	return &conntrack->tuplehash[NF_CT_DIR_ORIGINAL];
 }
 
 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
@@ -1041,48 +937,42 @@
 						   orig->dst.protonum));
 }
 
-static inline int resent_expect(const struct nf_conntrack_expect *i,
-				const struct nf_conntrack_tuple *tuple,
-				const struct nf_conntrack_tuple *mask)
-{
-	DEBUGP("resent_expect\n");
-	DEBUGP("   tuple:   "); NF_CT_DUMP_TUPLE(&i->tuple);
-	DEBUGP("ct_tuple:   "); NF_CT_DUMP_TUPLE(&i->ct_tuple);
-	DEBUGP("test tuple: "); NF_CT_DUMP_TUPLE(tuple);
-	return (((i->ct_tuple.dst.protonum == 0 && nf_ct_tuple_equal(&i->tuple, tuple))
-		 || (i->ct_tuple.dst.protonum && nf_ct_tuple_equal(&i->ct_tuple, tuple)))
-		&& nf_ct_tuple_equal(&i->mask, mask));
-}
-
 /* Would two expected things clash? */
-static inline int expect_clash(const struct nf_conntrack_expect *i,
-			       const struct nf_conntrack_tuple *tuple,
-			       const struct nf_conntrack_tuple *mask)
+static inline int expect_clash(const struct nf_conntrack_expect *a,
+			       const struct nf_conntrack_expect *b)
 {
 	/* Part covered by intersection of masks must be unequal,
 	   otherwise they clash */
 	struct nf_conntrack_tuple intersect_mask;
 	int count;
 
-	intersect_mask.src.l3num = i->mask.src.l3num & mask->src.l3num;
-	intersect_mask.src.u.all = i->mask.src.u.all & mask->src.u.all;
-	intersect_mask.dst.u.all = i->mask.dst.u.all & mask->dst.u.all;
-	intersect_mask.dst.protonum = i->mask.dst.protonum
-					& mask->dst.protonum;
+	intersect_mask.src.l3num = a->mask.src.l3num & b->mask->src.l3num;
+	intersect_mask.src.u.all = a->mask.src.u.all & b->mask->src.u.all;
+	intersect_mask.dst.u.all = a->mask.dst.u.all & b->mask->dst.u.all;
+	intersect_mask.dst.protonum = a->mask.dst.protonum
+					& b->mask->dst.protonum;
 
 	for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
 		intersect_mask.src.u3.all[count] =
-			i->mask.src.u3.all[count] & mask->src.u3.all[count];
+			a->mask.src.u3.all[count] & b->mask->src.u3.all[count];
 	}
 
 	for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
 		intersect_mask.dst.u3.all[count] =
-			i->mask.dst.u3.all[count] & mask->dst.u3.all[count];
+			a->mask.dst.u3.all[count] & b->mask->dst.u3.all[count];
 	}
 
-	return nf_ct_tuple_mask_cmp(&i->tuple, tuple, &intersect_mask);
+	return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
 }
 
+static inline int expect_matches(const struct nf_conntrack_expect *a,
+				 const struct nf_conntrack_expect *b)
+{
+	return a->master == b->master
+		&& ip_ct_tuple_equal(&a->tuple, &b->tuple)
+		&& ip_ct_tuple_equal(&a->mask, &b->mask);
+}
+
 inline void nf_conntrack_unexpect_related(struct nf_conntrack_expect *expect)
 {
 	WRITE_LOCK(&nf_conntrack_lock);
@@ -1090,18 +980,26 @@
 	WRITE_UNLOCK(&nf_conntrack_lock);
 }
 	
-static void expectation_timed_out(unsigned long ul_expect)
+/* Generally a bad idea to call this: could have matched already. */
+void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp)
 {
-	struct nf_conntrack_expect *expect = (void *) ul_expect;
+	struct nf_conntrack_expect *i;
 
-	DEBUGP("expectation %p timed out\n", expect);	
 	WRITE_LOCK(&nf_conntrack_lock);
-	__unexpect_related(expect);
+	/* choose the the oldest expectation to evict */
+	list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
+		if (expect_matches(i, exp)
+		    && (!i->timeout.function || del_timer(&i->timeout))) {
+			unlink_expect(i);
+			WRITE_UNLOCK(&nf_conntrack_lock);
+			destroy_expect(i);
+			return;
+		}
+	}
 	WRITE_UNLOCK(&nf_conntrack_lock);
 }
 
-struct nf_conntrack_expect *
-nf_conntrack_expect_alloc(void)
+struct nf_conntrack_expect *nf_conntrack_expect_alloc(void)
 {
 	struct nf_conntrack_expect *new;
 
@@ -1110,137 +1008,96 @@
 		DEBUGP("expect_related: OOM allocating expect\n");
 		return NULL;
 	}
+	new->master = NULL;
+	return new;
+}
 
-	/* tuple_cmp compares whole union, we have to initialized cleanly */
-	memset(new, 0, sizeof(struct nf_conntrack_expect));
-	atomic_set(&new->use, 1);
+void nf_conntrack_expect_free(struct nf_conntrack_expect *expect)
+{
+	kmem_cache_free(nf_conntrack_expect_cachep, expect);
+}
 
-	return new;
+static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp)
+{
+	atomic_inc(&exp->master->ct_general.use);
+	exp->master->expecting++;
+	list_add(&exp->list, &nf_conntrack_expect_list);
+
+	if (exp->master->helper->timeout) {
+		init_timer(&exp->timeout);
+		exp->timeout.data = (unsigned long)exp;
+		exp->timeout.function = expectation_timed_out;
+		exp->timeout.expires
+			= jiffies + exp->master->helper->timeout * HZ;
+		add_timer(&exp->timeout);
+	} else
+		exp->timeout.function = NULL;
+
+	NF_CT_STAT_INC(expect_create);
 }
 
-static void
-nf_conntrack_expect_insert(struct nf_conntrack_expect *new,
-			   struct nf_conn *related_to)
+/* Race with expectations being used means we could have none to find; OK. */
+static void evict_oldest_expect(struct nf_conn *master)
 {
-	DEBUGP("new expectation %p of conntrack %p\n", new, related_to);
-	new->expectant = related_to;
-	new->sibling = NULL;
+	struct nf_conntrack_expect *i;
 
-	/* add to expected list for this connection */
-	list_add_tail(&new->expected_list, &related_to->sibling_list);
-	/* add to global list of expectations */
-	list_prepend(&nf_conntrack_expect_list, &new->list);
-	/* add and start timer if required */
-	if (related_to->helper->timeout) {
-		init_timer(&new->timeout);
-		new->timeout.data = (unsigned long)new;
-		new->timeout.function = expectation_timed_out;
-		new->timeout.expires = jiffies +
-					related_to->helper->timeout * HZ;
-		add_timer(&new->timeout);
+	list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
+		if (i->master == master) {
+			if (!i->timeout.function || del_timer(&i->timeout)) {
+				unlink_expect(i);
+				destroy_expect(i);
+			}
+			break;
+		}
 	}
-	related_to->expecting++;
 }
 
-/* Add a related connection. */
-int nf_conntrack_expect_related(struct nf_conntrack_expect *expect,
-				struct nf_conn *related_to)
+static inline int refresh_timer(struct nf_conntrack_expect *i)
 {
-	struct nf_conntrack_expect *old;
-	int ret = 0;
+	if (!i->timeout.function)
+		return 1;
 
-	WRITE_LOCK(&nf_conntrack_lock);
-	/* Because of the write lock, no reader can walk the lists,
-	 * so there is no need to use the tuple lock too */
+	if (!del_timer(&i->timeout))
+		return 0;
 
+	i->timeout.expires = jiffies + i->master->helper->timeout*HZ;
+	add_timer(&i->timeout);
+	return 1;
+}
+
+int nf_conntrack_expect_related(struct nf_conntrack_expect *expect)
+{
+	struct nf_conntrack_expect *i;
+	int ret;
+
 	DEBUGP("nf_conntrack_expect_related %p\n", related_to);
 	DEBUGP("tuple: "); NF_CT_DUMP_TUPLE(&expect->tuple);
 	DEBUGP("mask:  "); NF_CT_DUMP_TUPLE(&expect->mask);
 
-	old = LIST_FIND(&nf_conntrack_expect_list, resent_expect,
-			struct nf_conntrack_expect *, &expect->tuple, 
-			&expect->mask);
-	if (old && old->expectant == related_to) {
-		/* Helper private data may contain offsets but no pointers
-		   pointing into the payload - otherwise we should have to copy 
-		   the data filled out by the helper over the old one */
-		DEBUGP("expect_related: resent packet\n");
-		if (related_to->helper->timeout) {
-			if (!del_timer(&old->timeout)) {
-				/* expectation is dying. Fall through */
+	WRITE_LOCK(&nf_conntrack_lock);
+	list_for_each_entry(i, &nf_conntrack_expect_list, list) {
+		if (expect_matches(i, expect)) {
+			/* Refresh timer: if it's dying, ignore.. */
+			if (refresh_timer(i)) {
+				ret = 0;
+				/* We don't need the one they've given us. */
+				nf_conntrack_expect_free(expect);
 				goto out;
-			} else {
-				old->timeout.expires = jiffies + 
-					related_to->helper->timeout * HZ;
-				add_timer(&old->timeout);
 			}
+		} else if (expect_clash(i, expect)) {
+			ret = -EBUSY;
+			goto out;
 		}
-
-		WRITE_UNLOCK(&nf_conntrack_lock);
-		nf_conntrack_expect_put(expect);
-		return -EEXIST;
-
-	} else if (related_to->helper->max_expected && 
-		   related_to->expecting >= related_to->helper->max_expected) {
-		/* old == NULL */
-		if (!(related_to->helper->flags & 
-		      NF_CT_HELPER_F_REUSE_EXPECT)) {
-			WRITE_UNLOCK(&nf_conntrack_lock);
-		    	if (net_ratelimit())
-			    	printk(KERN_WARNING
-				       "nf_conntrack: max number of expected "
-				       "connections %i of %s reached for "
-				       "%u %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x"
-				       "->%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
-				       related_to->helper->max_expected,
-				       related_to->helper->name,
-				       related_to->tuplehash[NF_CT_DIR_ORIGINAL].tuple.src.l3num,
-		    	       	       NIP6(*(struct in6_addr *)related_to->tuplehash[NF_CT_DIR_ORIGINAL].tuple.src.u3.all),
-		    	       	       NIP6(*(struct in6_addr *)related_to->tuplehash[NF_CT_DIR_ORIGINAL].tuple.dst.u3.all));
-			nf_conntrack_expect_put(expect);
-			return -EPERM;
-		}
-		DEBUGP("nf_conntrack: max number of expected "
-		       "connections %i of %s reached for "
-		       "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x"
-		       "->%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x, reusing\n",
-		       related_to->helper->max_expected,
-		       related_to->helper->name,
-		       NIP6(*(struct in6_addr *)related_to->tuplehash[NF_CT_DIR_ORIGINAL].tuple.src.u3.all),
-		       NIP6(*(struct in6_addr *)related_to->tuplehash[NF_CT_DIR_ORIGINAL].tuple.dst.u3.all));
-
-		/* choose the the oldest expectation to evict */
-		list_for_each_entry(old, &related_to->sibling_list,
-				    expected_list)
-			if (old->sibling == NULL)
-				break;
-
-		/* We cannot fail since related_to->expecting is the number
-		 * of unconfirmed expectations */
-		NF_CT_ASSERT(old && old->sibling == NULL);
-
-		/* newnat14 does not reuse the real allocated memory
-		 * structures but rather unexpects the old and
-		 * allocates a new.  unexpect_related will decrement
-		 * related_to->expecting. 
-		 */
-		unexpect_related(old);
-		ret = -EPERM;
-	} else if (LIST_FIND(&nf_conntrack_expect_list, expect_clash,
-			     struct nf_conntrack_expect *, &expect->tuple, 
-			     &expect->mask)) {
-		WRITE_UNLOCK(&nf_conntrack_lock);
-		DEBUGP("expect_related: busy!\n");
-
-		nf_conntrack_expect_put(expect);
-		return -EBUSY;
 	}
+	/* Will be over limit? */
+	if (expect->master->helper->max_expected && 
+	    expect->master->expecting >= expect->master->helper->max_expected)
+		evict_oldest_expect(expect->master);
 
-out:	nf_conntrack_expect_insert(expect, related_to);
-
+	nf_conntrack_expect_insert(expect);
+	ret = 0;
+out:
 	WRITE_UNLOCK(&nf_conntrack_lock);
-	NF_CT_STAT_INC(expect_create);
-
 	return ret;
 }
 
@@ -1257,7 +1114,7 @@
 	NF_CT_DUMP_TUPLE(newreply);
 
 	conntrack->tuplehash[NF_CT_DIR_REPLY].tuple = *newreply;
-	if (!conntrack->master && list_empty(&conntrack->sibling_list))
+	if (!conntrack->master && conntrack->expecting == 0)
 		conntrack->helper = nf_ct_find_helper(newreply);
 	WRITE_UNLOCK(&nf_conntrack_lock);
 }
@@ -1284,23 +1141,31 @@
 static inline int unhelp(struct nf_conntrack_tuple_hash *i,
 			 const struct nf_conntrack_helper *me)
 {
-	if (i->ctrack->helper == me) {
-		/* Get rid of any expected. */
-		remove_expectations(i->ctrack, 0);
-		/* And *then* set helper to NULL */
+	if (i->ctrack->helper == me)
 		i->ctrack->helper = NULL;
-	}
 	return 0;
 }
 
 void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
 {
 	unsigned int i;
+	struct nf_conntrack_expect *exp, *tmp;
 
 	/* Need write lock here, to delete helper. */
 	WRITE_LOCK(&nf_conntrack_lock);
 	LIST_DELETE(&helpers, me);
 
+	/* Get rid of expectations */
+	list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) {
+		if (exp->master->helper == me) {
+			if (!exp->timeout.function
+			    || del_timer(&exp->timeout)) {
+				unlink_expect(exp);
+				destroy_expect(exp);
+			}
+		}
+	}
+
 	/* Get rid of expecteds, set helpers to NULL. */
 	LIST_FIND_W(&unconfirmed, unhelp, struct nf_conntrack_tuple_hash*, me);
 	for (i = 0; i < nf_conntrack_htable_size; i++)

Modified: trunk/patch-o-matic-ng/nf_conntrack/linux-2.6/net/netfilter/nf_conntrack_ftp.c
===================================================================
--- trunk/patch-o-matic-ng/nf_conntrack/linux-2.6/net/netfilter/nf_conntrack_ftp.c	2005-02-15 02:38:48 UTC (rev 3707)
+++ trunk/patch-o-matic-ng/nf_conntrack/linux-2.6/net/netfilter/nf_conntrack_ftp.c	2005-02-15 03:17:44 UTC (rev 3708)
@@ -49,12 +49,11 @@
 module_param(loose, int, 0600);
 
 unsigned int (*nf_nat_ftp_hook)(struct sk_buff **pskb,
-				struct ip_conntrack *ct,
-				enum ip_conntrack_info ctinfo,
+				enum nf_conntrack_info ctinfo,
 				enum ip_ct_ftp_type type,
 				unsigned int matchoff,
 				unsigned int matchlen,
-				struct ip_conntrack_expect *exp,
+				struct nf_conntrack_expect *exp,
 				u32 *seq);
 EXPORT_SYMBOL_GPL(nf_nat_ftp_hook);
 
@@ -476,7 +475,7 @@
 	/* Look up to see if we're just after a \n. */
 	if (!find_nl_seq(ntohl(th->seq), ct_ftp_info, dir)) {
 		/* Now if this ends in \n, update ftp info. */
-		DEBUGP("ip_conntrack_ftp_help: wrong seq pos %s(%u) or %s(%u)\n",
+		DEBUGP("nf_conntrack_ftp_help: wrong seq pos %s(%u) or %s(%u)\n",
 		       ct_ftp_info->seq_aft_nl[0][dir] 
 		       old_seq_aft_nl_set ? "":"(UNSET) ", old_seq_aft_nl);
 		ret = NF_ACCEPT;
@@ -558,7 +557,7 @@
 		   networks, or the packet filter itself). */
 		if (!loose) {
 			ret = NF_ACCEPT;
-			nf_conntrack_expect_put(exp);
+			nf_conntrack_expect_free(exp);
 			goto out_update_nl;
 		}
 	}
@@ -590,17 +589,19 @@
 	exp->mask.dst.protonum = 0xFFFF;
 
 	exp->expectfn = NULL;
+	exp->master = ct;
 
 	/* Now, NAT might want to mangle the packet, and register the
 	 * (possibly changed) expectation itself. */
 	if (nf_nat_ftp_hook)
-		ret = nf_nat_ftp_hook(pskb, ct, ctinfo, search[i].ftptype,
+		ret = nf_nat_ftp_hook(pskb, ctinfo, search[i].ftptype,
 				      matchoff, matchlen, exp, &seq);
 	else {
 		/* Can't expect this?  Best to drop packet now. */
-		if (ip_conntrack_expect_related(exp, ct) != 0)
+		if (nf_conntrack_expect_related(exp) != 0) {
+			nf_conntrack_expect_free(exp);
 			ret = NF_DROP;
-		else
+		} else
 			ret = NF_ACCEPT;
 	}
 
@@ -652,7 +653,6 @@
 			ftp[i][j].mask.dst.protonum = 0xFFFF;
 			ftp[i][j].max_expected = 1;
 			ftp[i][j].timeout = 0;
-			ftp[i][j].flags = NF_CT_HELPER_F_REUSE_EXPECT;
 			ftp[i][j].me = nf_conntrack_ftp;
 			ftp[i][j].help = help;
 			tmpname = &ftp_names[i][j][0];

Modified: trunk/patch-o-matic-ng/nf_conntrack/linux-2.6/net/netfilter/nf_conntrack_standalone.c
===================================================================
--- trunk/patch-o-matic-ng/nf_conntrack/linux-2.6/net/netfilter/nf_conntrack_standalone.c	2005-02-15 02:38:48 UTC (rev 3707)
+++ trunk/patch-o-matic-ng/nf_conntrack/linux-2.6/net/netfilter/nf_conntrack_standalone.c	2005-02-15 03:17:44 UTC (rev 3708)
@@ -225,7 +225,6 @@
 	/* strange seq_file api calls stop even if we fail,
 	 * thus we need to grab lock since stop unlocks */
 	READ_LOCK(&nf_conntrack_lock);
-	READ_LOCK(&nf_conntrack_expect_tuple_lock);
 
 	if (list_empty(e))
 		return NULL;
@@ -252,7 +251,6 @@
 
 static void exp_seq_stop(struct seq_file *s, void *v)
 {
-	READ_UNLOCK(&nf_conntrack_expect_tuple_lock);
 	READ_UNLOCK(&nf_conntrack_lock);
 }
 
@@ -260,13 +258,12 @@
 {
 	struct nf_conntrack_expect *expect = v;
 
-	if (expect->expectant->helper->timeout)
+	if (expect->timeout.function)
 		seq_printf(s, "%lu ", timer_pending(&expect->timeout)
 			   ? (expect->timeout.expires - jiffies)/HZ : 0);
 	else
 		seq_printf(s, "- ");
-	seq_printf(s, "use=%u l3proto = %u proto=%u ",
-		   atomic_read(&expect->use),
+	seq_printf(s, "l3proto = %u proto=%u ",
 		   expect->tuple.src.l3num,
 		   expect->tuple.dst.protonum);
 	print_tuple(s, &expect->tuple,
@@ -804,10 +801,9 @@
 EXPORT_SYMBOL(nf_ct_l3protos);
 EXPORT_SYMBOL(nf_ct_find_helper);
 EXPORT_SYMBOL(nf_conntrack_expect_alloc);
+EXPORT_SYMBOL(nf_conntrack_expect_free);
 EXPORT_SYMBOL(nf_conntrack_expect_related);
 EXPORT_SYMBOL(nf_conntrack_unexpect_related);
-EXPORT_SYMBOL_GPL(nf_conntrack_expect_find_get);
-EXPORT_SYMBOL_GPL(nf_conntrack_expect_put);
 EXPORT_SYMBOL(nf_conntrack_tuple_taken);
 EXPORT_SYMBOL(nf_conntrack_htable_size);
 EXPORT_SYMBOL(nf_conntrack_expect_list);




More information about the netfilter-cvslog mailing list