[PATCH] aggressive early_drop and reserved conntrack entries

Jozsef Kadlecsik kadlec at blackhole.kfki.hu
Thu Dec 9 09:34:34 CET 2004


Hi,

The included patch addresses the following issues:

- When the conntrack table is full, we search only in a single hash
  bucket. We are in trouble anyway, so let's search harder for
  droppable entries: the patch extends the search to at most the third of
  all the buckets.
- If the conntrack table is full, the remote management of the machine
  becomes a little bit complicated :-). The patch adds the ability to
  reserve a given number of entries to be used for management connections.
  The following proc entries are added to /proc/sys/net/ipv4/netfilter:

  ip_conntrack_reserved		the number of reserved connections

  ip_conntrack_reserved_mark	mark value to match

  ip_conntrack_reserved_mask	mask to use at matching

  Example:

  # Let's reserve 3 conntrack entries
  echo 3 > /proc/sys/net/ipv4/netfilter/ip_conntrack_reserved
  # Set mark value; mask is left the default 0xffffffff
  echo 1 > /proc/sys/net/ipv4/netfilter/ip_conntrack_reserved_mark
  # Mark connection-initiating packets in raw table to use the reserved
  # entries when the table is full, i.e ip_conntrack_count >=
  # ip_conntrack_max - ip_conntrack_reserved
  iptables -t raw -A PREROUTING -s <management station> -d <firewall> \
	  	  -p tcp --dport 22 -m mark --mark 1 -j ACCEPT

Best regards,
Jozsef
-
E-mail  : kadlec at blackhole.kfki.hu, kadlec at sunserv.kfki.hu
PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
Address : KFKI Research Institute for Particle and Nuclear Physics
          H-1525 Budapest 114, POB. 49, Hungary
diff -urN --exclude-from=/usr/src/diff.exclude linux-2.6.9-orig/include/linux/sysctl.h linux-2.6.9-early_drop/include/linux/sysctl.h
--- linux-2.6.9-orig/include/linux/sysctl.h	2004-10-18 23:54:31.000000000 +0200
+++ linux-2.6.9-early_drop/include/linux/sysctl.h	2004-12-09 05:27:02.000000000 +0100
@@ -426,6 +426,9 @@
  	NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD=25,
  	NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT=26,
 	NET_IPV4_NF_CONNTRACK_COUNT=27,
+	NET_IPV4_NF_CONNTRACK_RESERVED=28,
+	NET_IPV4_NF_CONNTRACK_RESERVED_MARK=29,
+	NET_IPV4_NF_CONNTRACK_RESERVED_MASK=30,
 };

 /* /proc/sys/net/ipv6 */
diff -urN --exclude-from=/usr/src/diff.exclude linux-2.6.9-orig/net/ipv4/netfilter/ip_conntrack_core.c linux-2.6.9-early_drop/net/ipv4/netfilter/ip_conntrack_core.c
--- linux-2.6.9-orig/net/ipv4/netfilter/ip_conntrack_core.c	2004-10-18 23:53:05.000000000 +0200
+++ linux-2.6.9-early_drop/net/ipv4/netfilter/ip_conntrack_core.c	2004-12-09 05:42:19.000000000 +0100
@@ -76,6 +76,11 @@
 struct ip_conntrack ip_conntrack_untracked;
 unsigned int ip_ct_log_invalid;

+/* Number of reserved conntrack entries; mark value; mask */
+int ip_ct_reserved = 0;
+unsigned long ip_ct_reserved_mark = 0;
+unsigned long ip_ct_reserved_mask = 0xffffffff;
+
 DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);

 inline void
@@ -468,16 +473,32 @@
 	return !(test_bit(IPS_ASSURED_BIT, &i->ctrack->status));
 }

-static int early_drop(struct list_head *chain)
+static int early_drop(struct sk_buff *skb, unsigned int hash)
 {
-	/* Traverse backwards: gives us oldest, which is roughly LRU */
-	struct ip_conntrack_tuple_hash *h;
-	int dropped = 0;
+	struct ip_conntrack_tuple_hash *h = NULL;
+	int dropped = 1;
+	unsigned int i, bucket;
+
+	if (ip_ct_reserved == 0
+	    || atomic_read(&ip_conntrack_count) >= ip_conntrack_max)
+		goto not_reserved;

+	/* Let through reserved connections */
+	if ((skb->nfmark & ip_ct_reserved_mask) == ip_ct_reserved_mark)
+		return dropped;
+
+    not_reserved:
+    	dropped = 0;
 	READ_LOCK(&ip_conntrack_lock);
-	h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *);
-	if (h)
-		atomic_inc(&h->ctrack->ct_general.use);
+	/* Try hard but not fully: magic number is three */
+	for (i = 0; h == NULL && i < ip_conntrack_htable_size/3; i++) {
+		bucket = (i + hash) % ip_conntrack_htable_size;
+		/* Traverse backwards: gives us oldest, which is roughly LRU */
+		h = LIST_FIND_B(&ip_conntrack_hash[bucket], unreplied,
+				struct ip_conntrack_tuple_hash *);
+		if (h)
+			atomic_inc(&h->ctrack->ct_general.use);
+	}
 	READ_UNLOCK(&ip_conntrack_lock);

 	if (!h)
@@ -525,9 +546,10 @@
 	hash = hash_conntrack(tuple);

 	if (ip_conntrack_max
-	    && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
-		/* Try dropping from this hash chain. */
-		if (!early_drop(&ip_conntrack_hash[hash])) {
+	    && atomic_read(&ip_conntrack_count)
+	       >= (ip_conntrack_max - ip_ct_reserved)) {
+		/* Try dropping starting from this hash chain. */
+		if (!early_drop(skb, hash)) {
 			if (net_ratelimit())
 				printk(KERN_WARNING
 				       "ip_conntrack: table full, dropping"
diff -urN --exclude-from=/usr/src/diff.exclude linux-2.6.9-orig/net/ipv4/netfilter/ip_conntrack_standalone.c linux-2.6.9-early_drop/net/ipv4/netfilter/ip_conntrack_standalone.c
--- linux-2.6.9-orig/net/ipv4/netfilter/ip_conntrack_standalone.c	2004-10-18 23:54:07.000000000 +0200
+++ linux-2.6.9-early_drop/net/ipv4/netfilter/ip_conntrack_standalone.c	2004-12-09 05:47:04.000000000 +0100
@@ -481,6 +481,14 @@
 /* From ip_conntrack_core.c */
 extern int ip_conntrack_max;
 extern unsigned int ip_conntrack_htable_size;
+extern int ip_ct_reserved;
+extern unsigned long ip_ct_reserved_mask;
+extern unsigned long ip_ct_reserved_mark;
+
+/* Static boundaries */
+static int reserved_min = 0;
+static int conntrack_max = INT_MAX;
+

 /* From ip_conntrack_proto_tcp.c */
 extern unsigned long ip_ct_tcp_timeout_syn_sent;
@@ -519,7 +527,9 @@
 		.data		= &ip_conntrack_max,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
+		.proc_handler	= &proc_dointvec_minmax,
+		.extra1		= &ip_ct_reserved,
+		.extra2		= &conntrack_max,
 	},
 	{
 		.ctl_name	= NET_IPV4_NF_CONNTRACK_COUNT,
@@ -676,6 +686,32 @@
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.ctl_name	= NET_IPV4_NF_CONNTRACK_RESERVED,
+		.procname	= "ip_conntrack_reserved",
+		.data		= &ip_ct_reserved,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.extra1		= &reserved_min,
+		.extra2		= &ip_conntrack_max,
+	},
+	{
+		.ctl_name	= NET_IPV4_NF_CONNTRACK_RESERVED_MARK,
+		.procname	= "ip_conntrack_reserved_mark",
+		.data		= &ip_ct_reserved_mark,
+		.maxlen		= sizeof(unsigned long),
+		.mode		= 0644,
+		.proc_handler	= &proc_doulongvec_minmax,
+	},
+	{
+		.ctl_name	= NET_IPV4_NF_CONNTRACK_RESERVED_MASK,
+		.procname	= "ip_conntrack_reserved_mask",
+		.data		= &ip_ct_reserved_mask,
+		.maxlen		= sizeof(unsigned long),
+		.mode		= 0644,
+		.proc_handler	= &proc_doulongvec_minmax,
+	},
 	{ .ctl_name = 0 }
 };




More information about the netfilter-devel mailing list