[PoM PATCH 2/3] new connlimit for 2.6.14

Damon Gray dgray at speakeasy.org
Fri Nov 11 08:44:58 CET 2005


[PoM PATCH 2/3] new connlimit for 2.6.14

patched patch-o-matic-ng-20051109

This makes all the changes to ipt_connlimit.{c,h} and basically copies
the other PoM helper files to this directory. See first message for new
features.

Signed-off-by: Damon Gray <dgray at speakeasy.org>
-------------- next part --------------
diff -uprN patch-o-matic-ng-20051109/patchlets/connlimit/linux-2.6.14/include/linux/netfilter_ipv4/ipt_connlimit.h patch-o-matic-ng-changed/patchlets/connlimit/linux-2.6.14/include/linux/netfilter_ipv4/ipt_connlimit.h
--- patch-o-matic-ng-20051109/patchlets/connlimit/linux-2.6.14/include/linux/netfilter_ipv4/ipt_connlimit.h	Wed Dec 31 19:00:00 1969
+++ patch-o-matic-ng-changed/patchlets/connlimit/linux-2.6.14/include/linux/netfilter_ipv4/ipt_connlimit.h	Thu Nov 10 00:33:40 2005
@@ -0,0 +1,17 @@
+#ifndef _IPT_CONNLIMIT_H
+#define _IPT_CONNLIMIT_H
+
+struct ipt_connlimit_data;
+
+struct ipt_connlimit_info {
+	int limit;
+	int inverse;
+	u_int32_t mask;
+	unsigned int hash_size; /* will be converted to next highest power of 2, 0 defaults to 256 */
+	struct ipt_connlimit_data *data;
+	unsigned int timeout;   /* In seconds */
+	unsigned long connmark;	/* CONNMARK when timed out, 0=timeout */
+	unsigned long connmark_mask;
+	unsigned int shared_id; /* 0 means unused */
+};
+#endif /* _IPT_CONNLIMIT_H */
diff -uprN patch-o-matic-ng-20051109/patchlets/connlimit/linux-2.6.14/net/ipv4/netfilter/Kconfig.ladd patch-o-matic-ng-changed/patchlets/connlimit/linux-2.6.14/net/ipv4/netfilter/Kconfig.ladd
--- patch-o-matic-ng-20051109/patchlets/connlimit/linux-2.6.14/net/ipv4/netfilter/Kconfig.ladd	Wed Dec 31 19:00:00 1969
+++ patch-o-matic-ng-changed/patchlets/connlimit/linux-2.6.14/net/ipv4/netfilter/Kconfig.ladd	Thu May  6 08:52:37 2004
@@ -0,0 +1,9 @@
+config IP_NF_MATCH_CONNLIMIT
+	tristate  'Connections/IP limit match support'
+	depends on IP_NF_IPTABLES
+	help
+	  This match allows you to restrict the number of parallel TCP
+	  connections to a server per client IP address (or address block).
+	
+	  If you want to compile it as a module, say M here and read
+	  Documentation/modules.txt.  If unsure, say `N'.
diff -uprN patch-o-matic-ng-20051109/patchlets/connlimit/linux-2.6.14/net/ipv4/netfilter/Makefile.ladd patch-o-matic-ng-changed/patchlets/connlimit/linux-2.6.14/net/ipv4/netfilter/Makefile.ladd
--- patch-o-matic-ng-20051109/patchlets/connlimit/linux-2.6.14/net/ipv4/netfilter/Makefile.ladd	Wed Dec 31 19:00:00 1969
+++ patch-o-matic-ng-changed/patchlets/connlimit/linux-2.6.14/net/ipv4/netfilter/Makefile.ladd	Thu May  6 08:52:37 2004
@@ -0,0 +1,2 @@
+obj-$(CONFIG_IP_NF_MATCH_STATE) += ipt_state.o
+obj-$(CONFIG_IP_NF_MATCH_CONNLIMIT) += ipt_connlimit.o
diff -uprN patch-o-matic-ng-20051109/patchlets/connlimit/linux-2.6.14/net/ipv4/netfilter/ipt_connlimit.c patch-o-matic-ng-changed/patchlets/connlimit/linux-2.6.14/net/ipv4/netfilter/ipt_connlimit.c
--- patch-o-matic-ng-20051109/patchlets/connlimit/linux-2.6.14/net/ipv4/netfilter/ipt_connlimit.c	Wed Dec 31 19:00:00 1969
+++ patch-o-matic-ng-changed/patchlets/connlimit/linux-2.6.14/net/ipv4/netfilter/ipt_connlimit.c	Thu Nov 10 00:32:57 2005
@@ -0,0 +1,757 @@
+/*
+ * netfilter module to limit the number of parallel tcp
+ * connections per IP address.
+ *   (c) 2000 Gerd Knorr <kraxel at bytesex.org>
+ *   Nov 2002: Martin Bene <martin.bene at icomedias.com>:
+ *		only ignore TIME_WAIT or gone connections
+ *
+ * based on ...
+ *
+ * Kernel module to match connection tracking information.
+ * GPL (C) 1999  Rusty Russell (rusty at rustcorp.com.au).
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/skbuff.h>
+#include <linux/list.h>
+#include <linux/kref.h>
+#include <linux/rbtree.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+#include <linux/netfilter_ipv4/ip_conntrack_tcp.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/timer.h>
+#include <linux/spinlock.h>
+
+#define VERSION "2.2"
+
+#define DEBUG 0
+
+#if DEBUG
+#define DEBUGP(format, args...) printk("ipt_connlimit(%s): " format, \
+                                        __FUNCTION__ , ## args)
+#else
+#define DEBUGP(format, args...)
+#endif
+
+
+#if DEBUG
+#include "ipt_connlimit.h"
+const static char *tcp_state_names[] = { "none", "established", "syn_sent", "syn_recv",
+			     "fin_wait", "time_wait", "close", "close_wait",
+			     "last_ack", "listen" };
+#else
+#include <linux/netfilter_ipv4/ipt_connlimit.h>
+#endif
+
+static unsigned short gctimeout = 10;
+module_param(gctimeout, ushort, 0644);
+MODULE_PARM_DESC(gctimeout, "How often the connection garbage collector runs, in seconds.");
+
+// make sure gctimeout isn't 0, if it is then it means 1
+#define GC_TIMEOUT ((gctimeout ? gctimeout : 1) * HZ)
+
+MODULE_LICENSE("GPL");
+
+/* we'll save the tuples of all connections we care about */
+struct ipt_connlimit_conn {
+	struct list_head list;
+	//struct ip_conntrack_tuple tuple;
+	struct ip_conntrack *ct;
+};
+
+struct idle_ct {
+	unsigned long idle_time;
+	struct ipt_connlimit_conn *conn;
+	//struct list_head *conn_list;
+	/* Note that this ct ptr is reference counted while in
+	 * this list, so before freeing this entry you need to ip_conntrack_put(ct) */
+	//struct ip_conntrack *ct;
+	struct rb_node rb_node;
+};
+
+struct hash_stats {
+	unsigned int count;
+};
+
+struct ipt_connlimit_data {
+	spinlock_t lock;
+	unsigned int hash_size;
+	struct hash_stats *stats;
+	struct list_head *iphash;
+	struct timer_list gctimer;
+};
+
+struct ipt_connlimit_shared {
+	unsigned int id;
+	/* first iptables rule sets mask, each additional rule must
+	 * be the same mask otherwise the data would be skewed for each rule */
+	u_int32_t mask;
+	struct kref kref;
+	struct list_head list;
+	struct ipt_connlimit_data *data;
+};
+
+static spinlock_t ipt_connlimit_shared_lock = SPIN_LOCK_UNLOCKED;
+static LIST_HEAD(ipt_connlimit_shared_list);
+
+/* all ipt_connlimit_conn structs come from this cache */
+static kmem_cache_t *conn_cache = NULL;
+/* all idle_ct structs come from this cache */
+static kmem_cache_t *idle_ct_cache = NULL;
+
+// The connection garbage collector, this runs every GC_TIMEOUT but
+// gets reset on every new connection because every new connection
+// also garbage collects in the same way
+void connection_gc(unsigned long data_addr) {
+	int i = 0;
+	unsigned int global_count = 0;
+	struct list_head *p, *n;
+	struct hash_stats *stats;
+	struct ipt_connlimit_conn *conn;
+	struct ipt_connlimit_data *data = (struct ipt_connlimit_data *)data_addr;
+	DEBUGP("hash_size=%u gctimeout=%u   %p\n", data->hash_size, gctimeout, data);
+	spin_lock_bh(&data->lock);
+	for (i = 0; i < data->hash_size; i++) {
+		stats = &data->stats[i];
+		// Only check this list if it has connections
+		if(stats->count == 0)
+			continue;
+
+		list_for_each_safe(p, n, &data->iphash[i]) {
+			conn = list_entry(p, struct ipt_connlimit_conn, list);
+			if(conn->ct == NULL ||
+				conn->ct->proto.tcp.state >= TCP_CONNTRACK_TIME_WAIT ||
+				atomic_read(&conn->ct->ct_general.use) == 1) {
+				DEBUGP("[%d]: delete use=%u src=%u.%u.%u.%u:%d dst=%u.%u.%u.%u:%d %s\n",
+					i, atomic_read(&conn->ct->ct_general.use),
+					NIPQUAD(conn->ct->tuplehash[0].tuple.src.ip), ntohs(conn->ct->tuplehash[0].tuple.src.u.tcp.port),
+					NIPQUAD(conn->ct->tuplehash[0].tuple.dst.ip), ntohs(conn->ct->tuplehash[0].tuple.dst.u.tcp.port),
+					tcp_state_names[conn->ct->proto.tcp.state]);
+				list_del(p);
+				stats->count--;
+				if(conn->ct)
+					ip_conntrack_put(conn->ct);
+				kmem_cache_free(conn_cache, conn);
+			}
+		}
+		global_count += stats->count;
+	}
+	// Only re-enable the gctimer if we have some connections
+	if(global_count > 0)
+		mod_timer(&data->gctimer, jiffies + GC_TIMEOUT);
+	spin_unlock_bh(&data->lock);
+}
+
+static struct ipt_connlimit_data *alloc_data(unsigned int hash_size,
+	int kmalloc_flags)
+{
+	struct ipt_connlimit_data *data = NULL;
+	int i;
+
+	data = kmalloc(sizeof(struct ipt_connlimit_data), kmalloc_flags);
+	if (data == NULL)
+		return NULL;
+	
+	data->stats = kmalloc(sizeof(struct hash_stats) * hash_size, kmalloc_flags);
+	if (data->stats == NULL) {
+		kfree(data);
+		return NULL;
+	}
+	data->iphash = kmalloc(sizeof(struct list_head) * hash_size, kmalloc_flags);
+	if (data->iphash == NULL) {
+		kfree(data->stats);
+		kfree(data);
+		return NULL;
+	}
+
+	data->hash_size = hash_size;
+
+	spin_lock_init(&data->lock);
+	init_timer(&data->gctimer);
+	for (i = 0; i < hash_size; i++) {
+		INIT_LIST_HEAD(&data->iphash[i]);
+		data->stats[i].count=0;
+	}
+
+	// The gctimer gets activated in count_them and then
+	// also inside the connection_gc function
+	data->gctimer.expires  = jiffies + GC_TIMEOUT;
+	data->gctimer.function = connection_gc;
+	data->gctimer.data     = (unsigned long)data;
+
+	return data;
+}
+
+void free_data(struct ipt_connlimit_data *data)
+{
+	struct ipt_connlimit_conn *conn;
+	struct list_head *p, *n, *hash;
+	int i;
+
+	if (data == NULL)
+		return;
+
+	if(timer_pending(&data->gctimer))
+		del_timer_sync(&data->gctimer);
+
+	spin_lock_bh(&data->lock);
+	/* cleanup */
+	for (i = 0; i < data->hash_size; i++) {
+		hash = &data->iphash[i];
+		list_for_each_safe(p, n, hash) {
+			conn = list_entry(p, struct ipt_connlimit_conn, list);
+			list_del(p);
+			if(conn->ct)
+				ip_conntrack_put(conn->ct);
+			kmem_cache_free(conn_cache, conn);
+		}
+	}
+	spin_unlock_bh(&data->lock);
+	kfree(data);
+}
+
+static struct ipt_connlimit_shared *alloc_shared(
+	unsigned int hash_size,
+	int kmalloc_flags)
+{
+	struct ipt_connlimit_shared *shared = NULL;
+
+	shared = kmalloc(sizeof(struct ipt_connlimit_shared),kmalloc_flags);
+	if (shared == NULL)
+		return NULL;
+
+	shared->data=alloc_data(hash_size, kmalloc_flags);
+	if (shared->data == NULL) {
+		kfree(shared);
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&shared->list);
+	/* reminder: kref_init sets refcount = 1 */
+	kref_init(&shared->kref);
+	return shared;
+}
+
+/*
+ * ipt_connlimit_shared_lock must be locked going into this
+ * and nobody should have a ref to shared->data
+ * this is really only called by kref_put which is called
+ * by put_shared_id
+ */
+void shared_release(struct kref *ref)
+{
+	struct ipt_connlimit_shared *shared = NULL;
+	shared = container_of(ref, struct ipt_connlimit_shared, kref);
+
+	DEBUGP("destroying shared id %u\n", shared->id);
+
+	free_data(shared->data);
+	list_del(&shared->list);
+	kfree(shared);
+}
+
+/*
+ * TODO this needs to be changed in 2.6.12 to return what kref_put returns
+ *      which currently (in 2.6.11) it returns void                                 
+ */
+int put_shared_id(unsigned int id)
+{
+	struct ipt_connlimit_shared *shared=NULL;
+	struct list_head *lh;
+
+	if(!id)
+		return 0;
+
+	spin_lock_bh(&ipt_connlimit_shared_lock);
+	if(list_empty(&ipt_connlimit_shared_list)) {
+		goto out;
+	}
+
+	list_for_each(lh,&ipt_connlimit_shared_list) {
+		shared=list_entry(lh,struct ipt_connlimit_shared, list);
+		if(shared->id == id)
+			break;
+	}
+	if(shared) {
+		DEBUGP("found shared id %u in put_shared_id refcount=%d\n", shared->id,atomic_read(&shared->kref.refcount));
+		kref_put(&shared->kref,shared_release);
+	}
+out:
+	spin_unlock_bh(&ipt_connlimit_shared_lock);
+	return 0;
+}
+
+int put_shared(struct ipt_connlimit_shared *shared)
+{
+	if(shared == NULL)
+		return 0;
+
+	spin_lock_bh(&ipt_connlimit_shared_lock);
+	kref_put(&shared->kref,shared_release);
+	spin_unlock_bh(&ipt_connlimit_shared_lock);
+	return 0;
+}
+
+/*
+ * this returns a new or current shared data based on the given id
+ * if the returned shared data was not found and was created in
+ * this call and isnew isn't NULL then it will set *isnew = 1
+ */
+static struct ipt_connlimit_shared *get_shared_id(unsigned int id,
+	unsigned int hash_size,
+	int kmalloc_flags,
+	int *isnew)
+{
+	struct ipt_connlimit_shared *shared=NULL;
+
+	if (!id)
+		return NULL;
+
+	spin_lock_bh(&ipt_connlimit_shared_lock);
+
+	if (!list_empty(&ipt_connlimit_shared_list)) {
+		struct list_head *lh;
+		list_for_each(lh,&ipt_connlimit_shared_list) {
+			shared=list_entry(lh,struct ipt_connlimit_shared, list);
+			if(shared->id == id) {
+				kref_get(&shared->kref);
+				goto out;
+			}
+		}
+		shared=NULL;
+	}
+
+	shared = alloc_shared(hash_size, kmalloc_flags);
+	if (shared == NULL)
+		goto out;
+
+	shared->id=id;
+	list_add(&shared->list, &ipt_connlimit_shared_list);
+	if (isnew != NULL)
+		*isnew=1;
+out:
+	spin_unlock_bh(&ipt_connlimit_shared_lock);
+	return shared;
+}
+
+void idle_ct_rbinsert(struct rb_root *root, struct idle_ct *newict) {
+	struct rb_node **p=&root->rb_node;
+	struct rb_node *parent = NULL;
+	struct idle_ct *tmpict;
+
+	while(*p) {
+		parent = *p;
+		tmpict = rb_entry(parent, struct idle_ct, rb_node);
+		if(newict->idle_time <= tmpict->idle_time)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&newict->rb_node,parent, p);
+	rb_insert_color(&newict->rb_node,root);
+}
+
+/* this will timeout the longest idle first */
+int timeout_cts(const struct ipt_connlimit_info *info, struct rb_root *root, int count)
+{
+	struct rb_node *tmpnode;
+	struct idle_ct *idle_ct;
+	//struct ipt_connlimit_conn *conn;
+	int i=0;
+
+	if(count <= 0)
+		return 0;
+
+	tmpnode=rb_last(root);
+	do {
+		idle_ct=rb_entry(tmpnode, struct idle_ct, rb_node);
+
+		if(info->connmark) {
+#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
+			unsigned long newmark = (idle_ct->conn->ct->mark & ~info->connmark_mask) | info->connmark;
+			if(newmark != idle_ct->conn->ct->mark)
+				idle_ct->conn->ct->mark=newmark;
+#endif
+		} else {
+			if (del_timer(&idle_ct->conn->ct->timeout))
+				idle_ct->conn->ct->timeout.function((unsigned long)idle_ct->conn->ct);
+		}
+
+		//conn = list_entry(idle_ct->conn_list, struct ipt_connlimit_conn, list);
+		list_del(&idle_ct->conn->list);
+		ip_conntrack_put(idle_ct->conn->ct);
+		kmem_cache_free(conn_cache, idle_ct->conn);
+
+		i++;
+		DEBUGP("timed out connection with idle_time=%lu\n", idle_ct->idle_time);
+#if DEBUG
+		if(info->connmark)
+			DEBUGP("set mark to %lu\n", info->connmark);
+#endif
+	} while( i < count && (tmpnode=rb_prev(tmpnode)));
+
+	return i;
+}
+
+void clear_idle_ct_rbtree(struct rb_root *root) {
+	struct rb_node *tmp,*prev;
+	struct idle_ct *ict;
+
+	tmp=rb_last(root);
+
+	while(tmp) {
+		ict=rb_entry(tmp, struct idle_ct, rb_node);
+		prev=rb_prev(tmp);
+		rb_erase(tmp,root);
+		//ip_conntrack_put(ict->ct);
+		kmem_cache_free(idle_ct_cache,ict);
+		tmp=prev;
+	}
+}
+
+/* hash_size must be a power of 2 */
+static inline unsigned ipt_iphash(const unsigned addr, const unsigned int hash_size)
+{
+	return ((addr ^ (addr >> 8) ^ (addr >> 16) ^ (addr >> 24)) & (hash_size - 1) );
+}
+
+static int count_them(const struct ipt_connlimit_info *info,
+	u_int32_t addr,
+	struct ip_conntrack *ct)
+{
+#if DEBUG
+	int i=0;
+#endif
+	int addit = 1, matches = 0;
+	struct ip_conntrack_tuple tuple;
+	//struct ip_conntrack_tuple_hash *found;
+	struct ipt_connlimit_conn *conn;
+	struct list_head *hash,*lh;
+	/* idle_cts_root holds a rbtree of struct idle_ct which
+	 * are conntrack entries that have been idle >= timeout */
+	struct rb_root idle_cts_root = RB_ROOT;
+	unsigned int idle_count=0;
+	unsigned long timeout = info->timeout * HZ;
+	unsigned long ct_tcp_estab_timeout =
+		*ip_ct_tcp_timeouts[TCP_CONNTRACK_ESTABLISHED];
+	unsigned int hash_ind=0;
+	struct hash_stats *stats;
+
+
+	spin_lock_bh(&info->data->lock);
+	tuple = ct->tuplehash[0].tuple;
+
+	if(info->mask != 0) {
+		hash_ind = ipt_iphash(addr & info->mask, info->data->hash_size);
+	}
+
+	hash = &info->data->iphash[hash_ind];
+	stats = &info->data->stats[hash_ind];
+
+	/* Don't bother doing timeouts if there isn't a chance
+	 * we are over limit */
+	if(stats->count < info->limit) {
+		DEBUGP("[%d] no timeout needed count=%u limit=%d\n", hash_ind, stats->count, info->limit);
+		timeout = 0;
+	}
+	
+	/* check the saved connections */
+	for (lh = hash->next; lh != hash; lh = lh->next) {
+		
+		struct ip_conntrack *found_ct = NULL;
+		conn = list_entry(lh, struct ipt_connlimit_conn, list);
+		found_ct = conn->ct;
+		// found = ip_conntrack_find_get(&conn->tuple, ct);
+		// if (found != NULL 
+		//     && (found_ct = tuplehash_to_ctrack(found)) != NULL
+		//     && 0 == memcmp(&conn->tuple,&tuple,sizeof(tuple)) 
+		//     && (found_ct->proto.tcp.state < TCP_CONNTRACK_TIME_WAIT)) {
+		//	// Just to be sure we have it only once in the list.
+		//	// We should'nt see tuples twice unless someone hooks this
+		//	// into a table without "-p tcp --syn"
+		//	addit = 0;
+		//	DEBUGP("[%d:%d]: in the special case\n",hash_ind,i);
+		//}
+
+		DEBUGP("[%d:%d]: src=%u.%u.%u.%u:%d dst=%u.%u.%u.%u:%d %s\n",
+		       hash_ind,i,
+		       NIPQUAD(found_ct->tuplehash[0].tuple.src.ip), ntohs(found_ct->tuplehash[0].tuple.src.u.tcp.port),
+		       NIPQUAD(found_ct->tuplehash[0].tuple.dst.ip), ntohs(found_ct->tuplehash[0].tuple.dst.u.tcp.port),
+		       (NULL != found_ct) ? tcp_state_names[found_ct->proto.tcp.state] : "gone");
+
+		if (NULL == found_ct ||
+			found_ct->proto.tcp.state >= TCP_CONNTRACK_TIME_WAIT ||
+			atomic_read(&found_ct->ct_general.use) == 1) {
+			/* we don't care about connections which are
+			   closed already or if it wasn't found -> ditch it */
+			DEBUGP("[%d:%d]: deleting entry\n", hash_ind,i++);
+			lh = lh->prev;
+			list_del(lh->next);
+			stats->count--;
+			kmem_cache_free(conn_cache, conn);
+			if(found_ct)
+				ip_conntrack_put(found_ct);
+			/* lets see if we can stop adding timeoutable entries */
+			if(stats->count < info->limit)
+				timeout = 0;
+			continue;
+		}
+
+		if (!info->mask || (addr & info->mask) == (found_ct->tuplehash[0].tuple.src.ip & info->mask)) {
+			/* same source IP address -> be counted! */
+			matches++;
+
+			if (timeout &&
+				(found_ct->proto.tcp.state == TCP_CONNTRACK_ESTABLISHED) &&
+				timer_pending(&found_ct->timeout)) {
+				/* TODO deal with possible jiffies wraparound */
+				unsigned long idle_time=ct_tcp_estab_timeout - (found_ct->timeout.expires - jiffies);
+
+				DEBUGP("[%d:%d] timeout=%lu idle_time=%lu\n", hash_ind, i, timeout, idle_time);
+
+				if(idle_time >= timeout) {
+					struct idle_ct *ict= kmem_cache_alloc(idle_ct_cache,GFP_ATOMIC);
+					if(ict == NULL) {
+						//ip_conntrack_put(found_ct);
+						spin_unlock_bh(&info->data->lock);
+						return -1;
+					}
+					ict->idle_time=idle_time;
+					ict->conn = conn;
+					//ict->conn_list=lh;
+					//ict->ct=found_ct;
+
+					idle_ct_rbinsert(&idle_cts_root, ict);
+
+					idle_count++;
+					DEBUGP("[%d:%d] inserting into rbtree idle_count=%d\n",
+						hash_ind, i++, idle_count);
+					/* we can't call ip_conntrack_put, so continue */
+					continue;
+				}
+			}
+		}
+		//DEBUGP("[%d:%d] ip_conntrack_put\n", hash_ind, i++);
+		//ip_conntrack_put(found_ct);
+	}
+	if (timeout && idle_count){
+		if(addit && matches >= info->limit) {
+			/* +1 because we need to add this new conn */
+			int num2timeout=(matches - info->limit) + 1;
+			/* we can only do this if we have enough idle conns */
+			if(num2timeout <= idle_count) {
+				DEBUGP("timing out %d idle conns idle_count=%d\n",
+					num2timeout, idle_count);
+				timeout_cts(info, &idle_cts_root, num2timeout);
+
+				/* assume num2timeout connections went away */
+				matches -= num2timeout;
+				stats->count -= num2timeout;
+			}
+		}
+	}
+
+	if(idle_count)
+		clear_idle_ct_rbtree(&idle_cts_root);
+
+	if (addit) {
+		// We are going to keep a copy, so lets up the use count because
+		// ip_conntrack_get doesn't do this for us
+		/* save the new connection in our list */
+		DEBUGP("addit [%d]: src=%u.%u.%u.%u:%d dst=%u.%u.%u.%u:%d new\n",
+		       hash_ind,
+		       NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port),
+		       NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port));
+
+		conn = kmem_cache_alloc(conn_cache,GFP_ATOMIC);
+		if (NULL == conn) {
+			spin_unlock_bh(&info->data->lock);
+			return -1;
+		}
+		memset(conn, 0, sizeof(*conn));
+		INIT_LIST_HEAD(&conn->list);
+		// Only increment use when we add a conntrack entry
+		atomic_inc(&ct->ct_general.use);
+		conn->ct = ct;
+		//conn->tuple = tuple;
+		list_add(&conn->list,hash);
+		stats->count++;
+		matches++;
+	}
+	// Lets make the garbage collector run at a later time now
+	mod_timer(&info->data->gctimer, jiffies + GC_TIMEOUT);
+	spin_unlock_bh(&info->data->lock);
+	return matches;
+}
+
+static int match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      int *hotdrop)
+{
+	const struct ipt_connlimit_info *info = matchinfo;
+	int connections, match;
+	struct ip_conntrack *ct;
+	enum ip_conntrack_info ctinfo;
+
+
+	/* Note: ip_conntrack_get doesn't up the use count */
+	ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
+	if (NULL == ct) {
+		printk("ipt_connlimit: Oops: invalid ct state ?\n");
+		*hotdrop = 1;
+		return 0;
+	}
+	DEBUGP("-----------------------\n");
+	connections = count_them(info,skb->nh.iph->saddr,ct);
+	if (-1 == connections) {
+		printk("ipt_connlimit: Hmm, kmem_cache_alloc failed :-(\n");
+		*hotdrop = 1; /* let's free some memory :-) */
+		return 0;
+	}
+	match = (info->inverse) ? (connections <= info->limit) : (connections > info->limit);
+	DEBUGP("src=%u.%u.%u.%u mask=%u.%u.%u.%u "
+	       "connections=%d limit=%d match=%s\n",
+	       NIPQUAD(skb->nh.iph->saddr), NIPQUAD(info->mask),
+	       connections, info->limit, match ? "yes" : "no");
+
+	return match;
+}
+
+static int check(const char *tablename,
+		 const struct ipt_ip *ip,
+		 void *matchinfo,
+		 unsigned int matchsize,
+		 unsigned int hook_mask)
+{
+	struct ipt_connlimit_info *info = matchinfo;
+
+	/* verify size */
+	if (matchsize != IPT_ALIGN(sizeof(struct ipt_connlimit_info))) {
+		DEBUGP(KERN_ERR "ipt_connlimit: matchsize (%u) != sizeof(struct ipt_connlimit_info) (%u)\n",
+			matchsize,IPT_ALIGN(sizeof(struct ipt_connlimit_info)));
+		return 0;
+	}
+
+	/* refuse anything but tcp */
+	if (ip->proto != IPPROTO_TCP) {
+		printk(KERN_ERR "ipt_connlimit: only -p tcp rules are allowed\n");
+		return 0;
+	}
+
+#if ! defined(CONFIG_IP_NF_CONNTRACK_MARK)
+	if(info->connmark) {
+		printk(KERN_ERR "ipt_connlimit: connmarks are not supported in this kernel\n");
+		return 0;
+	}
+#endif
+
+	if(info->hash_size == 0)
+		info->hash_size = 256;
+
+	if(info->mask == 0)
+		info->hash_size = 1;
+	else
+		info->hash_size = roundup_pow_of_two(info->hash_size);
+
+	/* init private data */
+	if(info->shared_id) {
+		int isnew=0;
+		struct ipt_connlimit_shared *shared =
+			get_shared_id(info->shared_id, info->hash_size, GFP_KERNEL, &isnew);
+		if(shared == NULL) {
+			printk(KERN_ERR "ipt_connlimit: unable to alloc shared id %d\n",
+				info->shared_id);
+			return 0;
+		}
+
+		DEBUGP("got shared id %u refcount=%d\n", shared->id,
+			atomic_read(&shared->kref.refcount));
+
+		if(isnew) {
+			shared->mask = info->mask;
+		} else if(shared->mask != info->mask) {
+			printk(KERN_ERR "ipt_connlimit: mask mismatch (%u != %u)"
+					" for existing shared id %u\n",
+					shared->mask, info->mask, info->shared_id);
+			put_shared(shared);
+			return 0;
+		}
+		info->data = shared->data;
+		info->hash_size = info->data->hash_size;
+	} else {
+		info->data = alloc_data(info->hash_size, GFP_KERNEL);
+		if(info->data == NULL) {
+			printk(KERN_ERR "ipt_connlimit: unable to alloc data\n");
+			
+		}
+	}
+
+	DEBUGP("limit=%d inverse=%d timeout=%u connmark=%lu/%lu shared_id=%u mask=%u.%u.%u.%u hash_size=%u data=%p\n",
+		info->limit,info->inverse,info->timeout,info->connmark,info->connmark_mask, info->shared_id,NIPQUAD(info->mask),info->hash_size, info->data);
+
+	return 1;
+}
+
+static void destroy(void *matchinfo, unsigned int matchinfosize)
+{
+	struct ipt_connlimit_info *info = matchinfo;
+
+	DEBUGP("limit=%d inverse=%d timeout=%u connmark=%lu/%lu shared_id=%u mask=%u.%u.%u.%u hash_size=%u data=%p\n",
+		info->limit,info->inverse,info->timeout,info->connmark,info->connmark_mask, info->shared_id,NIPQUAD(info->mask),info->hash_size, info->data);
+
+	if(info->shared_id) {
+		put_shared_id(info->shared_id);
+		info->data=NULL;
+	} else {
+		free_data(info->data);
+	}
+}
+
+static struct ipt_match connlimit_match = { 
+	.name = "connlimit",
+	.match = &match,
+	.checkentry = &check,
+	.destroy = &destroy,
+	.me = THIS_MODULE
+};
+
+static int __init init(void)
+{
+	printk("ipt_connlimit: init v" VERSION " gctimeout=%u....\n", gctimeout);
+	conn_cache=kmem_cache_create("ipt_connlimit_conn",
+		sizeof(struct ipt_connlimit_conn),0,0,NULL,NULL);
+
+	if(conn_cache == NULL)
+		return -ENOMEM;
+
+	idle_ct_cache=kmem_cache_create("ipt_connlimit_idle_ct",
+		sizeof(struct ipt_connlimit_conn),0,0,NULL,NULL);
+
+	if(idle_ct_cache == NULL) {
+		kmem_cache_destroy(conn_cache);
+		return -ENOMEM;
+	}
+
+	return ipt_register_match(&connlimit_match);
+}
+
+static void __exit fini(void)
+{
+	int kmem_ret=0;
+
+	DEBUGP("fini....\n");
+
+	if((kmem_ret=kmem_cache_destroy(conn_cache)))
+		printk(KERN_ERR "ipt_connlimit: Error destroying connection cache: %d\n",kmem_ret);
+	if((kmem_ret=kmem_cache_destroy(idle_ct_cache)))
+		printk(KERN_ERR "ipt_connlimit: Error destroying idle ct cache: %d\n",kmem_ret);
+	ipt_unregister_match(&connlimit_match);
+}
+
+module_init(init);
+module_exit(fini);


More information about the netfilter-devel mailing list