[PATCH] aggressive early_drop and reserved conntrack entries
Jozsef Kadlecsik
kadlec at blackhole.kfki.hu
Fri Dec 10 23:27:26 CET 2004
On Thu, 9 Dec 2004, Jozsef Kadlecsik wrote:
> On Thu, 9 Dec 2004, Patrick Schaaf wrote:
>
> > This suggest, to me, that we keep unreplied connections on a new,
> > additional list. They are put there at the HEAD upon creation,
> > they are removed form the list when they make their transition
> > to assured. And early_drop becomes a simple, O(1) operation:
> > reap the connection which is at the TAIL of this new list.
>
> But I like it! Hmm, expect some new code soon...
Attached (;-) is the new patch, which implements the list of unassured
connections. (Reserving conntracks is dropped completely as unnecessary.)
I tested it slighgtly and seems to work fine. What do you think about it?
Best regards,
Jozsef
-
E-mail : kadlec at blackhole.kfki.hu, kadlec at sunserv.kfki.hu
PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
Address : KFKI Research Institute for Particle and Nuclear Physics
H-1525 Budapest 114, POB. 49, Hungary
-------------- next part --------------
diff -urN --exclude-from=/usr/src/diff.exclude linux-2.6.9-orig/include/linux/netfilter_ipv4/ip_conntrack.h linux-2.6.9-early_drop/include/linux/netfilter_ipv4/ip_conntrack.h
--- linux-2.6.9-orig/include/linux/netfilter_ipv4/ip_conntrack.h 2004-10-18 23:55:21.000000000 +0200
+++ linux-2.6.9-early_drop/include/linux/netfilter_ipv4/ip_conntrack.h 2004-12-09 16:08:29.000000000 +0100
@@ -196,6 +196,9 @@
/* Helper, if any. */
struct ip_conntrack_helper *helper;
+ /* List of unassured connections */
+ struct list_head unassured;
+
/* Storage reserved for other modules: */
union ip_conntrack_proto proto;
@@ -260,7 +263,8 @@
extern void ip_ct_refresh_acct(struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
const struct sk_buff *skb,
- unsigned long extra_jiffies);
+ unsigned long extra_jiffies,
+ int set_assured);
/* These are for NAT. Icky. */
/* Update TCP window tracking data when NAT mangles the packet */
diff -urN --exclude-from=/usr/src/diff.exclude linux-2.6.9-orig/net/ipv4/netfilter/ip_conntrack_core.c linux-2.6.9-early_drop/net/ipv4/netfilter/ip_conntrack_core.c
--- linux-2.6.9-orig/net/ipv4/netfilter/ip_conntrack_core.c 2004-10-18 23:53:05.000000000 +0200
+++ linux-2.6.9-early_drop/net/ipv4/netfilter/ip_conntrack_core.c 2004-12-10 21:32:46.000000000 +0100
@@ -66,6 +66,7 @@
void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
LIST_HEAD(ip_conntrack_expect_list);
+static LIST_HEAD(unassured_list);
struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
static LIST_HEAD(helpers);
unsigned int ip_conntrack_htable_size = 0;
@@ -313,6 +314,10 @@
}
kmem_cache_free(ip_conntrack_expect_cachep, ct->master);
}
+ /* Delete from the list of unassured conntracks */
+ if (!test_bit(IPS_ASSURED_BIT, &ct->status)
+ && test_bit(IPS_CONFIRMED_BIT, &ct->status))
+ list_del(&ct->unassured);
WRITE_UNLOCK(&ip_conntrack_lock);
if (master)
@@ -436,6 +441,7 @@
add_timer(&ct->timeout);
atomic_inc(&ct->ct_general.use);
set_bit(IPS_CONFIRMED_BIT, &ct->status);
+ list_add_tail(&ct->unassured, &unassured_list);
WRITE_UNLOCK(&ip_conntrack_lock);
CONNTRACK_STAT_INC(insert);
return NF_ACCEPT;
@@ -461,34 +467,36 @@
return h != NULL;
}
-/* There's a small race here where we may free a just-assured
- connection. Too bad: we're in trouble anyway. */
-static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
+static int early_drop(void)
{
- return !(test_bit(IPS_ASSURED_BIT, &i->ctrack->status));
-}
-
-static int early_drop(struct list_head *chain)
-{
- /* Traverse backwards: gives us oldest, which is roughly LRU */
- struct ip_conntrack_tuple_hash *h;
+ struct ip_conntrack *ct = NULL;
+ struct list_head *entry;
int dropped = 0;
+ /* There's a small race here where we may free a just-assured
+ connection. Too bad: we're in trouble anyway. */
READ_LOCK(&ip_conntrack_lock);
- h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *);
- if (h)
- atomic_inc(&h->ctrack->ct_general.use);
+ __list_for_each(entry, &unassured_list) {
+ ct = list_entry(entry,
+ struct ip_conntrack, unassured);
+ atomic_inc(&ct->ct_general.use);
+ break;
+ }
READ_UNLOCK(&ip_conntrack_lock);
- if (!h)
+ if (!ct)
return dropped;
- if (del_timer(&h->ctrack->timeout)) {
- death_by_timeout((unsigned long)h->ctrack);
+ if (del_timer(&ct->timeout)) {
+ death_by_timeout((unsigned long)ct);
dropped = 1;
+ if (net_ratelimit())
+ printk(KERN_WARNING
+ "ip_conntrack: table full, dropping"
+ " unassured connection.\n");
CONNTRACK_STAT_INC(early_drop);
}
- ip_conntrack_put(h->ctrack);
+ ip_conntrack_put(ct);
return dropped;
}
@@ -526,8 +534,8 @@
if (ip_conntrack_max
&& atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
- /* Try dropping from this hash chain. */
- if (!early_drop(&ip_conntrack_hash[hash])) {
+ /* Try to drop unassured connection. */
+ if (!early_drop()) {
if (net_ratelimit())
printk(KERN_WARNING
"ip_conntrack: table full, dropping"
@@ -564,6 +572,7 @@
conntrack->timeout.function = death_by_timeout;
INIT_LIST_HEAD(&conntrack->sibling_list);
+ INIT_LIST_HEAD(&conntrack->unassured);
WRITE_LOCK(&ip_conntrack_lock);
/* Need finding and deleting of expected ONLY if we win race */
@@ -1091,7 +1100,8 @@
void ip_ct_refresh_acct(struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
const struct sk_buff *skb,
- unsigned long extra_jiffies)
+ unsigned long extra_jiffies,
+ int set_assured)
{
IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
@@ -1107,6 +1117,10 @@
add_timer(&ct->timeout);
}
ct_add_counters(ct, ctinfo, skb);
+ if (set_assured) {
+ set_bit(IPS_ASSURED_BIT, &ct->status);
+ list_del(&ct->unassured);
+ }
WRITE_UNLOCK(&ip_conntrack_lock);
}
}
diff -urN --exclude-from=/usr/src/diff.exclude linux-2.6.9-orig/net/ipv4/netfilter/ip_conntrack_proto_generic.c linux-2.6.9-early_drop/net/ipv4/netfilter/ip_conntrack_proto_generic.c
--- linux-2.6.9-orig/net/ipv4/netfilter/ip_conntrack_proto_generic.c 2004-10-18 23:53:46.000000000 +0200
+++ linux-2.6.9-early_drop/net/ipv4/netfilter/ip_conntrack_proto_generic.c 2004-12-09 16:01:30.000000000 +0100
@@ -52,7 +52,8 @@
const struct sk_buff *skb,
enum ip_conntrack_info ctinfo)
{
- ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_generic_timeout);
+ ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_generic_timeout,
+ CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY);
return NF_ACCEPT;
}
diff -urN --exclude-from=/usr/src/diff.exclude linux-2.6.9-orig/net/ipv4/netfilter/ip_conntrack_proto_icmp.c linux-2.6.9-early_drop/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
--- linux-2.6.9-orig/net/ipv4/netfilter/ip_conntrack_proto_icmp.c 2004-10-18 23:53:06.000000000 +0200
+++ linux-2.6.9-early_drop/net/ipv4/netfilter/ip_conntrack_proto_icmp.c 2004-12-09 15:34:32.000000000 +0100
@@ -102,7 +102,7 @@
ct->timeout.function((unsigned long)ct);
} else {
atomic_inc(&ct->proto.icmp.count);
- ip_ct_refresh_acct(ct, ctinfo, skb, ip_ct_icmp_timeout);
+ ip_ct_refresh_acct(ct, ctinfo, skb, ip_ct_icmp_timeout, 0);
}
return NF_ACCEPT;
diff -urN --exclude-from=/usr/src/diff.exclude linux-2.6.9-orig/net/ipv4/netfilter/ip_conntrack_proto_sctp.c linux-2.6.9-early_drop/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
--- linux-2.6.9-orig/net/ipv4/netfilter/ip_conntrack_proto_sctp.c 2004-10-18 23:55:07.000000000 +0200
+++ linux-2.6.9-early_drop/net/ipv4/netfilter/ip_conntrack_proto_sctp.c 2004-12-09 15:37:52.000000000 +0100
@@ -319,6 +319,7 @@
sctp_chunkhdr_t _sch, *sch;
u_int32_t offset, count;
char map[256 / sizeof (char)] = {0};
+ int set_assured = 0;
DEBUGP(__FUNCTION__);
DEBUGP("\n");
@@ -408,14 +409,14 @@
WRITE_UNLOCK(&sctp_lock);
}
- ip_ct_refresh_acct(conntrack, ctinfo, skb, *sctp_timeouts[newconntrack]);
-
if (oldsctpstate == SCTP_CONNTRACK_COOKIE_ECHOED
&& CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY
&& newconntrack == SCTP_CONNTRACK_ESTABLISHED) {
DEBUGP("Setting assured bit\n");
- set_bit(IPS_ASSURED_BIT, &conntrack->status);
+ set_assured = 1;
}
+ ip_ct_refresh_acct(conntrack, ctinfo, skb,
+ *sctp_timeouts[newconntrack], set_assured);
return NF_ACCEPT;
}
diff -urN --exclude-from=/usr/src/diff.exclude linux-2.6.9-orig/net/ipv4/netfilter/ip_conntrack_proto_tcp.c linux-2.6.9-early_drop/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
--- linux-2.6.9-orig/net/ipv4/netfilter/ip_conntrack_proto_tcp.c 2004-10-18 23:55:29.000000000 +0200
+++ linux-2.6.9-early_drop/net/ipv4/netfilter/ip_conntrack_proto_tcp.c 2004-12-09 15:36:36.000000000 +0100
@@ -834,6 +834,7 @@
struct tcphdr *th, _tcph;
unsigned long timeout;
unsigned int index;
+ int set_assured = 0;
th = skb_header_pointer(skb, iph->ihl * 4,
sizeof(_tcph), &_tcph);
@@ -962,9 +963,9 @@
/* Set ASSURED if we see see valid ack in ESTABLISHED
after SYN_RECV or a valid answer for a picked up
connection. */
- set_bit(IPS_ASSURED_BIT, &conntrack->status);
+ set_assured = 1;
}
- ip_ct_refresh_acct(conntrack, ctinfo, skb, timeout);
+ ip_ct_refresh_acct(conntrack, ctinfo, skb, timeout, set_assured);
return NF_ACCEPT;
}
diff -urN --exclude-from=/usr/src/diff.exclude linux-2.6.9-orig/net/ipv4/netfilter/ip_conntrack_proto_udp.c linux-2.6.9-early_drop/net/ipv4/netfilter/ip_conntrack_proto_udp.c
--- linux-2.6.9-orig/net/ipv4/netfilter/ip_conntrack_proto_udp.c 2004-10-18 23:53:05.000000000 +0200
+++ linux-2.6.9-early_drop/net/ipv4/netfilter/ip_conntrack_proto_udp.c 2004-12-09 15:35:16.000000000 +0100
@@ -70,12 +70,12 @@
/* If we've seen traffic both ways, this is some kind of UDP
stream. Extend timeout. */
if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
- ip_ct_refresh_acct(conntrack, ctinfo, skb,
- ip_ct_udp_timeout_stream);
/* Also, more likely to be important, and not a probe */
- set_bit(IPS_ASSURED_BIT, &conntrack->status);
+ ip_ct_refresh_acct(conntrack, ctinfo, skb,
+ ip_ct_udp_timeout_stream, 1);
} else
- ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_udp_timeout);
+ ip_ct_refresh_acct(conntrack, ctinfo, skb,
+ ip_ct_udp_timeout, 0);
return NF_ACCEPT;
}
More information about the netfilter-devel
mailing list