[PATCH 1/4] RFC: fast string matching infrastrure for netfilter

Patrick McHardy kaber at trash.net
Mon Jan 10 00:10:41 CET 2005


Pablo Neira wrote:

> Hi,
>
> I've finished a first usable version of a infrastructure to look for 
> matchings in a packet. Features:
>
> * A library consisting in three public functions: constructor, 
> destructor and searching.
> * Boyer-moore algorithm to perform fast matchings.
> * Brute force search on the edges of two fragments to look for 
> fragmented matches, that is O(m) searchs where m is the size of the 
> pattern, it's not that bad for small pattern I think. It's fragment 
> aware by means of rusty's skb_iter functions.

Looks good. A problem is that it's only in-tree user is part of 
ip_conntrack.
I have actually given up keeping nf_conntrack up to date currently, but
I hope we can now really put ip_conntrack in maintenance mode and 
concentrate
on nf_conntrack. Any chance you want to base this on the nf_conntrack 
patch ?

> Comments welcome.

See below.

Regards
Patrick

>
> -- 
> Pablo
>
>------------------------------------------------------------------------
>
>--- /dev/null	2004-09-23 01:18:13.000000000 +0200
>+++ linux-2.5/net/ipv4/netfilter/nf_string_match.c	2005-01-09 21:54:04.000000000 +0100
>@@ -0,0 +1,264 @@
>+/* Efficient string matching fragment aware infrastrure for netfilter
>+ * Copyright (C) 2004 Pablo Neira Ayuso <pablo at eurodev.net>
>+ *
>+ *      This program is free software; you can redistribute it and/or
>+ *      modify it under the terms of the GNU General Public License
>+ *      as published by the Free Software Foundation; either version
>+ *      2 of the License, or (at your option) any later version.
>+ *
>+ * Idea:
>+ * 	
>+ * 	This algorithm used Boyer-Moore fast matching algorithm to look
>+ * 	for a pattern in the payload of a packet. Since it is fragment
>+ * 	aware, it looks for partial matches on the edges of two fragments.
>+ *
>+ * Literature:
>+ * 
>+ *      A Fast String Searching Algorithm, R.S. Boyer and Moore.
>+ *      Communications of the Association for Computing Machinery, 
>+ *      20(10), 1977, pp. 762-772.
>+ *	http://www.cs.utexas.edu/users/moore/publications/fstrpos.pdf
>+ *
>+ * 	Handbook of Exact String Matching Algorithms, Thierry Lecroq, 2004
>+ * 	http://www-igm.univ-mlv.fr/~lecroq/string/string.pdf
>+ * 
>+ * Implementation:
>+ * 
>+ * 	Boyer Moore implementation heavily based on Gianni Tedesco's.
>+ */
>+
>+#include <linux/module.h>
>+#include <linux/netfilter_ipv4/nf_string_match.h>
>+#include <linux/skbuff.h>
>+
>+#if 0
>+#define DEBUGP printk
>+#else
>+#define DEBUGP(format, args...)
>+#endif
>+
>+/* Compute constant structures needed by the boyer-moore algorithm,
>+ * this is done just ONCE. */
>+static void
>+sm_preprocess(struct nf_string_match *sm)
>+{
>+	int i, j, len[ASIZE], ended;
>+
>+	/* Compute the bad caracter shift array */
>+	for (i = 0; i < ASIZE; i++)
>+		sm->badshift[i] = sm->patlen;
>+	for (i = 0; i < sm->patlen - 1; i++)
>+		sm->badshift[(int) sm->pat[i]] = sm->patlen - 1 - i;
>+
>+	/* Compute the good shift array, used to match reocurrences 
>+	 * of a subpattern */
>+	for (i = 1; i < sm->patlen; i++) {
>+		for (j = 0; j < sm->patlen && sm->pat[sm->patlen -1 - j] 
>+				== sm->pat[sm->patlen - 1 - i - j]; j++);
>+		len[i] = j;
>+	}  
>+
>+	sm->goodshift[0] = 1;
>+	for (i = 1; i < sm->patlen; i++)
>+		sm->goodshift[i] = sm->patlen;
>+	for (i = sm->patlen - 1; i > 0; i--)
>+		sm->goodshift[len[i]] = i;
>+
>+	ended = 0;
>+	for (i = 0; i < sm->patlen; i++) {
>+		if (len[i] == sm->patlen - 1 - i)
>+			ended = i;
>+		if (ended)
>+			sm->goodshift[i] = ended;
>+	}
>+}
>+
>+struct nf_string_match *
>+nf_string_match_create(const char *pat, int patlen)
>+{
>+	struct nf_string_match *sm;
>+
>+	sm = kmalloc(sizeof(struct nf_string_match), GFP_KERNEL);
>+	if (!sm)
>+		return NULL;
>+	memset(sm, 0, sizeof(struct nf_string_match));
>+
>+	sm->goodshift = (int *) kmalloc(patlen * sizeof(int), GFP_KERNEL);
>+	if (!sm->goodshift)
>  
>
^ leaks sm

>+		return NULL;
>+	memset(sm->goodshift, 0, sizeof(int) * patlen);
>+	
>+	sm->pat = (char *) kmalloc(patlen * sizeof(int), GFP_KERNEL);
>+	if (!sm->pat) {
>  
>
^ leaks sm

>+		kfree(sm->goodshift);
>+		return NULL;
>+	}
>+	memset(sm->pat, patlen, sizeof(char) * patlen);
>+	
>+	strcpy(sm->pat, pat);
>+	sm->patlen = patlen;
>+
>+	/* Preprocess the pattern */
>+	sm_preprocess(sm);
>+
>+	return sm;
>+}
>+
>+/* release the structure */
>+void nf_string_match_destroy(struct nf_string_match *sm)
>+{
>+	kfree(sm->goodshift);
>+	kfree(sm->pat);
>+	kfree(sm);
>+}
>+
>+#define MAX(a,b) a>b?a:b
>+
>+/* this functions returns the offset where a matching was found */
>+static unsigned int
>+sm_search(struct nf_string_match *sm, unsigned char *data, int size, 
>+	  unsigned int *offset)
>+{
>+	int right_end = sm->patlen, sk, sh, i;
>+
>+	while (right_end < size) {
>+		DEBUGP("Searching in position %d (%c)\n", 
>+		       right_end, data[right_end]);
>+		for (i = 0; i < sm->patlen && data[right_end - i] 
>+				== sm->pat[sm->patlen - 1 - i]; i++);
>+		if (i == sm->patlen) {
>+			/* London calling... */
>+			DEBUGP("found!\n");
>+			*offset += (right_end - (sm->patlen - 1));
>+			return 1;
>+		}
>+		
>+		sk = sm->badshift[data[right_end - i]];  
>+		sh = sm->goodshift[i];
>+		DEBUGP("bad shift: %d good shift: %d\n", sk, sh);
>+
>+		/* Now jumping to... */
>+		right_end = MAX(right_end - i + sk, right_end + sh);
>+	}
>+	*offset += size;
>+	return 0;
>+}
>+
>+/* Brute force: look for a partial matching at the end of the packet.
>+ * A match never hits here, it's impossible. */
>+static void
>+sm_frag_search_end(struct nf_string_match *sm, void *ptr, int size, 
>+		   int *partial)
>+{
>+	int i;
>+	unsigned char *data = ptr + size - sm->patlen;
>+
>+	for (i=0; i < sm->patlen; i++) {
>+		DEBUGP("end: %c == %c (%d)\n", data[i], 
>+			sm->pat[*partial], *partial);
>+		
>+		if (data[i] == sm->pat[*partial])
>+			(*partial)++;
>+		else {
>+			if (data[i] == sm->pat[0])
>+				*partial = 1;
>+			else 
>+				*partial = 0;
>+		}
>+	}
>+	DEBUGP("sm->partial:%d\n", *partial);
>+}
>+
>+/* Brute force: Look for a matching at the beginning. Yes, here
>+ * we can hit a full match */
>+static int
>+sm_frag_search_begin(struct nf_string_match *sm, void *ptr, int size, 
>+		     unsigned int *offset, int *partial)
>+{
>+	int i, j = sm->patlen - *partial;
>+	unsigned char *data = (unsigned char *) ptr;
>+
>+	for (i=0; i<j; i++) {
>+		DEBUGP("begin: %c == %c (%d)\n", data[i],
>+			sm->pat[*partial], *partial);
>+
>+		if (data[i] == sm->pat[*partial])
>+			(*partial)++;
>+		else
>+			*partial = 0;
>+	}
>+
>+	if (*partial == sm->patlen) {
>+		DEBUGP("Fragmented match!\n");
>+		*offset += i;
>+		return 1;
>+	}
>+
>+	/* No matching founds, so reset counter */
>+	*partial = 0;
>+
>+	return 0;
>+}
>+
>+static inline int
>+frag_search(void *data, int size, struct nf_string_match *sm, 
>+	    unsigned int *offset, int *partial)
>+{
>+	/* There was a partial match before */
>+	if (sm_frag_search_begin(sm, data, size, offset, partial))
>+		return 1;
>+
>+	if (sm_search(sm, data, size, offset))
>+		return 1;
>+
>+	sm_frag_search_end(sm, data, size, partial);
>+
>+	return 0;
>+}
>+
>+int
>+nf_string_match_search(const struct sk_buff *skb, struct nf_string_match *sm,
>+		       unsigned int *offset)
>+{
>+	/* make sure offset is always set to zero */
>+	*offset = 0;
>+
>+	if (!skb_is_nonlinear(skb)) {
>+		DEBUGP("linear searching\n");
>+		/* General case: look for pattern in linear packet */
>+		if (sm_search(sm, skb->data, skb->len, offset))
>+			return 1;
>+	} else {
>+		/* The packet is fragmented */
>+		struct skb_iter i;
>+		int partial = 0;
>+
>+		DEBUGP("non linear searching\n");
>+
>+		/* Look for the pattern in first fragment */
>+		skb_iter_first(skb, &i);
>+		if (frag_search(i.data, i.len, sm, offset, &partial))
>+			return 1;
>+
>+		/* Now, the rest of fragments ... */
>+		while (skb_iter_next(skb, &i)) {
>+			DEBUGP("%p (size:%d)\n", i.data, i.len);
>+			if (frag_search(i.data, i.len, sm, offset, &partial))
>+				return 1;
>+		}
>+	}
>+
>+	return 0;
>+}
>+
>+static int __init init(void) { return 1; }
>+static void __exit fini(void) {}
>+
>+module_init(init);
>+module_exit(fini);
>  
>
What is this for ?

>+
>+MODULE_LICENSE("GPL");
>+
>+EXPORT_SYMBOL_GPL(nf_string_match_create);
>+EXPORT_SYMBOL_GPL(nf_string_match_search);
>+EXPORT_SYMBOL_GPL(nf_string_match_destroy);
>--- /dev/null	2004-09-23 01:18:13.000000000 +0200
>+++ linux-2.5/include/linux/netfilter_ipv4/nf_string_match.h	2005-01-09 19:28:25.000000000 +0100
>@@ -0,0 +1,24 @@
>+#ifndef _STRING_MATCHING_H
>+#define _STRING_MATCHING_H
>+
>+#include <linux/skbuff.h>
>+
>+/* Alphabet size */
>+#define ASIZE 256
>+
>+struct nf_string_match {
>+	char 		*pat;
>+	int 		patlen;
>+
>+	int 		badshift[ASIZE];
>+	int 		*goodshift;
>+};
>+
>+extern struct nf_string_match *
>+nf_string_match_create(const char *pat, int patlen);
>+extern int nf_string_match_search(const struct sk_buff *skb, 
>+				  struct nf_string_match *sm,
>+				  unsigned int *offset);
>+extern void nf_string_match_destroy(struct nf_string_match *sm);
>+
>+#endif
>  
>




More information about the netfilter-devel mailing list