[PATCH 1/4] RFC: fast string matching infrastrure for netfilter
Patrick McHardy
kaber at trash.net
Mon Jan 10 00:10:41 CET 2005
Pablo Neira wrote:
> Hi,
>
> I've finished a first usable version of a infrastructure to look for
> matchings in a packet. Features:
>
> * A library consisting in three public functions: constructor,
> destructor and searching.
> * Boyer-moore algorithm to perform fast matchings.
> * Brute force search on the edges of two fragments to look for
> fragmented matches, that is O(m) searchs where m is the size of the
> pattern, it's not that bad for small pattern I think. It's fragment
> aware by means of rusty's skb_iter functions.
Looks good. A problem is that it's only in-tree user is part of
ip_conntrack.
I have actually given up keeping nf_conntrack up to date currently, but
I hope we can now really put ip_conntrack in maintenance mode and
concentrate
on nf_conntrack. Any chance you want to base this on the nf_conntrack
patch ?
> Comments welcome.
See below.
Regards
Patrick
>
> --
> Pablo
>
>------------------------------------------------------------------------
>
>--- /dev/null 2004-09-23 01:18:13.000000000 +0200
>+++ linux-2.5/net/ipv4/netfilter/nf_string_match.c 2005-01-09 21:54:04.000000000 +0100
>@@ -0,0 +1,264 @@
>+/* Efficient string matching fragment aware infrastrure for netfilter
>+ * Copyright (C) 2004 Pablo Neira Ayuso <pablo at eurodev.net>
>+ *
>+ * This program is free software; you can redistribute it and/or
>+ * modify it under the terms of the GNU General Public License
>+ * as published by the Free Software Foundation; either version
>+ * 2 of the License, or (at your option) any later version.
>+ *
>+ * Idea:
>+ *
>+ * This algorithm used Boyer-Moore fast matching algorithm to look
>+ * for a pattern in the payload of a packet. Since it is fragment
>+ * aware, it looks for partial matches on the edges of two fragments.
>+ *
>+ * Literature:
>+ *
>+ * A Fast String Searching Algorithm, R.S. Boyer and Moore.
>+ * Communications of the Association for Computing Machinery,
>+ * 20(10), 1977, pp. 762-772.
>+ * http://www.cs.utexas.edu/users/moore/publications/fstrpos.pdf
>+ *
>+ * Handbook of Exact String Matching Algorithms, Thierry Lecroq, 2004
>+ * http://www-igm.univ-mlv.fr/~lecroq/string/string.pdf
>+ *
>+ * Implementation:
>+ *
>+ * Boyer Moore implementation heavily based on Gianni Tedesco's.
>+ */
>+
>+#include <linux/module.h>
>+#include <linux/netfilter_ipv4/nf_string_match.h>
>+#include <linux/skbuff.h>
>+
>+#if 0
>+#define DEBUGP printk
>+#else
>+#define DEBUGP(format, args...)
>+#endif
>+
>+/* Compute constant structures needed by the boyer-moore algorithm,
>+ * this is done just ONCE. */
>+static void
>+sm_preprocess(struct nf_string_match *sm)
>+{
>+ int i, j, len[ASIZE], ended;
>+
>+ /* Compute the bad caracter shift array */
>+ for (i = 0; i < ASIZE; i++)
>+ sm->badshift[i] = sm->patlen;
>+ for (i = 0; i < sm->patlen - 1; i++)
>+ sm->badshift[(int) sm->pat[i]] = sm->patlen - 1 - i;
>+
>+ /* Compute the good shift array, used to match reocurrences
>+ * of a subpattern */
>+ for (i = 1; i < sm->patlen; i++) {
>+ for (j = 0; j < sm->patlen && sm->pat[sm->patlen -1 - j]
>+ == sm->pat[sm->patlen - 1 - i - j]; j++);
>+ len[i] = j;
>+ }
>+
>+ sm->goodshift[0] = 1;
>+ for (i = 1; i < sm->patlen; i++)
>+ sm->goodshift[i] = sm->patlen;
>+ for (i = sm->patlen - 1; i > 0; i--)
>+ sm->goodshift[len[i]] = i;
>+
>+ ended = 0;
>+ for (i = 0; i < sm->patlen; i++) {
>+ if (len[i] == sm->patlen - 1 - i)
>+ ended = i;
>+ if (ended)
>+ sm->goodshift[i] = ended;
>+ }
>+}
>+
>+struct nf_string_match *
>+nf_string_match_create(const char *pat, int patlen)
>+{
>+ struct nf_string_match *sm;
>+
>+ sm = kmalloc(sizeof(struct nf_string_match), GFP_KERNEL);
>+ if (!sm)
>+ return NULL;
>+ memset(sm, 0, sizeof(struct nf_string_match));
>+
>+ sm->goodshift = (int *) kmalloc(patlen * sizeof(int), GFP_KERNEL);
>+ if (!sm->goodshift)
>
>
^ leaks sm
>+ return NULL;
>+ memset(sm->goodshift, 0, sizeof(int) * patlen);
>+
>+ sm->pat = (char *) kmalloc(patlen * sizeof(int), GFP_KERNEL);
>+ if (!sm->pat) {
>
>
^ leaks sm
>+ kfree(sm->goodshift);
>+ return NULL;
>+ }
>+ memset(sm->pat, patlen, sizeof(char) * patlen);
>+
>+ strcpy(sm->pat, pat);
>+ sm->patlen = patlen;
>+
>+ /* Preprocess the pattern */
>+ sm_preprocess(sm);
>+
>+ return sm;
>+}
>+
>+/* release the structure */
>+void nf_string_match_destroy(struct nf_string_match *sm)
>+{
>+ kfree(sm->goodshift);
>+ kfree(sm->pat);
>+ kfree(sm);
>+}
>+
>+#define MAX(a,b) a>b?a:b
>+
>+/* this functions returns the offset where a matching was found */
>+static unsigned int
>+sm_search(struct nf_string_match *sm, unsigned char *data, int size,
>+ unsigned int *offset)
>+{
>+ int right_end = sm->patlen, sk, sh, i;
>+
>+ while (right_end < size) {
>+ DEBUGP("Searching in position %d (%c)\n",
>+ right_end, data[right_end]);
>+ for (i = 0; i < sm->patlen && data[right_end - i]
>+ == sm->pat[sm->patlen - 1 - i]; i++);
>+ if (i == sm->patlen) {
>+ /* London calling... */
>+ DEBUGP("found!\n");
>+ *offset += (right_end - (sm->patlen - 1));
>+ return 1;
>+ }
>+
>+ sk = sm->badshift[data[right_end - i]];
>+ sh = sm->goodshift[i];
>+ DEBUGP("bad shift: %d good shift: %d\n", sk, sh);
>+
>+ /* Now jumping to... */
>+ right_end = MAX(right_end - i + sk, right_end + sh);
>+ }
>+ *offset += size;
>+ return 0;
>+}
>+
>+/* Brute force: look for a partial matching at the end of the packet.
>+ * A match never hits here, it's impossible. */
>+static void
>+sm_frag_search_end(struct nf_string_match *sm, void *ptr, int size,
>+ int *partial)
>+{
>+ int i;
>+ unsigned char *data = ptr + size - sm->patlen;
>+
>+ for (i=0; i < sm->patlen; i++) {
>+ DEBUGP("end: %c == %c (%d)\n", data[i],
>+ sm->pat[*partial], *partial);
>+
>+ if (data[i] == sm->pat[*partial])
>+ (*partial)++;
>+ else {
>+ if (data[i] == sm->pat[0])
>+ *partial = 1;
>+ else
>+ *partial = 0;
>+ }
>+ }
>+ DEBUGP("sm->partial:%d\n", *partial);
>+}
>+
>+/* Brute force: Look for a matching at the beginning. Yes, here
>+ * we can hit a full match */
>+static int
>+sm_frag_search_begin(struct nf_string_match *sm, void *ptr, int size,
>+ unsigned int *offset, int *partial)
>+{
>+ int i, j = sm->patlen - *partial;
>+ unsigned char *data = (unsigned char *) ptr;
>+
>+ for (i=0; i<j; i++) {
>+ DEBUGP("begin: %c == %c (%d)\n", data[i],
>+ sm->pat[*partial], *partial);
>+
>+ if (data[i] == sm->pat[*partial])
>+ (*partial)++;
>+ else
>+ *partial = 0;
>+ }
>+
>+ if (*partial == sm->patlen) {
>+ DEBUGP("Fragmented match!\n");
>+ *offset += i;
>+ return 1;
>+ }
>+
>+ /* No matching founds, so reset counter */
>+ *partial = 0;
>+
>+ return 0;
>+}
>+
>+static inline int
>+frag_search(void *data, int size, struct nf_string_match *sm,
>+ unsigned int *offset, int *partial)
>+{
>+ /* There was a partial match before */
>+ if (sm_frag_search_begin(sm, data, size, offset, partial))
>+ return 1;
>+
>+ if (sm_search(sm, data, size, offset))
>+ return 1;
>+
>+ sm_frag_search_end(sm, data, size, partial);
>+
>+ return 0;
>+}
>+
>+int
>+nf_string_match_search(const struct sk_buff *skb, struct nf_string_match *sm,
>+ unsigned int *offset)
>+{
>+ /* make sure offset is always set to zero */
>+ *offset = 0;
>+
>+ if (!skb_is_nonlinear(skb)) {
>+ DEBUGP("linear searching\n");
>+ /* General case: look for pattern in linear packet */
>+ if (sm_search(sm, skb->data, skb->len, offset))
>+ return 1;
>+ } else {
>+ /* The packet is fragmented */
>+ struct skb_iter i;
>+ int partial = 0;
>+
>+ DEBUGP("non linear searching\n");
>+
>+ /* Look for the pattern in first fragment */
>+ skb_iter_first(skb, &i);
>+ if (frag_search(i.data, i.len, sm, offset, &partial))
>+ return 1;
>+
>+ /* Now, the rest of fragments ... */
>+ while (skb_iter_next(skb, &i)) {
>+ DEBUGP("%p (size:%d)\n", i.data, i.len);
>+ if (frag_search(i.data, i.len, sm, offset, &partial))
>+ return 1;
>+ }
>+ }
>+
>+ return 0;
>+}
>+
>+static int __init init(void) { return 1; }
>+static void __exit fini(void) {}
>+
>+module_init(init);
>+module_exit(fini);
>
>
What is this for ?
>+
>+MODULE_LICENSE("GPL");
>+
>+EXPORT_SYMBOL_GPL(nf_string_match_create);
>+EXPORT_SYMBOL_GPL(nf_string_match_search);
>+EXPORT_SYMBOL_GPL(nf_string_match_destroy);
>--- /dev/null 2004-09-23 01:18:13.000000000 +0200
>+++ linux-2.5/include/linux/netfilter_ipv4/nf_string_match.h 2005-01-09 19:28:25.000000000 +0100
>@@ -0,0 +1,24 @@
>+#ifndef _STRING_MATCHING_H
>+#define _STRING_MATCHING_H
>+
>+#include <linux/skbuff.h>
>+
>+/* Alphabet size */
>+#define ASIZE 256
>+
>+struct nf_string_match {
>+ char *pat;
>+ int patlen;
>+
>+ int badshift[ASIZE];
>+ int *goodshift;
>+};
>+
>+extern struct nf_string_match *
>+nf_string_match_create(const char *pat, int patlen);
>+extern int nf_string_match_search(const struct sk_buff *skb,
>+ struct nf_string_match *sm,
>+ unsigned int *offset);
>+extern void nf_string_match_destroy(struct nf_string_match *sm);
>+
>+#endif
>
>
More information about the netfilter-devel
mailing list