PATCH: new HTTP Request url matching

Arie Grapa arie@cs.stanford.edu
Wed, 31 Oct 2001 01:53:50 -0800


--r5Pyd7+fXNt84Ff3
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

I just wrote a HTTP request matching module. 
It does HTTP Request URL matching. I know some of you believe this should 
be done in userspace, but I like it more this way.
I have tested it a quite a bit, but if you still find problems with it please
let me know.

Example usage:
iptables -t filter -N urlrules
iptables -A urlrules -p tcp --dport 80 -m url --url cmd.exe -j REJECT \
--reject-with tcp-reset
iptables -A urlrules -p tcp --dport 80 -m url --url root.exe -j REJECT \
--reject-with tcp-reset
iptables -A FORWARD -j urlrules

I am including both the kernel patch (tested 2.4.12,2.4.13) and
the userspace patch (tested 1.2.4).

I hope you find it useful.
Sincerely,
Arie Grapa (arie@cs.stanford.edu)

--r5Pyd7+fXNt84Ff3
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="patch.URL.iptables-1.2.4"

diff -urN iptables-1.2.4/extensions/Makefile iptables-1.2.4.URL/extensions/Makefile
--- iptables-1.2.4/extensions/Makefile	Wed Oct 31 01:28:21 2001
+++ iptables-1.2.4.URL/extensions/Makefile	Wed Oct 24 02:33:37 2001
@@ -4,7 +4,7 @@
 PF6_EXT_SLIB:=tcp udp icmpv6 standard MARK mark
 
 # The following may not be present, but compile them anyway.
-PF_EXT_SLIB+=FTOS TCPMSS TTL ULOG ah esp iplimit tcpmss ttl
+PF_EXT_SLIB+=FTOS TCPMSS TTL ULOG ah esp iplimit tcpmss ttl url
 
 # Optionals
 PF_EXT_SLIB_OPTS:=$(foreach T,$(wildcard extensions/.*-test),$(shell KERNEL_DIR=$(KERNEL_DIR) $(T)))
diff -urN iptables-1.2.4/extensions/libipt_url.c iptables-1.2.4.URL/extensions/libipt_url.c
--- iptables-1.2.4/extensions/libipt_url.c	Wed Dec 31 16:00:00 1969
+++ iptables-1.2.4.URL/extensions/libipt_url.c	Wed Oct 31 01:23:16 2001
@@ -0,0 +1,128 @@
+/* Shared library add-on to iptables to add http URL matching support. */
+#include <stdio.h>
+#include <netdb.h>
+#include <string.h>
+#include <stdlib.h>
+#include <getopt.h>
+
+#include <iptables.h>
+#include <linux/netfilter_ipv4/ipt_url.h>
+
+/* Function which prints out usage message. */
+static void
+help(void)
+{
+  printf("url match v%s options:\n"
+	 "[!] --url [!] string	Match string in HTTP URL.\n",
+	 NETFILTER_VERSION);
+}
+
+static struct option opts[] = {
+	{ "url", 1, 0, '1' },
+	{0}
+};
+
+/* Initialize the match. */
+static void
+init(struct ipt_entry_match *m, unsigned int *nfcache)
+{
+	*nfcache |= NFC_UNKNOWN;
+}
+
+static void
+parse_url(const unsigned char *url, struct ipt_url_match_info *info)
+{
+        if (strlen(url) <= URL_MAX_LEN) strcpy(info->matchstr, url);
+        else exit_error(PARAMETER_PROBLEM, "URL too long `%s'", url);
+}
+
+
+/* Function which parses command options; returns true if it
+   ate an option */
+static int
+parse(int c, char **argv, int invert, unsigned int *flags,
+      const struct ipt_entry *entry,
+      unsigned int *nfcache,
+      struct ipt_entry_match **match)
+{
+	struct ipt_url_match_info *urlinfo =
+		(struct ipt_url_match_info *)(*match)->data;
+	//printf ("parse\n");
+	switch (c) {
+	case '1':
+		if (*flags)
+			exit_error(PARAMETER_PROBLEM,
+				   "Only one `--url' allowed");
+		if (check_inverse(optarg, &invert))
+			optind++;
+		parse_url(argv[optind-1],urlinfo);
+		if (invert)
+			urlinfo->invert = 1;
+		*flags = 1;
+		break;
+	default:
+		return 0;
+	}
+	return 1;
+}
+
+static void
+print_url(const char * url, int invert)
+{
+	if (invert)
+		printf("! ");
+	printf("URL:%s", url);
+}
+
+/* Final check; must have specified --url. */
+static void
+final_check(unsigned int flags)
+{
+	if (!flags)
+		exit_error(PARAMETER_PROBLEM,
+			   "URL matching module said: You must specify `--url'");
+}
+
+/* Prints out the matchinfo. */
+static void
+print(const struct ipt_ip *ip,
+      const struct ipt_entry_match *match,
+      int numeric)
+{
+	const struct ipt_url_match_info *urlinfo =
+		(const struct ipt_url_match_info *)match->data;
+
+	print_url(urlinfo->matchstr, urlinfo->invert);
+}
+
+/* Saves the union ipt_matchinfo in parsable form to stdout. */
+static void
+save(const struct ipt_ip *ip, const struct ipt_entry_match *match)
+{
+	const struct ipt_url_match_info *urlinfo =
+		(const struct ipt_url_match_info *)match->data;
+
+	printf("--url ");
+	print_url(urlinfo->matchstr, urlinfo->invert);
+}
+
+static
+struct iptables_match url
+= { NULL,
+    "url",
+    NETFILTER_VERSION,
+    IPT_ALIGN(sizeof(struct ipt_url_match_info)),
+    IPT_ALIGN(sizeof(struct ipt_url_match_info)),
+    &help,
+    &init,
+    &parse,
+    &final_check,
+    &print,
+    &save,
+    opts
+};
+
+void _init(void)
+{
+	register_match(&url);
+}
Binary files iptables-1.2.4/ip6tables and iptables-1.2.4.URL/ip6tables differ
Binary files iptables-1.2.4/ip6tables.o and iptables-1.2.4.URL/ip6tables.o differ
Binary files iptables-1.2.4/iptables and iptables-1.2.4.URL/iptables differ
Binary files iptables-1.2.4/iptables-restore and iptables-1.2.4.URL/iptables-restore differ
Binary files iptables-1.2.4/iptables-save and iptables-1.2.4.URL/iptables-save differ
Binary files iptables-1.2.4/iptables.o and iptables-1.2.4.URL/iptables.o differ
Binary files iptables-1.2.4/libipq/libipq.a and iptables-1.2.4.URL/libipq/libipq.a differ
Binary files iptables-1.2.4/libiptc/libiptc.a and iptables-1.2.4.URL/libiptc/libiptc.a differ
Binary files iptables-1.2.4/libipulog/libipulog.a and iptables-1.2.4.URL/libipulog/libipulog.a differ

--r5Pyd7+fXNt84Ff3
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="patch.URL.linux"

diff -urN linux/Documentation/Configure.help linux-2.4.12.new/Documentation/Configure.help
--- linux/Documentation/Configure.help	Tue Oct  9 15:13:03 2001
+++ linux-2.4.12.new/Documentation/Configure.help	Wed Oct 31 01:00:51 2001
@@ -2149,6 +2149,23 @@
   If you want to compile it as a module, say M here and read
   Documentation/modules.txt.  If unsure, say `N'.
 
+HTTP request match support
+CONFIG_IP_NF_MATCH_URL
+  url matching allows you to match packets that
+  contain HTTP requests based on the requested URL.
+  I suggest you do not just DROP or REJECT the packet as TCP
+  will cheerfuly keep trying. 
+  Use `REJECT --reject-with tcp-reset' instead.
+
+  For example, you can protect your winbugs with
+  something like:
+
+  iptables -A FORWARD -p tcp -m url --url cmd.exe \
+  -j REJECT --reject-with tcp-reset
+
+  If you want to compile it as a module, say M here and read
+  Documentation/modules.txt.  If unsure, say `N'.
+
 LOG target support
 CONFIG_IP_NF_TARGET_LOG
   This option adds a `LOG' target, which allows you to create rules in
diff -urN linux/include/linux/netfilter_ipv4/ipt_url.h linux-2.4.12.new/include/linux/netfilter_ipv4/ipt_url.h
--- linux/include/linux/netfilter_ipv4/ipt_url.h	Wed Dec 31 16:00:00 1969
+++ linux-2.4.12.new/include/linux/netfilter_ipv4/ipt_url.h	Wed Oct 31 00:55:35 2001
@@ -0,0 +1,11 @@
+#ifndef _IPT_URL_MATCH_H
+#define _IPT_URL_MATCH_H
+#define URL_MAX_LEN 128
+
+struct ipt_url_match_info {
+    unsigned char matchstr[URL_MAX_LEN];
+    unsigned int urllen;
+    u_int8_t invert;
+};
+
+#endif /*_IPT_URL_MATCH_H*/
diff -urN linux/net/ipv4/netfilter/Config.in linux-2.4.12.new/net/ipv4/netfilter/Config.in
--- linux/net/ipv4/netfilter/Config.in	Tue Mar  6 22:44:16 2001
+++ linux-2.4.12.new/net/ipv4/netfilter/Config.in	Wed Oct 31 00:52:53 2001
@@ -21,6 +21,7 @@
   dep_tristate '  Multiple port match support' CONFIG_IP_NF_MATCH_MULTIPORT $CONFIG_IP_NF_IPTABLES
   dep_tristate '  TOS match support' CONFIG_IP_NF_MATCH_TOS $CONFIG_IP_NF_IPTABLES
   dep_tristate '  tcpmss match support' CONFIG_IP_NF_MATCH_TCPMSS $CONFIG_IP_NF_IPTABLES
+  dep_tristate '  URL match support' CONFIG_IP_NF_MATCH_URL $CONFIG_IP_NF_IPTABLES
   if [ "$CONFIG_IP_NF_CONNTRACK" != "n" ]; then
     dep_tristate '  Connection state match support' CONFIG_IP_NF_MATCH_STATE $CONFIG_IP_NF_CONNTRACK $CONFIG_IP_NF_IPTABLES 
   fi
diff -urN linux/net/ipv4/netfilter/Makefile linux-2.4.12.new/net/ipv4/netfilter/Makefile
--- linux/net/ipv4/netfilter/Makefile	Wed Apr 25 15:00:28 2001
+++ linux-2.4.12.new/net/ipv4/netfilter/Makefile	Wed Oct 31 00:54:11 2001
@@ -55,6 +55,7 @@
 obj-$(CONFIG_IP_NF_MATCH_STATE) += ipt_state.o
 obj-$(CONFIG_IP_NF_MATCH_UNCLEAN) += ipt_unclean.o
 obj-$(CONFIG_IP_NF_MATCH_TCPMSS) += ipt_tcpmss.o
+obj-$(CONFIG_IP_NF_MATCH_URL) += ipt_url.o
 
 # targets
 obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
diff -urN linux/net/ipv4/netfilter/ipt_url.c linux-2.4.12.new/net/ipv4/netfilter/ipt_url.c
--- linux/net/ipv4/netfilter/ipt_url.c	Wed Dec 31 16:00:00 1969
+++ linux-2.4.12.new/net/ipv4/netfilter/ipt_url.c	Wed Oct 31 00:51:48 2001
@@ -0,0 +1,175 @@
+/* Kernel module to match HTTP GET&POST REQUESTS WITH URL string values. 
+ *
+ * Copyright (C) 2001 Arie Grapa (arie@cs.stanford.edu)
+ *
+ * ChangeLog
+ *   31.10.2001: First Version
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter_ipv4/ipt_url.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+#define MAXREQUEST 200
+
+/* Linear string search based on memcmp() 
+ * blatantly copied from ipt_string */
+char *search_linear (const char *needle, const char *haystack, 
+		     int needle_len, int haystack_len)
+{
+	char *k = haystack + (haystack_len-needle_len);
+	char *t = haystack;
+	while ( t++ < k ) {
+		if ( memcmp(t, needle, needle_len) == 0 ) return t;
+        }
+	return NULL;
+}
+
+/* Returns 1 if packet contains a HTTP request and URL requested */
+/* matches our target string, 0 otherwise */
+static inline int
+url_match(const char * url,
+	unsigned int urllen,
+	const struct tcphdr *tcp,
+	u_int16_t datalen,
+	int invert,
+	int *hotdrop)
+{
+	char * data;
+	void * reqstart;
+	void * reqend;
+	void * temp; // first "\r" or " " char in the request
+	char buff[MAXREQUEST];
+	u_int16_t tcphdrlen = (tcp->doff) * 4;
+
+	// do not match SYN or RST packets
+	if (tcp->syn || tcp->rst) 
+		return invert; // no match
+
+	memset (buff, 0, MAXREQUEST);
+	/* If we don't have the whole header, drop packet. */
+	if (tcphdrlen > datalen) {
+		*hotdrop = 1; //this would immediately drop the packet
+		return invert; // no match
+	}
+        data = (char *) tcp+tcphdrlen;
+	datalen -= tcphdrlen;
+	// do not match undersized packets
+	if (datalen < 6) {
+		//printk("ipt_url got undersized packet\n");
+		return invert; // no match
+	}
+	if (data[0] == 'G' &&
+	    data[1] == 'E' && 
+	    data[2] == 'T' && 
+	    data[3] == ' ') {
+		reqstart = data + 4;
+		temp = memchr (reqstart,' ',datalen-4);
+		reqend = memchr (reqstart,'\r',datalen-4);
+		if (temp < reqend) reqend = temp;
+	} else if (data[0] == 'P' && 
+		   data[1] == 'O' && 
+		   data[2] == 'S' && 
+		   data[3] == 'T' &&
+		   data[4] == ' ') {
+		reqstart = data + 5;
+		temp = memchr (reqstart,' ',datalen-5);
+		reqend = memchr (reqstart,'\r',datalen-5);
+		if (temp < reqend) reqend = temp;
+	} else {
+		//printk("ipt_url got non HTTP-Request packet\n");
+		return invert; // no match
+	}
+	// to get here, we have a GET or a POST to begin with
+	strncpy (buff, reqstart, reqend - reqstart);
+        if (search_linear (url, reqstart, urllen, reqend - reqstart)) {
+		//printk ("ipt_url MATCHED url: %s in %s\n", url, buff);
+		return !invert; // match
+	}	
+	//printk("ipt_url not-matched url: %s in %s\n", url, buff);
+	return invert; // no match
+}
+
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo, // my struct ipt_url_match_info
+      int offset, // non-zero means a non-head fragment
+      const void *hdr, // pointer to protocol header
+      u_int16_t datalen, // length of data (packet length - IP header length)
+      int *hotdrop)  // should drop immediately?
+{
+	const struct ipt_url_match_info *info = matchinfo;
+	const struct tcphdr *tcph = (void *)skb->nh.iph + skb->nh.iph->ihl*4;
+	return url_match(info->matchstr, info->urllen,
+			 tcph, skb->len - skb->nh.iph->ihl*4,
+			 info->invert, hotdrop);
+}
+
+static int
+checkentry(const char *tablename,
+           const struct ipt_ip *ip,
+           void *matchinfo, // my struct ipt_url_match_info
+           unsigned int matchsize,
+           unsigned int hook_mask)
+{
+	//printk("ipt_url: checkentry called\n");
+	if (matchsize != IPT_ALIGN(sizeof(struct ipt_url_match_info))) {
+		printk("ipt_url: size mismatch\n");
+		return 0;
+	}
+	
+	((struct ipt_url_match_info *)matchinfo)->urllen = strlen(((struct ipt_url_match_info *)matchinfo)->matchstr);
+	if (((struct ipt_url_match_info *)matchinfo)->urllen > URL_MAX_LEN) {
+		printk("ipt_url: String is too long\n");
+		return 0;
+	}
+
+	/* Must specify -p tcp */
+	if (ip->proto != IPPROTO_TCP || (ip->invflags & IPT_INV_PROTO)) {
+		printk("ipt_url: Only works on TCP packets\n");
+		return 0;
+	}
+
+	// is match string 0 sized?
+	if (((struct ipt_url_match_info *)matchinfo)->matchstr[0] == 0) {
+		printk("ipt_url: match string is empty\n");
+		return 0;
+	} else {
+		//printk("url: match string is ");
+		//printk(((struct ipt_url_match_info *)matchinfo)->matchstr);
+	}
+	//printk ("ipt_url: checkentry looks good, url:%s, length:%d\n",
+			//((struct ipt_url_match_info *)matchinfo)->matchstr,
+			//((struct ipt_url_match_info *)matchinfo)->urllen);
+	return 1;
+}
+
+// first has to be (NULL,NULL) to later use to link 
+// second is name of match function, as refered to by userspace
+// third is match function
+static struct ipt_match my_url_match
+= { { NULL, NULL }, "url", &match, &checkentry, NULL, THIS_MODULE };
+
+static int __init init(void)
+{
+  //printk("Module ipt_url Inserted\n");
+  return ipt_register_match(&my_url_match);
+}
+
+static void __exit fini(void)
+{
+  //printk("Module ipt_url Removed\n");
+  ipt_unregister_match(&my_url_match);
+}
+
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arie Grapa <arie@cs.stanford.edu>");
+MODULE_DESCRIPTION("HTTP Request String matching");
+

--r5Pyd7+fXNt84Ff3--