[netfilter-cvslog] r4420 - in trunk/nfsim: core tools

rusty at netfilter.org rusty at netfilter.org
Wed Nov 2 01:49:20 CET 2005


Author: rusty at netfilter.org
Date: 2005-11-02 01:49:12 +0100 (Wed, 02 Nov 2005)
New Revision: 4420

Modified:
   trunk/nfsim/core/fakesockopt.c
   trunk/nfsim/core/message.c
   trunk/nfsim/core/message.h
   trunk/nfsim/core/nfsockopt.h
   trunk/nfsim/tools/iptables.c
Log:
Significant rework for fork() when attached to iptables.
(Solves hang in --failtest on 36ipt_multiport-invert.sim)
Use completely separate stdio and msg fd for child.
Have parent write exit status out to child's msg fd after it dies.
Always drain both file descriptors completely, so as not to miss stdout messages.


Modified: trunk/nfsim/core/fakesockopt.c
===================================================================
--- trunk/nfsim/core/fakesockopt.c	2005-11-02 00:44:45 UTC (rev 4419)
+++ trunk/nfsim/core/fakesockopt.c	2005-11-02 00:49:12 UTC (rev 4420)
@@ -106,7 +106,7 @@
 	dlclose(handle);
 
 	if (!(fdstr = getenv("NFSIM_FAKESOCK_FD"))) {
-		fprintf(stderr, "The facksockopt.so library can only be used  "
+		fprintf(stderr, "The fakesockopt.so library can only be used  "
 				"from within nfsim");
 		exit(EXIT_FAILURE);
 	}
@@ -117,7 +117,6 @@
 		proc_prefix = prefix;
 
 	return;
-
 }
 
 /* Called on unload */
@@ -157,10 +156,53 @@
 	return *((int*)CMSG_DATA(cmsg));
 }
 
+static void do_fork(void)
+{
+	int pid, out_fd, msg_fd;
+
+	out_fd = recv_fd(sd);
+	msg_fd = recv_fd(sd);
+
+	pid = fork();
+	if (pid < 0) {
+		perror("fork");
+		exit(EXIT_FAILURE);
+	}
+
+	if (pid == 0) {
+		/* Child: get own fd for output. */
+		dup2(out_fd, STDOUT_FILENO);
+		dup2(out_fd, STDERR_FILENO);
+		close(sd);
+		sd = msg_fd;
+	} else {
+		struct nf_userspace_message msg;
+		int status;
+
+		/* Wait for kid.  We write its exit status as a
+		 * message through its pipe when it's done.*/
+		waitpid(pid, &status, 0);
+
+		fprintf(stderr, "Child exited %i\n", status);
+		msg.type = UM_SYSCALL;
+		msg.opcode = SYS_EXIT;
+		msg.args[0] = status;
+		msg.args[1] = 0;
+		msg.args[2] = 0;
+		msg.args[3] = 0;
+		msg.len = 0;
+		msg.retval = 0;
+
+		write(msg_fd, &msg, sizeof(msg));
+		close(msg_fd);
+		close(out_fd);
+	}
+}
+
 static void handle_kernelop(int fd, struct nf_userspace_message *msg)
 {
 	struct nf_userspace_message *reply;
-	int pid, m_offset = sizeof(struct nf_userspace_message);
+	int m_offset = sizeof(struct nf_userspace_message);
 
 	assert(msg->type = UM_KERNELOP);
 
@@ -182,19 +224,8 @@
 
 		break;
 	case KOP_FORK:
-		pid = fork();
-		if (pid < 0) {
-			perror("fork");
-			exit(EXIT_FAILURE);
-		}
-		reply = msg;
-		if (pid != 0) {
-			/* Retval for parent is status of child. */
-			waitpid(pid, &msg->retval, 0);
-			break;
-		}
-		reply->retval = getpid();
-		break;
+		do_fork();
+		return;
 
 	default:
 		fprintf(stderr, "Invalid kernelop opcode\n");
@@ -209,14 +240,6 @@
 
 	if (reply != msg)
 		free(reply);
-
-	if (msg->opcode == KOP_FORK && pid == 0) {
-		/* Child: get own fd for output. */
-		int newfd = recv_fd(sd);
-		dup2(newfd, STDOUT_FILENO);
-		dup2(newfd, STDERR_FILENO);
-		close(newfd);
-	}
 }
 
 /* send a syscall and wait for a response, processing any kernelops
@@ -327,7 +350,6 @@
 		return __socket(domain, type, protocol);
 }
 
-
 static int file_is_proc(const char *pathname)
 {
 	if (pathname && strlen(pathname) >= 6

Modified: trunk/nfsim/core/message.c
===================================================================
--- trunk/nfsim/core/message.c	2005-11-02 00:44:45 UTC (rev 4419)
+++ trunk/nfsim/core/message.c	2005-11-02 00:49:12 UTC (rev 4420)
@@ -31,8 +31,9 @@
 #include <sys/wait.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 
-static int sock[2]; /* socket for messages. 0 = parent, 1 = child */
-static bool forked;
+static int msg_fd; /* socket for messages. */
+static int io_fd; /* pipe to read child's stdout/stderr */
+static int pid = -1; /* pid of child if we forked it ourselves */
 
 static void send_userspace_message(struct nf_userspace_message *msg);
 
@@ -41,7 +42,7 @@
 	struct iovec iov[1];
 	struct cmsghdr *cmsg;
 	struct msghdr msg;
-	char buf[1];
+	char buf[1] = { '\0' };
 
 	iov[0].iov_base = buf;
 	iov[0].iov_len = sizeof(buf);
@@ -60,8 +61,6 @@
 	cmsg->cmsg_type = SCM_RIGHTS;
 	*(int*)CMSG_DATA(cmsg) = fd;
 
-	msg.msg_controllen = CMSG_SPACE(sizeof(fd));
-
 	if (sendmsg(dest_fd, &msg, 0) < 0)
 		barf_perror("sendfd failed");
 }
@@ -69,20 +68,7 @@
 void message_init(void)
 {
 	sigset_t ss;
-	char *fdstr;
-	int fdflags;
 
-	if (socketpair(PF_UNIX, SOCK_STREAM, PF_UNSPEC, sock))
-		barf_perror("socket");
-
-	fdflags = fcntl(sock[0], F_GETFD);
-	if (fcntl(sock[0], F_SETFD, fdflags & ~FD_CLOEXEC))
-		barf_perror("fcntl");
-
-	fdstr = talloc_asprintf(NULL, "%d", sock[1]);
-	setenv("NFSIM_FAKESOCK_FD", fdstr, 1);
-	talloc_free(fdstr);
-
 	sigemptyset(&ss);
 	sigaddset(&ss, SIGPIPE);
 	sigprocmask(SIG_BLOCK, &ss, NULL);
@@ -92,80 +78,49 @@
 {
 	sigset_t ss;
 
-	close(sock[0]);
-	close(sock[1]);
-
 	sigemptyset(&ss);
 	sigaddset(&ss, SIGPIPE);
 	sigprocmask(SIG_UNBLOCK, &ss, NULL);
 }
 
-struct program
-{
-	pid_t pid;
-	int status;
-	int fd;
-};
-static struct program *currprog;
-
 void start_program(const char *name, int argc, char *argv[])
 {
-	int childfd[2];
+	int iofds[2];
+	int msgfds[2];
+	char *fdstr;
 
-	assert(!currprog);
-	currprog = talloc(NULL, struct program);
-
-	if (pipe(childfd) != 0)
+	if (pipe(iofds) != 0)
 		barf_perror("%s pipe", name);
 
+	if (socketpair(PF_UNIX, SOCK_STREAM, PF_UNSPEC, msgfds))
+		barf_perror("socket");
+
 	fflush(stdout);
-	currprog->pid = fork();
-	switch (currprog->pid) {
+	pid = fork();
+	switch (pid) {
 	case -1:
 		barf_perror("iptables fork");
 	case 0:
-		dup2(childfd[1], STDOUT_FILENO);
-		dup2(childfd[1], STDERR_FILENO);
+		dup2(iofds[1], STDOUT_FILENO);
+		dup2(iofds[1], STDERR_FILENO);
+		close(iofds[0]);
+		close(msgfds[0]);
 		if (setenv("LD_PRELOAD", "fakesockopt.so.1.0", 1))
 			barf("putenv failed");
+		fdstr = talloc_asprintf(NULL, "%d", msgfds[1]);
+		setenv("NFSIM_FAKESOCK_FD", fdstr, 1);
+		talloc_free(fdstr);
 		execvp(name, argv);
 		fprintf(stderr, "Could not exec %s!\n", name);
 		exit(EXIT_FAILURE);
 	}
 
-	close(childfd[1]);
-	currprog->fd = childfd[0];
-	currprog->status = -1;
+	close(iofds[1]);
+	close(msgfds[1]);
+	io_fd = iofds[0];
+	msg_fd = msgfds[0];
 }
 
-int end_program(const char *name)
-{
-	int status;
-
-	if (forked) {
-		/* Damn UNIX!  Not our child. */
-
-		/* Might have been read already. */
-		status = currprog->status;
-		if (status == -1) {
-			struct nf_userspace_message msg;
-			if (read(sock[0], &msg, sizeof(msg)) != sizeof(msg))
-				barf_perror("Reading child status");
-			status = msg.retval;
-		}
-	} else {
-		while (waitpid(currprog->pid, &status, 0) <= 0)
-			if (errno != EINTR)
-				barf_perror("Waiting for child %s", name);
-	}
-
-	close(currprog->fd);
-	talloc_free(currprog);
-	currprog = NULL;
-
-	return status;
-}
-
 static const char *protofamily(int pf)
 {
 	switch (pf) {
@@ -246,15 +201,15 @@
 	return errstr;
 }
 
-static void handle_userspace_message(void)
+static bool handle_userspace_message(int *status)
 {
 	int len;
 	struct nf_userspace_message msg;
 
 	/* FIXME: msg.len?  Presumable always 0? --RR */
-	len = read(sock[0], &msg, sizeof(msg));
+	len = read(msg_fd, &msg, sizeof(msg));
 	if (len != sizeof(msg))
-		barf_perror("reading userspace message");
+		return false;
 
 	if (msg.type == UM_SYSCALL) {
 		switch (msg.opcode) {
@@ -275,14 +230,14 @@
 				nfsim_log(LOG_USERSPACE,
 					  "    getsockopt -> %s (len %i)",
 					  err(msg.retval), msg.args[3]);
-			write(sock[0], &msg, sizeof(msg));
+			write(msg_fd, &msg, sizeof(msg));
 			break;
 		case SYS_SETSOCKOPT:
 			if (strace)
 				nfsim_log(LOG_USERSPACE,
 						  "strace: setsockopt(%s, %s,"
 						  " %p, %u)",
-						  protofamily(msg.args[0]),
+					  protofamily(msg.args[0]),
 						  sockopt(msg.args[1], false),
 						  (char *)msg.args[2],
 						  msg.args[3]);
@@ -292,8 +247,11 @@
 			if (strace)
 				nfsim_log(LOG_USERSPACE, "    setsockopt -> %s",
 					  err(msg.retval));
-			write(sock[0], &msg, sizeof(msg));
+			write(msg_fd, &msg, sizeof(msg));
 			break;
+		case SYS_EXIT:
+			*status = msg.args[0];
+			break;
 		default:
 			barf("Invalid syscall opcode %d\n", msg.opcode);
 		}
@@ -301,7 +259,7 @@
 		switch (msg.opcode) {
 
 		case KOP_COPY_FROM_USER:
-			if (complete_read(sock[0], (char *)msg.args[0],
+			if (complete_read(msg_fd, (char *)msg.args[0],
 					msg.args[2]) !=	msg.args[2])
 				barf_perror("read");
 			break;
@@ -309,26 +267,21 @@
 			/* copying has been done */
 			break;
 
-		case KOP_FORK:
-			if (!currprog->pid)
-				currprog->pid = msg.retval;
-			else
-				currprog->status = msg.retval;
-			break;
-
 		default:
 			barf("Invalid kernelop opcode %d\n", msg.opcode);
 		}
 	} else
 		barf("Unknown message type %d\n", msg.type);
+
+	return true;
 }
 
-void send_userspace_message(struct nf_userspace_message *msg)
+static void send_userspace_message(struct nf_userspace_message *msg)
 {
-	write(sock[0], msg, sizeof(struct nf_userspace_message) + msg->len);
+	write(msg_fd, msg, sizeof(struct nf_userspace_message) + msg->len);
 
 	/* expect a reply */
-	handle_userspace_message();
+	handle_userspace_message(NULL);
 }
 
 int copy_to_user(void *to, const void *from, unsigned long n)
@@ -358,7 +311,8 @@
 	msg->retval = 0;
 
 	if (strace)
-		nfsim_log(LOG_USERSPACE, "        copy_to_user(%p,%i)", to, n);
+		nfsim_log(LOG_USERSPACE, "        copy_to_user(%p,%i)",
+			  to, n);
 	memcpy((char *)msg + sizeof(struct nf_userspace_message), from, n);
 
 	send_userspace_message(msg);
@@ -394,7 +348,8 @@
 
 	if (strace)
 		nfsim_log(LOG_USERSPACE,
-				"        copy_from_user(%p,%i)", to, n);
+			  "        copy_from_user(%p,%i)",
+			  to, n);
 	send_userspace_message(&msg);
 
 	return 0;
@@ -404,55 +359,56 @@
 void fork_other_program(void)
 {
 	struct nf_userspace_message msg;
-	int newsock[2];
+	int iofds[2], msgfds[2];
 
-	if (!currprog)
+	/* Nothing to do if no program attached. */
+	if (pid == -1)
 		return;
 
-	if (socketpair(PF_UNIX, SOCK_STREAM, PF_UNSPEC, newsock))
+	if (socketpair(PF_UNIX, SOCK_STREAM, PF_UNSPEC, msgfds))
 		barf_perror("socket");
 
-	forked = true;
+	if (pipe(iofds) != 0)
+		barf_perror("pipe");
 
 	memset(&msg, 0, sizeof(msg));
 	msg.type = UM_KERNELOP;
 	msg.opcode = KOP_FORK;
 	if (strace)
-		nfsim_log(LOG_USERSPACE, "        fork()");
+		nfsim_log(LOG_USERSPACE, "        %i: fork() = %i",
+			  getppid(), getpid());
 
-	/* Child will tell us pid. */
-	currprog->pid = 0;
-	currprog->status = -1;
-	send_userspace_message(&msg);
+	/* Send fork message (no response) */
+	write(msg_fd, &msg, sizeof(struct nf_userspace_message));
 
-	/* Now child expects us to supply new fd for stdout/stderr.
-	 * We use this to tell when it died. */
-	send_fd(sock[0], newsock[1]);
+	/* Child reads these. */
+	send_fd(msg_fd, iofds[1]);
+	send_fd(msg_fd, msgfds[1]);
+	close(msgfds[1]);
+	close(iofds[1]);
 
-	/* And use the new socket pair. */
-	close(currprog->fd);
-	close(newsock[1]);
-	currprog->fd = newsock[0];
+	/* Use the new socket pair. */
+	msg_fd = msgfds[0];
+	io_fd = iofds[0];
 }
 
 /* Loop accepting messages from fakesockopt.  If child talks, return. */
-char *wait_for_output(void)
+char *wait_for_output(int *status)
 {
 	fd_set fdset;
 	int retry = 0;
 
-	for (;;) {
+	/* We need to keep going until both fds drained. */
+	while (io_fd != -1 || msg_fd != -1) {
 		int maxfd, sret;
 
 		FD_ZERO(&fdset);
-		FD_SET(sock[0], &fdset);
+		if (msg_fd != -1)
+			FD_SET(msg_fd, &fdset);
+		if (io_fd != -1)
+			FD_SET(io_fd, &fdset);
 
-		maxfd = sock[0];
-		if (currprog) {
-			FD_SET(currprog->fd, &fdset);
-			if (currprog->fd > maxfd)
-				maxfd = currprog->fd;
-		}
+		maxfd = max(msg_fd, io_fd);
 		sret = select(maxfd + 1, &fdset, NULL, NULL, NULL);
 		if (sret < 0) {
 			if ((errno == EINTR || errno == EAGAIN)
@@ -461,18 +417,30 @@
 			barf_perror("select");
 		}
 
-		if (FD_ISSET(sock[0], &fdset))
-			handle_userspace_message();
-
-		if (currprog && FD_ISSET(currprog->fd, &fdset)) {
+		if (io_fd != -1 && FD_ISSET(io_fd, &fdset)) {
 			char *output = talloc_size(NULL, 1024);
-			int ret = read(currprog->fd, output, 1023);
+			int ret = read(io_fd, output, 1023);
 			if (ret > 0) {
 				output[ret] = '\0';
 				return output;
 			}
 			talloc_free(output);
-			return NULL;
+			close(io_fd);
+			io_fd = -1;
 		}
+
+		if (msg_fd != -1 && FD_ISSET(msg_fd, &fdset)) {
+			if (!handle_userspace_message(status)) {
+				close(msg_fd);
+				msg_fd = -1;
+			}
+		}
 	}
+
+	/* If other end didn't tell us status, maybe direct child? */
+	if (*status == -1)
+		if (waitpid(pid, status, 0) == -1)
+			*status = -1;
+	pid = -1;
+	return NULL;
 }

Modified: trunk/nfsim/core/message.h
===================================================================
--- trunk/nfsim/core/message.h	2005-11-02 00:44:45 UTC (rev 4419)
+++ trunk/nfsim/core/message.h	2005-11-02 00:49:12 UTC (rev 4420)
@@ -32,14 +32,13 @@
 int copy_from_user(void *to, const void *from, unsigned long n);
 
 /* Returns talloced output of child (if running). */
-char *wait_for_output(void);
+char *wait_for_output(int *status);
 
 /* We want to fork: split other program */
 void fork_other_program(void);
 
 /* Start a program*/
 void start_program(const char *name, int argc, char *argv[]);
-int end_program(const char *name);
 
 /* Trace system calls? */
 extern bool strace;

Modified: trunk/nfsim/core/nfsockopt.h
===================================================================
--- trunk/nfsim/core/nfsockopt.h	2005-11-02 00:44:45 UTC (rev 4419)
+++ trunk/nfsim/core/nfsockopt.h	2005-11-02 00:49:12 UTC (rev 4420)
@@ -25,6 +25,7 @@
 /* TODO: use the proper values insetad */
 #define SYS_GETSOCKOPT 1
 #define SYS_SETSOCKOPT 2
+#define SYS_EXIT 3
 
 #define KOP_COPY_TO_USER   1
 #define KOP_COPY_FROM_USER 2

Modified: trunk/nfsim/tools/iptables.c
===================================================================
--- trunk/nfsim/tools/iptables.c	2005-11-02 00:44:45 UTC (rev 4419)
+++ trunk/nfsim/tools/iptables.c	2005-11-02 00:49:12 UTC (rev 4420)
@@ -26,14 +26,13 @@
 #include <unistd.h>
 #include <stdlib.h>
 #include <sys/types.h>
-#include <sys/wait.h>
 #include <signal.h>
 #include "utils.h"
 #include "message.h"
 
 static bool run_command(int argc, char **argv)
 {
-	int status;
+	int status = -1;
 	char *prefix, *name, *output;
 	char buf[4096] = { 0 };
 
@@ -45,18 +44,19 @@
 
 	start_program(name, argc, argv);
 
-	while ((output = wait_for_output()) != NULL) {
+	while ((output = wait_for_output(&status)) != NULL) {
 		/* Take output from iptables, and feed through logging. */
 		nfsim_log_partial(LOG_USERSPACE, buf, sizeof(buf), "%s",
 				  output);
 		talloc_free(output);
 	}
-	status = end_program(name);
 	talloc_free(name);
 
-	if (WIFEXITED(status) && WEXITSTATUS(status) == 0)
-		return true;
-	return false;
+	/* If parent died without returning status, this can happen. */
+	if (status == -1)
+		return false;
+
+	return (WIFEXITED(status) && WEXITSTATUS(status) == 0);
 }
 
 static void run_command_help(int argc, char **argv)




More information about the netfilter-cvslog mailing list