summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoy Marples <roy@marples.name>2021-01-28 17:26:20 +0000
committerRoy Marples <roy@marples.name>2021-01-28 17:26:20 +0000
commit701a9d031e0f97d7024639a9fbeaa35afb1bbb83 (patch)
tree197e6e249f3d9cfee771fddecf807e1f0b71a827
parent07af15970a386e49db4b4e5c443e702fbca2f275 (diff)
downloaddhcpcd-701a9d031e0f97d7024639a9fbeaa35afb1bbb83.tar.xz
Linux: Implement epoll(7) for eloop (again)
eloop allows for O(1) processing of active fd's. The problems with the prior implementation have now been fixed.
-rwxr-xr-xconfigure20
-rw-r--r--src/eloop.c297
-rw-r--r--src/privsep-linux.c9
-rw-r--r--src/privsep.c2
4 files changed, 247 insertions, 81 deletions
diff --git a/configure b/configure
index c8f15749..091815ab 100755
--- a/configure
+++ b/configure
@@ -1260,6 +1260,23 @@ EOF
fi
if [ -z "$POLL" ]; then
+ printf "Testing for epoll ... "
+ cat <<EOF >_epoll.c
+#include <sys/epoll.h>
+int main(void) {
+ return epoll_create1(EPOLL_CLOEXEC);
+}
+EOF
+ if $XCC _epoll.c -o _epoll 2>&3; then
+ POLL=epoll
+ echo "yes"
+ else
+ echo "no"
+ fi
+ rm -f _epoll.c _epoll
+fi
+
+if [ -z "$POLL" ]; then
printf "Testing for ppoll ... "
cat <<EOF >_ppoll.c
#include <poll.h>
@@ -1322,6 +1339,9 @@ kqueue1)
kqueue)
echo "#define HAVE_KQUEUE" >>$CONFIG_H
;;
+epoll)
+ echo "#define HAVE_EPOLL" >>$CONFIG_H
+ ;;
ppoll)
echo "#define HAVE_PPOLL" >>$CONFIG_H
;;
diff --git a/src/eloop.c b/src/eloop.c
index 031d7507..8fd31307 100644
--- a/src/eloop.c
+++ b/src/eloop.c
@@ -48,7 +48,7 @@
#include "config.h"
#endif
-#if defined(HAVE_KQUEUE) || defined(HAVE_PPOLL)
+#if defined(HAVE_KQUEUE) || defined(HAVE_EPOLL) || defined(HAVE_PPOLL)
#elif defined(HAVE_POLLTS)
#define ppoll pollts
#elif !defined(HAVE_PSELECT)
@@ -66,6 +66,9 @@
#define _kevent kevent
#endif
#define NFD 2
+#elif defined(HAVE_EPOLL)
+#include <sys/epoll.h>
+#define NFD 1
#else
#include <poll.h>
#define USE_POLL
@@ -169,9 +172,13 @@ struct eloop {
void (*signal_cb)(int, void *);
void *signal_cb_ctx;
-#ifdef HAVE_KQUEUE
+#if defined(HAVE_KQUEUE) || defined(HAVE_EPOLL)
int fd;
+#endif
+#if defined(HAVE_KQUEUE)
struct kevent *fds;
+#elif defined(HAVE_EPOLL)
+ struct epoll_event *fds;
#else
struct pollfd *fds;
#endif
@@ -312,9 +319,12 @@ static int
eloop_event_setup_fds(struct eloop *eloop)
{
struct eloop_event *e, *ne;
-#ifdef HAVE_KQUEUE
+#if defined(HAVE_KQUEUE)
struct kevent *pfd;
size_t nfds = eloop->nsignals;
+#elif defined(HAVE_EPOLL)
+ struct epoll_event *pfd;
+ size_t nfds = 0;
#else
struct pollfd *pfd;
size_t nfds = 0;
@@ -373,9 +383,12 @@ eloop_event_add_rw(struct eloop *eloop, int fd,
{
struct eloop_event *e;
bool added;
-#ifdef HAVE_KQUEUE
+#if defined(HAVE_KQUEUE)
struct kevent ke[2];
size_t n;
+#elif defined(HAVE_EPOLL)
+ struct epoll_event epe;
+ int op;
#endif
assert(eloop != NULL);
@@ -422,7 +435,7 @@ eloop_event_add_rw(struct eloop *eloop, int fd,
}
setup:
-#ifdef HAVE_KQUEUE
+#if defined(HAVE_KQUEUE)
EV_SET(&ke[0], (uintptr_t)fd, EVFILT_READ, EV_ADD, 0, 0, e);
if (e->write_cb != NULL) {
EV_SET(&ke[1], (uintptr_t)fd, EVFILT_WRITE, EV_ADD, 0, 0, e);
@@ -436,6 +449,22 @@ setup:
}
return -1;
}
+#elif defined(HAVE_EPOLL)
+ memset(&epe, 0, sizeof(epe));
+ epe.data.ptr = e;
+ if (e->read_cb != NULL)
+ epe.events |= EPOLLIN;
+ if (e->write_cb != NULL)
+ epe.events |= EPOLLOUT;
+
+ op = added ? EPOLL_CTL_ADD : EPOLL_CTL_MOD;
+ if (epoll_ctl(eloop->fd, op, fd, &epe) == -1) {
+ if (added) {
+ TAILQ_REMOVE(&eloop->events, e, next);
+ TAILQ_INSERT_TAIL(&eloop->free_events, e, next);
+ }
+ return -1;
+ }
#else
e->pollfd = NULL;
UNUSED(added);
@@ -465,7 +494,6 @@ eloop_event_delete_write(struct eloop *eloop, int fd, int write_only)
{
struct eloop_event *e;
#ifdef HAVE_KQUEUE
- struct kevent ke;
#endif
assert(eloop != NULL);
@@ -484,13 +512,28 @@ eloop_event_delete_write(struct eloop *eloop, int fd, int write_only)
}
if (write_only) {
-#ifdef HAVE_KQUEUE
+#if defined(HAVE_KQUEUE)
if (e->write_cb != NULL) {
+ struct kevent ke;
+
EV_SET(&ke, (uintptr_t)e->fd,
EVFILT_WRITE, EV_DELETE, 0, 0, NULL);
if (_kevent(eloop->fd, &ke, 1, NULL, 0, NULL) == -1)
return -1;
}
+#elif defined(HAVE_EPOLL)
+ if (e->write_cb != NULL) {
+ struct epoll_event epe;
+
+ memset(&epe, 0, sizeof(epe));
+ epe.data.ptr = e;
+ if (e->read_cb != NULL)
+ epe.events |= EPOLLIN;
+ if (epoll_ctl(eloop->fd,
+ e->read_cb != NULL ? EPOLL_CTL_MOD : EPOLL_CTL_DEL,
+ e->fd, &epe) == -1)
+ return -1;
+ }
#else
if (e->pollfd != NULL) {
e->pollfd->events &= ~POLLOUT;
@@ -655,19 +698,26 @@ eloop_enter(struct eloop *eloop)
int
eloop_forked(struct eloop *eloop)
{
-#ifdef HAVE_KQUEUE
+#if defined(HAVE_KQUEUE) || defined(HAVE_EPOLL)
struct eloop_event *e;
- size_t i;
+#if defined(HAVE_KQUEUE)
struct kevent *pfds, *pfd;
+ size_t i;
int error;
+#elif defined(HAVE_EPOLL)
+ struct epoll_event epe = { .events = 0 };
+#endif
assert(eloop != NULL);
+#if defined(HAVE_KQUEUE) || defined(HAVE_EPOLL)
if (eloop->fd != -1)
close(eloop->fd);
if (eloop_open(eloop) == -1)
return -1;
+#endif
- pfds = malloc((eloop->nsignals + (eloop->nevents * 2)) * sizeof(*pfds));
+#ifdef HAVE_KQUEUE
+ pfds = malloc((eloop->nsignals + (eloop->nevents * NFD)) * sizeof(*pfds));
pfd = pfds;
if (eloop->signal_cb != NULL) {
@@ -677,24 +727,41 @@ eloop_forked(struct eloop *eloop)
}
} else
i = 0;
+#endif
TAILQ_FOREACH(e, &eloop->events, next) {
if (e->fd == -1)
continue;
- EV_SET(pfd++, (uintptr_t)e->fd, EVFILT_READ, EV_ADD, 0, 0, e);
- i++;
+#if defined(HAVE_KQUEUE)
+ if (e->read_cb != NULL) {
+ EV_SET(pfd++, (uintptr_t)e->fd,
+ EVFILT_READ, EV_ADD, 0, 0, e);
+ i++;
+ }
if (e->write_cb != NULL) {
EV_SET(pfd++, (uintptr_t)e->fd,
EVFILT_WRITE, EV_ADD, 0, 0, e);
i++;
}
+#elif defined(HAVE_EPOLL)
+ memset(&epe, 0, sizeof(epe));
+ epe.data.ptr = e;
+ if (e->read_cb != NULL)
+ epe.events |= EPOLLIN;
+ if (e->write_cb != NULL)
+ epe.events |= EPOLLOUT;
+ if (epoll_ctl(eloop->fd, EPOLL_CTL_ADD, e->fd, &epe) == -1)
+ return -1;
+#endif
}
+#if defined(HAVE_KQUEUE)
if (i == 0)
return 0;
error = _kevent(eloop->fd, pfds, i, NULL, 0, NULL);
- free(pfds);
- return error;
+#else
+ return 0;
+#endif
#else
UNUSED(eloop);
return 0;
@@ -706,6 +773,7 @@ eloop_open(struct eloop *eloop)
{
int fd;
+ assert(eloop != NULL);
#if defined(HAVE_KQUEUE1)
fd = kqueue1(O_CLOEXEC);
#elif defined(HAVE_KQUEUE)
@@ -719,13 +787,13 @@ eloop_open(struct eloop *eloop)
close(fd);
return -1;
}
+#elif defined(HAVE_EPOLL)
+ fd = epoll_create1(EPOLL_CLOEXEC);
#else
fd = 0;
#endif
-#ifdef USE_POLL
- UNUSED(eloop);
-#else
+#ifndef USE_POLL
eloop->fd = fd;
#endif
@@ -854,12 +922,10 @@ eloop_new(void)
TAILQ_INIT(&eloop->free_timeouts);
eloop->exitcode = EXIT_FAILURE;
-#ifdef HAVE_KQUEUE
if (eloop_open(eloop) == -1) {
eloop_free(eloop);
return NULL;
}
-#endif
return eloop;
}
@@ -920,27 +986,142 @@ void
eloop_free(struct eloop *eloop)
{
-#ifdef HAVE_KQUEUE
+ eloop_clear(eloop, -1);
+#if defined(HAVE_KQUEUE) || defined(HAVE_EPOLL)
if (eloop != NULL && eloop->fd != -1)
close(eloop->fd);
#endif
- eloop_clear(eloop, -1);
free(eloop);
}
+#if defined(HAVE_KQUEUE)
+static int
+eloop_run_kqueue(struct eloop *eloop, struct timespec *ts)
+{
+ int n, nn;
+ struct kevent *ke;
+ struct eloop_event *e;
+
+ n = _kevent(eloop->fd, NULL, 0, eloop->fds, eloop->nevents, ts);
+ if (n == -1)
+ return -1;
+
+ for (nn = n, ke = eloop->fds; nn != 0; nn--, ke++) {
+ if (eloop->cleared)
+ break;
+ e = (struct eloop_event *)ke->udata;
+#if 0
+ /* What to do with this?
+ * Currently we behave like ppoll and just try the
+ * socket and get the error there. */
+ if (ke->flags & EV_ERROR)
+ errno = (int)ke->data;
+#endif
+ switch (ke->filter) {
+ case EVFILT_SIGNAL:
+ eloop->signal_cb((int)ke->ident,
+ eloop->signal_cb_ctx);
+ break;
+ case EVFILT_WRITE:
+ e->write_cb(e->write_cb_arg);
+ break;
+ case EVFILT_READ:
+ e->read_cb(e->read_cb_arg);
+ break;
+ }
+ }
+ return n;
+}
+
+#elif defined(HAVE_EPOLL)
+
+static int
+eloop_run_epoll(struct eloop *eloop, struct timespec *ts, sigset_t *signals)
+{
+ int timeout, n, nn;
+ struct epoll_event *epe;
+ struct eloop_event *e;
+
+ if (ts != NULL) {
+ if (ts->tv_sec > INT_MAX / 1000 ||
+ (ts->tv_sec == INT_MAX / 1000 &&
+ ((ts->tv_nsec + 999999) / 1000000 > INT_MAX % 1000000)))
+ timeout = INT_MAX;
+ else
+ timeout = (int)(ts->tv_sec * 1000 +
+ (ts->tv_nsec + 999999) / 1000000);
+ } else
+ timeout = -1;
+
+ if (signals != NULL)
+ n = epoll_pwait(eloop->fd, eloop->fds,
+ (int)eloop->nevents, timeout, signals);
+ else
+ n = epoll_wait(eloop->fd, eloop->fds,
+ (int)eloop->nevents, timeout);
+ if (n == -1)
+ return -1;
+
+ for (nn = n, epe = eloop->fds; nn != 0; nn--, epe++) {
+ if (eloop->cleared)
+ break;
+ e = (struct eloop_event *)epe->data.ptr;
+ if (epe->events & EPOLLOUT &&
+ e->fd != -1 && e->write_cb != NULL)
+ e->write_cb(e->write_cb_arg);
+ if (epe->events && (EPOLLIN | EPOLLERR | EPOLLHUP) &&
+ e->fd != -1 && e->read_cb != NULL)
+ e->read_cb(e->read_cb_arg);
+ }
+ return n;
+}
+
+#else
+
+static int
+eloop_run_ppoll(struct eloop *eloop, struct timespec *ts, sigset_t *signals)
+{
+ int n, nn;
+ struct eloop_event *e;
+
+ n = ppoll(eloop->fds, (nfds_t)eloop->nevents, ts, signals);
+ if (n == -1 || n == 0)
+ return n;
+
+ nn = n;
+ TAILQ_FOREACH(e, &eloop->events, next) {
+ if (eloop->cleared)
+ break;
+ /* Skip freshly added events */
+ if (e->pollfd == NULL)
+ continue;
+ if (e->pollfd->revents)
+ nn--;
+ if (e->fd != -1 && e->pollfd->revents & POLLOUT &&
+ e->write_cb != NULL)
+ e->write_cb(e->write_cb_arg);
+ if (e->fd != -1 &&
+ e->pollfd != NULL && e->pollfd->revents &&
+ e->read_cb != NULL)
+ e->read_cb(e->read_cb_arg);
+ if (nn == 0)
+ break;
+ }
+ return n;
+}
+#endif
+
int
eloop_start(struct eloop *eloop, sigset_t *signals)
{
- int n;
- struct eloop_event *e;
+ int error;
struct eloop_timeout *t;
struct timespec ts, *tsp;
-#ifdef HAVE_KQUEUE
- struct kevent *ke;
- UNUSED(signals);
-#endif
assert(eloop != NULL);
+#if defined(HAVE_KQUEUE)
+ UNUSED(signals);
+#endif
for (;;) {
if (eloop->exitnow)
@@ -948,7 +1129,8 @@ eloop_start(struct eloop *eloop, sigset_t *signals)
#ifndef HAVE_KQUEUE
if (_eloop_nsig != 0) {
- n = _eloop_sig[--_eloop_nsig];
+ int n = _eloop_sig[--_eloop_nsig];
+
if (eloop->signal_cb != NULL)
eloop->signal_cb(n, eloop->signal_cb_ctx);
continue;
@@ -985,64 +1167,19 @@ eloop_start(struct eloop *eloop, sigset_t *signals)
if (eloop->events_need_setup)
eloop_event_setup_fds(eloop);
-#ifdef HAVE_KQUEUE
- n = _kevent(eloop->fd, NULL, 0, eloop->fds, eloop->nevents,tsp);
+#if defined(HAVE_KQUEUE)
+ UNUSED(signals);
+ error = eloop_run_kqueue(eloop, tsp);
+#elif defined(HAVE_EPOLL)
+ error = eloop_run_epoll(eloop, tsp, signals);
#else
- n = ppoll(eloop->fds, (nfds_t)eloop->nevents, tsp, signals);
+ error = eloop_run_ppoll(eloop, tsp, signals);
#endif
- if (n == -1) {
+ if (error == -1) {
if (errno == EINTR)
continue;
return -errno;
}
-
-#ifdef HAVE_KQUEUE
- for (ke = eloop->fds; n != 0; n--, ke++) {
- if (eloop->cleared)
- break;
- e = (struct eloop_event *)ke->udata;
-#if 0
- /* What to do with this?
- * Currently we behave like ppoll and just try the
- * socket and get the error there. */
- if (ke->flags & EV_ERROR)
- errno = (int)ke->data;
-#endif
- switch (ke->filter) {
- case EVFILT_SIGNAL:
- eloop->signal_cb((int)ke->ident,
- eloop->signal_cb_ctx);
- break;
- case EVFILT_WRITE:
- e->write_cb(e->write_cb_arg);
- break;
- case EVFILT_READ:
- e->read_cb(e->read_cb_arg);
- break;
- }
- }
-#else
- if (n == 0)
- continue;
- TAILQ_FOREACH(e, &eloop->events, next) {
- if (eloop->cleared)
- break;
- /* Skip freshly added events */
- if (e->pollfd == NULL)
- continue;
- if (e->pollfd->revents)
- n--;
- if (e->fd != -1 && e->pollfd->revents & POLLOUT &&
- e->write_cb != NULL)
- e->write_cb(e->write_cb_arg);
- if (e->fd != -1 &&
- e->pollfd != NULL && e->pollfd->revents &&
- e->read_cb != NULL)
- e->read_cb(e->read_cb_arg);
- if (n == 0)
- break;
- }
-#endif
}
return eloop->exitcode;
diff --git a/src/privsep-linux.c b/src/privsep-linux.c
index d31d720d..2226659b 100644
--- a/src/privsep-linux.c
+++ b/src/privsep-linux.c
@@ -233,6 +233,15 @@ static struct sock_filter ps_seccomp_filter[] = {
#ifdef __NR_close
SECCOMP_ALLOW(__NR_close),
#endif
+#ifdef __NR_epoll_ctl
+ SECCOMP_ALLOW(__NR_epoll_ctl),
+#endif
+#ifdef __NR_epoll_wait
+ SECCOMP_ALLOW(__NR_epoll_wait),
+#endif
+#ifdef __NR_epoll_pwait
+ SECCOMP_ALLOW(__NR_epoll_pwait),
+#endif
#ifdef __NR_exit_group
SECCOMP_ALLOW(__NR_exit_group),
#endif
diff --git a/src/privsep.c b/src/privsep.c
index 60ef649a..077c0b49 100644
--- a/src/privsep.c
+++ b/src/privsep.c
@@ -138,7 +138,7 @@ ps_dropprivs(struct dhcpcd_ctx *ctx)
if (ctx->ps_control_pid != getpid()) {
/* Prohibit new files, sockets, etc */
#if (defined(__linux__) || defined(__sun) || defined(__OpenBSD__)) && \
- !defined(HAVE_KQUEUE)
+ !defined(HAVE_KQUEUE) && !defined(HAVE_EPOLL)
/*
* If poll(2) is called with nfds > RLIMIT_NOFILE
* then it returns EINVAL.