Created
March 21, 2016 08:07
-
-
Save macdice/6ec9173a1130c8a9a738 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/configure b/configure | |
index 24655dc..e239641 100755 | |
--- a/configure | |
+++ b/configure | |
@@ -10193,7 +10193,7 @@ fi | |
## Header files | |
## | |
-for ac_header in atomic.h crypt.h dld.h fp_class.h getopt.h ieeefp.h ifaddrs.h langinfo.h mbarrier.h poll.h pwd.h sys/epoll.h sys/ioctl.h sys/ipc.h sys/poll.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/socket.h sys/sockio.h sys/tas.h sys/time.h sys/un.h termios.h ucred.h utime.h wchar.h wctype.h | |
+for ac_header in atomic.h crypt.h dld.h fp_class.h getopt.h ieeefp.h ifaddrs.h langinfo.h mbarrier.h poll.h pwd.h sys/epoll.h sys/event.h sys/ioctl.h sys/ipc.h sys/poll.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/socket.h sys/sockio.h sys/tas.h sys/time.h sys/un.h termios.h ucred.h utime.h wchar.h wctype.h | |
do : | |
as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` | |
ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" | |
@@ -12425,7 +12425,7 @@ fi | |
LIBS_including_readline="$LIBS" | |
LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'` | |
-for ac_func in cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat pthread_is_threaded_np readlink setproctitle setsid shm_open symlink sync_file_range towlower utime utimes wcstombs wcstombs_l | |
+for ac_func in cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit kqueue mbstowcs_l memmove poll pstat pthread_is_threaded_np readlink setproctitle setsid shm_open symlink sync_file_range towlower utime utimes wcstombs wcstombs_l | |
do : | |
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` | |
ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" | |
diff --git a/configure.in b/configure.in | |
index c564a76..ef9d450 100644 | |
--- a/configure.in | |
+++ b/configure.in | |
@@ -1183,7 +1183,7 @@ AC_SUBST(UUID_LIBS) | |
## | |
dnl sys/socket.h is required by AC_FUNC_ACCEPT_ARGTYPES | |
-AC_CHECK_HEADERS([atomic.h crypt.h dld.h fp_class.h getopt.h ieeefp.h ifaddrs.h langinfo.h mbarrier.h poll.h pwd.h sys/epoll.h sys/ioctl.h sys/ipc.h sys/poll.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/socket.h sys/sockio.h sys/tas.h sys/time.h sys/un.h termios.h ucred.h utime.h wchar.h wctype.h]) | |
+AC_CHECK_HEADERS([atomic.h crypt.h dld.h fp_class.h getopt.h ieeefp.h ifaddrs.h langinfo.h mbarrier.h poll.h pwd.h sys/epoll.h sys/event.h sys/ioctl.h sys/ipc.h sys/poll.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/socket.h sys/sockio.h sys/tas.h sys/time.h sys/un.h termios.h ucred.h utime.h wchar.h wctype.h]) | |
# On BSD, test for net/if.h will fail unless sys/socket.h | |
# is included first. | |
@@ -1432,7 +1432,7 @@ PGAC_FUNC_WCSTOMBS_L | |
LIBS_including_readline="$LIBS" | |
LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'` | |
-AC_CHECK_FUNCS([cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat pthread_is_threaded_np readlink setproctitle setsid shm_open symlink sync_file_range towlower utime utimes wcstombs wcstombs_l]) | |
+AC_CHECK_FUNCS([cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit kqueue mbstowcs_l memmove poll pstat pthread_is_threaded_np readlink setproctitle setsid shm_open symlink sync_file_range towlower utime utimes wcstombs wcstombs_l]) | |
AC_REPLACE_FUNCS(fseeko) | |
case $host_os in | |
diff --git a/src/backend/storage/ipc/latch.c b/src/backend/storage/ipc/latch.c | |
index fd9a7bc..03acffe 100644 | |
--- a/src/backend/storage/ipc/latch.c | |
+++ b/src/backend/storage/ipc/latch.c | |
@@ -44,6 +44,9 @@ | |
#ifdef HAVE_SYS_EPOLL_H | |
#include <sys/epoll.h> | |
#endif | |
+#ifdef HAVE_SYS_EVENT_H | |
+#include <sys/event.h> | |
+#endif | |
#ifdef HAVE_POLL_H | |
#include <poll.h> | |
#endif | |
@@ -68,10 +71,12 @@ | |
* useful to manually specify the used primitive. If desired, just add a | |
* define somewhere before this block. | |
*/ | |
-#if defined(WAIT_USE_EPOLL) || defined(WAIT_USE_POLL) || defined(WAIT_USE_SELECT) || defined(WAIT_USE_WIN32) | |
+#if defined(WAIT_USE_EPOLL) || defined(WAIT_USE_KQUEUE) || defined(WAIT_USE_POLL) || defined(WAIT_USE_SELECT) || defined(WAIT_USE_WIN32) | |
/* don't overwrite manual choice */ | |
#elif defined(HAVE_SYS_EPOLL_H) | |
#define WAIT_USE_EPOLL | |
+#elif defined(HAVE_KQUEUE) | |
+#define WAIT_USE_KQUEUE | |
#elif defined(HAVE_POLL) | |
#define WAIT_USE_POLL | |
#elif HAVE_SYS_SELECT_H | |
@@ -107,6 +112,10 @@ struct WaitEventSet | |
int epoll_fd; | |
/* epoll_wait returns events in a user provided arrays, allocate once */ | |
struct epoll_event *epoll_ret_events; | |
+#elif defined(WAIT_USE_KQUEUE) | |
+ int kqueue_fd; | |
+ /* kevent returns events in a user provided arrays, allocate once */ | |
+ struct kevent *kqueue_ret_events; | |
#elif defined(WAIT_USE_POLL) | |
/* poll expects events to be waited on every poll() call, prepare once */ | |
struct pollfd *pollfds; | |
@@ -135,6 +144,8 @@ static void drainSelfPipe(void); | |
#if defined(WAIT_USE_EPOLL) | |
static void WaitEventAdjustEpoll(WaitEventSet *set, WaitEvent *event, int action); | |
+#elif defined(WAIT_USE_KQUEUE) | |
+static void WaitEventAdjustKqueue(WaitEventSet *set, WaitEvent *event, int action); | |
#elif defined(WAIT_USE_POLL) | |
static void WaitEventAdjustPoll(WaitEventSet *set, WaitEvent *event); | |
#elif defined(WAIT_USE_WIN32) | |
@@ -491,6 +502,8 @@ CreateWaitEventSet(MemoryContext context, int nevents) | |
#if defined(WAIT_USE_EPOLL) | |
sz += sizeof(struct epoll_event) * nevents; | |
+#elif defined(WAIT_USE_KQUEUE) | |
+ sz += sizeof(struct kevent) * nevents; | |
#elif defined(WAIT_USE_POLL) | |
sz += sizeof(struct pollfd) * nevents; | |
#elif defined(WAIT_USE_WIN32) | |
@@ -509,6 +522,9 @@ CreateWaitEventSet(MemoryContext context, int nevents) | |
#if defined(WAIT_USE_EPOLL) | |
set->epoll_ret_events = (struct epoll_event *) data; | |
data += sizeof(struct epoll_event) * nevents; | |
+#elif defined(WAIT_USE_KQUEUE) | |
+ set->kqueue_ret_events = (struct kevent *) data; | |
+ data += sizeof(struct kevent) * nevents; | |
#elif defined(WAIT_USE_POLL) | |
set->pollfds = (struct pollfd *) data; | |
data += sizeof(struct pollfd) * nevents; | |
@@ -524,6 +540,10 @@ CreateWaitEventSet(MemoryContext context, int nevents) | |
set->epoll_fd = epoll_create(nevents); | |
if (set->epoll_fd < 0) | |
elog(ERROR, "epoll_create failed: %m"); | |
+#elif defined(WAIT_USE_KQUEUE) | |
+ set->kqueue_fd = kqueue(); | |
+ if (set->kqueue_fd < 0) | |
+ elog(ERROR, "kqueue failed: %m"); | |
#elif defined(WAIT_USE_WIN32) | |
/* | |
@@ -549,6 +569,8 @@ FreeWaitEventSet(WaitEventSet *set) | |
{ | |
#if defined(WAIT_USE_EPOLL) | |
close(set->epoll_fd); | |
+#elif defined(WAIT_USE_KQUEUE) | |
+ close(set->kqueue_fd); | |
#elif defined(WAIT_USE_WIN32) | |
WaitEvent *cur_event; | |
@@ -638,6 +660,8 @@ AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd, Latch *latch) | |
/* perform wait primitive specific initialization, if needed */ | |
#if defined(WAIT_USE_EPOLL) | |
WaitEventAdjustEpoll(set, event, EPOLL_CTL_ADD); | |
+#elif defined(WAIT_USE_KQUEUE) | |
+ WaitEventAdjustKqueue(set, event, EV_ADD); | |
#elif defined(WAIT_USE_POLL) | |
WaitEventAdjustPoll(set, event); | |
#elif defined(WAIT_USE_SELECT) | |
@@ -696,6 +720,8 @@ ModifyWaitEvent(WaitEventSet *set, int pos, uint32 events, Latch *latch) | |
#if defined(WAIT_USE_EPOLL) | |
WaitEventAdjustEpoll(set, event, EPOLL_CTL_MOD); | |
+#elif defined(WAIT_USE_KQUEUE) | |
+ WaitEventAdjustKqueue(set, event, EV_ADD); | |
#elif defined(WAIT_USE_POLL) | |
WaitEventAdjustPoll(set, event); | |
#elif defined(WAIT_USE_SELECT) | |
@@ -897,6 +923,56 @@ WaitEventAdjustPoll(WaitEventSet *set, WaitEvent *event) | |
} | |
#endif | |
+#if defined(WAIT_USE_KQUEUE) | |
+ | |
+/* | |
+ * action can be EV_ADD or EV_DELETE. EV_ADD is used for both adding and | |
+ * modifying, and EV_DELETE is not used yet. | |
+ */ | |
+static void | |
+WaitEventAdjustKqueue(WaitEventSet *set, WaitEvent *event, int action) | |
+{ | |
+ int rc; | |
+ struct kevent k_ev; | |
+ | |
+ k_ev.ident = event->fd; | |
+ k_ev.filter = 0; | |
+ k_ev.flags = action | EV_CLEAR; | |
+ k_ev.fflags = 0; | |
+ k_ev.data = 0; | |
+ k_ev.udata = event; | |
+ | |
+ Assert(event->fd >= 0); | |
+ if (event->events == WL_LATCH_SET) | |
+ { | |
+ Assert(set->latch != NULL); | |
+ k_ev.filter = EVFILT_READ; | |
+ } | |
+ else if (event->events == WL_POSTMASTER_DEATH) | |
+ { | |
+ k_ev.filter = EVFILT_READ; | |
+ } | |
+ else | |
+ { | |
+ Assert(event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)); | |
+ | |
+ /* TODO: We need to register two kevents if both bits are set! */ | |
+ /* TODO: But then how will we merge the resulting received events? */ | |
+ if (event->events & WL_SOCKET_READABLE) | |
+ k_ev.filter = EVFILT_READ; | |
+ else | |
+ k_ev.filter = EVFILT_WRITE; | |
+ } | |
+ | |
+ rc = kevent(set->kqueue_fd, &k_ev, 1, NULL, 0, NULL); | |
+ if (rc < 0) | |
+ ereport(ERROR, | |
+ (errcode_for_socket_access(), | |
+ errmsg("kevent() failed %d %d: %m", set->kqueue_fd, event->fd))); | |
+} | |
+ | |
+#endif | |
+ | |
#if defined(WAIT_USE_WIN32) | |
static void | |
WaitEventAdjustWin32(WaitEventSet *set, WaitEvent *event) | |
@@ -1060,6 +1136,140 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout, | |
return returned_events; | |
} | |
+#elif defined(WAIT_USE_KQUEUE) | |
+ | |
+/* | |
+ * Wait using FreeBSD kqueue(2)/kevent(2). Also available on other BSD-family | |
+ * systems including MacOSX. | |
+ * | |
+ * This is the preferrable wait method for systems that have it, as several | |
+ * readiness notifications are delivered, without having to iterate through | |
+ * all of set->events. | |
+ */ | |
+static int | |
+WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout, | |
+ WaitEvent *occurred_events, int nevents) | |
+{ | |
+ int returned_events = 0; | |
+ int rc; | |
+ WaitEvent *cur_event; | |
+ struct kevent *cur_kqueue_event; | |
+ struct timespec timeout; | |
+ struct timespec *timeout_p; | |
+ | |
+ if (cur_timeout < 0) | |
+ timeout_p = NULL; | |
+ else | |
+ { | |
+ timeout.tv_sec = cur_timeout / 1000; | |
+ timeout.tv_nsec = (cur_timeout % 1000) * 1000000; | |
+ timeout_p = &timeout; | |
+ } | |
+ | |
+ /* Sleep */ | |
+ rc = kevent(set->kqueue_fd, NULL, 0, | |
+ set->kqueue_ret_events, nevents, | |
+ timeout_p); | |
+ | |
+ /* Check return code */ | |
+ if (rc < 0) | |
+ { | |
+ /* EINTR is okay, otherwise complain */ | |
+ if (errno != EINTR) | |
+ { | |
+ waiting = false; | |
+ ereport(ERROR, | |
+ (errcode_for_socket_access(), | |
+ errmsg("kevent() failed while trying to wait: %m"))); | |
+ } | |
+ return 0; | |
+ } | |
+ else if (rc == 0) | |
+ { | |
+ /* timeout exceeded */ | |
+ return -1; | |
+ } | |
+ | |
+ /* | |
+ * At least one event occurred, iterate over the returned kqueue events | |
+ * until they're either all processed, or we've returned all the events | |
+ * the caller desired. | |
+ */ | |
+ for (cur_kqueue_event = set->kqueue_ret_events; | |
+ cur_kqueue_event < (set->kqueue_ret_events + rc) && | |
+ returned_events < nevents; | |
+ cur_kqueue_event++) | |
+ { | |
+ /* epoll's data pointer is set to the associated WaitEvent */ | |
+ cur_event = (WaitEvent *) cur_kqueue_event->udata; | |
+ | |
+ occurred_events->pos = cur_event->pos; | |
+ occurred_events->events = 0; | |
+ | |
+ if (cur_event->events == WL_LATCH_SET && | |
+ cur_kqueue_event->flags & (EV_EOF | EVFILT_READ)) | |
+ { | |
+ /* There's data in the self-pipe, clear it. */ | |
+ drainSelfPipe(); | |
+ | |
+ if (set->latch->is_set) | |
+ { | |
+ occurred_events->fd = PGINVALID_SOCKET; | |
+ occurred_events->events = WL_LATCH_SET; | |
+ occurred_events++; | |
+ returned_events++; | |
+ } | |
+ } | |
+ else if (cur_event->events == WL_POSTMASTER_DEATH && | |
+ cur_kqueue_event->flags & (EVFILT_READ | EV_EOF)) | |
+ { | |
+ /* | |
+ * We expect an EV_EOF when the remote end is closed, but | |
+ * because we don't expect the pipe to become readable or to have | |
+ * any errors either, treat those cases as postmaster death, too. | |
+ * | |
+ * As explained in the WAIT_USE_SELECT implementation, select(2) | |
+ * may spuriously return. Be paranoid about that here too, a | |
+ * spurious WL_POSTMASTER_DEATH would be painful. | |
+ */ | |
+ if (!PostmasterIsAlive()) | |
+ { | |
+ occurred_events->fd = PGINVALID_SOCKET; | |
+ occurred_events->events = WL_POSTMASTER_DEATH; | |
+ occurred_events++; | |
+ returned_events++; | |
+ } | |
+ } | |
+ else if (cur_event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) | |
+ { | |
+ Assert(cur_event->fd >= 0); | |
+ | |
+ if ((cur_event->events & WL_SOCKET_READABLE) && | |
+ (cur_kqueue_event->flags & (EV_EOF | EVFILT_READ))) | |
+ { | |
+ /* readable, or EOF */ | |
+ occurred_events->events |= WL_SOCKET_READABLE; | |
+ } | |
+ | |
+ if ((cur_event->events & WL_SOCKET_WRITEABLE) && | |
+ (cur_kqueue_event->flags & (EV_EOF | EVFILT_WRITE))) | |
+ { | |
+ /* writable, or EOF */ | |
+ occurred_events->events |= WL_SOCKET_WRITEABLE; | |
+ } | |
+ | |
+ if (occurred_events->events != 0) | |
+ { | |
+ occurred_events->fd = cur_event->fd; | |
+ occurred_events++; | |
+ returned_events++; | |
+ } | |
+ } | |
+ } | |
+ | |
+ return returned_events; | |
+} | |
+ | |
#elif defined(WAIT_USE_POLL) | |
/* | |
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in | |
index c72635c..e319f1d 100644 | |
--- a/src/include/pg_config.h.in | |
+++ b/src/include/pg_config.h.in | |
@@ -279,6 +279,9 @@ | |
/* Define to 1 if you have isinf(). */ | |
#undef HAVE_ISINF | |
+/* Define to 1 if you have the `kqueue' function. */ | |
+#undef HAVE_KQUEUE | |
+ | |
/* Define to 1 if you have the <langinfo.h> header file. */ | |
#undef HAVE_LANGINFO_H | |
@@ -533,6 +536,9 @@ | |
/* Define to 1 if you have the <sys/epoll.h> header file. */ | |
#undef HAVE_SYS_EPOLL_H | |
+/* Define to 1 if you have the <sys/event.h> header file. */ | |
+#undef HAVE_SYS_EVENT_H | |
+ | |
/* Define to 1 if you have the <sys/ioctl.h> header file. */ | |
#undef HAVE_SYS_IOCTL_H | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment