|
From: Thorsten K. <ku...@su...> - 2015-07-16 13:37:30
|
Hi,
this patch replaces the two other patches regarding the poll() interface.
TI-RPC is using select() in svc_run(), while glibc and Solaris
are using poll(). This has two drawbacks: poll() is much more efficient
then select(), and with select() we are limited to 1024 open filehandles.
And applications replacing svc_run on Linux don't work, since they expect
svc_pollfd and svc_max_pollfd.
Attached patch changes this. It's full backward compatible, API and ABI,
so e.g. current rpcbind will continue to work and compile. The only
problem could arrive, that we can have more than 1024 open filehandles
and applications ignoring higher ones.
Signed-off-by: Thorsten Kukuk <ku...@th...>
---
src/libtirpc.map | 4 +-
src/rpc_commondata.c | 3 ++
src/rpc_dtablesize.c | 2 -
src/svc.c | 102 ++++++++++++++++++++++++++++++++-------------------
src/svc_run.c | 78 +++++++++++++++++++++++++--------------
src/svc_vc.c | 66 ++++++++++-----------------------
tirpc/rpc/svc.h | 6 +--
7 files changed, 142 insertions(+), 119 deletions(-)
diff --git a/src/libtirpc.map b/src/libtirpc.map
index 7d7f40f..f385de5 100644
--- a/src/libtirpc.map
+++ b/src/libtirpc.map
@@ -322,7 +322,9 @@ TIRPC_0.3.3 {
__key_encryptsession_pk_LOCAL;
__key_gendes_LOCAL;
xdr_sizeof;
- authdes_pk_create;
+ authdes_pk_create;
+ svc_pollfd;
+ svc_max_pollfd;
} TIRPC_0.3.2;
TIRPC_PRIVATE {
diff --git a/src/rpc_commondata.c b/src/rpc_commondata.c
index 5392306..918c1aa 100644
--- a/src/rpc_commondata.c
+++ b/src/rpc_commondata.c
@@ -36,3 +36,6 @@
struct opaque_auth _null_auth;
fd_set svc_fdset;
int svc_maxfd = -1;
+struct pollfd *svc_pollfd;
+int svc_max_pollfd;
+
diff --git a/src/rpc_dtablesize.c b/src/rpc_dtablesize.c
index 5c6033e..13d320c 100644
--- a/src/rpc_dtablesize.c
+++ b/src/rpc_dtablesize.c
@@ -50,8 +50,6 @@ _rpc_dtablesize(void)
if (size == 0) {
size = getdtablesize();
- if (size > FD_SETSIZE)
- size = FD_SETSIZE;
}
return (size);
}
diff --git a/src/svc.c b/src/svc.c
index 32c84f1..2dac85a 100644
--- a/src/svc.c
+++ b/src/svc.c
@@ -100,16 +100,43 @@ xprt_register (xprt)
rwlock_wrlock (&svc_fd_lock);
if (__svc_xports == NULL)
{
- __svc_xports = (SVCXPRT **) mem_alloc (FD_SETSIZE * sizeof (SVCXPRT *));
+ __svc_xports = (SVCXPRT **) calloc (_rpc_dtablesize(), sizeof (SVCXPRT *));
if (__svc_xports == NULL)
return;
- memset (__svc_xports, '\0', FD_SETSIZE * sizeof (SVCXPRT *));
}
- if (sock < FD_SETSIZE)
+ if (sock < _rpc_dtablesize())
{
+ int i;
+ struct pollfd *new_svc_pollfd;
+
__svc_xports[sock] = xprt;
- FD_SET (sock, &svc_fdset);
- svc_maxfd = max (svc_maxfd, sock);
+ if (sock < FD_SETSIZE)
+ {
+ FD_SET (sock, &svc_fdset);
+ svc_maxfd = max (svc_maxfd, sock);
+ }
+
+ /* Check if we have an empty slot */
+ for (i = 0; i < svc_max_pollfd; ++i)
+ if (svc_pollfd[i].fd == -1)
+ {
+ svc_pollfd[i].fd = sock;
+ svc_pollfd[i].events = (POLLIN | POLLPRI |
+ POLLRDNORM | POLLRDBAND);
+ return;
+ }
+
+ new_svc_pollfd = (struct pollfd *) realloc (svc_pollfd,
+ sizeof (struct pollfd)
+ * (svc_max_pollfd + 1));
+ if (new_svc_pollfd == NULL) /* Out of memory */
+ return;
+ svc_pollfd = new_svc_pollfd;
+ ++svc_max_pollfd;
+
+ svc_pollfd[svc_max_pollfd - 1].fd = sock;
+ svc_pollfd[svc_max_pollfd - 1].events = (POLLIN | POLLPRI |
+ POLLRDNORM | POLLRDBAND);
}
rwlock_unlock (&svc_fd_lock);
}
@@ -142,16 +169,25 @@ __xprt_do_unregister (xprt, dolock)
if (dolock)
rwlock_wrlock (&svc_fd_lock);
- if ((sock < FD_SETSIZE) && (__svc_xports[sock] == xprt))
+ if ((sock < _rpc_dtablesize() ) && (__svc_xports[sock] == xprt))
{
+ int i;
+
__svc_xports[sock] = NULL;
- FD_CLR (sock, &svc_fdset);
- if (sock >= svc_maxfd)
+ if (sock < FD_SETSIZE)
{
- for (svc_maxfd--; svc_maxfd >= 0; svc_maxfd--)
- if (__svc_xports[svc_maxfd])
- break;
+ FD_CLR (sock, &svc_fdset);
+ if (sock >= svc_maxfd)
+ {
+ for (svc_maxfd--; svc_maxfd >= 0; svc_maxfd--)
+ if (__svc_xports[svc_maxfd])
+ break;
+ }
}
+
+ for (i = 0; i < svc_max_pollfd; ++i)
+ if (svc_pollfd[i].fd == sock)
+ svc_pollfd[i].fd = -1;
}
if (dolock)
rwlock_unlock (&svc_fd_lock);
@@ -606,11 +642,15 @@ svc_getreqset (readfds)
int bit, fd;
fd_mask mask, *maskp;
int sock;
+ int setsize;
assert (readfds != NULL);
+ setsize = _rpc_dtablesize ();
+ if (setsize > FD_SETSIZE)
+ setsize = FD_SETSIZE;
maskp = readfds->fds_bits;
- for (sock = 0; sock < FD_SETSIZE; sock += NFDBITS)
+ for (sock = 0; sock < setsize; sock += NFDBITS)
{
for (mask = *maskp++; (bit = ffsl(mask)) != 0; mask ^= (1L << (bit - 1)))
{
@@ -733,36 +773,22 @@ svc_getreq_poll (pfdp, pollretval)
struct pollfd *pfdp;
int pollretval;
{
- int i;
- int fds_found;
+ int fds_found, i;
- for (i = fds_found = 0; fds_found < pollretval; i++)
+ for (i = fds_found = 0; i < svc_max_pollfd; ++i)
{
struct pollfd *p = &pfdp[i];
- if (p->revents)
+ if (p->fd != -1 && p->revents)
{
- /* fd has input waiting */
- fds_found++;
- /*
- * We assume that this function is only called
- * via someone _select()ing from svc_fdset or
- * _poll()ing from svc_pollset[]. Thus it's safe
- * to handle the POLLNVAL event by simply turning
- * the corresponding bit off in svc_fdset. The
- * svc_pollset[] array is derived from svc_fdset
- * and so will also be updated eventually.
- *
- * XXX Should we do an xprt_unregister() instead?
- */
- if (p->revents & POLLNVAL)
- {
- rwlock_wrlock (&svc_fd_lock);
- FD_CLR (p->fd, &svc_fdset);
- rwlock_unlock (&svc_fd_lock);
- }
- else
- svc_getreq_common (p->fd);
+ /* fd has input waiting */
+ if (p->revents & POLLNVAL)
+ xprt_unregister (__svc_xports[p->fd]);
+ else
+ svc_getreq_common (p->fd);
+
+ if (++fds_found >= pollretval)
+ break;
}
}
}
diff --git a/src/svc_run.c b/src/svc_run.c
index 783b1dc..f40314b 100644
--- a/src/svc_run.c
+++ b/src/svc_run.c
@@ -34,10 +34,11 @@
#include <reentrant.h>
#include <err.h>
#include <errno.h>
-#include <rpc/rpc.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
+#include <sys/poll.h>
+
#include <rpc/rpc.h>
#include "rpc_com.h"
@@ -46,33 +47,54 @@
void
svc_run()
{
- fd_set readfds, cleanfds;
- struct timeval timeout;
- extern rwlock_t svc_fd_lock;
+ int i;
+ struct pollfd *my_pollfd = NULL;
+ int last_max_pollfd = 0;
+
+ for (;;) {
+ int max_pollfd = svc_max_pollfd;
+ if (max_pollfd == 0 && svc_pollfd == NULL)
+ break;
+
+ if (last_max_pollfd != max_pollfd)
+ {
+ struct pollfd *new_pollfd
+ = realloc (my_pollfd, sizeof (struct pollfd) * max_pollfd);
+
+ if (new_pollfd == NULL)
+ {
+ warn ("svc_run: - out of memory");
+ break;
+ }
+
+ my_pollfd = new_pollfd;
+ last_max_pollfd = max_pollfd;
+ }
+
+ for (i = 0; i < max_pollfd; ++i)
+ {
+ my_pollfd[i].fd = svc_pollfd[i].fd;
+ my_pollfd[i].events = svc_pollfd[i].events;
+ my_pollfd[i].revents = 0;
+ }
+ switch (i = poll (my_pollfd, max_pollfd, -1))
+ {
+ case -1:
+ if (errno == EINTR)
+ continue;
+ warn ("svc_run: - poll failed");
+ break;
+ case 0:
+ continue;
+ default:
+ svc_getreq_poll (my_pollfd, i);
+ continue;
+ }
+ break;
+ }
- for (;;) {
- rwlock_rdlock(&svc_fd_lock);
- readfds = svc_fdset;
- cleanfds = svc_fdset;
- rwlock_unlock(&svc_fd_lock);
- timeout.tv_sec = 30;
- timeout.tv_usec = 0;
- switch (select(svc_maxfd+1, &readfds, NULL, NULL, &timeout)) {
- case -1:
- FD_ZERO(&readfds);
- if (errno == EINTR) {
- continue;
- }
- warn("svc_run: - select failed");
- return;
- case 0:
- __svc_clean_idle(&cleanfds, 30, FALSE);
- continue;
- default:
- svc_getreqset(&readfds);
- }
- }
+ free (my_pollfd);
}
/*
@@ -85,6 +107,8 @@ svc_exit()
extern rwlock_t svc_fd_lock;
rwlock_wrlock(&svc_fd_lock);
- FD_ZERO(&svc_fdset);
+ free (svc_pollfd);
+ svc_pollfd = NULL;
+ svc_max_pollfd = 0;
rwlock_unlock(&svc_fd_lock);
}
diff --git a/src/svc_vc.c b/src/svc_vc.c
index 3cddcbc..4bafbcf 100644
--- a/src/svc_vc.c
+++ b/src/svc_vc.c
@@ -309,7 +309,6 @@ rendezvous_request(xprt, msg)
socklen_t len;
struct __rpc_sockinfo si;
SVCXPRT *newxprt;
- fd_set cleanfds;
assert(xprt != NULL);
assert(msg != NULL);
@@ -321,13 +320,16 @@ again:
&len)) < 0) {
if (errno == EINTR)
goto again;
- /*
- * Clean out the most idle file descriptor when we're
- * running out.
- */
+
if (errno == EMFILE || errno == ENFILE) {
- cleanfds = svc_fdset;
- __svc_clean_idle(&cleanfds, 0, FALSE);
+ /* If there are no file descriptors available, then accept will fail.
+ We want to delay here so the connection request can be dequeued;
+ otherwise we can bounce between polling and accepting, never
+ giving the request a chance to dequeue and eating an enormous
+ amount of cpu time in svc_run if we're polling on many file
+ descriptors. */
+ struct timespec ts = { .tv_sec = 0, .tv_nsec = 50000000 };
+ nanosleep (&ts, NULL);
goto again;
}
return (FALSE);
@@ -794,47 +796,17 @@ __rpc_get_local_uid(SVCXPRT *transp, uid_t *uid) {
* rpcbind are known to call this function. Do not alter or remove this
* API without changing the library's sonum.
*/
+/* Since this is an exported interface used by rpcbind, we cannot
+ remove it. But since poll() can handle much more and much higher
+ file descriptors, this code doesn't really work anymore, too.
+ So for now, keep it as dummy function and do nothing to not break
+ existing binaries. If we have ported rpcbind to the poll() interface
+ and find out, that we really need this cleanup stuff (but nobody
+ besides FreeBSD has this), we need to re-implement it using poll().
+ But this means a new function name with different parameters. For
+ ABI/API compatibility, we cannot reuse this one. */
bool_t
__svc_clean_idle(fd_set *fds, int timeout, bool_t cleanblock)
{
- int i, ncleaned;
- SVCXPRT *xprt, *least_active;
- struct timeval tv, tdiff, tmax;
- struct cf_conn *cd;
-
- gettimeofday(&tv, NULL);
- tmax.tv_sec = tmax.tv_usec = 0;
- least_active = NULL;
- rwlock_wrlock(&svc_fd_lock);
- for (i = ncleaned = 0; i <= svc_maxfd; i++) {
- if (FD_ISSET(i, fds)) {
- xprt = __svc_xports[i];
- if (xprt == NULL || xprt->xp_ops == NULL ||
- xprt->xp_ops->xp_recv != svc_vc_recv)
- continue;
- cd = (struct cf_conn *)xprt->xp_p1;
- if (!cleanblock && !cd->nonblock)
- continue;
- if (timeout == 0) {
- timersub(&tv, &cd->last_recv_time, &tdiff);
- if (timercmp(&tdiff, &tmax, >)) {
- tmax = tdiff;
- least_active = xprt;
- }
- continue;
- }
- if (tv.tv_sec - cd->last_recv_time.tv_sec > timeout) {
- __xprt_unregister_unlocked(xprt);
- __svc_vc_dodestroy(xprt);
- ncleaned++;
- }
- }
- }
- if (timeout == 0 && least_active != NULL) {
- __xprt_unregister_unlocked(least_active);
- __svc_vc_dodestroy(least_active);
- ncleaned++;
- }
- rwlock_unlock(&svc_fd_lock);
- return ncleaned > 0 ? TRUE : FALSE;
+ return FALSE;
}
diff --git a/tirpc/rpc/svc.h b/tirpc/rpc/svc.h
index f647095..1ab6527 100644
--- a/tirpc/rpc/svc.h
+++ b/tirpc/rpc/svc.h
@@ -315,12 +315,10 @@ extern int rpc_reg(rpcprog_t, rpcvers_t, rpcproc_t,
* dynamic; must be inspected before each call to select
*/
extern int svc_maxfd;
-#ifdef FD_SETSIZE
extern fd_set svc_fdset;
#define svc_fds svc_fdset.fds_bits[0] /* compatibility */
-#else
-extern int svc_fds;
-#endif /* def FD_SETSIZE */
+extern struct pollfd *svc_pollfd;
+extern int svc_max_pollfd;
/*
* a small program implemented by the svc_rpc implementation itself;
--
1.8.5.6
--
Thorsten Kukuk, Senior Architect SLES & Common Code Base
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nuernberg, Germany
GF: Felix Imendörffer, Jane Smithard, Dilip Upmanyu, Graham Norton, HRB 21284 (AG Nürnberg)
|