* Release 3.77.92.

* Complete implementation of new jobserver algorithm.
* A few minor fixups.
This commit is contained in:
Paul Smith 1999-08-01 08:12:06 +00:00
parent 09f1e4cf83
commit e34540f79b
8 changed files with 117 additions and 72 deletions

View File

@ -1,6 +1,43 @@
1999-08-01 Paul D. Smith <psmith@gnu.org>
New jobserver algorithm to avoid a possible hole where we could
miss SIGCHLDs and get into a deadlock. The original algorithm was
suggested by Roland McGrath with a nice refinement by Paul Eggert.
Many thanks as well to Tim Magill and Howard Chu, who also
provided many viable ideas and critiques. We all had a fun week
dreaming up interesting ways to use and abuse UNIX syscalls :).
Previously we could miss a SIGCHLD if it happened after we reaped
the children but before we re-entered the blocking read. If this
happened to all makes and/or all children, make would never wake
up.
We avoid this by having the SIGCHLD handler reset the blocking bit
on the jobserver pipe read FD (normally read does block in this
algorithm). Now if the handler is called between the time we reap
and the time we read(), and there are no tokens available, the
read will merely return with EAGAIN instead of blocking.
* main.c (main): Set the blocking bit explicitly here.
* job.c (child_handler): If we have a jobserver pipe, set the
non-blocking bit for it.
(start_waiting_job): Move the token stuff back to new_job; if we
do it here then we're not controlling the number of remote jobs
started!
(new_job): Move the check for job slots to _after_ we've created a
child structure. If the read returns without getting a token, set
the blocking bit then try to reap_children.
* make.h (EINTR_SET): Define to test errno if EINTR is available,
or 0 otherwise. Just some code cleanup.
* arscan.c (ar_member_touch): Use it.
* function.c (func_shell): Use it.
* job.c (reap_children): Use it.
* remake.c (touch_file): Use it.
1999-07-28 Paul D. Smith <psmith@gnu.org>
* make.h: Define _() and N_() macros as a passthrough to initiate
* make.h: Define _() and N_() macros as passthrough to initiate
NLS support.
* <all>: Add _()/N_() around translatable strings.

View File

@ -775,11 +775,8 @@ ar_member_touch (arname, memname)
if (AR_HDR_SIZE != write (fd, (char *) &ar_hdr, AR_HDR_SIZE))
goto lose;
/* The file's mtime is the time we we want. */
#ifdef EINTR
while (fstat (fd, &statbuf) < 0 && errno == EINTR);
#else
fstat (fd, &statbuf);
#endif
while (fstat (fd, &statbuf) < 0 && EINTR_SET)
;
#if defined(ARFMAG) || defined(ARFZMAG) || defined(AIAMAG) || defined(WINDOWS32)
/* Advance member's time to that time */
for (i = 0; i < sizeof ar_hdr.ar_date; i++)

View File

@ -3,7 +3,7 @@ AC_REVISION([$Id$])
AC_PREREQ(2.13)dnl dnl Minimum Autoconf version required.
AC_INIT(vpath.c)dnl dnl A distinctive file to look for in srcdir.
AM_INIT_AUTOMAKE(make, 3.77.91)
AM_INIT_AUTOMAKE(make, 3.77.92)
AM_CONFIG_HEADER(config.h)
dnl Regular configure stuff

View File

@ -1381,11 +1381,7 @@ func_shell (o, argv, funcname)
if (cc > 0)
i += cc;
}
#ifdef EINTR
while (cc > 0 || errno == EINTR);
#else
while (cc > 0);
#endif
while (cc > 0 || EINTR_SET);
/* Close the read side of the pipe. */
#ifdef __MSDOS__

109
job.c
View File

@ -145,6 +145,17 @@ extern int wait ();
#endif /* Don't have `union wait'. */
/* How to set close-on-exec for a file descriptor. */
#if !defined F_SETFD
# define CLOSE_ON_EXEC(_d)
#else
# ifndef FD_CLOEXEC
# define FD_CLOEXEC 1
# endif
# define CLOSE_ON_EXEC(_d) (void) fcntl ((_d), F_SETFD, FD_CLOEXEC)
#endif
#ifdef VMS
static int vms_jobsefnmask = 0;
#endif /* !VMS */
@ -218,9 +229,6 @@ int w32_kill(int pid, int sig)
#endif /* WINDOWS32 */
#ifndef MAKE_JOBSERVER
# define free_job_token(c)
#else
static void
free_job_token (child)
struct child *child;
@ -248,7 +256,6 @@ free_job_token (child)
child->job_token = '-';
}
#endif
/* Write an error message describing the exit status given in
@ -302,14 +309,13 @@ vmsWaitForChildren(int *status)
/* Handle a dead child. This handler may or may not ever be installed.
If we're using the jobserver blocking read, we need it. First, installing
it ensures the read will interrupt on SIGCHLD. Second, we close the dup'd
read side of the pipe to ensure we don't enter another blocking read
without reaping all the dead children. In this case we don't need the
dead_children count.
it ensures the read will interrupt on SIGCHLD. Second, we reset the
blocking bit on the read side of the pipe to ensure we don't enter another
blocking read without reaping all the dead children. In this case we
don't need the dead_children count.
If we don't have either waitpid or wait3, then make is unreliable, but we
use the dead_children count to reap children as best we can. In this case
job_rfd will never be >= 0. */
use the dead_children count to reap children as best we can. */
static unsigned int dead_children = 0;
@ -319,9 +325,15 @@ child_handler (sig)
{
++dead_children;
if (job_rfd >= 0)
close (job_rfd);
job_rfd = -1;
#ifdef HAVE_JOBSERVER
if (job_fds[0] >= 0)
{
int fl = fcntl(job_fds[0], F_GETFL, 0);
if (fl >= 0)
fcntl(job_fds[0], F_SETFL, fl | O_NONBLOCK);
}
#endif
if (debug_flag)
printf (_("Got a SIGCHLD; %u unreaped children.\n"), dead_children);
@ -330,11 +342,12 @@ child_handler (sig)
extern int shell_function_pid, shell_function_completed;
/* Reap dead children, storing the returned status and the new command
/* Reap all dead children, storing the returned status and the new command
state (`cs_finished') in the `file' member of the `struct child' for the
dead child, and removing the child from the chain. If BLOCK nonzero,
reap at least one child, waiting for it to die if necessary. If ERR is
nonzero, print an error message first. */
dead child, and removing the child from the chain. In addition, if BLOCK
nonzero, we block in this function until we've reaped at least one
complete child, waiting for it to die if necessary. If ERR is nonzero,
print an error message first. */
void
reap_children (block, err)
@ -415,19 +428,18 @@ reap_children (block, err)
{
/* A remote status command failed miserably. Punt. */
remote_status_lose:
#ifdef EINTR
if (errno == EINTR)
if (EINTR_SET)
continue;
#endif
pfatal_with_name ("remote_status");
}
else
{
/* No remote children. Check for local children. */
#if !defined(__MSDOS__) && !defined(_AMIGA) && !defined(WINDOWS32)
if (any_local)
{
local_wait:
#ifdef VMS
vmsWaitForChildren (&status);
pid = c->pid;
@ -446,15 +458,14 @@ reap_children (block, err)
if (pid < 0)
{
/* The wait*() failed miserably. Punt. */
#ifdef EINTR
if (errno == EINTR)
continue;
#endif
if (EINTR_SET)
goto local_wait;
pfatal_with_name ("wait");
}
else if (pid > 0)
{
/* We got one; chop the status word up. */
/* We got a child exit; chop the status word up. */
exit_code = WEXITSTATUS (status);
exit_sig = WIFSIGNALED (status) ? WTERMSIG (status) : 0;
coredump = WCOREDUMP (status);
@ -1171,20 +1182,16 @@ start_waiting_job (c)
c->remote = start_remote_job_p (1);
/* If not, start it locally. */
if (!c->remote)
/* If we are running at least one job already and the load average
is too high, make this one wait. */
if (!c->remote && job_slots_used > 0 && load_too_high ())
{
/* If we are running at least one job already and the load average
is too high, make this one wait. */
if (job_slots_used > 0 && load_too_high ())
{
/* Put this child on the chain of children waiting
for the load average to go down. */
set_command_state (f, cs_running);
c->next = waiting_jobs;
waiting_jobs = c;
return 0;
}
/* Put this child on the chain of children waiting for the load average
to go down. */
set_command_state (f, cs_running);
c->next = waiting_jobs;
waiting_jobs = c;
return 0;
}
/* Start the first command; reap_children will run later command lines. */
@ -1379,19 +1386,25 @@ new_job (file)
}
/* Read a token. As long as there's no token available we'll block.
If we get a SIGCHLD we'll return with EINTR. If one happened
before we got here we'll return with EBADF. */
else if (read (job_rfd, &c->job_token, 1) < 1)
before we got here we'll return immediately with EAGAIN because
the signal handler unsets the blocking bit. */
else if (read (job_fds[0], &c->job_token, 1) < 1)
{
if (errno == EINTR)
;
int fl;
/* If EBADF, we got a SIGCHLD before. Otherwise, who knows? */
else if (errno != EBADF)
#if !defined(EAGAIN)
# define EAGAIN EWOULDBLOCK
#endif
if (errno != EINTR && errno != EAGAIN)
pfatal_with_name (_("read jobs pipe"));
/* Something's done; reap it. We don't force a block here; if
something strange happened and nothing's ready, just come back
and wait some more. */
/* Set the blocking bit on the read FD again, just in case. */
fl = fcntl(job_fds[0], F_GETFL, 0);
if (fl >= 0)
fcntl(job_fds[0], F_SETFL, fl & ~O_NONBLOCK);
/* Something's done. We don't want to block for a whole child,
just reap whatever's there. */
reap_children (0, 0);
}

11
main.c
View File

@ -205,7 +205,6 @@ static unsigned int inf_jobs = 0;
/* File descriptors for the jobs pipe. */
int job_fds[2] = { -1, -1 };
int job_rfd = -1;
/* Maximum load average at which multiple jobs will be run.
Negative values mean unlimited, while zero means limit to
@ -1353,15 +1352,13 @@ int main (int argc, char ** argv)
job_slots_str = xstrdup(buf);
}
/* If we have a jobserver pipe, dup(2) the read end. We'll use that in
the child handler to note a child has died. See job.c. */
/* Be sure the blocking bit on the read FD is set to start with. */
if (job_fds[0] >= 0)
{
job_rfds = dup (job_fds[0]);
CLOSE_ON_EXEC (job_rfds);
int fl = fcntl(job_fds[0], F_GETFL, 0);
if (fl >= 0)
fcntl(job_fds[0], F_SETFL, fl & ~O_NONBLOCK);
}
#endif
/* Set up MAKEFLAGS and MFLAGS again, so they will be right. */

8
make.h
View File

@ -77,6 +77,14 @@ Boston, MA 02111-1307, USA. */
extern int errno;
#endif
/* A shortcut for EINTR checking. Note you should never negate this! That
very likely doesn't mean what you want if EINTR is not available. */
#ifdef EINTR
# define EINTR_SET (errno == EINTR)
#else
# define EINTR_SET (0)
#endif
#ifndef isblank
# define isblank(c) ((c) == ' ' || (c) == '\t')
#endif

View File

@ -918,13 +918,10 @@ touch_file (file)
char buf;
int status;
#ifdef EINTR
do
#endif
status = fstat (fd, &statbuf);
#ifdef EINTR
while (status < 0 && errno == EINTR);
#endif
while (status < 0 && EINTR_SET);
if (status < 0)
TOUCH_ERROR ("touch: fstat: ");
/* Rewrite character 0 same as it already is. */