Waitpid equivalent with timeout?

Solution 1:

Don't mix alarm() with wait(). You can lose error information that way.

Use the self-pipe trick. This turns any signal into a select()able event:

int selfpipe[2];
void selfpipe_sigh(int n)
{
    int save_errno = errno;
    (void)write(selfpipe[1], "",1);
    errno = save_errno;
}
void selfpipe_setup(void)
{
    static struct sigaction act;
    if (pipe(selfpipe) == -1) { abort(); }

    fcntl(selfpipe[0],F_SETFL,fcntl(selfpipe[0],F_GETFL)|O_NONBLOCK);
    fcntl(selfpipe[1],F_SETFL,fcntl(selfpipe[1],F_GETFL)|O_NONBLOCK);
    memset(&act, 0, sizeof(act));
    act.sa_handler = selfpipe_sigh;
    sigaction(SIGCHLD, &act, NULL);
}

Then, your waitpid-like function looks like this:

int selfpipe_waitpid(void)
{
    static char dummy[4096];
    fd_set rfds;
    struct timeval tv;
    int died = 0, st;

    tv.tv_sec = 5;
    tv.tv_usec = 0;
    FD_ZERO(&rfds);
    FD_SET(selfpipe[0], &rfds);
    if (select(selfpipe[0]+1, &rfds, NULL, NULL, &tv) > 0) {
       while (read(selfpipe[0],dummy,sizeof(dummy)) > 0);
       while (waitpid(-1, &st, WNOHANG) != -1) died++;
    }
    return died;
}

You can see in selfpipe_waitpid() how you can control the timeout and even mix with other select()-based IO.

Solution 2:

Fork an intermediate child, which forks the real child and a timeout process and waits for all (both) of its children. When one exits, it'll kill the other one and exit.

pid_t intermediate_pid = fork();
if (intermediate_pid == 0) {
    pid_t worker_pid = fork();
    if (worker_pid == 0) {
        do_work();
        _exit(0);
    }

    pid_t timeout_pid = fork();
    if (timeout_pid == 0) {
        sleep(timeout_time);
        _exit(0);
    }

    pid_t exited_pid = wait(NULL);
    if (exited_pid == worker_pid) {
        kill(timeout_pid, SIGKILL);
    } else {
        kill(worker_pid, SIGKILL); // Or something less violent if you prefer
    }
    wait(NULL); // Collect the other process
    _exit(0); // Or some more informative status
}
waitpid(intermediate_pid, 0, 0);

Surprisingly simple :)

You can even leave out the intermediate child if you're sure no other module in the program is spwaning child processes of its own.

Solution 3:

This is an interesting question. I found sigtimedwait can do it.

EDIT 2016/08/29: Thanks for Mark Edington's suggestion. I'v tested your example on Ubuntu 16.04, it works as expected.

Note: this only works for child processes. It's a pity that seems no equivalent way of Window's WaitForSingleObject(unrelated_process_handle, timeout) in Linux/Unix to get notified of unrelated process's termination within timeout.

OK, Mark Edington's sample code is here:

/* The program creates a child process and waits for it to finish. If a timeout
 * elapses the child is killed. Waiting is done using sigtimedwait(). Race
 * condition is avoided by blocking the SIGCHLD signal before fork().
 */
#include <sys/types.h>
#include <sys/wait.h>
#include <signal.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>

static pid_t fork_child (void)
{
    int p = fork ();

    if (p == -1) {
        perror ("fork");
        exit (1);
    }

    if (p == 0) {
        puts ("child: sleeping...");
        sleep (10);
        puts ("child: exiting");
        exit (0);
    }

    return p;
}

int main (int argc, char *argv[])
{
    sigset_t mask;
    sigset_t orig_mask;
    struct timespec timeout;
    pid_t pid;

    sigemptyset (&mask);
    sigaddset (&mask, SIGCHLD);

    if (sigprocmask(SIG_BLOCK, &mask, &orig_mask) < 0) {
        perror ("sigprocmask");
        return 1;
    }

    pid = fork_child ();

    timeout.tv_sec = 5;
    timeout.tv_nsec = 0;

    do {
        if (sigtimedwait(&mask, NULL, &timeout) < 0) {
            if (errno == EINTR) {
                /* Interrupted by a signal other than SIGCHLD. */
                continue;
            }
            else if (errno == EAGAIN) {
                printf ("Timeout, killing child\n");
                kill (pid, SIGKILL);
            }
            else {
                perror ("sigtimedwait");
                return 1;
            }
        }

        break;
    } while (1);

    if (waitpid(pid, NULL, 0) < 0) {
        perror ("waitpid");
        return 1;
    }

    return 0;
}