How to add poll function to the kernel module code?

Solution 1:

You can find some good examples in kernel itself. Take a look at next files:

  • drivers/rtc/dev.c, drivers/rtc/interface.c
  • kernel/printk/printk.c
  • drivers/char/random.c

To add poll() function to your code follow next steps.

  1. Include needed headers:

     #include <linux/wait.h>
     #include <linux/poll.h>
    
  2. Declare waitqueue variable:

     static DECLARE_WAIT_QUEUE_HEAD(fortune_wait);
    
  3. Add fortune_poll() function and add it (as .poll callback) to your file operations structure:

     static unsigned int fortune_poll(struct file *file, poll_table *wait)
     {
         poll_wait(file, &fortune_wait, wait);
         if (new-data-is-ready)
             return POLLIN | POLLRDNORM;
         return 0;
     }
    
     static const struct file_operations proc_test_fops = {
         ....
         .poll = fortune_poll,
     };
    

    Note that you should return POLLIN | POLLRDNORM if you have some new data to read, and 0 in case there is no new data to read (poll() call timed-out). See man 2 poll for details.

  4. Notify your waitqueue once you have new data:

     wake_up_interruptible(&fortune_wait);
    

That's the basic stuff about implementing poll() operation. Depending on your task, you may be needed to use some waitqueue API in your .read function (like wait_event_interruptible()).


See also related question: Implementing poll in a Linux kernel module.

Solution 2:

Minimal runnable example

GitHub upstream with QEMU + Buildroot boilerplate:

  • poll.ko kernel module
  • poll.out userland test

In this simplified example, we generate poll events from a separate thread. In real life, poll events will likely be triggered by interrupts, when the hardware has finished some job, and new data became available for userland to read.

The main point to remember is that if poll returns zero, the kernel calls it again: Why do we need to call poll_wait in poll?

poll.ko

#include <linux/debugfs.h>
#include <linux/delay.h> /* usleep_range */
#include <linux/errno.h> /* EFAULT */
#include <linux/fs.h>
#include <linux/jiffies.h>
#include <linux/kernel.h> /* min */
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/poll.h>
#include <linux/printk.h> /* printk */
#include <linux/uaccess.h> /* copy_from_user, copy_to_user */
#include <linux/wait.h> /* wait_queue_head_t, wait_event_interruptible, wake_up_interruptible  */
#include <uapi/linux/stat.h> /* S_IRUSR */

static int ret0 = 0;
module_param(ret0, int, S_IRUSR | S_IWUSR);
MODULE_PARM_DESC(i, "if 1, always return 0 from poll");

static char readbuf[1024];
static size_t readbuflen;
static struct dentry *debugfs_file;
static struct task_struct *kthread;
static wait_queue_head_t waitqueue;

static ssize_t read(struct file *filp, char __user *buf, size_t len, loff_t *off)
{
    ssize_t ret;
    if (copy_to_user(buf, readbuf, readbuflen)) {
        ret = -EFAULT;
    } else {
        ret = readbuflen;
    }
    /* This is normal pipe behaviour: data gets drained once a reader reads from it. */
    /* https://stackoverflow.com/questions/1634580/named-pipes-fifos-on-unix-with-multiple-readers */
    readbuflen = 0;
    return ret;
}

/* If you return 0 here, then the kernel will sleep until an event
 * happens in the queue. and then call this again, because of the call to poll_wait. */
unsigned int poll(struct file *filp, struct poll_table_struct *wait)
{
    pr_info("poll\n");
    /* This doesn't sleep. It just makes the kernel call poll again if we return 0. */
    poll_wait(filp, &waitqueue, wait);
    if (readbuflen && !ret0) {
        pr_info("return POLLIN\n");
        return POLLIN;
    } else {
        pr_info("return 0\n");
        return 0;
    }
}

static int kthread_func(void *data)
{
    while (!kthread_should_stop()) {
        readbuflen = snprintf(
            readbuf,
            sizeof(readbuf),
            "%llu",
            (unsigned long long)jiffies
        );
        usleep_range(1000000, 1000001);
        pr_info("wake_up\n");
        wake_up(&waitqueue);
    }
    return 0;
}

static const struct file_operations fops = {
    .owner = THIS_MODULE,
    .read = read,
    .poll = poll
};

static int myinit(void)
{
    debugfs_file = debugfs_create_file(
        "lkmc_poll", S_IRUSR | S_IWUSR, NULL, NULL, &fops);
    init_waitqueue_head(&waitqueue);
    kthread = kthread_create(kthread_func, NULL, "mykthread");
    wake_up_process(kthread);
    return 0;
}

static void myexit(void)
{
    kthread_stop(kthread);
    debugfs_remove(debugfs_file);
}

module_init(myinit)
module_exit(myexit)
MODULE_LICENSE("GPL");

poll.out userland:

#define _XOPEN_SOURCE 700
#include <assert.h>
#include <fcntl.h> /* creat, O_CREAT */
#include <poll.h> /* poll */
#include <stdio.h> /* printf, puts, snprintf */
#include <stdlib.h> /* EXIT_FAILURE, EXIT_SUCCESS */
#include <unistd.h> /* read */

int main(int argc, char **argv) {
    char buf[1024];
    int fd, i, n;
    short revents;
    struct pollfd pfd;

    if (argc < 2) {
        fprintf(stderr, "usage: %s <poll-device>\n", argv[0]);
        exit(EXIT_FAILURE);
    }
    fd = open(argv[1], O_RDONLY | O_NONBLOCK);
    if (fd == -1) {
        perror("open");
        exit(EXIT_FAILURE);
    }
    pfd.fd = fd;
    pfd.events = POLLIN;
    while (1) {
        puts("poll");
        i = poll(&pfd, 1, -1);
        if (i == -1) {
            perror("poll");
            assert(0);
        }
        revents = pfd.revents;
        printf("revents = %d\n", revents);
        if (revents & POLLIN) {
            n = read(pfd.fd, buf, sizeof(buf));
            printf("POLLIN n=%d buf=%.*s\n", n, n, buf);
        }
    }
}

Usage:

insmod poll.ko
mount -t debugfs none /sys/kernel/debug
./kernel_modules/poll.out /sys/kernel/debug/lkmc_poll

Outcome: jiffies gets printed to stdout every second from userland, e.g.:

poll
<6>[    4.275305] poll
<6>[    4.275580] return POLLIN
revents = 1
POLLIN n=10 buf=4294893337
poll
<6>[    4.276627] poll
<6>[    4.276911] return 0
<6>[    5.271193] wake_up
<6>[    5.272326] poll
<6>[    5.273207] return POLLIN
revents = 1
POLLIN n=10 buf=4294893588
poll
<6>[    5.276367] poll
<6>[    5.276618] return 0
<6>[    6.275178] wake_up
<6>[    6.276370] poll
<6>[    6.277269] return POLLIN
revents = 1
POLLIN n=10 buf=4294893839

Force the poll file_operation to return 0 to see what happens more clearly:

insmod poll.ko ret0=1

Sample output:

poll
<6>[   85.674801] poll
<6>[   85.675788] return 0
<6>[   86.675182] wake_up
<6>[   86.676431] poll
<6>[   86.677373] return 0
<6>[   87.679198] wake_up
<6>[   87.680515] poll
<6>[   87.681564] return 0
<6>[   88.683198] wake_up

From this we see that control is not returned to userland: the kernel just keeps calling the poll file_operation again and again.

Tested on Linux 5.4.3.