Skip to content

Commit

Permalink
prov/util: Narrow uffd pagefault handler for non-backed writes only.
Browse files Browse the repository at this point in the history
Page faults come in 3 flavors: reads, writes and writes to protected
pages.  The only ones we can handle are writes to non-backed pages.

Signed-off-by: Mike Uttormark <mike.uttormark@hpe.com>
  • Loading branch information
muttormark authored and iziemba committed Oct 9, 2024
1 parent db2a6e8 commit dbd0c9e
Show file tree
Hide file tree
Showing 2 changed files with 144 additions and 78 deletions.
33 changes: 32 additions & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ dnl
dnl Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
dnl Copyright (c) 2019-2021 Intel, Inc. All rights reserved.
dnl Copyright (c) 2019-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
dnl (C) Copyright 2020 Hewlett Packard Enterprise Development LP
dnl (C) Copyright 2020,2024 Hewlett Packard Enterprise Development LP
dnl Copyright (c) 2022 DataDirect Networks, Inc. All rights reserved.
dnl Copyright (c) 2023 Tactical Computing Labs, LLC. All rights reserved.
dnl
Expand Down Expand Up @@ -557,6 +557,37 @@ AS_IF([test $have_uffd -eq 1],
AC_DEFINE_UNQUOTED([HAVE_UFFD_UNMAP], [$have_uffd],
[Define to 1 if platform supports userfault fd unmap])

dnl Check uffd thread id support
have_uffd_thread_id=0
AS_IF([test $have_uffd -eq 1],
[AC_MSG_CHECKING([for userfaultfd thread id support])
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
#include <sys/types.h>
#include <linux/userfaultfd.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <fcntl.h>
#include <sys/ioctl.h>
]],
[[
int fd;
struct uffdio_api api_obj;
api_obj.api = UFFD_API;
api_obj.features = UFFD_FEATURE_THREAD_ID |
UFFD_FEATURE_EVENT_UNMAP |
UFFD_FEATURE_EVENT_REMOVE |
UFFD_FEATURE_EVENT_REMAP;
fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
return ioctl(fd, UFFDIO_API, &api_obj);
]])
],
[AC_MSG_RESULT([yes])
have_uffd_thread_id=1],
[AC_MSG_RESULT([no])])])

AC_DEFINE_UNQUOTED([HAVE_UFFD_THREAD_ID], [$have_uffd_thread_id],
[Define to 1 if platform supports userfault fd thread id])

dnl restricted DL open
restricted_dl=0
AC_ARG_ENABLE([restricted_dl],
Expand Down
189 changes: 112 additions & 77 deletions prov/util/src/util_mem_monitor.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Copyright (c) 2017-2021 Intel Inc. All rights reserved.
* Copyright (c) 2019-2021 Amazon.com, Inc. or its affiliates.
* All rights reserved.
* (C) Copyright 2020 Hewlett Packard Enterprise Development LP
* (C) Copyright 2024 Hewlett Packard Enterprise Development LP
* Copyright (C) 2024 Cornelis Networks. All rights reserved.
*
* This software is available to you under a choice of one of two
Expand Down Expand Up @@ -555,6 +555,8 @@ void ofi_monitor_unsubscribe_no_op(struct ofi_mem_monitor *notifier,
#include <sys/ioctl.h>
#include <linux/userfaultfd.h>

static void ofi_uffd_pagefault_handler(struct uffd_msg *msg);

/* The userfault fd monitor requires for events that could
* trigger it to be handled outside of the monitor functions
* itself. When a fault occurs on a monitored region, the
Expand All @@ -567,12 +569,8 @@ void ofi_monitor_unsubscribe_no_op(struct ofi_mem_monitor *notifier,
static void *ofi_uffd_handler(void *arg)
{
struct uffd_msg msg;
struct uffdio_zeropage zp;
struct pollfd fds;
int ret;
int i;
void *address;
bool found;

fds.fd = uffd.fd;
fds.events = POLLIN;
Expand Down Expand Up @@ -613,78 +611,7 @@ static void *ofi_uffd_handler(void *arg)
(size_t) msg.arg.remap.len);
break;
case UFFD_EVENT_PAGEFAULT:

/* The event tells us the address of the fault
* (which can be anywhere on the page). It does not
* tell us the size of the page so we have to guess
* from the list of known page_sizes.
*
* We employ the standard resolution: install a zeroed page.
*/

address = (void *) (uintptr_t) msg.arg.pagefault.address;
found = false;

for (i = 0; i < num_page_sizes; ) {
/* setup a zeropage reqest for this pagesize */
zp.range.start = (uint64_t) (uintptr_t)
ofi_get_page_start(address, page_sizes[i]);
zp.range.len = (uint64_t) page_sizes[i];
zp.mode = 0;
zp.zeropage = 0;

ret = ioctl(uffd.fd, UFFDIO_ZEROPAGE, &zp);

if (0 == ret) { /* success */
found = true;
break;
}

/* Note: the documentation (man ioctl_userfaultfd) says
* that the ioctl() returns -1 on error and errno is set
* to indicate the error. It also says that the zeropage
* member of struct uffdio_zeropage is set to the negated
* error. The unit tests for uffd say
* real retval in uffdio_zeropage.zeropage
* so that's what we use here.
*/

if (-EAGAIN == zp.zeropage) {
/* This is a tough case. If the memory map is
* changing, the kernel returns EAGAIN before
* servicing the zeropage request. So the page
* fault has not been rectified. If we don't try
* again, the application will crash. If we add
* a maximum retry count we could still end up
* with an unresolved page fault.
*
* It's likely a kernel bug if it returns EAGAIN
* forever. So we retry until we get a return
* value from the ioctl that is not EAGAIN.
*/
continue;
}
i++;

if (-EINVAL == zp.zeropage) /* wrong page size */
continue;

/* If we get here we failed to install the zeroed
* page for this pagesize and it wasn't a size error.
* We could either stop trying or go on to the
* next pagesize. We choose to print a warning and try
* another pagesize.
*/

FI_DBG(&core_prov, FI_LOG_MR,
"Unable to install zeroed page of size %lu to rectify page fault."
" address = %p zeropage = %lld errno = %d\n",
page_sizes[i], address, zp.zeropage, errno);
}
if (!found)
FI_WARN(&core_prov, FI_LOG_MR,
"Unable to handle event UFFD_EVENT_PAGEFAULT for address %p.\n",
address);
ofi_uffd_pagefault_handler(&msg);
break;
default:
FI_WARN(&core_prov, FI_LOG_MR,
Expand All @@ -697,6 +624,114 @@ static void *ofi_uffd_handler(void *arg)
return NULL;
}

static void ofi_uffd_pagefault_handler(struct uffd_msg *msg)
{
struct uffdio_zeropage zp;
int i;
int ret;
void * const address = (void *) (uintptr_t) msg->arg.pagefault.address;
uint64_t const flags = (uint64_t) msg->arg.pagefault.flags;
#if HAVE_UFFD_THREAD_ID
uint32_t const ptid = (uint32_t) msg->arg.pagefault.feat.ptid;
#endif
/* ofi_uffd_register sets the mode to
* UFFDIO_REGISTER_MODE_MISSING. As a result, we can
* get read, write or write-protect notifications via
* UFFD_EVENT_PAGEFAULT. The only ones we can sensibly
* handle are writes to non-backed pages.
* (Read and write-protect nofications are likely
* application bugs.)
*/

if (UFFD_PAGEFAULT_FLAG_WRITE != flags) {
#if HAVE_UFFD_THREAD_ID
FI_WARN(&core_prov, FI_LOG_MR,
"UFFD pagefault with unrecognized flags: %lu, address %p, thread %u\n",
flags, address, ptid);
#else
FI_WARN(&core_prov, FI_LOG_MR,
"UFFD pagefault with unrecognized flags: %lu, address %p\n",
flags, address);
#endif
/* The faulting thread is halted at this point. In
* theory we could wake it up with UFFDIO_WAKE. In
* practice that requires the address range of the
* fault, information we don't have from the
* pagefault event.
*/

return;
}

/* The event tells us the address of the fault
* (which can be anywhere on the page). It does not
* tell us the size of the page so we have to guess
* from the list of known page_sizes.
*
* We employ the standard resolution: install a zeroed page.
*/

for (i = 0; i < num_page_sizes; ) {
/* setup a zeropage reqest for this pagesize */
zp.range.start = (uint64_t) (uintptr_t)
ofi_get_page_start(address, page_sizes[i]);
zp.range.len = (uint64_t) page_sizes[i];
zp.mode = 0;
zp.zeropage = 0;

ret = ioctl(uffd.fd, UFFDIO_ZEROPAGE, &zp);

if (0 == ret) /* success */
return;

/* Note: the documentation (man ioctl_userfaultfd) says
* that the ioctl() returns -1 on error and errno is set
* to indicate the error. It also says that the zeropage
* member of struct uffdio_zeropage is set to the negated
* error. The unit tests for uffd say
* real retval in uffdio_zeropage.zeropage
* so that's what we use here.
*/

if (-EAGAIN == zp.zeropage)
/* This is a tough case. If the memory map is
* changing, the kernel returns EAGAIN before
* installing the zeroed page. So the page
* fault has not been rectified. If we don't try
* again, the application will crash. If we add
* a maximum retry count we could still end up
* with an unresolved page fault.
*
* It's likely a kernel bug or (something else
* bad like OOM) if it returns EAGAIN forever.
* So we retry until we get something besides
* EAGAIN.
*/
continue; /* retry this page size */

i++; /* try next page size */

if (-EINVAL == zp.zeropage) /* wrong page size */
continue;

/* If we get here we failed to install the zeroed
* page for this page size and it wasn't a size error.
* We could either stop trying or go on to the
* next pagesize. We choose to print a message and try
* another page size.
*/

FI_DBG(&core_prov, FI_LOG_MR,
"Unable to install zeroed page of size %zu to handle page fault."
" address = %p zeropage = %lld errno = %d\n",
page_sizes[i], address, zp.zeropage, errno);
}

FI_WARN(&core_prov, FI_LOG_MR,
"Unable to handle event UFFD_EVENT_PAGEFAULT for address %p.\n",
address);
}

static int ofi_uffd_register(const void *addr, size_t len, size_t page_size)
{
struct uffdio_register reg;
Expand Down

0 comments on commit dbd0c9e

Please sign in to comment.