Skip to content

Commit

Permalink
Merge pull request openucx#546 from jakirkham/typ_get_buf_dat_nbytes
Browse files Browse the repository at this point in the history
Optimize buffer utility functions
  • Loading branch information
jakirkham authored Aug 12, 2020
2 parents c44a451 + a701d64 commit 181764a
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 65 deletions.
4 changes: 4 additions & 0 deletions ucp/_libs/__init__.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
# See file LICENSE for terms.

# cython: language_level=3
5 changes: 3 additions & 2 deletions ucp/_libs/topological_distance.pyx
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
# See file LICENSE for terms.

# cython: language_level=3

import pynvml
from topological_distance_dep cimport *
from .topological_distance_dep cimport *


cdef class TopologicalDistance:
Expand Down
22 changes: 9 additions & 13 deletions ucp/_libs/ucx_api.pyx
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
# See file LICENSE for terms.

# cython: language_level=3

import logging
import socket
import weakref
Expand All @@ -12,7 +14,9 @@ from libc.stdint cimport uintptr_t
from libc.stdio cimport FILE, fclose, fflush
from libc.stdlib cimport free
from libc.string cimport memset
from ucx_api_dep cimport *

from .ucx_api_dep cimport *
from .utils cimport get_buffer_data

from ..exceptions import (
UCXCanceled,
Expand All @@ -22,7 +26,6 @@ from ..exceptions import (
log_errors,
)
from ..utils import nvtx_annotate
from .utils import get_buffer_data


# Struct used as requests by UCX
Expand Down Expand Up @@ -722,9 +725,7 @@ def tag_send_nb(
name: str, optional
Descriptive name of the operation
"""
cdef void *data = <void*><uintptr_t>(
get_buffer_data(buffer, check_writable=False)
)
cdef void *data = <void*>get_buffer_data(buffer, check_writable=False)
cdef ucp_send_callback_t _send_cb = <ucp_send_callback_t>_send_callback
cdef ucs_status_ptr_t status = ucp_tag_send_nb(
ep._handle,
Expand Down Expand Up @@ -836,9 +837,7 @@ def tag_recv_nb(
when the `worker` closes.
"""

cdef void *data = <void*><uintptr_t>(
get_buffer_data(buffer, check_writable=True)
)
cdef void *data = <void*>get_buffer_data(buffer, check_writable=True)
cdef ucp_tag_recv_callback_t _tag_recv_cb = (
<ucp_tag_recv_callback_t>_tag_recv_callback
)
Expand Down Expand Up @@ -904,8 +903,7 @@ def stream_send_nb(
name: str, optional
Descriptive name of the operation
"""
cdef void *data = <void*><uintptr_t>(get_buffer_data(buffer,
check_writable=False))
cdef void *data = <void*>get_buffer_data(buffer, check_writable=False)
cdef ucp_send_callback_t _send_cb = <ucp_send_callback_t>_send_callback
cdef ucs_status_ptr_t status = ucp_stream_send_nb(
ep._handle,
Expand Down Expand Up @@ -999,9 +997,7 @@ def stream_recv_nb(
Descriptive name of the operation
"""

cdef void *data = <void*><uintptr_t>(
get_buffer_data(buffer, check_writable=True)
)
cdef void *data = <void*>get_buffer_data(buffer, check_writable=True)
cdef size_t length
cdef ucp_stream_recv_callback_t _stream_recv_cb = (
<ucp_stream_recv_callback_t>_stream_recv_callback
Expand Down
11 changes: 11 additions & 0 deletions ucp/_libs/utils.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
# See file LICENSE for terms.

# cython: language_level=3


from libc.stdint cimport uintptr_t


cpdef uintptr_t get_buffer_data(buffer, bint check_writable=*) except *
cpdef Py_ssize_t get_buffer_nbytes(buffer, check_min_size, bint cuda_support) except *
102 changes: 52 additions & 50 deletions ucp/_libs/utils.pyx
Original file line number Diff line number Diff line change
@@ -1,38 +1,30 @@
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
# See file LICENSE for terms.

# cython: language_level=3
import asyncio
import operator
from functools import reduce


from cpython.memoryview cimport PyMemoryView_GET_BUFFER
from cython cimport boundscheck, wraparound
from libc.stdint cimport uintptr_t

from ..exceptions import UCXCloseError, UCXError


def get_buffer_data(buffer, check_writable=False):
cpdef uintptr_t get_buffer_data(buffer, bint check_writable=False) except *:
"""
Returns data pointer of the buffer. Raising ValueError if the buffer
is read only and check_writable=True is set.
"""
iface = None
if hasattr(buffer, "__cuda_array_interface__"):
iface = buffer.__cuda_array_interface__
elif hasattr(buffer, "__array_interface__"):
iface = buffer.__array_interface__

cdef dict iface = getattr(buffer, "__cuda_array_interface__", None)

cdef uintptr_t data_ptr
cdef bint data_readonly
if iface is not None:
data_ptr, data_readonly = iface['data']
data_ptr, data_readonly = iface["data"]
else:
mview = memoryview(buffer)
data_ptr = int(<uintptr_t>PyMemoryView_GET_BUFFER(mview).buf)
data_readonly = mview.readonly

# Workaround for numba giving None, rather than an 0.
# https://github.com/cupy/cupy/issues/2104 for more info.
if data_ptr is None:
data_ptr = 0
data_ptr = <uintptr_t>PyMemoryView_GET_BUFFER(mview).buf
data_readonly = <bint>PyMemoryView_GET_BUFFER(mview).readonly

if data_ptr == 0:
raise NotImplementedError("zero-sized buffers isn't supported")
Expand All @@ -43,51 +35,61 @@ def get_buffer_data(buffer, check_writable=False):
return data_ptr


def get_buffer_nbytes(buffer, check_min_size, cuda_support):
@boundscheck(False)
@wraparound(False)
cpdef Py_ssize_t get_buffer_nbytes(buffer, check_min_size, bint cuda_support) except *:
"""
Returns the size of the buffer in bytes. Returns ValueError
if `check_min_size` is greater than the size of the buffer
"""

iface = None
if hasattr(buffer, "__cuda_array_interface__"):
iface = buffer.__cuda_array_interface__
if not cuda_support:
msg = "UCX is not configured with CUDA support, please add " \
"`cuda_copy` and/or `cuda_ipc` to the UCX_TLS environment" \
"variable and that the ucx-proc=*=gpu package is " \
"installed. See " \
"https://ucx-py.readthedocs.io/en/latest/install.html for " \
"more information."
raise ValueError(msg)
elif hasattr(buffer, "__array_interface__"):
iface = buffer.__array_interface__
cdef dict iface = getattr(buffer, "__cuda_array_interface__", None)
if not cuda_support and iface is not None:
raise ValueError(
"UCX is not configured with CUDA support, please add "
"`cuda_copy` and/or `cuda_ipc` to the UCX_TLS environment"
"variable and that the ucx-proc=*=gpu package is "
"installed. See "
"https://ucx-py.readthedocs.io/en/latest/install.html for "
"more information."
)

cdef tuple shape, strides
cdef Py_ssize_t i, s, itemsize, ndim, nbytes
if iface is not None:
import numpy
itemsize = int(numpy.dtype(iface['typestr']).itemsize)
itemsize = numpy.dtype(iface["typestr"]).itemsize
# Making sure that the elements in shape is integers
shape = [int(s) for s in iface['shape']]
nbytes = reduce(operator.mul, shape, 1) * itemsize
shape = iface["shape"]
ndim = len(shape)
nbytes = itemsize
for i in range(ndim):
nbytes *= <Py_ssize_t>shape[i]
# Check that data is contiguous
if len(shape) > 0 and iface.get("strides", None) is not None:
strides = [int(s) for s in iface['strides']]
if len(strides) != len(shape):
msg = "The length of shape and strides must be equal"
raise ValueError(msg)
strides = iface.get("strides")
if strides is not None and ndim > 0:
if len(strides) != ndim:
raise ValueError(
"The length of shape and strides must be equal"
)
s = itemsize
for i in reversed(range(len(shape))):
if s != strides[i]:
for i from ndim > i >= 0 by 1:
if s != <Py_ssize_t>strides[i]:
raise ValueError("Array must be contiguous")
s *= shape[i]
if iface.get("mask", None) is not None:
s *= <Py_ssize_t>shape[i]
if iface.get("mask") is not None:
raise NotImplementedError("mask attribute not supported")
else:
mview = memoryview(buffer)
nbytes = mview.nbytes
if not mview.contiguous:
raise ValueError("buffer must be contiguous")
if not mview.c_contiguous:
raise ValueError("buffer must be C-contiguous")

if check_min_size is not None and nbytes < check_min_size:
raise ValueError("the nbytes is greater than the size of the buffer!")
cdef Py_ssize_t min_size
if check_min_size is not None:
min_size = check_min_size
if nbytes < min_size:
raise ValueError(
"the nbytes is greater than the size of the buffer!"
)
return nbytes

0 comments on commit 181764a

Please sign in to comment.