diff --git a/examples/ucp_hello_world.c b/examples/ucp_hello_world.c index 3b506deb5bc..bb0ff042f65 100644 --- a/examples/ucp_hello_world.c +++ b/examples/ucp_hello_world.c @@ -350,17 +350,12 @@ static int run_ucx_client(ucp_worker_h ucp_worker) return ret; } -static void flush_callback(void *request, ucs_status_t status, void *user_data) -{ -} - static ucs_status_t flush_ep(ucp_worker_h worker, ucp_ep_h ep) { ucp_request_param_t param; void *request; - param.op_attr_mask = UCP_OP_ATTR_FIELD_CALLBACK; - param.cb.send = flush_callback; + param.op_attr_mask = 0; request = ucp_ep_flush_nbx(ep, ¶m); if (request == NULL) { return UCS_OK; diff --git a/src/ucp/api/ucp.h b/src/ucp/api/ucp.h index d827e8b864c..0bbba0b4663 100644 --- a/src/ucp/api/ucp.h +++ b/src/ucp/api/ucp.h @@ -610,42 +610,6 @@ enum ucp_cb_param_flags { }; -/** - * @ingroup UCP_COMM - * @brief Atomic operation requested for ucp_atomic_post - * - * This enumeration defines which atomic memory operation should be - * performed by the ucp_atomic_post family of functions. All of these are - * non-fetching atomics and will not result in a request handle. - */ -typedef enum { - UCP_ATOMIC_POST_OP_ADD, /**< Atomic add */ - UCP_ATOMIC_POST_OP_AND, /**< Atomic and */ - UCP_ATOMIC_POST_OP_OR, /**< Atomic or */ - UCP_ATOMIC_POST_OP_XOR, /**< Atomic xor */ - UCP_ATOMIC_POST_OP_LAST -} ucp_atomic_post_op_t; - - -/** - * @ingroup UCP_COMM - * @brief Atomic operation requested for ucp_atomic_fetch - * - * This enumeration defines which atomic memory operation should be performed - * by the ucp_atomic_fetch family of functions. All of these functions - * will fetch data from the remote node. - */ -typedef enum { - UCP_ATOMIC_FETCH_OP_FADD, /**< Atomic Fetch and add */ - UCP_ATOMIC_FETCH_OP_SWAP, /**< Atomic swap */ - UCP_ATOMIC_FETCH_OP_CSWAP, /**< Atomic conditional swap */ - UCP_ATOMIC_FETCH_OP_FAND, /**< Atomic Fetch and and */ - UCP_ATOMIC_FETCH_OP_FOR, /**< Atomic Fetch and or */ - UCP_ATOMIC_FETCH_OP_FXOR, /**< Atomic Fetch and xor */ - UCP_ATOMIC_FETCH_OP_LAST -} ucp_atomic_fetch_op_t; - - /** * @ingroup UCP_COMM * @brief Atomic operation requested for ucp_atomic_op_nbx @@ -2124,27 +2088,6 @@ ucs_status_t ucp_worker_query(ucp_worker_h worker, void ucp_worker_print_info(ucp_worker_h worker, FILE *stream); -/** - * @ingroup UCP_WORKER - * @brief Get the address of the worker object. - * - * This routine returns the address of the worker object. This address can be - * passed to remote instances of the UCP library in order to connect to this - * worker. The memory for the address handle is allocated by this function, and - * must be released by using @ref ucp_worker_release_address - * "ucp_worker_release_address()" routine. - * - * @param [in] worker Worker object whose address to return. - * @param [out] address_p A pointer to the worker address. - * @param [out] address_length_p The size in bytes of the address. - * - * @return Error code as defined by @ref ucs_status_t - */ -ucs_status_t ucp_worker_get_address(ucp_worker_h worker, - ucp_address_t **address_p, - size_t *address_length_p); - - /** * @ingroup UCP_WORKER * @brief Release an address of the worker object. @@ -2533,35 +2476,6 @@ ucs_status_t ucp_ep_create(ucp_worker_h worker, const ucp_ep_params_t *params, ucp_ep_h *ep_p); -/** - * @ingroup UCP_ENDPOINT - * - * @brief Non-blocking @ref ucp_ep_h "endpoint" closure. - * - * This routine releases the @ref ucp_ep_h "endpoint". The endpoint closure - * process depends on the selected @a mode. - * - * @param [in] ep Handle to the endpoint to close. - * @param [in] mode One from @ref ucp_ep_close_mode value. - * - * @return UCS_OK - The endpoint is closed successfully. - * @return UCS_PTR_IS_ERR(_ptr) - The closure failed and an error code indicates - * the transport level status. However, resources - * are released and the @a endpoint can no longer - * be used. - * @return otherwise - The closure process is started, and can be - * completed at any point in time. A request handle - * is returned to the application in order to track - * progress of the endpoint closure. The application - * is responsible for releasing the handle using the - * @ref ucp_request_free routine. - * - * @note @ref ucp_ep_close_nb replaces deprecated @ref ucp_disconnect_nb and - * @ref ucp_ep_destroy - */ -ucs_status_ptr_t ucp_ep_close_nb(ucp_ep_h ep, unsigned mode); - - /** * @ingroup UCP_ENDPOINT * @@ -2633,32 +2547,26 @@ void ucp_ep_print_info(ucp_ep_h ep, FILE *stream); * @ref ucp_ep_h "endpoint" when this call returns. * * @param [in] ep UCP endpoint. - * @param [in] flags Flags for flush operation. Reserved for future use. - * @param [in] cb Callback which will be called when the flush operation - * completes. + * @param [in] param Operation parameters, see @ref ucp_request_param_t. * - * @return NULL - The flush operation was completed immediately. + * @return NULL - The flush operation was completed immediately. * @return UCS_PTR_IS_ERR(_ptr) - The flush operation failed. - * @return otherwise - Flush operation was scheduled and can be completed - * in any point in time. The request handle is returned - * to the application in order to track progress. The - * application is responsible for releasing the handle - * using @ref ucp_request_free "ucp_request_free()" - * routine. + * @return otherwise - Flush operation was scheduled and can be + * completed in any point in time. The request + * handle is returned to the application in + * order to track progress. * * * The following example demonstrates how blocking flush can be implemented * using non-blocking flush: * @code {.c} - * void empty_function(void *request, ucs_status_t status) - * { - * } - * * ucs_status_t blocking_ep_flush(ucp_ep_h ep, ucp_worker_h worker) * { + * ucp_request_param_t param; * void *request; * - * request = ucp_ep_flush_nb(ep, 0, empty_function); + * param.op_attr_mask = 0; + * request = ucp_ep_flush_nbx(ep, ¶m); * if (request == NULL) { * return UCS_OK; * } else if (UCS_PTR_IS_ERR(request)) { @@ -2673,31 +2581,7 @@ void ucp_ep_print_info(ucp_ep_h ep, FILE *stream); * return status; * } * } - * @endcode */ -ucs_status_ptr_t ucp_ep_flush_nb(ucp_ep_h ep, unsigned flags, - ucp_send_callback_t cb); - - -/** - * @ingroup UCP_ENDPOINT - * - * @brief Non-blocking flush of outstanding AMO and RMA operations on the - * @ref ucp_ep_h "endpoint". - * - * This routine flushes all outstanding AMO and RMA communications on the - * @ref ucp_ep_h "endpoint". All the AMO and RMA operations issued on the - * @a ep prior to this call are completed both at the origin and at the target - * @ref ucp_ep_h "endpoint" when this call returns. - * - * @param [in] ep UCP endpoint. - * @param [in] param Operation parameters, see @ref ucp_request_param_t. - * - * @return NULL - The flush operation was completed immediately. - * @return UCS_PTR_IS_ERR(_ptr) - The flush operation failed. - * @return otherwise - Flush operation was scheduled and can be - * completed in any point in time. The request - * handle is returned to the application in - * order to track progress. + * @endcode */ ucs_status_ptr_t ucp_ep_flush_nbx(ucp_ep_h ep, const ucp_request_param_t *param); @@ -3056,36 +2940,6 @@ ucs_status_t ucp_rkey_ptr(ucp_rkey_h rkey, uint64_t raddr, void **addr_p); void ucp_rkey_destroy(ucp_rkey_h rkey); -/** - * @ingroup UCP_WORKER - * @brief Add user defined callback for Active Message. - * - * This routine installs a user defined callback to handle incoming Active - * Messages with a specific id. This callback is called whenever an Active - * Message that was sent from the remote peer by @ref ucp_am_send_nb is - * received on this worker. - * - * @param [in] worker UCP worker on which to set the Active Message - * handler. - * @param [in] id Active Message id. - * @param [in] cb Active Message callback. NULL to clear. - * @param [in] arg Active Message argument, which will be passed - * in to every invocation of the callback as the - * arg argument. - * @param [in] flags Dictates how an Active Message is handled on the - * remote endpoint. Currently only - * UCP_AM_FLAG_WHOLE_MSG is supported, which - * indicates the callback will not be invoked - * until all data has arrived. - * - * @return error code if the worker does not support Active Messages or - * requested callback flags. - */ -ucs_status_t ucp_worker_set_am_handler(ucp_worker_h worker, uint16_t id, - ucp_am_callback_t cb, void *arg, - uint32_t flags); - - /** * @ingroup UCP_WORKER * @brief Add user defined callback for Active Message. @@ -3110,35 +2964,6 @@ ucs_status_t ucp_worker_set_am_recv_handler(ucp_worker_h worker, const ucp_am_handler_param_t *param); -/** - * @ingroup UCP_COMM - * @brief Send Active Message. - * - * This routine sends an Active Message to an ep. It does not support - * CUDA memory. - * - * @param [in] ep UCP endpoint where the Active Message will be run. - * @param [in] id Active Message id. Specifies which registered - * callback to run. - * @param [in] buffer Pointer to the data to be sent to the target node - * of the Active Message. - * @param [in] count Number of elements to send. - * @param [in] datatype Datatype descriptor for the elements in the buffer. - * @param [in] cb Callback that is invoked upon completion of the - * data transfer if it is not completed immediately. - * @param [in] flags Operation flags as defined by @ref ucp_send_am_flags. - * - * @return NULL Active Message was sent immediately. - * @return UCS_PTR_IS_ERR(_ptr) Error sending Active Message. - * @return otherwise Pointer to request, and Active Message is known - * to be completed after cb is run. - */ -ucs_status_ptr_t ucp_am_send_nb(ucp_ep_h ep, uint16_t id, - const void *buffer, size_t count, - ucp_datatype_t datatype, - ucp_send_callback_t cb, unsigned flags); - - /** * @ingroup UCP_COMM * @brief Send Active Message. @@ -3258,49 +3083,6 @@ ucs_status_ptr_t ucp_am_recv_data_nbx(ucp_worker_h worker, void *data_desc, void ucp_am_data_release(ucp_worker_h worker, void *data); -/** - * @ingroup UCP_COMM - * @brief Non-blocking stream send operation. - * - * This routine sends data that is described by the local address @a buffer, - * size @a count, and @a datatype object to the destination endpoint @a ep. - * The routine is non-blocking and therefore returns immediately, however - * the actual send operation may be delayed. The send operation is considered - * completed when it is safe to reuse the source @e buffer. If the send - * operation is completed immediately the routine returns UCS_OK and the - * callback function @a cb is @b not invoked. If the operation is - * @b not completed immediately and no error reported, then the UCP library will - * schedule invocation of the callback @a cb upon completion of the send - * operation. In other words, the completion of the operation will be signaled - * either by the return code or by the callback. - * - * @note The user should not modify any part of the @a buffer after this - * operation is called, until the operation completes. - * - * @param [in] ep Destination endpoint handle. - * @param [in] buffer Pointer to the message buffer (payload). - * @param [in] count Number of elements to send. - * @param [in] datatype Datatype descriptor for the elements in the buffer. - * @param [in] cb Callback function that is invoked whenever the - * send operation is completed. It is important to note - * that the callback is only invoked in the event that - * the operation cannot be completed in place. - * @param [in] flags Reserved for future use. - * - * @return NULL - The send operation was completed immediately. - * @return UCS_PTR_IS_ERR(_ptr) - The send operation failed. - * @return otherwise - Operation was scheduled for send and can be - * completed in any point in time. The request handle - * is returned to the application in order to track - * progress of the message. The application is - * responsible for releasing the handle using - * @ref ucp_request_free routine. - */ -ucs_status_ptr_t ucp_stream_send_nb(ucp_ep_h ep, const void *buffer, size_t count, - ucp_datatype_t datatype, ucp_send_callback_t cb, - unsigned flags); - - /** * @ingroup UCP_COMM * @brief Non-blocking stream send operation. @@ -3331,167 +3113,6 @@ ucs_status_ptr_t ucp_stream_send_nbx(ucp_ep_h ep, const void *buffer, size_t cou const ucp_request_param_t *param); -/** - * @ingroup UCP_COMM - * @brief Non-blocking tagged-send operations - * - * This routine sends a messages that is described by the local address @a - * buffer, size @a count, and @a datatype object to the destination endpoint - * @a ep. Each message is associated with a @a tag value that is used for - * message matching on the @ref ucp_tag_recv_nb "receiver". The routine is - * non-blocking and therefore returns immediately, however the actual send - * operation may be delayed. The send operation is considered completed when - * it is safe to reuse the source @e buffer. If the send operation is - * completed immediately the routine return UCS_OK and the call-back function - * @a cb is @b not invoked. If the operation is @b not completed immediately - * and no error reported then the UCP library will schedule to invoke the - * call-back @a cb whenever the send operation will be completed. In other - * words, the completion of a message can be signaled by the return code or - * the call-back. - * - * @note The user should not modify any part of the @a buffer after this - * operation is called, until the operation completes. - * - * @param [in] ep Destination endpoint handle. - * @param [in] buffer Pointer to the message buffer (payload). - * @param [in] count Number of elements to send - * @param [in] datatype Datatype descriptor for the elements in the buffer. - * @param [in] tag Message tag. - * @param [in] cb Callback function that is invoked whenever the - * send operation is completed. It is important to note - * that the call-back is only invoked in a case when - * the operation cannot be completed in place. - * - * @return NULL - The send operation was completed immediately. - * @return UCS_PTR_IS_ERR(_ptr) - The send operation failed. - * @return otherwise - Operation was scheduled for send and can be - * completed in any point in time. The request handle - * is returned to the application in order to track - * progress of the message. The application is - * responsible for releasing the handle using - * @ref ucp_request_free "ucp_request_free()" routine. - */ -ucs_status_ptr_t ucp_tag_send_nb(ucp_ep_h ep, const void *buffer, size_t count, - ucp_datatype_t datatype, ucp_tag_t tag, - ucp_send_callback_t cb); - -/** - * @ingroup UCP_COMM - * @brief Non-blocking tagged-send operations with user provided request - * - * This routine provides a convenient and efficient way to implement a - * blocking send pattern. It also completes requests faster than - * @ref ucp_tag_send_nb() because: - * @li it always uses eager protocol to send data up to the - * rendezvous threshold. - * @li its rendezvous threshold is higher than the one used by - * the @ref ucp_tag_send_nb(). The threshold is controlled by - * the @b UCX_SEND_NBR_RNDV_THRESH environment variable. - * @li its request handling is simpler. There is no callback and no need - * to allocate and free requests. In fact request can be allocated by - * caller on the stack. - * - * This routine sends a messages that is described by the local address @a - * buffer, size @a count, and @a datatype object to the destination endpoint - * @a ep. Each message is associated with a @a tag value that is used for - * message matching on the @ref ucp_tag_recv_nbr "receiver". - * - * The routine is non-blocking and therefore returns immediately, however - * the actual send operation may be delayed. The send operation is considered - * completed when it is safe to reuse the source @e buffer. If the send - * operation is completed immediately the routine returns UCS_OK. - * - * If the operation is @b not completed immediately and no error reported - * then the UCP library will fill a user provided @a req and - * return UCS_INPROGRESS status. In order to monitor completion of the - * operation @ref ucp_request_check_status() should be used. - * - * Following pseudo code implements a blocking send function: - * @code - * MPI_send(...) - * { - * char *request; - * ucs_status_t status; - * - * // allocate request on the stack - * // ucp_context_query() was used to get ucp_request_size - * request = alloca(ucp_request_size); - * - * // note: make sure that there is enough memory before the - * // request handle - * status = ucp_tag_send_nbr(ep, ..., request + ucp_request_size); - * if (status != UCS_INPROGRESS) { - * return status; - * } - * - * do { - * ucp_worker_progress(worker); - * status = ucp_request_check_status(request + ucp_request_size); - * } while (status == UCS_INPROGRESS); - * - * return status; - * } - * @endcode - * - * @note The user should not modify any part of the @a buffer after this - * operation is called, until the operation completes. - * - * - * @param [in] ep Destination endpoint handle. - * @param [in] buffer Pointer to the message buffer (payload). - * @param [in] count Number of elements to send - * @param [in] datatype Datatype descriptor for the elements in the buffer. - * @param [in] tag Message tag. - * @param [in] req Request handle allocated by the user. There should - * be at least UCP request size bytes of available - * space before the @a req. The size of UCP request - * can be obtained by @ref ucp_context_query function. - * - * @return UCS_OK - The send operation was completed immediately. - * @return UCS_INPROGRESS - The send was not completed and is in progress. - * @ref ucp_request_check_status() should be used to - * monitor @a req status. - * @return Error code as defined by @ref ucs_status_t - */ -ucs_status_t ucp_tag_send_nbr(ucp_ep_h ep, const void *buffer, size_t count, - ucp_datatype_t datatype, ucp_tag_t tag, void *req); - -/** - * @ingroup UCP_COMM - * @brief Non-blocking synchronous tagged-send operation. - * - * Same as @ref ucp_tag_send_nb, except the request completes only after there - * is a remote tag match on the message (which does not always mean the remote - * receive has been completed). This function never completes "in-place", and - * always returns a request handle. - * - * @note The user should not modify any part of the @a buffer after this - * operation is called, until the operation completes. - * @note Returns @ref UCS_ERR_UNSUPPORTED if @ref UCP_ERR_HANDLING_MODE_PEER is - * enabled. This is a temporary implementation-related constraint that - * will be addressed in future releases. - * - * @param [in] ep Destination endpoint handle. - * @param [in] buffer Pointer to the message buffer (payload). - * @param [in] count Number of elements to send - * @param [in] datatype Datatype descriptor for the elements in the buffer. - * @param [in] tag Message tag. - * @param [in] cb Callback function that is invoked whenever the - * send operation is completed. - * - * @return UCS_PTR_IS_ERR(_ptr) - The send operation failed. - * @return otherwise - Operation was scheduled for send and can be - * completed in any point in time. The request handle - * is returned to the application in order to track - * progress of the message. The application is - * responsible for releasing the handle using - * @ref ucp_request_free "ucp_request_free()" routine. - */ -ucs_status_ptr_t ucp_tag_send_sync_nb(ucp_ep_h ep, const void *buffer, size_t count, - ucp_datatype_t datatype, ucp_tag_t tag, - ucp_send_callback_t cb); - - /** * @ingroup UCP_COMM * @brief Non-blocking tagged-send operation @@ -3567,53 +3188,6 @@ ucs_status_ptr_t ucp_tag_send_sync_nbx(ucp_ep_h ep, const void *buffer, const ucp_request_param_t *param); -/** - * @ingroup UCP_COMM - * @brief Non-blocking stream receive operation of structured data into a - * user-supplied buffer. - * - * This routine receives data that is described by the local address @a buffer, - * size @a count, and @a datatype object on the endpoint @a ep. The routine is - * non-blocking and therefore returns immediately. The receive operation is - * considered complete when the message is delivered to the buffer. If data is - * not immediately available, the operation will be scheduled for receive and - * a request handle will be returned. In order to notify the application about - * completion of a scheduled receive operation, the UCP library will invoke - * the call-back @a cb when data is in the receive buffer and ready for - * application access. If the receive operation cannot be started, the routine - * returns an error. - * - * @param [in] ep UCP endpoint that is used for the receive operation. - * @param [in] buffer Pointer to the buffer to receive the data. - * @param [in] count Number of elements to receive into @a buffer. - * @param [in] datatype Datatype descriptor for the elements in the buffer. - * @param [in] cb Callback function that is invoked whenever the - * receive operation is completed and the data is ready - * in the receive @a buffer. It is important to note - * that the call-back is only invoked in a case when - * the operation cannot be completed immediately. - * @param [out] length Size of the received data in bytes. The value is - * valid only if return code is UCS_OK. - * @note The amount of data received, in bytes, is always an - * integral multiple of the @a datatype size. - * @param [in] flags Flags defined in @ref ucp_stream_recv_flags_t. - * - * @return NULL - The receive operation was completed - * immediately. - * @return UCS_PTR_IS_ERR(_ptr) - The receive operation failed. - * @return otherwise - Operation was scheduled for receive. A request - * handle is returned to the application in order - * to track progress of the operation. - * The application is responsible for releasing - * the handle by calling the - * @ref ucp_request_free routine. - */ -ucs_status_ptr_t ucp_stream_recv_nb(ucp_ep_h ep, void *buffer, size_t count, - ucp_datatype_t datatype, - ucp_stream_recv_callback_t cb, - size_t *length, unsigned flags); - - /** * @ingroup UCP_COMM * @brief Non-blocking stream receive operation of structured data into a @@ -3689,83 +3263,6 @@ ucs_status_ptr_t ucp_stream_recv_nbx(ucp_ep_h ep, void *buffer, size_t count, ucs_status_ptr_t ucp_stream_recv_data_nb(ucp_ep_h ep, size_t *length); -/** - * @ingroup UCP_COMM - * @brief Non-blocking tagged-receive operation. - * - * This routine receives a message that is described by the local address @a - * buffer, size @a count, and @a datatype object on the @a worker. The tag - * value of the receive message has to match the @a tag and @a tag_mask values, - * where the @a tag_mask indicates which bits of the tag have to be matched. The - * routine is non-blocking and therefore returns immediately. The receive - * operation is considered completed when the message is delivered to the @a - * buffer. In order to notify the application about completion of the receive - * operation the UCP library will invoke the call-back @a cb when the received - * message is in the receive buffer and ready for application access. If the - * receive operation cannot be stated the routine returns an error. - * - * @note This routine cannot return UCS_OK. It always returns a request - * handle or an error. - * - * @param [in] worker UCP worker that is used for the receive operation. - * @param [in] buffer Pointer to the buffer to receive the data. - * @param [in] count Number of elements to receive - * @param [in] datatype Datatype descriptor for the elements in the buffer. - * @param [in] tag Message tag to expect. - * @param [in] tag_mask Bit mask that indicates the bits that are used for - * the matching of the incoming tag - * against the expected tag. - * @param [in] cb Callback function that is invoked whenever the - * receive operation is completed and the data is ready - * in the receive @a buffer. - * - * @return UCS_PTR_IS_ERR(_ptr) - The receive operation failed. - * @return otherwise - Operation was scheduled for receive. The request - * handle is returned to the application in order - * to track progress of the operation. The - * application is responsible for releasing the - * handle using @ref ucp_request_free - * "ucp_request_free()" routine. - */ -ucs_status_ptr_t ucp_tag_recv_nb(ucp_worker_h worker, void *buffer, size_t count, - ucp_datatype_t datatype, ucp_tag_t tag, - ucp_tag_t tag_mask, ucp_tag_recv_callback_t cb); - - -/** - * @ingroup UCP_COMM - * @brief Non-blocking tagged-receive operation. - * - * This routine receives a message that is described by the local address @a - * buffer, size @a count, and @a datatype object on the @a worker. The tag - * value of the receive message has to match the @a tag and @a tag_mask values, - * where the @a tag_mask indicates which bits of the tag have to be matched. The - * routine is non-blocking and therefore returns immediately. The receive - * operation is considered completed when the message is delivered to the @a - * buffer. In order to monitor completion of the operation - * @ref ucp_request_check_status or @ref ucp_tag_recv_request_test should be - * used. - * - * @param [in] worker UCP worker that is used for the receive operation. - * @param [in] buffer Pointer to the buffer to receive the data. - * @param [in] count Number of elements to receive - * @param [in] datatype Datatype descriptor for the elements in the buffer. - * @param [in] tag Message tag to expect. - * @param [in] tag_mask Bit mask that indicates the bits that are used for - * the matching of the incoming tag - * against the expected tag. - * @param [in] req Request handle allocated by the user. There should - * be at least UCP request size bytes of available - * space before the @a req. The size of UCP request - * can be obtained by @ref ucp_context_query function. - * - * @return Error code as defined by @ref ucs_status_t - */ -ucs_status_t ucp_tag_recv_nbr(ucp_worker_h worker, void *buffer, size_t count, - ucp_datatype_t datatype, ucp_tag_t tag, - ucp_tag_t tag_mask, void *req); - - /** * @ingroup UCP_COMM * @brief Non-blocking tagged-receive operation. @@ -3853,45 +3350,6 @@ ucp_tag_message_h ucp_tag_probe_nb(ucp_worker_h worker, ucp_tag_t tag, ucp_tag_recv_info_t *info); -/** - * @ingroup UCP_COMM - * @brief Non-blocking receive operation for a probed message. - * - * This routine receives a message that is described by the local address @a - * buffer, size @a count, @a message handle, and @a datatype object on the @a - * worker. The @a message handle can be obtained by calling the @ref - * ucp_tag_probe_nb "ucp_tag_probe_nb()" routine. The @ref ucp_tag_msg_recv_nb - * "ucp_tag_msg_recv_nb()" routine is non-blocking and therefore returns - * immediately. The receive operation is considered completed when the message - * is delivered to the @a buffer. In order to notify the application about - * completion of the receive operation the UCP library will invoke the - * call-back @a cb when the received message is in the receive buffer and ready - * for application access. If the receive operation cannot be started the - * routine returns an error. - * - * @param [in] worker UCP worker that is used for the receive operation. - * @param [in] buffer Pointer to the buffer that will receive the data. - * @param [in] count Number of elements to receive - * @param [in] datatype Datatype descriptor for the elements in the buffer. - * @param [in] message Message handle. - * @param [in] cb Callback function that is invoked whenever the - * receive operation is completed and the data is ready - * in the receive @a buffer. - * - * @return UCS_PTR_IS_ERR(_ptr) - The receive operation failed. - * @return otherwise - Operation was scheduled for receive. The request - * handle is returned to the application in order - * to track progress of the operation. The - * application is responsible for releasing the - * handle using @ref ucp_request_free - * "ucp_request_free()" routine. - */ -ucs_status_ptr_t ucp_tag_msg_recv_nb(ucp_worker_h worker, void *buffer, - size_t count, ucp_datatype_t datatype, - ucp_tag_message_h message, - ucp_tag_recv_callback_t cb); - - /** * @ingroup UCP_COMM * @brief Non-blocking receive operation for a probed message. @@ -3926,82 +3384,6 @@ ucs_status_ptr_t ucp_tag_msg_recv_nbx(ucp_worker_h worker, void *buffer, size_t count, ucp_tag_message_h message, const ucp_request_param_t *param); -/** - * @ingroup UCP_COMM - * @brief Non-blocking implicit remote memory put operation. - * - * This routine initiates a storage of contiguous block of data that is - * described by the local address @a buffer in the remote contiguous memory - * region described by @a remote_addr address and the @ref ucp_rkey_h "memory - * handle" @a rkey. The routine returns immediately and @b does @b not - * guarantee re-usability of the source address @e buffer. If the operation is - * completed immediately the routine return UCS_OK, otherwise UCS_INPROGRESS - * or an error is returned to user. - * - * @note A user can use @ref ucp_worker_flush_nb "ucp_worker_flush_nb()" - * in order to guarantee re-usability of the source address @e buffer. - * - * @param [in] ep Remote endpoint handle. - * @param [in] buffer Pointer to the local source address. - * @param [in] length Length of the data (in bytes) stored under the - * source address. - * @param [in] remote_addr Pointer to the destination remote memory address - * to write to. - * @param [in] rkey Remote memory key associated with the - * remote memory address. - * - * @return Error code as defined by @ref ucs_status_t - */ -ucs_status_t ucp_put_nbi(ucp_ep_h ep, const void *buffer, size_t length, - uint64_t remote_addr, ucp_rkey_h rkey); - -/** - * @ingroup UCP_COMM - * @brief Non-blocking remote memory put operation. - * - * This routine initiates a storage of contiguous block of data that is - * described by the local address @a buffer in the remote contiguous memory - * region described by @a remote_addr address and the @ref ucp_rkey_h "memory - * handle" @a rkey. The routine returns immediately and @b does @b not - * guarantee re-usability of the source address @e buffer. If the operation is - * completed immediately the routine return UCS_OK, otherwise UCS_INPROGRESS - * or an error is returned to user. If the put operation completes immediately, - * the routine returns UCS_OK and the call-back routine @a cb is @b not - * invoked. If the operation is @b not completed immediately and no error is - * reported, then the UCP library will schedule invocation of the call-back - * routine @a cb upon completion of the put operation. In other words, the - * completion of a put operation can be signaled by the return code or - * execution of the call-back. - * - * @note A user can use @ref ucp_worker_flush_nb "ucp_worker_flush_nb()" - * in order to guarantee re-usability of the source address @e buffer. - * - * @param [in] ep Remote endpoint handle. - * @param [in] buffer Pointer to the local source address. - * @param [in] length Length of the data (in bytes) stored under the - * source address. - * @param [in] remote_addr Pointer to the destination remote memory address - * to write to. - * @param [in] rkey Remote memory key associated with the - * remote memory address. - * @param [in] cb Call-back function that is invoked whenever the - * put operation is completed and the local buffer - * can be modified. Does not guarantee remote - * completion. - * - * @return NULL - The operation was completed immediately. - * @return UCS_PTR_IS_ERR(_ptr) - The operation failed. - * @return otherwise - Operation was scheduled and can be - * completed at any point in time. The request handle - * is returned to the application in order to track - * progress of the operation. The application is - * responsible for releasing the handle using - * @ref ucp_request_free "ucp_request_free()" routine. - */ -ucs_status_ptr_t ucp_put_nb(ucp_ep_h ep, const void *buffer, size_t length, - uint64_t remote_addr, ucp_rkey_h rkey, - ucp_send_callback_t cb); - /** * @ingroup UCP_COMM @@ -4058,81 +3440,6 @@ ucs_status_ptr_t ucp_put_nbx(ucp_ep_h ep, const void *buffer, size_t count, const ucp_request_param_t *param); -/** - * @ingroup UCP_COMM - * @brief Non-blocking implicit remote memory get operation. - * - * This routine initiate a load of contiguous block of data that is described - * by the remote memory address @a remote_addr and the @ref ucp_rkey_h "memory handle" - * @a rkey in the local contiguous memory region described by @a buffer - * address. The routine returns immediately and @b does @b not guarantee that - * remote data is loaded and stored under the local address @e buffer. - * - * @note A user can use @ref ucp_worker_flush_nb "ucp_worker_flush_nb()" in order - * guarantee that remote data is loaded and stored under the local address - * @e buffer. - * - * @param [in] ep Remote endpoint handle. - * @param [in] buffer Pointer to the local destination address. - * @param [in] length Length of the data (in bytes) stored under the - * destination address. - * @param [in] remote_addr Pointer to the source remote memory address - * to read from. - * @param [in] rkey Remote memory key associated with the - * remote memory address. - * - * @return Error code as defined by @ref ucs_status_t - */ -ucs_status_t ucp_get_nbi(ucp_ep_h ep, void *buffer, size_t length, - uint64_t remote_addr, ucp_rkey_h rkey); - -/** - * @ingroup UCP_COMM - * @brief Non-blocking remote memory get operation. - * - * This routine initiates a load of a contiguous block of data that is - * described by the remote memory address @a remote_addr and the @ref ucp_rkey_h - * "memory handle" @a rkey in the local contiguous memory region described - * by @a buffer address. The routine returns immediately and @b does @b not - * guarantee that remote data is loaded and stored under the local address @e - * buffer. If the operation is completed immediately the routine return UCS_OK, - * otherwise UCS_INPROGRESS or an error is returned to user. If the get - * operation completes immediately, the routine returns UCS_OK and the - * call-back routine @a cb is @b not invoked. If the operation is @b not - * completed immediately and no error is reported, then the UCP library will - * schedule invocation of the call-back routine @a cb upon completion of the - * get operation. In other words, the completion of a get operation can be - * signaled by the return code or execution of the call-back. - * - * @note A user can use @ref ucp_worker_flush_nb "ucp_worker_flush_nb()" - * in order to guarantee re-usability of the source address @e buffer. - * - * @param [in] ep Remote endpoint handle. - * @param [in] buffer Pointer to the local destination address. - * @param [in] length Length of the data (in bytes) stored under the - * destination address. - * @param [in] remote_addr Pointer to the source remote memory address - * to read from. - * @param [in] rkey Remote memory key associated with the - * remote memory address. - * @param [in] cb Call-back function that is invoked whenever the - * get operation is completed and the data is - * visible to the local process. - * - * @return NULL - The operation was completed immediately. - * @return UCS_PTR_IS_ERR(_ptr) - The operation failed. - * @return otherwise - Operation was scheduled and can be - * completed at any point in time. The request handle - * is returned to the application in order to track - * progress of the operation. The application is - * responsible for releasing the handle using - * @ref ucp_request_free "ucp_request_free()" routine. - */ -ucs_status_ptr_t ucp_get_nb(ucp_ep_h ep, void *buffer, size_t length, - uint64_t remote_addr, ucp_rkey_h rkey, - ucp_send_callback_t cb); - - /** * @ingroup UCP_COMM * @brief Non-blocking remote memory get operation. @@ -4184,84 +3491,6 @@ ucs_status_ptr_t ucp_get_nbx(ucp_ep_h ep, void *buffer, size_t count, const ucp_request_param_t *param); -/** - * @ingroup UCP_COMM - * @brief Post an atomic memory operation. - * - * This routine posts an atomic memory operation to a remote value. - * The remote value is described by the combination of the remote - * memory address @a remote_addr and the @ref ucp_rkey_h "remote memory handle" - * @a rkey. - * Return from the function does not guarantee completion. A user must - * call @ref ucp_ep_flush_nb or @ref ucp_worker_flush_nb to guarantee that the - * remote value has been updated. - * - * @param [in] ep UCP endpoint. - * @param [in] opcode One of @ref ucp_atomic_post_op_t. - * @param [in] value Source operand for the atomic operation. - * @param [in] op_size Size of value in bytes - * @param [in] remote_addr Remote address to operate on. - * @param [in] rkey Remote key handle for the remote memory address. - * - * @return Error code as defined by @ref ucs_status_t - */ -ucs_status_t ucp_atomic_post(ucp_ep_h ep, ucp_atomic_post_op_t opcode, uint64_t value, - size_t op_size, uint64_t remote_addr, ucp_rkey_h rkey); - - -/** - * @ingroup UCP_COMM - * @brief Post an atomic fetch operation. - * - * This routine will post an atomic fetch operation to remote memory. - * The remote value is described by the combination of the remote - * memory address @a remote_addr and the @ref ucp_rkey_h "remote memory handle" - * @a rkey. - * The routine is non-blocking and therefore returns immediately. However the - * actual atomic operation may be delayed. The atomic operation is not considered complete - * until the values in remote and local memory are completed. If the atomic operation - * completes immediately, the routine returns UCS_OK and the call-back routine - * @a cb is @b not invoked. If the operation is @b not completed immediately and no - * error is reported, then the UCP library will schedule invocation of the call-back - * routine @a cb upon completion of the atomic operation. In other words, the completion - * of an atomic operation can be signaled by the return code or execution of the call-back. - * - * @note The user should not modify any part of the @a result after this - * operation is called, until the operation completes. - * - * @param [in] ep UCP endpoint. - * @param [in] opcode One of @ref ucp_atomic_fetch_op_t. - * @param [in] value Source operand for atomic operation. In the case of CSWAP - * this is the conditional for the swap. For SWAP this is - * the value to be placed in remote memory. - * @param [inout] result Local memory address to store resulting fetch to. - * In the case of CSWAP the value in result will be - * swapped into the @a remote_addr if the condition - * is true. - * @param [in] op_size Size of value in bytes and pointer type for result - * @param [in] remote_addr Remote address to operate on. - * @param [in] rkey Remote key handle for the remote memory address. - * @param [in] cb Call-back function that is invoked whenever the - * send operation is completed. It is important to note - * that the call-back function is only invoked in a case when - * the operation cannot be completed in place. - * - * @return NULL - The operation was completed immediately. - * @return UCS_PTR_IS_ERR(_ptr) - The operation failed. - * @return otherwise - Operation was scheduled and can be - * completed at any point in time. The request handle - * is returned to the application in order to track - * progress of the operation. The application is - * responsible for releasing the handle using - * @ref ucp_request_free "ucp_request_free()" routine. - */ -ucs_status_ptr_t -ucp_atomic_fetch_nb(ucp_ep_h ep, ucp_atomic_fetch_op_t opcode, - uint64_t value, void *result, size_t op_size, - uint64_t remote_addr, ucp_rkey_h rkey, - ucp_send_callback_t cb); - - /** * @ingroup UCP_COMM * @brief Post an atomic memory operation. @@ -4387,6 +3616,7 @@ ucs_status_t ucp_tag_recv_request_test(void *request, ucp_tag_recv_info_t *info) */ ucs_status_t ucp_stream_recv_request_test(void *request, size_t *length_p); + /** * @ingroup UCP_COMM * @brief Cancel an outstanding communications request. @@ -4523,39 +3753,6 @@ void ucp_dt_destroy(ucp_datatype_t datatype); ucs_status_t ucp_worker_fence(ucp_worker_h worker); -/** - * @ingroup UCP_WORKER - * - * @brief Flush outstanding AMO and RMA operations on the @ref ucp_worker_h - * "worker" - * - * This routine flushes all outstanding AMO and RMA communications on the - * @ref ucp_worker_h "worker". All the AMO and RMA operations issued on the - * @a worker prior to this call are completed both at the origin and at the - * target when this call returns. - * - * @note For description of the differences between @ref ucp_worker_flush_nb - * "flush" and @ref ucp_worker_fence "fence" operations please see - * @ref ucp_worker_fence "ucp_worker_fence()" - * - * @param [in] worker UCP worker. - * @param [in] flags Flags for flush operation. Reserved for future use. - * @param [in] cb Callback which will be called when the flush operation - * completes. - * - * @return NULL - The flush operation was completed immediately. - * @return UCS_PTR_IS_ERR(_ptr) - The flush operation failed. - * @return otherwise - Flush operation was scheduled and can be completed - * in any point in time. The request handle is returned - * to the application in order to track progress. The - * application is responsible for releasing the handle - * using @ref ucp_request_free "ucp_request_free()" - * routine. - */ -ucs_status_ptr_t ucp_worker_flush_nb(ucp_worker_h worker, unsigned flags, - ucp_send_callback_t cb); - - /** * @ingroup UCP_WORKER * @@ -4639,8 +3836,8 @@ typedef struct ucp_ep_attr { struct sockaddr_storage remote_sockaddr; /** - * Structure defining an array containing transport and device names used - * by this endpoint. The caller is responsible for allocation and + * Structure defining an array containing transport and device names used + * by this endpoint. The caller is responsible for allocation and * deallocation of this array. */ ucp_transports_t transports; diff --git a/src/ucp/api/ucp_compat.h b/src/ucp/api/ucp_compat.h index 3c543cac322..c991cc56771 100644 --- a/src/ucp/api/ucp_compat.h +++ b/src/ucp/api/ucp_compat.h @@ -499,6 +499,868 @@ ucs_status_t ucp_atomic_cswap64(ucp_ep_h ep, uint64_t compare, uint64_t swap, ucs_status_ptr_t ucp_ep_modify_nb(ucp_ep_h ep, const ucp_ep_params_t *params); +/** + * @ingroup UCP_WORKER + * @brief Get the address of the worker object. + * + * @deprecated Use @ref ucp_worker_query with the flag + * @ref UCP_WORKER_ATTR_FIELD_ADDRESS in order to obtain the worker + * address. + * + * This routine returns the address of the worker object. This address can be + * passed to remote instances of the UCP library in order to connect to this + * worker. The memory for the address handle is allocated by this function, and + * must be released by using @ref ucp_worker_release_address + * "ucp_worker_release_address()" routine. + * + * @param [in] worker Worker object whose address to return. + * @param [out] address_p A pointer to the worker address. + * @param [out] address_length_p The size in bytes of the address. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_worker_get_address(ucp_worker_h worker, + ucp_address_t **address_p, + size_t *address_length_p); + + +/** + * @ingroup UCP_ENDPOINT + * + * @brief Non-blocking @ref ucp_ep_h "endpoint" closure. + * + * @deprecated Use @ref ucp_ep_close_nbx instead. + * + * This routine releases the @ref ucp_ep_h "endpoint". The endpoint closure + * process depends on the selected @a mode. + * + * @param [in] ep Handle to the endpoint to close. + * @param [in] mode One from @ref ucp_ep_close_mode value. + * + * @return UCS_OK - The endpoint is closed successfully. + * @return UCS_PTR_IS_ERR(_ptr) - The closure failed and an error code indicates + * the transport level status. However, resources + * are released and the @a endpoint can no longer + * be used. + * @return otherwise - The closure process is started, and can be + * completed at any point in time. A request handle + * is returned to the application in order to track + * progress of the endpoint closure. The application + * is responsible for releasing the handle using the + * @ref ucp_request_free routine. + * + * @note @ref ucp_ep_close_nb replaces deprecated @ref ucp_disconnect_nb and + * @ref ucp_ep_destroy + */ +ucs_status_ptr_t ucp_ep_close_nb(ucp_ep_h ep, unsigned mode); + + +/** + * @ingroup UCP_ENDPOINT + * + * @brief Non-blocking flush of outstanding AMO and RMA operations on the + * @ref ucp_ep_h "endpoint". + * + * @deprecated Use @ref ucp_ep_flush_nbx instead. + * + * This routine flushes all outstanding AMO and RMA communications on the + * @ref ucp_ep_h "endpoint". All the AMO and RMA operations issued on the + * @a ep prior to this call are completed both at the origin and at the target + * @ref ucp_ep_h "endpoint" when this call returns. + * + * @param [in] ep UCP endpoint. + * @param [in] flags Flags for flush operation. Reserved for future use. + * @param [in] cb Callback which will be called when the flush operation + * completes. + * + * @return NULL - The flush operation was completed immediately. + * @return UCS_PTR_IS_ERR(_ptr) - The flush operation failed. + * @return otherwise - Flush operation was scheduled and can be completed + * in any point in time. The request handle is returned + * to the application in order to track progress. The + * application is responsible for releasing the handle + * using @ref ucp_request_free "ucp_request_free()" + * routine. + */ +ucs_status_ptr_t ucp_ep_flush_nb(ucp_ep_h ep, unsigned flags, + ucp_send_callback_t cb); + + +/** + * @ingroup UCP_WORKER + * @brief Add user defined callback for Active Message. + * + * @deprecated Use @ref ucp_worker_set_am_recv_handler instead. + * + * This routine installs a user defined callback to handle incoming Active + * Messages with a specific id. This callback is called whenever an Active + * Message that was sent from the remote peer by @ref ucp_am_send_nb is + * received on this worker. + * + * @param [in] worker UCP worker on which to set the Active Message + * handler. + * @param [in] id Active Message id. + * @param [in] cb Active Message callback. NULL to clear. + * @param [in] arg Active Message argument, which will be passed + * in to every invocation of the callback as the + * arg argument. + * @param [in] flags Dictates how an Active Message is handled on the + * remote endpoint. Currently only + * UCP_AM_FLAG_WHOLE_MSG is supported, which + * indicates the callback will not be invoked + * until all data has arrived. + * + * @return error code if the worker does not support Active Messages or + * requested callback flags. + */ +ucs_status_t ucp_worker_set_am_handler(ucp_worker_h worker, uint16_t id, + ucp_am_callback_t cb, void *arg, + uint32_t flags); + + +/** + * @ingroup UCP_COMM + * @brief Send Active Message. + * + * @deprecated Use @ref ucp_am_send_nbx instead. + * + * This routine sends an Active Message to an ep. It does not support + * CUDA memory. + * + * @param [in] ep UCP endpoint where the Active Message will be run. + * @param [in] id Active Message id. Specifies which registered + * callback to run. + * @param [in] buffer Pointer to the data to be sent to the target node + * of the Active Message. + * @param [in] count Number of elements to send. + * @param [in] datatype Datatype descriptor for the elements in the buffer. + * @param [in] cb Callback that is invoked upon completion of the + * data transfer if it is not completed immediately. + * @param [in] flags Operation flags as defined by @ref ucp_send_am_flags. + * + * @return NULL Active Message was sent immediately. + * @return UCS_PTR_IS_ERR(_ptr) Error sending Active Message. + * @return otherwise Pointer to request, and Active Message is known + * to be completed after cb is run. + */ +ucs_status_ptr_t ucp_am_send_nb(ucp_ep_h ep, uint16_t id, + const void *buffer, size_t count, + ucp_datatype_t datatype, + ucp_send_callback_t cb, unsigned flags); + + +/** + * @ingroup UCP_COMM + * @brief Non-blocking stream send operation. + * + * @deprecated Use @ref ucp_stream_send_nbx instead. + * + * This routine sends data that is described by the local address @a buffer, + * size @a count, and @a datatype object to the destination endpoint @a ep. + * The routine is non-blocking and therefore returns immediately, however + * the actual send operation may be delayed. The send operation is considered + * completed when it is safe to reuse the source @e buffer. If the send + * operation is completed immediately the routine returns UCS_OK and the + * callback function @a cb is @b not invoked. If the operation is + * @b not completed immediately and no error reported, then the UCP library will + * schedule invocation of the callback @a cb upon completion of the send + * operation. In other words, the completion of the operation will be signaled + * either by the return code or by the callback. + * + * @note The user should not modify any part of the @a buffer after this + * operation is called, until the operation completes. + * + * @param [in] ep Destination endpoint handle. + * @param [in] buffer Pointer to the message buffer (payload). + * @param [in] count Number of elements to send. + * @param [in] datatype Datatype descriptor for the elements in the buffer. + * @param [in] cb Callback function that is invoked whenever the + * send operation is completed. It is important to note + * that the callback is only invoked in the event that + * the operation cannot be completed in place. + * @param [in] flags Reserved for future use. + * + * @return NULL - The send operation was completed immediately. + * @return UCS_PTR_IS_ERR(_ptr) - The send operation failed. + * @return otherwise - Operation was scheduled for send and can be + * completed in any point in time. The request handle + * is returned to the application in order to track + * progress of the message. The application is + * responsible for releasing the handle using + * @ref ucp_request_free routine. + */ +ucs_status_ptr_t ucp_stream_send_nb(ucp_ep_h ep, const void *buffer, size_t count, + ucp_datatype_t datatype, ucp_send_callback_t cb, + unsigned flags); + + +/** + * @ingroup UCP_COMM + * @brief Non-blocking stream receive operation of structured data into a + * user-supplied buffer. + * + * @deprecated Use @ref ucp_stream_recv_nbx instead. + * + * This routine receives data that is described by the local address @a buffer, + * size @a count, and @a datatype object on the endpoint @a ep. The routine is + * non-blocking and therefore returns immediately. The receive operation is + * considered complete when the message is delivered to the buffer. If data is + * not immediately available, the operation will be scheduled for receive and + * a request handle will be returned. In order to notify the application about + * completion of a scheduled receive operation, the UCP library will invoke + * the call-back @a cb when data is in the receive buffer and ready for + * application access. If the receive operation cannot be started, the routine + * returns an error. + * + * @param [in] ep UCP endpoint that is used for the receive operation. + * @param [in] buffer Pointer to the buffer to receive the data. + * @param [in] count Number of elements to receive into @a buffer. + * @param [in] datatype Datatype descriptor for the elements in the buffer. + * @param [in] cb Callback function that is invoked whenever the + * receive operation is completed and the data is ready + * in the receive @a buffer. It is important to note + * that the call-back is only invoked in a case when + * the operation cannot be completed immediately. + * @param [out] length Size of the received data in bytes. The value is + * valid only if return code is UCS_OK. + * @note The amount of data received, in bytes, is always an + * integral multiple of the @a datatype size. + * @param [in] flags Flags defined in @ref ucp_stream_recv_flags_t. + * + * @return NULL - The receive operation was completed + * immediately. + * @return UCS_PTR_IS_ERR(_ptr) - The receive operation failed. + * @return otherwise - Operation was scheduled for receive. A request + * handle is returned to the application in order + * to track progress of the operation. + * The application is responsible for releasing + * the handle by calling the + * @ref ucp_request_free routine. + */ +ucs_status_ptr_t ucp_stream_recv_nb(ucp_ep_h ep, void *buffer, size_t count, + ucp_datatype_t datatype, + ucp_stream_recv_callback_t cb, + size_t *length, unsigned flags); + + +/** + * @ingroup UCP_COMM + * @brief Non-blocking tagged-send operations + * + * @deprecated Use @ref ucp_tag_send_nbx instead. + * + * This routine sends a messages that is described by the local address @a + * buffer, size @a count, and @a datatype object to the destination endpoint + * @a ep. Each message is associated with a @a tag value that is used for + * message matching on the @ref ucp_tag_recv_nb "receiver". The routine is + * non-blocking and therefore returns immediately, however the actual send + * operation may be delayed. The send operation is considered completed when + * it is safe to reuse the source @e buffer. If the send operation is + * completed immediately the routine return UCS_OK and the call-back function + * @a cb is @b not invoked. If the operation is @b not completed immediately + * and no error reported then the UCP library will schedule to invoke the + * call-back @a cb whenever the send operation will be completed. In other + * words, the completion of a message can be signaled by the return code or + * the call-back. + * + * @note The user should not modify any part of the @a buffer after this + * operation is called, until the operation completes. + * + * @param [in] ep Destination endpoint handle. + * @param [in] buffer Pointer to the message buffer (payload). + * @param [in] count Number of elements to send + * @param [in] datatype Datatype descriptor for the elements in the buffer. + * @param [in] tag Message tag. + * @param [in] cb Callback function that is invoked whenever the + * send operation is completed. It is important to note + * that the call-back is only invoked in a case when + * the operation cannot be completed in place. + * + * @return NULL - The send operation was completed immediately. + * @return UCS_PTR_IS_ERR(_ptr) - The send operation failed. + * @return otherwise - Operation was scheduled for send and can be + * completed in any point in time. The request handle + * is returned to the application in order to track + * progress of the message. The application is + * responsible for releasing the handle using + * @ref ucp_request_free "ucp_request_free()" routine. + */ +ucs_status_ptr_t ucp_tag_send_nb(ucp_ep_h ep, const void *buffer, size_t count, + ucp_datatype_t datatype, ucp_tag_t tag, + ucp_send_callback_t cb); + + +/** + * @ingroup UCP_COMM + * @brief Non-blocking tagged-send operations with user provided request + * + * @deprecated Use @ref ucp_tag_send_nbx with the flag + * @ref UCP_OP_ATTR_FIELD_REQUEST instead. + * + * This routine provides a convenient and efficient way to implement a + * blocking send pattern. It also completes requests faster than + * @ref ucp_tag_send_nb() because: + * @li it always uses eager protocol to send data up to the + * rendezvous threshold. + * @li its rendezvous threshold is higher than the one used by + * the @ref ucp_tag_send_nb(). The threshold is controlled by + * the @b UCX_SEND_NBR_RNDV_THRESH environment variable. + * @li its request handling is simpler. There is no callback and no need + * to allocate and free requests. In fact request can be allocated by + * caller on the stack. + * + * This routine sends a messages that is described by the local address @a + * buffer, size @a count, and @a datatype object to the destination endpoint + * @a ep. Each message is associated with a @a tag value that is used for + * message matching on the @ref ucp_tag_recv_nbr "receiver". + * + * The routine is non-blocking and therefore returns immediately, however + * the actual send operation may be delayed. The send operation is considered + * completed when it is safe to reuse the source @e buffer. If the send + * operation is completed immediately the routine returns UCS_OK. + * + * If the operation is @b not completed immediately and no error reported + * then the UCP library will fill a user provided @a req and + * return UCS_INPROGRESS status. In order to monitor completion of the + * operation @ref ucp_request_check_status() should be used. + * + * Following pseudo code implements a blocking send function: + * @code + * MPI_send(...) + * { + * char *request; + * ucs_status_t status; + * + * // allocate request on the stack + * // ucp_context_query() was used to get ucp_request_size + * request = alloca(ucp_request_size); + * + * // note: make sure that there is enough memory before the + * // request handle + * status = ucp_tag_send_nbr(ep, ..., request + ucp_request_size); + * if (status != UCS_INPROGRESS) { + * return status; + * } + * + * do { + * ucp_worker_progress(worker); + * status = ucp_request_check_status(request + ucp_request_size); + * } while (status == UCS_INPROGRESS); + * + * return status; + * } + * @endcode + * + * @note The user should not modify any part of the @a buffer after this + * operation is called, until the operation completes. + * + * + * @param [in] ep Destination endpoint handle. + * @param [in] buffer Pointer to the message buffer (payload). + * @param [in] count Number of elements to send + * @param [in] datatype Datatype descriptor for the elements in the buffer. + * @param [in] tag Message tag. + * @param [in] req Request handle allocated by the user. There should + * be at least UCP request size bytes of available + * space before the @a req. The size of UCP request + * can be obtained by @ref ucp_context_query function. + * + * @return UCS_OK - The send operation was completed immediately. + * @return UCS_INPROGRESS - The send was not completed and is in progress. + * @ref ucp_request_check_status() should be used to + * monitor @a req status. + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_tag_send_nbr(ucp_ep_h ep, const void *buffer, size_t count, + ucp_datatype_t datatype, ucp_tag_t tag, void *req); + + +/** + * @ingroup UCP_COMM + * @brief Non-blocking synchronous tagged-send operation. + * + * @deprecated Use @ref ucp_tag_send_sync_nbx instead. + * + * Same as @ref ucp_tag_send_nb, except the request completes only after there + * is a remote tag match on the message (which does not always mean the remote + * receive has been completed). This function never completes "in-place", and + * always returns a request handle. + * + * @note The user should not modify any part of the @a buffer after this + * operation is called, until the operation completes. + * @note Returns @ref UCS_ERR_UNSUPPORTED if @ref UCP_ERR_HANDLING_MODE_PEER is + * enabled. This is a temporary implementation-related constraint that + * will be addressed in future releases. + * + * @param [in] ep Destination endpoint handle. + * @param [in] buffer Pointer to the message buffer (payload). + * @param [in] count Number of elements to send + * @param [in] datatype Datatype descriptor for the elements in the buffer. + * @param [in] tag Message tag. + * @param [in] cb Callback function that is invoked whenever the + * send operation is completed. + * + * @return UCS_PTR_IS_ERR(_ptr) - The send operation failed. + * @return otherwise - Operation was scheduled for send and can be + * completed in any point in time. The request handle + * is returned to the application in order to track + * progress of the message. The application is + * responsible for releasing the handle using + * @ref ucp_request_free "ucp_request_free()" routine. + */ +ucs_status_ptr_t ucp_tag_send_sync_nb(ucp_ep_h ep, const void *buffer, size_t count, + ucp_datatype_t datatype, ucp_tag_t tag, + ucp_send_callback_t cb); + + +/** + * @ingroup UCP_COMM + * @brief Non-blocking tagged-receive operation. + * + * @deprecated Use @ref ucp_tag_recv_nbx instead. + * + * This routine receives a message that is described by the local address @a + * buffer, size @a count, and @a datatype object on the @a worker. The tag + * value of the receive message has to match the @a tag and @a tag_mask values, + * where the @a tag_mask indicates which bits of the tag have to be matched. The + * routine is non-blocking and therefore returns immediately. The receive + * operation is considered completed when the message is delivered to the @a + * buffer. In order to notify the application about completion of the receive + * operation the UCP library will invoke the call-back @a cb when the received + * message is in the receive buffer and ready for application access. If the + * receive operation cannot be stated the routine returns an error. + * + * @note This routine cannot return UCS_OK. It always returns a request + * handle or an error. + * + * @param [in] worker UCP worker that is used for the receive operation. + * @param [in] buffer Pointer to the buffer to receive the data. + * @param [in] count Number of elements to receive + * @param [in] datatype Datatype descriptor for the elements in the buffer. + * @param [in] tag Message tag to expect. + * @param [in] tag_mask Bit mask that indicates the bits that are used for + * the matching of the incoming tag + * against the expected tag. + * @param [in] cb Callback function that is invoked whenever the + * receive operation is completed and the data is ready + * in the receive @a buffer. + * + * @return UCS_PTR_IS_ERR(_ptr) - The receive operation failed. + * @return otherwise - Operation was scheduled for receive. The request + * handle is returned to the application in order + * to track progress of the operation. The + * application is responsible for releasing the + * handle using @ref ucp_request_free + * "ucp_request_free()" routine. + */ +ucs_status_ptr_t ucp_tag_recv_nb(ucp_worker_h worker, void *buffer, size_t count, + ucp_datatype_t datatype, ucp_tag_t tag, + ucp_tag_t tag_mask, ucp_tag_recv_callback_t cb); + + +/** + * @ingroup UCP_COMM + * @brief Non-blocking tagged-receive operation. + * + * @deprecated Use @ref ucp_tag_recv_nbx with the flag + * @ref UCP_OP_ATTR_FIELD_REQUEST instead. + * + * This routine receives a message that is described by the local address @a + * buffer, size @a count, and @a datatype object on the @a worker. The tag + * value of the receive message has to match the @a tag and @a tag_mask values, + * where the @a tag_mask indicates which bits of the tag have to be matched. The + * routine is non-blocking and therefore returns immediately. The receive + * operation is considered completed when the message is delivered to the @a + * buffer. In order to monitor completion of the operation + * @ref ucp_request_check_status or @ref ucp_tag_recv_request_test should be + * used. + * + * @param [in] worker UCP worker that is used for the receive operation. + * @param [in] buffer Pointer to the buffer to receive the data. + * @param [in] count Number of elements to receive + * @param [in] datatype Datatype descriptor for the elements in the buffer. + * @param [in] tag Message tag to expect. + * @param [in] tag_mask Bit mask that indicates the bits that are used for + * the matching of the incoming tag + * against the expected tag. + * @param [in] req Request handle allocated by the user. There should + * be at least UCP request size bytes of available + * space before the @a req. The size of UCP request + * can be obtained by @ref ucp_context_query function. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_tag_recv_nbr(ucp_worker_h worker, void *buffer, size_t count, + ucp_datatype_t datatype, ucp_tag_t tag, + ucp_tag_t tag_mask, void *req); + + +/** + * @ingroup UCP_COMM + * @brief Non-blocking receive operation for a probed message. + * + * @deprecated Use @ref ucp_tag_recv_nbx instead. + * + * This routine receives a message that is described by the local address @a + * buffer, size @a count, @a message handle, and @a datatype object on the @a + * worker. The @a message handle can be obtained by calling the @ref + * ucp_tag_probe_nb "ucp_tag_probe_nb()" routine. The @ref ucp_tag_msg_recv_nb + * "ucp_tag_msg_recv_nb()" routine is non-blocking and therefore returns + * immediately. The receive operation is considered completed when the message + * is delivered to the @a buffer. In order to notify the application about + * completion of the receive operation the UCP library will invoke the + * call-back @a cb when the received message is in the receive buffer and ready + * for application access. If the receive operation cannot be started the + * routine returns an error. + * + * @param [in] worker UCP worker that is used for the receive operation. + * @param [in] buffer Pointer to the buffer that will receive the data. + * @param [in] count Number of elements to receive + * @param [in] datatype Datatype descriptor for the elements in the buffer. + * @param [in] message Message handle. + * @param [in] cb Callback function that is invoked whenever the + * receive operation is completed and the data is ready + * in the receive @a buffer. + * + * @return UCS_PTR_IS_ERR(_ptr) - The receive operation failed. + * @return otherwise - Operation was scheduled for receive. The request + * handle is returned to the application in order + * to track progress of the operation. The + * application is responsible for releasing the + * handle using @ref ucp_request_free + * "ucp_request_free()" routine. + */ +ucs_status_ptr_t ucp_tag_msg_recv_nb(ucp_worker_h worker, void *buffer, + size_t count, ucp_datatype_t datatype, + ucp_tag_message_h message, + ucp_tag_recv_callback_t cb); + + +/** + * @ingroup UCP_COMM + * @brief Non-blocking implicit remote memory put operation. + * + * @deprecated Use @ref ucp_put_nbx without passing the flag + * @ref UCP_OP_ATTR_FIELD_CALLBACK instead. If a request pointer + * is returned, release it immediately by @ref ucp_request_free. + * + * This routine initiates a storage of contiguous block of data that is + * described by the local address @a buffer in the remote contiguous memory + * region described by @a remote_addr address and the @ref ucp_rkey_h "memory + * handle" @a rkey. The routine returns immediately and @b does @b not + * guarantee re-usability of the source address @e buffer. If the operation is + * completed immediately the routine return UCS_OK, otherwise UCS_INPROGRESS + * or an error is returned to user. + * + * @note A user can use @ref ucp_worker_flush_nb "ucp_worker_flush_nb()" + * in order to guarantee re-usability of the source address @e buffer. + * + * @param [in] ep Remote endpoint handle. + * @param [in] buffer Pointer to the local source address. + * @param [in] length Length of the data (in bytes) stored under the + * source address. + * @param [in] remote_addr Pointer to the destination remote memory address + * to write to. + * @param [in] rkey Remote memory key associated with the + * remote memory address. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_put_nbi(ucp_ep_h ep, const void *buffer, size_t length, + uint64_t remote_addr, ucp_rkey_h rkey); + + +/** + * @ingroup UCP_COMM + * @brief Non-blocking remote memory put operation. + * + * @deprecated Use @ref ucp_put_nbx instead. + * + * This routine initiates a storage of contiguous block of data that is + * described by the local address @a buffer in the remote contiguous memory + * region described by @a remote_addr address and the @ref ucp_rkey_h "memory + * handle" @a rkey. The routine returns immediately and @b does @b not + * guarantee re-usability of the source address @e buffer. If the operation is + * completed immediately the routine return UCS_OK, otherwise UCS_INPROGRESS + * or an error is returned to user. If the put operation completes immediately, + * the routine returns UCS_OK and the call-back routine @a cb is @b not + * invoked. If the operation is @b not completed immediately and no error is + * reported, then the UCP library will schedule invocation of the call-back + * routine @a cb upon completion of the put operation. In other words, the + * completion of a put operation can be signaled by the return code or + * execution of the call-back. + * + * @note A user can use @ref ucp_worker_flush_nb "ucp_worker_flush_nb()" + * in order to guarantee re-usability of the source address @e buffer. + * + * @param [in] ep Remote endpoint handle. + * @param [in] buffer Pointer to the local source address. + * @param [in] length Length of the data (in bytes) stored under the + * source address. + * @param [in] remote_addr Pointer to the destination remote memory address + * to write to. + * @param [in] rkey Remote memory key associated with the + * remote memory address. + * @param [in] cb Call-back function that is invoked whenever the + * put operation is completed and the local buffer + * can be modified. Does not guarantee remote + * completion. + * + * @return NULL - The operation was completed immediately. + * @return UCS_PTR_IS_ERR(_ptr) - The operation failed. + * @return otherwise - Operation was scheduled and can be + * completed at any point in time. The request handle + * is returned to the application in order to track + * progress of the operation. The application is + * responsible for releasing the handle using + * @ref ucp_request_free "ucp_request_free()" routine. + */ +ucs_status_ptr_t ucp_put_nb(ucp_ep_h ep, const void *buffer, size_t length, + uint64_t remote_addr, ucp_rkey_h rkey, + ucp_send_callback_t cb); + + +/** + * @ingroup UCP_COMM + * @brief Non-blocking implicit remote memory get operation. + * + * @deprecated Use @ref ucp_get_nbx without passing the flag + * @ref UCP_OP_ATTR_FIELD_CALLBACK instead. If a request pointer + * is returned, release it immediately by @ref ucp_request_free. + * + * This routine initiate a load of contiguous block of data that is described + * by the remote memory address @a remote_addr and the @ref ucp_rkey_h "memory handle" + * @a rkey in the local contiguous memory region described by @a buffer + * address. The routine returns immediately and @b does @b not guarantee that + * remote data is loaded and stored under the local address @e buffer. + * + * @note A user can use @ref ucp_worker_flush_nb "ucp_worker_flush_nb()" in order + * guarantee that remote data is loaded and stored under the local address + * @e buffer. + * + * @param [in] ep Remote endpoint handle. + * @param [in] buffer Pointer to the local destination address. + * @param [in] length Length of the data (in bytes) stored under the + * destination address. + * @param [in] remote_addr Pointer to the source remote memory address + * to read from. + * @param [in] rkey Remote memory key associated with the + * remote memory address. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_get_nbi(ucp_ep_h ep, void *buffer, size_t length, + uint64_t remote_addr, ucp_rkey_h rkey); + + +/** + * @ingroup UCP_COMM + * @brief Non-blocking remote memory get operation. + * + * @deprecated Use @ref ucp_get_nbx instead. + * + * This routine initiates a load of a contiguous block of data that is + * described by the remote memory address @a remote_addr and the @ref ucp_rkey_h + * "memory handle" @a rkey in the local contiguous memory region described + * by @a buffer address. The routine returns immediately and @b does @b not + * guarantee that remote data is loaded and stored under the local address @e + * buffer. If the operation is completed immediately the routine return UCS_OK, + * otherwise UCS_INPROGRESS or an error is returned to user. If the get + * operation completes immediately, the routine returns UCS_OK and the + * call-back routine @a cb is @b not invoked. If the operation is @b not + * completed immediately and no error is reported, then the UCP library will + * schedule invocation of the call-back routine @a cb upon completion of the + * get operation. In other words, the completion of a get operation can be + * signaled by the return code or execution of the call-back. + * + * @note A user can use @ref ucp_worker_flush_nb "ucp_worker_flush_nb()" + * in order to guarantee re-usability of the source address @e buffer. + * + * @param [in] ep Remote endpoint handle. + * @param [in] buffer Pointer to the local destination address. + * @param [in] length Length of the data (in bytes) stored under the + * destination address. + * @param [in] remote_addr Pointer to the source remote memory address + * to read from. + * @param [in] rkey Remote memory key associated with the + * remote memory address. + * @param [in] cb Call-back function that is invoked whenever the + * get operation is completed and the data is + * visible to the local process. + * + * @return NULL - The operation was completed immediately. + * @return UCS_PTR_IS_ERR(_ptr) - The operation failed. + * @return otherwise - Operation was scheduled and can be + * completed at any point in time. The request handle + * is returned to the application in order to track + * progress of the operation. The application is + * responsible for releasing the handle using + * @ref ucp_request_free "ucp_request_free()" routine. + */ +ucs_status_ptr_t ucp_get_nb(ucp_ep_h ep, void *buffer, size_t length, + uint64_t remote_addr, ucp_rkey_h rkey, + ucp_send_callback_t cb); + + +/** + * @ingroup UCP_COMM + * @brief Atomic operation requested for ucp_atomic_post + * + * @deprecated Use @ref ucp_atomic_op_nbx and @ref ucp_atomic_op_t instead. + * + * This enumeration defines which atomic memory operation should be + * performed by the ucp_atomic_post family of functions. All of these are + * non-fetching atomics and will not result in a request handle. + */ +typedef enum { + UCP_ATOMIC_POST_OP_ADD, /**< Atomic add */ + UCP_ATOMIC_POST_OP_AND, /**< Atomic and */ + UCP_ATOMIC_POST_OP_OR, /**< Atomic or */ + UCP_ATOMIC_POST_OP_XOR, /**< Atomic xor */ + UCP_ATOMIC_POST_OP_LAST +} ucp_atomic_post_op_t; + + +/** + * @ingroup UCP_COMM + * @brief Post an atomic memory operation. + * + * @deprecated Use @ref ucp_atomic_op_nbx without the flag + * @ref UCP_OP_ATTR_FIELD_REPLY_BUFFER instead. + * + * This routine posts an atomic memory operation to a remote value. + * The remote value is described by the combination of the remote + * memory address @a remote_addr and the @ref ucp_rkey_h "remote memory handle" + * @a rkey. + * Return from the function does not guarantee completion. A user must + * call @ref ucp_ep_flush_nb or @ref ucp_worker_flush_nb to guarantee that the + * remote value has been updated. + * + * @param [in] ep UCP endpoint. + * @param [in] opcode One of @ref ucp_atomic_post_op_t. + * @param [in] value Source operand for the atomic operation. + * @param [in] op_size Size of value in bytes + * @param [in] remote_addr Remote address to operate on. + * @param [in] rkey Remote key handle for the remote memory address. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_atomic_post(ucp_ep_h ep, ucp_atomic_post_op_t opcode, uint64_t value, + size_t op_size, uint64_t remote_addr, ucp_rkey_h rkey); + + +/** + * @ingroup UCP_COMM + * @brief Atomic operation requested for ucp_atomic_fetch + * + * @deprecated Use @ref ucp_atomic_op_nbx and @ref ucp_atomic_op_t instead. + * + * This enumeration defines which atomic memory operation should be performed + * by the ucp_atomic_fetch family of functions. All of these functions + * will fetch data from the remote node. + */ +typedef enum { + UCP_ATOMIC_FETCH_OP_FADD, /**< Atomic Fetch and add */ + UCP_ATOMIC_FETCH_OP_SWAP, /**< Atomic swap */ + UCP_ATOMIC_FETCH_OP_CSWAP, /**< Atomic conditional swap */ + UCP_ATOMIC_FETCH_OP_FAND, /**< Atomic Fetch and and */ + UCP_ATOMIC_FETCH_OP_FOR, /**< Atomic Fetch and or */ + UCP_ATOMIC_FETCH_OP_FXOR, /**< Atomic Fetch and xor */ + UCP_ATOMIC_FETCH_OP_LAST +} ucp_atomic_fetch_op_t; + + +/** + * @ingroup UCP_COMM + * @brief Post an atomic fetch operation. + * + * @deprecated Use @ref ucp_atomic_op_nbx with the flag + * @ref UCP_OP_ATTR_FIELD_REPLY_BUFFER instead. + * + * This routine will post an atomic fetch operation to remote memory. + * The remote value is described by the combination of the remote + * memory address @a remote_addr and the @ref ucp_rkey_h "remote memory handle" + * @a rkey. + * The routine is non-blocking and therefore returns immediately. However the + * actual atomic operation may be delayed. The atomic operation is not considered complete + * until the values in remote and local memory are completed. If the atomic operation + * completes immediately, the routine returns UCS_OK and the call-back routine + * @a cb is @b not invoked. If the operation is @b not completed immediately and no + * error is reported, then the UCP library will schedule invocation of the call-back + * routine @a cb upon completion of the atomic operation. In other words, the completion + * of an atomic operation can be signaled by the return code or execution of the call-back. + * + * @note The user should not modify any part of the @a result after this + * operation is called, until the operation completes. + * + * @param [in] ep UCP endpoint. + * @param [in] opcode One of @ref ucp_atomic_fetch_op_t. + * @param [in] value Source operand for atomic operation. In the case of CSWAP + * this is the conditional for the swap. For SWAP this is + * the value to be placed in remote memory. + * @param [inout] result Local memory address to store resulting fetch to. + * In the case of CSWAP the value in result will be + * swapped into the @a remote_addr if the condition + * is true. + * @param [in] op_size Size of value in bytes and pointer type for result + * @param [in] remote_addr Remote address to operate on. + * @param [in] rkey Remote key handle for the remote memory address. + * @param [in] cb Call-back function that is invoked whenever the + * send operation is completed. It is important to note + * that the call-back function is only invoked in a case when + * the operation cannot be completed in place. + * + * @return NULL - The operation was completed immediately. + * @return UCS_PTR_IS_ERR(_ptr) - The operation failed. + * @return otherwise - Operation was scheduled and can be + * completed at any point in time. The request handle + * is returned to the application in order to track + * progress of the operation. The application is + * responsible for releasing the handle using + * @ref ucp_request_free "ucp_request_free()" routine. + */ +ucs_status_ptr_t +ucp_atomic_fetch_nb(ucp_ep_h ep, ucp_atomic_fetch_op_t opcode, + uint64_t value, void *result, size_t op_size, + uint64_t remote_addr, ucp_rkey_h rkey, + ucp_send_callback_t cb); + + +/** + * @ingroup UCP_WORKER + * + * @brief Flush outstanding AMO and RMA operations on the @ref ucp_worker_h + * "worker" + * + * @deprecated Use @ref ucp_worker_flush_nbx instead. + * + * This routine flushes all outstanding AMO and RMA communications on the + * @ref ucp_worker_h "worker". All the AMO and RMA operations issued on the + * @a worker prior to this call are completed both at the origin and at the + * target when this call returns. + * + * @note For description of the differences between @ref ucp_worker_flush_nb + * "flush" and @ref ucp_worker_fence "fence" operations please see + * @ref ucp_worker_fence "ucp_worker_fence()" + * + * @param [in] worker UCP worker. + * @param [in] flags Flags for flush operation. Reserved for future use. + * @param [in] cb Callback which will be called when the flush operation + * completes. + * + * @return NULL - The flush operation was completed immediately. + * @return UCS_PTR_IS_ERR(_ptr) - The flush operation failed. + * @return otherwise - Flush operation was scheduled and can be completed + * in any point in time. The request handle is returned + * to the application in order to track progress. The + * application is responsible for releasing the handle + * using @ref ucp_request_free "ucp_request_free()" + * routine. + */ +ucs_status_ptr_t ucp_worker_flush_nb(ucp_worker_h worker, unsigned flags, + ucp_send_callback_t cb); + + END_C_DECLS #endif diff --git a/src/ucp/core/ucp_am.c b/src/ucp/core/ucp_am.c index 7686467b55c..86294ec170a 100644 --- a/src/ucp/core/ucp_am.c +++ b/src/ucp/core/ucp_am.c @@ -1129,6 +1129,7 @@ UCS_PROFILE_FUNC(ucs_status_ptr_t, ucp_am_recv_data_nbx, req->recv.length = ucp_dt_length(datatype, count, buffer, &req->recv.state); req->recv.mem_type = mem_type; + req->recv.op_attr = param->op_attr_mask; req->recv.am.desc = desc; rts = data_desc; diff --git a/src/ucp/core/ucp_mm.c b/src/ucp/core/ucp_mm.c index 0be52678dec..a87515e4ab0 100644 --- a/src/ucp/core/ucp_mm.c +++ b/src/ucp/core/ucp_mm.c @@ -286,13 +286,18 @@ static inline int ucp_mem_map_is_allocate(const ucp_mem_map_params_t *params) (params->flags & UCP_MEM_MAP_ALLOCATE); } -void ucp_memh_dereg(ucp_context_h context, ucp_mem_h memh, ucp_md_map_t md_map) +static void ucp_memh_dereg(ucp_context_h context, ucp_mem_h memh, + ucp_md_map_t md_map) { ucp_md_index_t md_index; ucs_status_t status; /* Unregister from all memory domains */ ucs_for_each_bit(md_index, md_map) { + ucs_assertv(md_index != memh->alloc_md_index, + "memh %p: md_index %u alloc_md_index %u", memh, md_index, + memh->alloc_md_index); + ucs_trace("de-registering memh[%d]=%p", md_index, memh->uct[md_index]); ucs_assert(context->tl_mds[md_index].attr.cap.flags & UCT_MD_FLAG_REG); status = uct_md_mem_dereg(context->tl_mds[md_index].md, @@ -307,6 +312,33 @@ void ucp_memh_dereg(ucp_context_h context, ucp_mem_h memh, ucp_md_map_t md_map) } } +void ucp_memh_unmap(ucp_context_h context, ucp_mem_h memh, ucp_md_map_t md_map) +{ + uct_allocated_memory_t mem; + ucs_status_t status; + + mem.address = ucp_memh_address(memh); + mem.length = ucp_memh_length(memh); + mem.method = memh->alloc_method; + + if (mem.method == UCT_ALLOC_METHOD_MD) { + ucs_assert(memh->alloc_md_index != UCP_NULL_RESOURCE); + mem.md = context->tl_mds[memh->alloc_md_index].md; + mem.memh = memh->uct[memh->alloc_md_index]; + md_map &= ~UCS_BIT(memh->alloc_md_index); + } + + ucp_memh_dereg(context, memh, md_map); + + /* If the memory was also allocated, release it */ + if (memh->alloc_method != UCT_ALLOC_METHOD_LAST) { + status = uct_mem_free(&mem); + if (status != UCS_OK) { + ucs_warn("failed to free: %s", ucs_status_string(status)); + } + } +} + static ucs_status_t ucp_memh_register(ucp_context_h context, ucp_mem_h memh, ucp_md_map_t md_map, void *address, size_t length, unsigned uct_flags) @@ -950,32 +982,9 @@ static ucs_status_t ucp_mem_rcache_mem_reg_cb(void *context, ucs_rcache_t *rcach static void ucp_mem_rcache_mem_dereg_cb(void *ctx, ucs_rcache_t *rcache, ucs_rcache_region_t *rregion) { - ucp_mem_h memh = ucs_derived_of(rregion, ucp_mem_t); - ucp_md_map_t md_map = memh->md_map; - ucp_context_h context = ctx; - uct_allocated_memory_t mem; - ucs_status_t status; - - mem.address = ucp_memh_address(memh); - mem.length = ucp_memh_length(memh); - mem.method = memh->alloc_method; - - if (mem.method == UCT_ALLOC_METHOD_MD) { - ucs_assert(memh->alloc_md_index != UCP_NULL_RESOURCE); - mem.md = context->tl_mds[memh->alloc_md_index].md; - mem.memh = memh->uct[memh->alloc_md_index]; - md_map &= ~UCS_BIT(memh->alloc_md_index); - } - - ucp_memh_dereg(context, memh, md_map); + ucp_mem_h memh = ucs_derived_of(rregion, ucp_mem_t); - /* If the memory was also allocated, release it */ - if (memh->alloc_method != UCT_ALLOC_METHOD_LAST) { - status = uct_mem_free(&mem); - if (status != UCS_OK) { - ucs_warn("failed to free: %s", ucs_status_string(status)); - } - } + ucp_memh_unmap((ucp_context_h)ctx, memh, memh->md_map); } static void ucp_mem_rcache_dump_region_cb(void *rcontext, ucs_rcache_t *rcache, diff --git a/src/ucp/core/ucp_mm.h b/src/ucp/core/ucp_mm.h index 9e6f0340008..241061f9f48 100644 --- a/src/ucp/core/ucp_mm.h +++ b/src/ucp/core/ucp_mm.h @@ -134,7 +134,8 @@ ucs_status_t ucp_memh_get_slow(ucp_context_h context, void *address, ucp_md_map_t reg_md_map, unsigned uct_flags, ucp_mem_h *memh_p); -void ucp_memh_dereg(ucp_context_h context, ucp_mem_h memh, ucp_md_map_t md_map); +void ucp_memh_unmap(ucp_context_h context, ucp_mem_h memh, + ucp_md_map_t md_map); ucs_status_t ucp_mem_rcache_init(ucp_context_h context); diff --git a/src/ucp/core/ucp_mm.inl b/src/ucp/core/ucp_mm.inl index e2a7f02b708..c36e308853c 100644 --- a/src/ucp/core/ucp_mm.inl +++ b/src/ucp/core/ucp_mm.inl @@ -67,7 +67,7 @@ ucp_memh_put(ucp_context_h context, ucp_mem_h memh, int invalidate) } if (ucs_unlikely(context->rcache == NULL)) { - ucp_memh_dereg(context, memh, memh->md_map); + ucp_memh_unmap(context, memh, memh->md_map); ucs_free(memh); return; } diff --git a/src/ucp/core/ucp_request.h b/src/ucp/core/ucp_request.h index 7d779d9feb8..cc3d28cce82 100644 --- a/src/ucp/core/ucp_request.h +++ b/src/ucp/core/ucp_request.h @@ -378,6 +378,7 @@ struct ucp_request { ucp_datatype_t datatype; /* Receive type */ size_t length; /* Total length, in bytes */ ucs_memory_type_t mem_type; /* Memory type */ + uint32_t op_attr; /* Operation attributes */ ucp_dt_state_t state; ucp_worker_t *worker; uct_tag_context_t uct_ctx; /* Transport offload context */ diff --git a/src/ucp/rndv/proto_rndv.c b/src/ucp/rndv/proto_rndv.c index 08a151f3dfc..085dde1e7da 100644 --- a/src/ucp/rndv/proto_rndv.c +++ b/src/ucp/rndv/proto_rndv.c @@ -186,6 +186,7 @@ ucp_proto_rndv_ctrl_init(const ucp_proto_rndv_ctrl_init_params_t *params) ucp_memory_info_t mem_info; ucs_status_t status; double ctrl_latency; + uint16_t op_flags; ucs_assert(params->super.flags & UCP_PROTO_COMMON_INIT_FLAG_RESPONSE); ucs_assert(!(params->super.flags & UCP_PROTO_COMMON_INIT_FLAG_SINGLE_FRAG)); @@ -199,12 +200,16 @@ ucp_proto_rndv_ctrl_init(const ucp_proto_rndv_ctrl_init_params_t *params) return UCS_ERR_NO_ELEM; } + op_flags = UCP_PROTO_SELECT_OP_FLAG_INTERNAL | + (select_param->op_flags & + ucp_proto_select_op_attr_to_flags(UCP_OP_ATTR_FLAG_MULTI_SEND)); + /* Construct select parameter for the remote protocol */ if (params->super.super.rkey_config_key == NULL) { /* Remote buffer is unknown, assume same params as local */ remote_select_param = *select_param; remote_select_param.op_id = params->remote_op_id; - remote_select_param.op_flags = UCP_PROTO_SELECT_OP_FLAG_INTERNAL; + remote_select_param.op_flags = op_flags; } else { /* If we know the remote buffer parameters, these are actually the local * parameters for the remote protocol @@ -462,8 +467,9 @@ void ucp_proto_rndv_bulk_query(const ucp_proto_query_params_t *params, static ucs_status_t ucp_proto_rndv_send_reply(ucp_worker_h worker, ucp_request_t *req, ucp_operation_id_t op_id, uint32_t op_attr_mask, - size_t length, const void *rkey_buffer, - size_t rkey_length, uint8_t sg_count) + uint16_t op_flags, size_t length, + const void *rkey_buffer, size_t rkey_length, + uint8_t sg_count) { ucp_ep_h ep = req->send.ep; ucp_worker_cfg_index_t rkey_cfg_index; @@ -492,7 +498,7 @@ ucp_proto_rndv_send_reply(ucp_worker_h worker, ucp_request_t *req, rkey = NULL; } - ucp_proto_select_param_init(&sel_param, op_id, op_attr_mask, 0, + ucp_proto_select_param_init(&sel_param, op_id, op_attr_mask, op_flags, req->send.state.dt_iter.dt_class, &req->send.state.dt_iter.mem_info, sg_count); @@ -575,7 +581,8 @@ void ucp_proto_rndv_receive_start(ucp_worker_h worker, ucp_request_t *recv_req, &sg_count); } - status = ucp_proto_rndv_send_reply(worker, req, op_id, 0, rts->size, + status = ucp_proto_rndv_send_reply(worker, req, op_id, + recv_req->recv.op_attr, 0, rts->size, rkey_buffer, rkey_length, sg_count); if (status != UCS_OK) { ucp_datatype_iter_cleanup(&req->send.state.dt_iter, UCP_DT_MASK_ALL); @@ -592,8 +599,9 @@ void ucp_proto_rndv_receive_start(ucp_worker_h worker, ucp_request_t *recv_req, static ucs_status_t ucp_proto_rndv_send_start(ucp_worker_h worker, ucp_request_t *req, - uint32_t op_attr_mask, const ucp_rndv_rtr_hdr_t *rtr, - size_t header_length, uint8_t sg_count) + uint32_t op_attr_mask, uint32_t op_flags, + const ucp_rndv_rtr_hdr_t *rtr, size_t header_length, + uint8_t sg_count) { ucs_status_t status; size_t rkey_length; @@ -608,8 +616,8 @@ ucp_proto_rndv_send_start(ucp_worker_h worker, ucp_request_t *req, ucs_assert(rtr->size == req->send.state.dt_iter.length); status = ucp_proto_rndv_send_reply(worker, req, UCP_OP_ID_RNDV_SEND, - op_attr_mask, rtr->size, rtr + 1, - rkey_length, sg_count); + op_attr_mask, op_flags, rtr->size, + rtr + 1, rkey_length, sg_count); if (status != UCS_OK) { return status; } @@ -641,6 +649,7 @@ ucp_proto_rndv_handle_rtr(void *arg, void *data, size_t length, unsigned flags) const ucp_rndv_rtr_hdr_t *rtr = data; ucp_request_t *req, *freq; ucs_status_t status; + uint32_t op_flags; uint8_t sg_count; UCP_SEND_REQUEST_GET_BY_ID(&req, worker, rtr->sreq_id, 0, return UCS_OK, @@ -652,6 +661,8 @@ ucp_proto_rndv_handle_rtr(void *arg, void *data, size_t length, unsigned flags) /* RTR covers the whole send request - use the send request directly */ ucs_assert(req->flags & UCP_REQUEST_FLAG_PROTO_INITIALIZED); + op_flags = req->send.proto_config->select_param.op_flags; + if (rtr->size == req->send.state.dt_iter.length) { /* RTR covers the whole send request - use the send request directly */ ucs_assert(rtr->offset == 0); @@ -662,8 +673,8 @@ ucp_proto_rndv_handle_rtr(void *arg, void *data, size_t length, unsigned flags) req->flags &= ~UCP_REQUEST_FLAG_PROTO_INITIALIZED; sg_count = req->send.proto_config->select_param.sg_count; - status = ucp_proto_rndv_send_start(worker, req, 0, rtr, length, - sg_count); + status = ucp_proto_rndv_send_start(worker, req, 0, op_flags, rtr, + length, sg_count); if (status != UCS_OK) { goto err_request_fail; } @@ -688,8 +699,8 @@ ucp_proto_rndv_handle_rtr(void *arg, void *data, size_t length, unsigned flags) * TODO can rndv/ppln be selected here (and not just single frag)? */ status = ucp_proto_rndv_send_start(worker, freq, - UCP_OP_ATTR_FLAG_MULTI_SEND, rtr, - length, sg_count); + UCP_OP_ATTR_FLAG_MULTI_SEND, + op_flags, rtr, length, sg_count); if (status != UCS_OK) { goto err_put_freq; } diff --git a/src/ucp/tag/tag_recv.c b/src/ucp/tag/tag_recv.c index fd933054d92..1086797858d 100644 --- a/src/ucp/tag/tag_recv.c +++ b/src/ucp/tag/tag_recv.c @@ -132,7 +132,7 @@ ucp_tag_recv_common(ucp_worker_h worker, void *buffer, size_t count, &req->recv.state); req->recv.mem_type = ucp_request_get_memory_type(worker->context, buffer, req->recv.length, param); - + req->recv.op_attr = param->op_attr_mask; req->recv.tag.tag = tag; req->recv.tag.tag_mask = tag_mask; if (param->op_attr_mask & UCP_OP_ATTR_FIELD_CALLBACK) { diff --git a/src/ucs/memory/rcache.c b/src/ucs/memory/rcache.c index c2fffb5fdbb..fd74d9a20a8 100644 --- a/src/ucs/memory/rcache.c +++ b/src/ucs/memory/rcache.c @@ -364,12 +364,10 @@ void ucs_mem_region_destroy_internal(ucs_rcache_t *rcache, if (region->flags & UCS_RCACHE_REGION_FLAG_REGISTERED) { UCS_STATS_UPDATE_COUNTER(rcache->stats, UCS_RCACHE_DEREGS, 1); - { - UCS_PROFILE_CODE("mem_dereg") { - rcache->params.ops->mem_dereg(rcache->params.context, rcache, - region); - } - } + UCS_PROFILE_NAMED_CALL_VOID_ALWAYS("mem_dereg", + rcache->params.ops->mem_dereg, + rcache->params.context, rcache, + region); } if (!(rcache->params.flags & UCS_RCACHE_FLAG_NO_PFN_CHECK) && @@ -733,7 +731,7 @@ ucs_rcache_check_overlap(ucs_rcache_t *rcache, ucs_pgt_addr_t *start, * TODO: currently rcache is optimized for the case where most of * the regions have same protection. */ - mem_prot = UCS_PROFILE_CALL(ucs_get_mem_prot, *start, *end); + mem_prot = UCS_PROFILE_CALL_ALWAYS(ucs_get_mem_prot, *start, *end); if (!ucs_test_all_flags(mem_prot, *prot)) { ucs_rcache_region_trace(rcache, region, "do not merge "UCS_RCACHE_PROT_FMT @@ -897,10 +895,9 @@ ucs_rcache_create_region(ucs_rcache_t *rcache, void *address, size_t length, ++distribution_bin->count; distribution_bin->total_size += region_size; - region->status = status = - UCS_PROFILE_NAMED_CALL("mem_reg", rcache->params.ops->mem_reg, - rcache->params.context, rcache, arg, region, - merged ? UCS_RCACHE_MEM_REG_HIDE_ERRORS : 0); + region->status = status = UCS_PROFILE_NAMED_CALL_ALWAYS( + "mem_reg", rcache->params.ops->mem_reg, rcache->params.context, + rcache, arg, region, merged ? UCS_RCACHE_MEM_REG_HIDE_ERRORS : 0); if (status != UCS_OK) { if (merged) { /* failure may be due to merge, because memory of the merged diff --git a/src/ucs/profile/profile.c b/src/ucs/profile/profile.c index edc90fea7d4..3d7ab542a77 100644 --- a/src/ucs/profile/profile.c +++ b/src/ucs/profile/profile.c @@ -21,8 +21,8 @@ typedef struct ucs_profile_global_location { - ucs_profile_location_t super; /*< Location info */ - volatile int *loc_id_p; /*< Back-pointer to location index */ + ucs_profile_location_t super; /*< Location info */ + volatile ucs_profile_loc_id_t *loc_id_p; /*< Back-pointer to location index */ } ucs_profile_global_location_t; @@ -360,6 +360,20 @@ static void ucs_profile_thread_key_destr(void *data) ucs_profile_thread_finalize(ctx); } +static ucs_profile_loc_id_t +ucs_profile_location_id(ucs_profile_context_t *ctx, + ucs_profile_global_location_t *loc) +{ + size_t raw_loc_id = (loc - ctx->locations) + 1; /* Array index plus 1 */ + ucs_profile_loc_id_t loc_id; + + loc_id = raw_loc_id; + ucs_assertv_always(loc_id == raw_loc_id, + "profile location id overflow loc_id=%d raw_loc_id=%zd", + loc_id, raw_loc_id); + return loc_id; +} + /* * Register a profiling location - should be called once per location in the * code, before the first record of each such location is made. @@ -378,7 +392,8 @@ static void ucs_profile_thread_key_destr(void *data) static UCS_F_NOINLINE int ucs_profile_get_location(ucs_profile_context_t *ctx, ucs_profile_type_t type, const char *name, const char *file, int line, - const char *function, volatile int *loc_id_p) + const char *function, + volatile ucs_profile_loc_id_t *loc_id_p) { ucs_profile_global_location_t *loc, *new_locations; int loc_id; @@ -393,12 +408,12 @@ ucs_profile_get_location(ucs_profile_context_t *ctx, ucs_profile_type_t type, /* Check if profiling is disabled */ if (!ctx->profile_mode) { - *loc_id_p = loc_id = 0; + *loc_id_p = loc_id = UCS_PROFILE_LOC_ID_DISABLED; goto out_unlock; } /* Location ID must be uninitialized */ - ucs_assert(*loc_id_p == -1); + ucs_assert(*loc_id_p == UCS_PROFILE_LOC_ID_UNKNOWN); ucs_profile_ctx_for_each_location(ctx, loc) { if ((type == loc->super.type) && (line == loc->super.line) && @@ -419,7 +434,7 @@ ucs_profile_get_location(ucs_profile_context_t *ctx, ucs_profile_type_t type, "profile_locations"); if (new_locations == NULL) { ucs_warn("failed to expand locations array"); - *loc_id_p = loc_id = 0; + *loc_id_p = loc_id = UCS_PROFILE_LOC_ID_DISABLED; goto out_unlock; } @@ -436,7 +451,7 @@ ucs_profile_get_location(ucs_profile_context_t *ctx, ucs_profile_type_t type, loc->loc_id_p = loc_id_p; out_found: - *loc_id_p = loc_id = (loc - ctx->locations) + 1; + *loc_id_p = loc_id = ucs_profile_location_id(ctx, loc); ucs_memory_cpu_store_fence(); out_unlock: pthread_mutex_unlock(&ctx->mutex); @@ -472,27 +487,27 @@ static void ucs_profile_thread_expand_locations(ucs_profile_context_t *ctx, void ucs_profile_record(ucs_profile_context_t *ctx, ucs_profile_type_t type, const char *name, uint32_t param32, uint64_t param64, const char *file, int line, const char *function, - volatile int *loc_id_p) + volatile ucs_profile_loc_id_t *loc_id_p) { ucs_profile_thread_location_t *loc; ucs_profile_thread_context_t *thread_ctx; + ucs_profile_loc_id_t loc_id; ucs_profile_record_t *rec; ucs_time_t current_time; - int loc_id; /* If the location id is -1 or 0, need to re-read it with lock held */ loc_id = *loc_id_p; if (ucs_unlikely(loc_id <= 0)) { loc_id = ucs_profile_get_location(ctx, type, name, file, line, function, loc_id_p); - if (loc_id == 0) { + if (loc_id == UCS_PROFILE_LOC_ID_DISABLED) { return; } } ucs_memory_cpu_load_fence(); - ucs_assert(*loc_id_p != 0); + ucs_assert(*loc_id_p != UCS_PROFILE_LOC_ID_DISABLED); ucs_assert(ctx->profile_mode != 0); /* Get thread-specific profiling context */ diff --git a/src/ucs/profile/profile_defs.h b/src/ucs/profile/profile_defs.h index 67f63b51072..257741700ea 100644 --- a/src/ucs/profile/profile_defs.h +++ b/src/ucs/profile/profile_defs.h @@ -14,10 +14,10 @@ BEGIN_C_DECLS -/** @file profile_defs.h */ - -#define UCS_PROFILE_STACK_MAX 64 -#define UCS_PROFILE_FILE_VERSION 2u +#define UCS_PROFILE_STACK_MAX 64 +#define UCS_PROFILE_FILE_VERSION 2u +#define UCS_PROFILE_LOC_ID_UNKNOWN -1 +#define UCS_PROFILE_LOC_ID_DISABLED 0 /** @@ -117,6 +117,7 @@ typedef struct ucs_profile_record { } UCS_S_PACKED ucs_profile_record_t; typedef struct ucs_profile_context ucs_profile_context_t; +typedef short ucs_profile_loc_id_t; extern const char *ucs_profile_mode_names[]; @@ -153,6 +154,254 @@ void ucs_profile_cleanup(ucs_profile_context_t *ctx); void ucs_profile_dump(ucs_profile_context_t *ctx); +/* + * Store a new record with the given data. + * SHOULD NOT be used directly - use UCS_PROFILE macros instead. + * + * @param [in] ctx Global profile context. + * @param [in] type Location type. + * @param [in] name Location name. + * @param [in] param32 custom 32-bit parameter. + * @param [in] param64 custom 64-bit parameter. + * @param [in] file Source file name. + * @param [in] line Source line number. + * @param [in] function Calling function name. + * @param [in,out] loc_id_p Variable used to maintain the location ID. + */ +void ucs_profile_record(ucs_profile_context_t *ctx, ucs_profile_type_t type, + const char *name, uint32_t param32, uint64_t param64, + const char *file, int line, const char *function, + volatile ucs_profile_loc_id_t *loc_id_p); + + +/** + * Record a profiling event. + * + * @param _ctx Profiling context. + * @param _type Event type. + * @param _name Event name. + * @param _param32 Custom 32-bit parameter. + * @param _param64 Custom 64-bit parameter. + */ +#define UCS_PROFILE_CTX_RECORD_ALWAYS(_ctx, _type, _name, _param64, _param32) \ + { \ + static ucs_profile_loc_id_t loc_id = UCS_PROFILE_LOC_ID_UNKNOWN; \ + if (loc_id != UCS_PROFILE_LOC_ID_DISABLED) { \ + ucs_profile_record(_ctx, _type, _name, _param64, _param32, \ + __FILE__, __LINE__, __FUNCTION__, &loc_id); \ + } \ + } + + +/** + * Profile a block of code. + * + * @param _ctx Profiling context. + * @param _name Event name. + * @param _code Code block to run and profile. + */ +#define UCS_PROFILE_CTX_CODE_ALWAYS(_ctx, _name, _code) \ + { \ + UCS_PROFILE_CTX_RECORD_ALWAYS(_ctx, UCS_PROFILE_TYPE_SCOPE_BEGIN, "", \ + 0, 0); \ + ucs_compiler_fence(); \ + _code; \ + ucs_compiler_fence(); \ + UCS_PROFILE_CTX_RECORD_ALWAYS(_ctx, UCS_PROFILE_TYPE_SCOPE_END, _name, \ + 0, 0); \ + } + + +/** + * Create a profiled function. + * + * Usage: + * UCS_PROFILE_CTX_FUNC_ALWAYS(ctx, , , (a, b), int a, char b) + * + * @param _ctx Profiling context. + * @param _ret_type Function return type. + * @param _name Function name. + * @param _arglist List of argument *names* only. + * @param ... Argument declarations (with types). + */ +#define UCS_PROFILE_CTX_FUNC_ALWAYS(_ctx, _ret_type, _name, _arglist, ...) \ + static UCS_F_ALWAYS_INLINE _ret_type _name##_inner(__VA_ARGS__); \ + \ + _ret_type _name(__VA_ARGS__) \ + { \ + _ret_type _ret; \ + \ + UCS_PROFILE_CTX_CODE_ALWAYS(_ctx, #_name, \ + _ret = _name##_inner _arglist); \ + return _ret; \ + } \ + static UCS_F_ALWAYS_INLINE _ret_type _name##_inner(__VA_ARGS__) + + +/** + * Create a profiled function whose return type is void. + * + * Usage: + * UCS_PROFILE_CTX_FUNC_VOID_ALWAYS(ctx, , (a, b), int a, char b) + * + * @param _ctx Profiling context. + * @param _name Function name. + * @param _arglist List of argument *names* only. + * @param ... Argument declarations (with types). + */ +#define UCS_PROFILE_CTX_FUNC_VOID_ALWAYS(_ctx, _name, _arglist, ...) \ + static UCS_F_ALWAYS_INLINE void _name##_inner(__VA_ARGS__); \ + \ + void _name(__VA_ARGS__) \ + { \ + UCS_PROFILE_CTX_CODE_ALWAYS(_ctx, #_name, _name##_inner _arglist); \ + } \ + static UCS_F_ALWAYS_INLINE void _name##_inner(__VA_ARGS__) + + +/* + * Profile a function call, and specify explicit name string for the profile. + * Useful when calling a function by a pointer. Uses default profile context. + * + * Usage: + * UCS_PROFILE_CTX_NAMED_CALL(ctx, "name", function, arg1, arg2) + * + * @param _name Name string for the profile. + * @param _func Function name. + * @param ... Function call arguments. + */ +#define UCS_PROFILE_CTX_NAMED_CALL_ALWAYS(_ctx, _name, _func, ...) \ + ({ \ + ucs_typeof(_func(__VA_ARGS__)) retval; \ + \ + UCS_PROFILE_CTX_CODE_ALWAYS(_ctx, _name, retval = _func(__VA_ARGS__)); \ + retval; \ + }) + + +/** + * Record a profiling sample event. + * + * @param _name Event name. + */ +#define UCS_PROFILE_SAMPLE_ALWAYS(_name) \ + UCS_PROFILE_CTX_RECORD_ALWAYS(ucs_profile_default_ctx, \ + UCS_PROFILE_TYPE_SAMPLE, (_name), 0, 0) + + +/** + * Declare a profiled scope of code. + * + * Usage: + * UCS_PROFILE_CODE_ALWAYS(, ) + * + * @param _name Scope name. + */ +#define UCS_PROFILE_CODE_ALWAYS(_name, _code) \ + UCS_PROFILE_CTX_CODE_ALWAYS(ucs_profile_default_ctx, _name, _code) + + +/** + * Create a profiled function. Uses default profile context. + * + * Usage: + * UCS_PROFILE_FUNC_ALWAYS(, , (a, b), int a, char b) + * + * @param _ret_type Function return type. + * @param _name Function name. + * @param _arglist List of argument *names* only. + * @param ... Argument declarations (with types). + */ +#define UCS_PROFILE_FUNC_ALWAYS(_ret_type, _name, _arglist, ...) \ + UCS_PROFILE_CTX_FUNC_ALWAYS(ucs_profile_default_ctx, _ret_type, _name, \ + _arglist, ##__VA_ARGS__) + + +/** + * Create a profiled function whose return type is void. Uses default profile + * context. + * + * Usage: + * UCS_PROFILE_FUNC_VOID_ALWAYS(, (a, b), int a, char b) + * + * @param _name Function name. + * @param _arglist List of argument *names* only. + * @param ... Argument declarations (with types). + */ +#define UCS_PROFILE_FUNC_VOID_ALWAYS(_name, _arglist, ...) \ + UCS_PROFILE_CTX_FUNC_VOID_ALWAYS(ucs_profile_default_ctx, _name, _arglist, \ + ##__VA_ARGS__) + + +/* + * Profile a function call, and specify explicit name string for the event. + * Useful when calling a function by a pointer. Uses default profile context. + * + * Usage: + * ret = UCS_PROFILE_NAMED_CALL_ALWAYS("name", function, arg1, arg2) + * + * @param _name Name string for the profile. + * @param _func Function name. + * @param ... Function call arguments. + */ +#define UCS_PROFILE_NAMED_CALL_ALWAYS(_name, _func, ...) \ + UCS_PROFILE_CTX_NAMED_CALL_ALWAYS(ucs_profile_default_ctx, _name, _func, \ + ##__VA_ARGS__) + + +/* + * Profile a function call. + * + * Usage: + * ret = UCS_PROFILE_CALL_ALWAYS(function, arg1, arg2) + * + * @param _func Function name. + * @param ... Function call arguments. + */ +#define UCS_PROFILE_CALL_ALWAYS(_func, ...) \ + UCS_PROFILE_NAMED_CALL_ALWAYS(#_func, _func, ##__VA_ARGS__) + + +/* + * Profile a void function call, and specify explicit name string for the event. + * Useful when calling a function by a pointer. Uses default profile context. + * + * Usage: + * UCS_PROFILE_NAMED_CALL_VOID_ALWAYS("name", function, arg1, arg2) + * + * @param _name Name string for the profile. + * @param _func Function name. + * @param ... Function call arguments. + */ +#define UCS_PROFILE_NAMED_CALL_VOID_ALWAYS(_name, _func, ...) \ + UCS_PROFILE_CODE_ALWAYS(_name, _func(__VA_ARGS__)) + + +/* + * Profile a void function call. + * + * Usage: + * UCS_PROFILE_CALL_VOID_ALWAYS(function, arg1, arg2) + * + * @param _func Function name. + * @param ... Function call arguments. + */ +#define UCS_PROFILE_CALL_VOID_ALWAYS(_func, ...) \ + UCS_PROFILE_NAMED_CALL_VOID_ALWAYS(#_func, _func, __VA_ARGS__) + + +/* + * Profile a request progress event. + * + * @param _req Request pointer. + * @param _name Event name. + * @param _param32 Custom 32-bit parameter. + */ +#define UCS_PROFILE_REQUEST_EVENT_ALWAYS(_req, _name, _param32) \ + UCS_PROFILE_CTX_RECORD_ALWAYS(ucs_profile_default_ctx, \ + UCS_PROFILE_TYPE_REQUEST_EVENT, (_name), \ + (_param32), (uintptr_t)(_req)); + END_C_DECLS #endif diff --git a/src/ucs/profile/profile_off.h b/src/ucs/profile/profile_off.h index 06df5a13765..6e703856cad 100644 --- a/src/ucs/profile/profile_off.h +++ b/src/ucs/profile/profile_off.h @@ -11,21 +11,17 @@ #include - -#define UCS_PROFILE(...) UCS_EMPTY_STATEMENT -#define UCS_PROFILE_SAMPLE(_name) UCS_EMPTY_STATEMENT -#define UCS_PROFILE_SCOPE_BEGIN() UCS_EMPTY_STATEMENT -#define UCS_PROFILE_SCOPE_END(_name) UCS_EMPTY_STATEMENT -#define UCS_PROFILE_CODE(_name) -#define UCS_PROFILE_FUNC(_ret_type, _name, _arglist, ...) _ret_type _name(__VA_ARGS__) -#define UCS_PROFILE_FUNC_VOID(_name, _arglist, ...) void _name(__VA_ARGS__) -#define UCS_PROFILE_NAMED_CALL(_name, _func, ...) _func(__VA_ARGS__) -#define UCS_PROFILE_CALL(_func, ...) _func(__VA_ARGS__) -#define UCS_PROFILE_NAMED_CALL_VOID(_name, _func, ...) _func(__VA_ARGS__) -#define UCS_PROFILE_CALL_VOID(_func, ...) _func(__VA_ARGS__) -#define UCS_PROFILE_REQUEST_NEW(...) UCS_EMPTY_STATEMENT -#define UCS_PROFILE_REQUEST_EVENT(...) UCS_EMPTY_STATEMENT -#define UCS_PROFILE_REQUEST_EVENT_CHECK_STATUS(...) UCS_EMPTY_STATEMENT -#define UCS_PROFILE_REQUEST_FREE(...) UCS_EMPTY_STATEMENT +#define UCS_PROFILE_SAMPLE(...) UCS_EMPTY_STATEMENT +#define UCS_PROFILE_CODE(_, _code) _code +#define UCS_PROFILE_FUNC(_ret_type, _name, _, ...) _ret_type _name(__VA_ARGS__) +#define UCS_PROFILE_FUNC_VOID(_name, _, ...) void _name(__VA_ARGS__) +#define UCS_PROFILE_NAMED_CALL(_name, _func, ...) _func(__VA_ARGS__) +#define UCS_PROFILE_CALL(_func, ...) _func(__VA_ARGS__) +#define UCS_PROFILE_NAMED_CALL_VOID UCS_PROFILE_NAMED_CALL +#define UCS_PROFILE_CALL_VOID UCS_PROFILE_CALL +#define UCS_PROFILE_REQUEST_NEW(...) UCS_EMPTY_STATEMENT +#define UCS_PROFILE_REQUEST_EVENT(...) UCS_EMPTY_STATEMENT +#define UCS_PROFILE_REQUEST_EVENT_CHECK_STATUS(...) UCS_EMPTY_STATEMENT +#define UCS_PROFILE_REQUEST_FREE(...) UCS_EMPTY_STATEMENT #endif diff --git a/src/ucs/profile/profile_on.h b/src/ucs/profile/profile_on.h index 062e3498366..4daba624884 100644 --- a/src/ucs/profile/profile_on.h +++ b/src/ucs/profile/profile_on.h @@ -11,277 +11,18 @@ #include #include -#include - BEGIN_C_DECLS -/** @file profile_on.h */ - -/* Helper macro */ -#define _UCS_PROFILE_CTX_RECORD(_ctx, _type, _name, _param64, _param32, _loc_id_p) \ - { \ - if (*(_loc_id_p) != 0) { \ - ucs_profile_record((_ctx), (_type), (_name), (_param64), \ - (_param32), __FILE__, __LINE__, __FUNCTION__, \ - (_loc_id_p)); \ - } \ - } - - -/* Helper macro */ -#define __UCS_PROFILE_CTX_CODE(_ctx, _name, _loop_var) \ - int _loop_var ; \ - for (({ UCS_PROFILE_CTX_SCOPE_BEGIN((_ctx)); _loop_var = 1;}); \ - _loop_var; \ - ({ UCS_PROFILE_CTX_SCOPE_END((_ctx), (_name)); _loop_var = 0;})) - - -/* Helper macro */ -#define _UCS_PROFILE_CTX_CODE(_ctx, _name, _var_suffix) \ - __UCS_PROFILE_CTX_CODE(_ctx, _name, UCS_PP_TOKENPASTE(loop, _var_suffix)) - - -/** - * Record a profiling event. - * - * @param _ctx Profiling context. - * @param _type Event type. - * @param _name Event name. - * @param _param32 Custom 32-bit parameter. - * @param _param64 Custom 64-bit parameter. - */ -#define UCS_PROFILE_CTX_RECORD(_ctx, _type, _name, _param32, _param64) \ - { \ - static int loc_id = -1; \ - _UCS_PROFILE_CTX_RECORD((_ctx), (_type), (_name), (_param32), \ - (_param64), &loc_id); \ - } - - -/** - * Record a profiling sample event. - * - * @param _name Event name. - */ -#define UCS_PROFILE_SAMPLE(_name) \ - UCS_PROFILE_CTX_RECORD(ucs_profile_default_ctx, UCS_PROFILE_TYPE_SAMPLE, \ - (_name), 0, 0) - - -/** - * Record a scope-begin profiling event. - * - * @param _ctx Profiling context. - */ -#define UCS_PROFILE_CTX_SCOPE_BEGIN(_ctx) \ - { \ - UCS_PROFILE_CTX_RECORD((_ctx), UCS_PROFILE_TYPE_SCOPE_BEGIN, "", 0, 0); \ - ucs_compiler_fence(); \ - } - - -/** - * Record a scope-end profiling event. - * - * @param _ctx Profiling context. - * @param _name Scope name. - */ -#define UCS_PROFILE_CTX_SCOPE_END(_ctx, _name) \ - { \ - ucs_compiler_fence(); \ - UCS_PROFILE_CTX_RECORD((_ctx), UCS_PROFILE_TYPE_SCOPE_END, _name, 0, 0); \ - } - - -/** - * Declare a profiled scope of code. - * - * Usage: - * UCS_PROFILE_CODE() { - * - * } - * - * @param _name Scope name. - */ -#define UCS_PROFILE_CODE(_name) \ - _UCS_PROFILE_CTX_CODE(ucs_profile_default_ctx, _name, UCS_PP_UNIQUE_ID) - - -/** - * Create a profiled function. - * - * Usage: - * _UCS_PROFILE_CTX_FUNC(ctx, , , (a, b), int a, char b) - * - * @param _ctx Profiling context. - * @param _ret_type Function return type. - * @param _name Function name. - * @param _arglist List of argument *names* only. - * @param ... Argument declarations (with types). - */ -#define _UCS_PROFILE_CTX_FUNC(_ctx, _ret_type, _name, _arglist, ...) \ - static UCS_F_ALWAYS_INLINE _ret_type _name##_inner(__VA_ARGS__); \ - \ - _ret_type _name(__VA_ARGS__) \ - { \ - _ret_type _ret; \ - UCS_PROFILE_CTX_SCOPE_BEGIN(_ctx); \ - _ret = _name##_inner _arglist; \ - UCS_PROFILE_CTX_SCOPE_END(_ctx, #_name); \ - return _ret; \ - } \ - static UCS_F_ALWAYS_INLINE _ret_type _name##_inner(__VA_ARGS__) - - -/** - * Create a profiled function. Uses default profile context. - * - * Usage: - * UCS_PROFILE_FUNC(, , (a, b), int a, char b) - * - * @param _ret_type Function return type. - * @param _name Function name. - * @param _arglist List of argument *names* only. - * @param ... Argument declarations (with types). - */ -#define UCS_PROFILE_FUNC(_ret_type, _name, _arglist, ...) \ - _UCS_PROFILE_CTX_FUNC(ucs_profile_default_ctx, _ret_type, _name, _arglist, ## __VA_ARGS__) - - -/** - * Create a profiled function whose return type is void. - * - * Usage: - * _UCS_PROFILE_CTX_FUNC_VOID(ctx, , (a, b), int a, char b) - * - * @param _ctx Profiling context. - * @param _name Function name. - * @param _arglist List of argument *names* only. - * @param ... Argument declarations (with types). - */ -#define _UCS_PROFILE_CTX_FUNC_VOID(_ctx, _name, _arglist, ...) \ - static UCS_F_ALWAYS_INLINE void _name##_inner(__VA_ARGS__); \ - \ - void _name(__VA_ARGS__) { \ - UCS_PROFILE_CTX_SCOPE_BEGIN((_ctx)); \ - _name##_inner _arglist; \ - UCS_PROFILE_CTX_SCOPE_END((_ctx), #_name); \ - } \ - static UCS_F_ALWAYS_INLINE void _name##_inner(__VA_ARGS__) - - -/** - * Create a profiled function whose return type is void. Uses default profile - * context. - * - * Usage: - * UCS_PROFILE_FUNC_VOID(, (a, b), int a, char b) - * - * @param _name Function name. - * @param _arglist List of argument *names* only. - * @param ... Argument declarations (with types). - */ -#define UCS_PROFILE_FUNC_VOID(_name, _arglist, ...) \ - _UCS_PROFILE_CTX_FUNC_VOID(ucs_profile_default_ctx, _name, _arglist, ## __VA_ARGS__) - - -/* - * Profile a function call, and specify explicit name string for the profile. - * Useful when calling a function by a pointer. Uses default profile context. - * - * Usage: - * _UCS_PROFILE_CTX_NAMED_CALL(ctx, "name", function, arg1, arg2) - * - * @param _name Name string for the profile. - * @param _func Function name. - * @param ... Function call arguments. - */ -#define _UCS_PROFILE_CTX_NAMED_CALL(_ctx, _name, _func, ...) \ - ({ \ - ucs_typeof(_func(__VA_ARGS__)) retval; \ - UCS_PROFILE_CTX_SCOPE_BEGIN((_ctx)); \ - retval = _func(__VA_ARGS__); \ - UCS_PROFILE_CTX_SCOPE_END((_ctx), _name); \ - retval; \ - }) - - -/* - * Profile a function call, and specify explicit name string for the profile. - * Useful when calling a function by a pointer. Uses default profile context. - * - * Usage: - * UCS_PROFILE_NAMED_CALL("name", function, arg1, arg2) - * - * @param _name Name string for the profile. - * @param _func Function name. - * @param ... Function call arguments. - */ -#define UCS_PROFILE_NAMED_CALL(_name, _func, ...) \ - _UCS_PROFILE_CTX_NAMED_CALL(ucs_profile_default_ctx, _name, _func, ## __VA_ARGS__) - - -/* - * Profile a function call. - * - * Usage: - * UCS_PROFILE_CALL(function, arg1, arg2) - * - * @param _func Function name. - * @param ... Function call arguments. - */ -#define UCS_PROFILE_CALL(_func, ...) \ - UCS_PROFILE_NAMED_CALL(#_func, _func, ## __VA_ARGS__) - - -/* - * Profile a function call which does not return a value, and specify explicit - * name string for the profile. Useful when calling a function by a pointer. - * - * Usage: - * _UCS_PROFILE_CTX_NAMED_CALL_VOID(ctx, "name", function, arg1, arg2) - * - * @param _ctx Profiling context. - * @param _name Name string for the profile. - * @param _func Function name. - * @param ... Function call arguments. - */ -#define _UCS_PROFILE_CTX_NAMED_CALL_VOID(_ctx, _name, _func, ...) \ - { \ - UCS_PROFILE_CTX_SCOPE_BEGIN((_ctx)); \ - _func(__VA_ARGS__); \ - UCS_PROFILE_CTX_SCOPE_END((_ctx), _name); \ - } - - -/* - * Profile a function call which does not return a value, and specify explicit - * name string for the profile. Useful when calling a function by a pointer. - * Uses default profile context. - * - * Usage: - * UCS_PROFILE_NAMED_CALL_VOID("name", function, arg1, arg2) - * - * @param _name Name string for the profile. - * @param _func Function name. - * @param ... Function call arguments. - */ -#define UCS_PROFILE_NAMED_CALL_VOID(_name, _func, ...) \ - _UCS_PROFILE_CTX_NAMED_CALL_VOID(ucs_profile_default_ctx, _name, _func, ## __VA_ARGS__) - - -/* - * Profile a function call which does not return a value. - * - * Usage: - * UCS_PROFILE_CALL_VOID(function, arg1, arg2) - * - * @param _func Function name. - * @param ... Function call arguments. - */ -#define UCS_PROFILE_CALL_VOID(_func, ...) \ - UCS_PROFILE_NAMED_CALL_VOID(#_func, _func, ## __VA_ARGS__) +#define UCS_PROFILE_SAMPLE UCS_PROFILE_SAMPLE_ALWAYS +#define UCS_PROFILE_CODE UCS_PROFILE_CODE_ALWAYS +#define UCS_PROFILE_FUNC UCS_PROFILE_FUNC_ALWAYS +#define UCS_PROFILE_FUNC_VOID UCS_PROFILE_FUNC_VOID_ALWAYS +#define UCS_PROFILE_NAMED_CALL UCS_PROFILE_NAMED_CALL_ALWAYS +#define UCS_PROFILE_CALL UCS_PROFILE_CALL_ALWAYS +#define UCS_PROFILE_NAMED_CALL_VOID UCS_PROFILE_NAMED_CALL_VOID_ALWAYS +#define UCS_PROFILE_CALL_VOID UCS_PROFILE_CALL_VOID_ALWAYS +#define UCS_PROFILE_REQUEST_EVENT UCS_PROFILE_REQUEST_EVENT_ALWAYS /* @@ -292,20 +33,9 @@ BEGIN_C_DECLS * @param _param32 Custom 32-bit parameter. */ #define UCS_PROFILE_REQUEST_NEW(_req, _name, _param32) \ - UCS_PROFILE_CTX_RECORD(ucs_profile_default_ctx, UCS_PROFILE_TYPE_REQUEST_NEW, \ - (_name), (_param32), (uintptr_t)(_req)); - - -/* - * Profile a request progress event. - * - * @param _req Request pointer. - * @param _name Event name. - * @param _param32 Custom 32-bit parameter. - */ -#define UCS_PROFILE_REQUEST_EVENT(_req, _name, _param32) \ - UCS_PROFILE_CTX_RECORD(ucs_profile_default_ctx, UCS_PROFILE_TYPE_REQUEST_EVENT, \ - (_name), (_param32), (uintptr_t)(_req)); + UCS_PROFILE_CTX_RECORD_ALWAYS(ucs_profile_default_ctx, \ + UCS_PROFILE_TYPE_REQUEST_NEW, (_name), \ + (_param32), (uintptr_t)(_req)); /* @@ -328,28 +58,9 @@ BEGIN_C_DECLS * @param _req Request pointer. */ #define UCS_PROFILE_REQUEST_FREE(_req) \ - UCS_PROFILE_CTX_RECORD(ucs_profile_default_ctx, UCS_PROFILE_TYPE_REQUEST_FREE, \ - "", 0, (uintptr_t)(_req)); - - -/* - * Store a new record with the given data. - * SHOULD NOT be used directly - use UCS_PROFILE macros instead. - * @param [in] ctx Global profile context. - * @param [in] type Location type. - * @param [in] name Location name. - * @param [in] param32 custom 32-bit parameter. - * @param [in] param64 custom 64-bit parameter. - * @param [in] file Source file name. - * @param [in] line Source line number. - * @param [in] function Calling function name. - * @param [in,out] loc_id_p Variable used to maintain the location ID. - */ -void ucs_profile_record(ucs_profile_context_t *ctx, ucs_profile_type_t type, - const char *name, uint32_t param32, uint64_t param64, - const char *file, int line, const char *function, - volatile int *loc_id_p); - + UCS_PROFILE_CTX_RECORD_ALWAYS(ucs_profile_default_ctx, \ + UCS_PROFILE_TYPE_REQUEST_FREE, "", 0, \ + (uintptr_t)(_req)); END_C_DECLS diff --git a/src/uct/base/uct_mem.c b/src/uct/base/uct_mem.c index 330ba76f419..d474421da10 100644 --- a/src/uct/base/uct_mem.c +++ b/src/uct/base/uct_mem.c @@ -440,8 +440,9 @@ static inline uct_iface_mp_priv_t* uct_iface_mp_priv(ucs_mpool_t *mp) return (uct_iface_mp_priv_t*)ucs_mpool_priv(mp); } -UCS_PROFILE_FUNC(ucs_status_t, uct_iface_mp_chunk_alloc, (mp, size_p, chunk_p), - ucs_mpool_t *mp, size_t *size_p, void **chunk_p) +UCS_PROFILE_FUNC_ALWAYS(ucs_status_t, uct_iface_mp_chunk_alloc, + (mp, size_p, chunk_p), ucs_mpool_t *mp, size_t *size_p, + void **chunk_p) { uct_base_iface_t *iface = uct_iface_mp_priv(mp)->iface; uct_iface_mp_chunk_hdr_t *hdr; @@ -469,8 +470,8 @@ UCS_PROFILE_FUNC(ucs_status_t, uct_iface_mp_chunk_alloc, (mp, size_p, chunk_p), return UCS_OK; } -UCS_PROFILE_FUNC_VOID(uct_iface_mp_chunk_release, (mp, chunk), - ucs_mpool_t *mp, void *chunk) +UCS_PROFILE_FUNC_VOID_ALWAYS(uct_iface_mp_chunk_release, (mp, chunk), + ucs_mpool_t *mp, void *chunk) { uct_base_iface_t *iface = uct_iface_mp_priv(mp)->iface; uct_iface_mp_chunk_hdr_t *hdr; diff --git a/src/uct/ib/base/ib_iface.c b/src/uct/ib/base/ib_iface.c index ccc54ecf44d..66093f0c47b 100644 --- a/src/uct/ib/base/ib_iface.c +++ b/src/uct/ib/base/ib_iface.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -962,9 +963,11 @@ ucs_status_t uct_ib_iface_create_qp(uct_ib_iface_t *iface, uct_ib_iface_fill_attr(iface, attr); #if HAVE_DECL_IBV_CREATE_QP_EX - qp = ibv_create_qp_ex(dev->ibv_context, &attr->ibv); + qp = UCS_PROFILE_CALL_ALWAYS(ibv_create_qp_ex, dev->ibv_context, + &attr->ibv); #else - qp = ibv_create_qp(uct_ib_iface_md(iface)->pd, &attr->ibv); + qp = UCS_PROFILE_CALL_ALWAYS(ibv_create_qp, uct_ib_iface_md(iface)->pd, + &attr->ibv); #endif if (qp == NULL) { ucs_error("iface=%p: failed to create %s QP " diff --git a/src/uct/ib/base/ib_md.c b/src/uct/ib/base/ib_md.c index 41ed6a6a37c..37a345b6c83 100644 --- a/src/uct/ib/base/ib_md.c +++ b/src/uct/ib/base/ib_md.c @@ -317,8 +317,9 @@ void *uct_ib_md_mem_handle_thread_func(void *arg) while (ctx->len) { size = ucs_min(ctx->len, ctx->chunk); if (ctx->access != UCT_IB_MEM_DEREG) { - ctx->mr[mr_idx] = UCS_PROFILE_CALL(ibv_reg_mr, ctx->pd, ctx->addr, - size, ctx->access); + ctx->mr[mr_idx] = UCS_PROFILE_CALL_ALWAYS(ibv_reg_mr, ctx->pd, + ctx->addr, size, + ctx->access); if (ctx->mr[mr_idx] == NULL) { uct_ib_md_print_mem_reg_err_msg(ctx->addr, size, ctx->access, errno, ctx->silent); @@ -451,11 +452,10 @@ static ucs_status_t uct_ib_md_reg_mr(uct_ib_md_t *md, void *address, ucs_status_t status; if (length >= md->config.min_mt_reg) { - UCS_PROFILE_CODE("reg ksm") { - status = md->ops->reg_multithreaded(md, address, length, - access_flags, memh, mr_type, - silent); - } + status = UCS_PROFILE_NAMED_CALL_ALWAYS("reg_multithreaded", + md->ops->reg_multithreaded, md, + address, length, access_flags, + memh, mr_type, silent); if (status != UCS_ERR_UNSUPPORTED) { if (status == UCS_OK) { @@ -476,7 +476,8 @@ ucs_status_t uct_ib_reg_mr(struct ibv_pd *pd, void *addr, size_t length, { ucs_time_t UCS_V_UNUSED start_time = ucs_get_time(); struct ibv_mr *mr; - mr = UCS_PROFILE_CALL(ibv_reg_mr, pd, addr, length, access_flags); + + mr = UCS_PROFILE_CALL_ALWAYS(ibv_reg_mr, pd, addr, length, access_flags); if (mr == NULL) { uct_ib_md_print_mem_reg_err_msg(addr, length, access_flags, errno, silent); @@ -886,9 +887,9 @@ uct_ib_mkey_pack(uct_md_h uct_md, uct_mem_h uct_memh, (memh != md->global_odp)) { /* create UMR on-demand */ - UCS_PROFILE_CODE("reg atomic key") { - status = md->ops->reg_atomic_key(md, memh); - } + status = UCS_PROFILE_NAMED_CALL_ALWAYS("reg atomic key", + md->ops->reg_atomic_key, md, + memh); if (status == UCS_OK) { memh->flags |= UCT_IB_MEM_FLAG_ATOMIC_MR; diff --git a/src/uct/ib/dc/dc_mlx5.c b/src/uct/ib/dc/dc_mlx5.c index d2cc7e7556e..aaff83c1725 100644 --- a/src/uct/ib/dc/dc_mlx5.c +++ b/src/uct/ib/dc/dc_mlx5.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -351,7 +352,8 @@ static ucs_status_t uct_dc_mlx5_iface_create_dci(uct_dc_mlx5_iface_t *iface, dv_attr.dc_init_attr.dct_access_key = UCT_IB_KEY; uct_rc_mlx5_common_fill_dv_qp_attr(&iface->super, &attr.super.ibv, &dv_attr, UCS_BIT(UCT_IB_DIR_TX)); - qp = mlx5dv_create_qp(dev->ibv_context, &attr.super.ibv, &dv_attr); + qp = UCS_PROFILE_CALL_ALWAYS(mlx5dv_create_qp, dev->ibv_context, + &attr.super.ibv, &dv_attr); if (qp == NULL) { ucs_error("mlx5dv_create_qp("UCT_IB_IFACE_FMT", DCI): failed: %m", UCT_IB_IFACE_ARG(ib_iface)); diff --git a/src/uct/ib/mlx5/dv/ib_mlx5dv_md.c b/src/uct/ib/mlx5/dv/ib_mlx5dv_md.c index d543a647370..d0ab248362b 100644 --- a/src/uct/ib/mlx5/dv/ib_mlx5dv_md.c +++ b/src/uct/ib/mlx5/dv/ib_mlx5dv_md.c @@ -193,9 +193,11 @@ static ucs_status_t uct_ib_mlx5_devx_reg_ksm(uct_ib_mlx5_md_t *md, UCT_IB_MLX5DV_SET64(mkc, mkc, len, length); UCT_IB_MLX5DV_SET(create_mkey_in, in, translations_octword_actual_size, list_size); - mr = mlx5dv_devx_obj_create(md->super.dev.ibv_context, in, - uct_ib_mlx5_calc_mkey_inlen(list_size), - out, sizeof(out)); + mr = UCS_PROFILE_NAMED_CALL_ALWAYS("devx_create_mkey", + mlx5dv_devx_obj_create, + md->super.dev.ibv_context, in, + uct_ib_mlx5_calc_mkey_inlen(list_size), + out, sizeof(out)); if (mr == NULL) { ucs_debug("mlx5dv_devx_obj_create(CREATE_MKEY, mode=KSM) failed, syndrome %x: %m", UCT_IB_MLX5DV_GET(create_mkey_out, out, syndrome)); @@ -246,14 +248,14 @@ uct_ib_mlx5_devx_reg_ksm_data_contig(uct_ib_mlx5_md_t *md, uct_ib_mlx5_mr_t *mr, off_t off, struct mlx5dv_devx_obj **mr_p, uint32_t *mkey) { - intptr_t addr = (intptr_t)mr->super.ib->addr & - ~(UCT_IB_MD_MAX_MR_SIZE - 1); + intptr_t addr = (intptr_t)mr->super.ib->addr & ~(UCT_IB_MD_MAX_MR_SIZE - 1); /* FW requires indirect atomic MR addr and length to be aligned * to max supported atomic argument size */ - size_t length = ucs_align_up(mr->super.ib->length + - (intptr_t)mr->super.ib->addr - addr, - md->super.dev.atomic_align); - int list_size = ucs_div_round_up(length, UCT_IB_MD_MAX_MR_SIZE); + size_t length = ucs_align_up(mr->super.ib->length + + (intptr_t)mr->super.ib->addr - addr, + md->super.dev.atomic_align); + /* add off to workaround CREATE_MKEY range check issue */ + int list_size = ucs_div_round_up(length + off, UCT_IB_MD_MAX_MR_SIZE); int i; char *in; void *klm; @@ -1169,7 +1171,7 @@ static ucs_status_t uct_ib_mlx5dv_check_dc(uct_ib_device_t *dev) dv_attr.dc_init_attr.dct_access_key = UCT_IB_KEY; /* create DCT qp successful means DC is supported */ - qp = mlx5dv_create_qp(ctx, &qp_attr, &dv_attr); + qp = UCS_PROFILE_CALL_ALWAYS(mlx5dv_create_qp, ctx, &qp_attr, &dv_attr); if (qp == NULL) { ucs_debug("%s: mlx5dv_create_qp(DCT) failed: %m", uct_ib_device_name(dev)); diff --git a/src/uct/ib/rc/accel/rc_mlx5_common.c b/src/uct/ib/rc/accel/rc_mlx5_common.c index d2ec19e28ea..dd11c7ab19c 100644 --- a/src/uct/ib/rc/accel/rc_mlx5_common.c +++ b/src/uct/ib/rc/accel/rc_mlx5_common.c @@ -14,6 +14,7 @@ #include #include #include +#include ucs_config_field_t uct_rc_mlx5_common_config_table[] = { @@ -341,7 +342,7 @@ uct_rc_mlx5_verbs_create_cmd_qp(uct_rc_mlx5_iface_common_t *iface) qp_init_attr.srq = iface->rx.srq.verbs.srq; qp_init_attr.cap.max_send_wr = iface->tm.cmd_qp_len; - qp = ibv_create_qp(md->pd, &qp_init_attr); + qp = UCS_PROFILE_CALL_ALWAYS(ibv_create_qp, md->pd, &qp_init_attr); if (qp == NULL) { ucs_error("failed to create TM control QP: %m"); goto err_rd; diff --git a/src/uct/ib/rc/accel/rc_mlx5_common.h b/src/uct/ib/rc/accel/rc_mlx5_common.h index 2184ff098d9..6672cb67f51 100644 --- a/src/uct/ib/rc/accel/rc_mlx5_common.h +++ b/src/uct/ib/rc/accel/rc_mlx5_common.h @@ -430,7 +430,7 @@ typedef struct uct_rc_mlx5_iface_common_config { size_t mp_num_strides; } tm; unsigned exp_backoff; - uint8_t log_ack_req_freq; + unsigned log_ack_req_freq; UCS_CONFIG_STRING_ARRAY_FIELD(types) srq_topo; } uct_rc_mlx5_iface_common_config_t; diff --git a/src/uct/ib/rc/accel/rc_mlx5_iface.c b/src/uct/ib/rc/accel/rc_mlx5_iface.c index f166a805788..ae5612e1126 100644 --- a/src/uct/ib/rc/accel/rc_mlx5_iface.c +++ b/src/uct/ib/rc/accel/rc_mlx5_iface.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "rc_mlx5.inl" @@ -349,7 +350,8 @@ ucs_status_t uct_rc_mlx5_iface_create_qp(uct_rc_mlx5_iface_common_t *iface, uct_rc_mlx5_common_fill_dv_qp_attr(iface, &attr->super.ibv, &dv_attr, UCS_BIT(UCT_IB_DIR_TX) | UCS_BIT(UCT_IB_DIR_RX)); - qp->verbs.qp = mlx5dv_create_qp(dev->ibv_context, &attr->super.ibv, &dv_attr); + qp->verbs.qp = UCS_PROFILE_CALL_ALWAYS(mlx5dv_create_qp, dev->ibv_context, + &attr->super.ibv, &dv_attr); if (qp->verbs.qp == NULL) { ucs_error("mlx5dv_create_qp("UCT_IB_IFACE_FMT"): failed: %m", UCT_IB_IFACE_ARG(ib_iface)); @@ -760,7 +762,8 @@ UCS_CLASS_INIT_FUNC(uct_rc_mlx5_iface_common_t, uct_iface_ops_t *tl_ops, self->super.config.fence_mode = (uct_rc_fence_mode_t)rc_config->fence_mode; self->super.rx.srq.quota = self->rx.srq.mask + 1; self->super.config.exp_backoff = mlx5_config->exp_backoff; - self->config.log_ack_req_freq = mlx5_config->log_ack_req_freq; + self->config.log_ack_req_freq = ucs_min(mlx5_config->log_ack_req_freq, + UCT_RC_MLX5_MAX_LOG_ACK_REQ_FREQ); if ((rc_config->fence_mode == UCT_RC_FENCE_MODE_WEAK) || ((rc_config->fence_mode == UCT_RC_FENCE_MODE_AUTO) && @@ -876,8 +879,6 @@ UCS_CLASS_INIT_FUNC(uct_rc_mlx5_iface_t, self->super.super.config.tx_moderation = ucs_min(config->super.tx_cq_moderation, self->super.tx.bb_max / 4); - self->super.config.log_ack_req_freq = ucs_min(config->super.log_ack_req_freq, - UCT_RC_MLX5_MAX_LOG_ACK_REQ_FREQ); status = uct_rc_init_fc_thresh(&config->super, &self->super.super); if (status != UCS_OK) { diff --git a/src/uct/ib/rc/base/rc_iface.h b/src/uct/ib/rc/base/rc_iface.h index 5763cb4db02..95dcb562e7a 100644 --- a/src/uct/ib/rc/base/rc_iface.h +++ b/src/uct/ib/rc/base/rc_iface.h @@ -174,8 +174,6 @@ struct uct_rc_iface_config { unsigned tx_cq_moderation; /* How many TX messages are batched to one CQE */ unsigned tx_cq_len; - unsigned log_ack_req_freq; /* Log of requests ack - frequency on DevX */ }; diff --git a/src/uct/ib/rdmacm/rdmacm_cm_ep.c b/src/uct/ib/rdmacm/rdmacm_cm_ep.c index 2f8deb73b88..7e49562d3f6 100644 --- a/src/uct/ib/rdmacm/rdmacm_cm_ep.c +++ b/src/uct/ib/rdmacm/rdmacm_cm_ep.c @@ -13,6 +13,7 @@ #include #include #include +#include const char* uct_rdmacm_cm_ep_str(uct_rdmacm_cm_ep_t *cep, char *str, @@ -341,7 +342,7 @@ uct_rdmacm_cm_create_dummy_qp(uct_rdmacm_cm_device_context_t *ctx, qp_init_attr.cap.max_send_sge = 1; qp_init_attr.cap.max_recv_sge = 1; - qp = ibv_create_qp(cep->id->pd, &qp_init_attr); + qp = UCS_PROFILE_CALL_ALWAYS(ibv_create_qp, cep->id->pd, &qp_init_attr); if (qp == NULL) { ucs_error("failed to create a dummy ud qp. %m"); return UCS_ERR_IO_ERROR; diff --git a/src/uct/rocm/base/rocm_base.c b/src/uct/rocm/base/rocm_base.c index 578375eccc0..f2946562b12 100644 --- a/src/uct/rocm/base/rocm_base.c +++ b/src/uct/rocm/base/rocm_base.c @@ -185,8 +185,15 @@ ucs_status_t uct_rocm_base_detect_memory_type(uct_md_h md, const void *addr, info.size = sizeof(hsa_amd_pointer_info_t); status = hsa_amd_pointer_info((void*)addr, &info, NULL, NULL, NULL); if ((status == HSA_STATUS_SUCCESS) && - (info.type != HSA_EXT_POINTER_TYPE_UNKNOWN)) { - *mem_type_p = UCS_MEMORY_TYPE_ROCM; + (info.type == HSA_EXT_POINTER_TYPE_HSA)) { + hsa_device_type_t dev_type; + + status = hsa_agent_get_info(info.agentOwner, HSA_AGENT_INFO_DEVICE, &dev_type); + if ((status == HSA_STATUS_SUCCESS) && + (dev_type == HSA_DEVICE_TYPE_GPU)) { + *mem_type_p = UCS_MEMORY_TYPE_ROCM; + return UCS_OK; + } } return UCS_OK; diff --git a/src/uct/sm/self/self.c b/src/uct/sm/self/self.c index a1f0b08137f..36d0e900e4b 100644 --- a/src/uct/sm/self/self.c +++ b/src/uct/sm/self/self.c @@ -252,8 +252,13 @@ uct_self_query_tl_devices(uct_md_h md, uct_tl_device_resource_t **tl_devices_p, } for (i = 0; i < self_md->num_devices; i++) { - ucs_snprintf_zero(devices[i].name, sizeof(devices->name), "%s%d", - UCT_SM_DEVICE_NAME, i); + if (self_md->num_devices > 1) { + ucs_snprintf_zero(devices[i].name, sizeof(devices->name), "%s%d", + UCT_SM_DEVICE_NAME, i); + } else { + ucs_strncpy_zero(devices[i].name, UCT_SM_DEVICE_NAME, + sizeof(devices->name)); + } devices[i].type = UCT_DEVICE_TYPE_SELF; devices[i].sys_device = UCS_SYS_DEVICE_ID_UNKNOWN; } diff --git a/test/apps/iodemo/Makefile.am b/test/apps/iodemo/Makefile.am index c1c631f780c..03c84bedb39 100644 --- a/test/apps/iodemo/Makefile.am +++ b/test/apps/iodemo/Makefile.am @@ -27,6 +27,7 @@ io_demo_CXXFLAGS = \ io_demo_CPPFLAGS = $(BASE_CPPFLAGS) $(io_demo_CUDA_CPPFLAGS) io_demo_LDADD = \ + $(top_builddir)/src/ucm/libucm.la \ $(top_builddir)/src/ucs/libucs.la \ $(top_builddir)/src/uct/libuct.la \ $(top_builddir)/src/ucp/libucp.la \ diff --git a/test/apps/profiling/ucx_profiling.c b/test/apps/profiling/ucx_profiling.c index 0606a6af957..d54089cbf65 100644 --- a/test/apps/profiling/ucx_profiling.c +++ b/test/apps/profiling/ucx_profiling.c @@ -17,16 +17,14 @@ UCS_PROFILE_FUNC(double, calc_pi, (count), int count) { pi_d_4 = 0.0; /* Profile a block of code */ - { - UCS_PROFILE_CODE("leibnitz") { - for (n = 0; n < count; ++n) { - pi_d_4 += pow(-1.0, n) / (2 * n + 1); - - /* create a timestamp for each step */ - UCS_PROFILE_SAMPLE("step"); - } + UCS_PROFILE_CODE("leibnitz", { + for (n = 0; n < count; ++n) { + pi_d_4 += pow(-1.0, n) / (2 * n + 1); + + /* create a timestamp for each step */ + UCS_PROFILE_SAMPLE("step"); } - } + }); return pi_d_4 * 4.0; } diff --git a/test/gtest/ucm/malloc_hook.cc b/test/gtest/ucm/malloc_hook.cc index 7c7c8c0c10a..6c8a25de4f3 100644 --- a/test/gtest/ucm/malloc_hook.cc +++ b/test/gtest/ucm/malloc_hook.cc @@ -302,8 +302,12 @@ class test_thread { typedef std::pair range; bool is_ptr_in_range(void *ptr, size_t size, const std::vector &ranges) { - for (std::vector::const_iterator iter = ranges.begin(); iter != ranges.end(); ++iter) { - if ((ptr >= iter->first) && ((char*)ptr < iter->second)) { + uintptr_t p = (uintptr_t)ptr; + for (std::vector::const_iterator iter = ranges.begin(); + iter != ranges.end(); ++iter) { + uintptr_t begin = (uintptr_t)iter->first; + uintptr_t end = (uintptr_t)iter->second; + if ((p >= begin) && (p < end)) { return true; } } diff --git a/test/gtest/ucp/test_ucp_mmap.cc b/test/gtest/ucp/test_ucp_mmap.cc index 1a3f72605c1..0db5c2db3a5 100644 --- a/test/gtest/ucp/test_ucp_mmap.cc +++ b/test/gtest/ucp/test_ucp_mmap.cc @@ -21,7 +21,8 @@ class test_ucp_mmap : public ucp_test { enum { VARIANT_DEFAULT, VARIANT_MAP_NONBLOCK, - VARIANT_PROTO_ENABLE + VARIANT_PROTO_ENABLE, + VARIANT_NO_RCACHE }; static void @@ -32,6 +33,8 @@ class test_ucp_mmap : public ucp_test { "map_nb"); add_variant_with_value(variants, UCP_FEATURE_RMA, VARIANT_PROTO_ENABLE, "proto"); + add_variant_with_value(variants, UCP_FEATURE_RMA, VARIANT_NO_RCACHE, + "no_rcache"); } virtual void init() { @@ -44,6 +47,10 @@ class test_ucp_mmap : public ucp_test { if (!is_loopback()) { receiver().connect(&sender(), get_ep_params()); } + + if (get_variant_value() == VARIANT_NO_RCACHE) { + modify_config("RCACHE_ENABLE", "n"); + } } unsigned mem_map_flags() const { diff --git a/test/gtest/ucs/test_profile.cc b/test/gtest/ucs/test_profile.cc index 0042c0abc42..899640d3dd1 100644 --- a/test/gtest/ucs/test_profile.cc +++ b/test/gtest/ucs/test_profile.cc @@ -14,8 +14,6 @@ extern "C" { #include #include -#ifdef HAVE_PROFILING - class scoped_profile { public: scoped_profile(ucs::test_base& test, const std::string &file_name, @@ -109,23 +107,25 @@ const int test_profile::MIN_LINE = __LINE__; static void *test_request = &test_request; -UCS_PROFILE_FUNC_VOID(profile_test_func1, ()) +UCS_PROFILE_FUNC_VOID_ALWAYS(profile_test_func1, ()) { UCS_PROFILE_REQUEST_NEW(test_request, "allocate", 10); - UCS_PROFILE_REQUEST_EVENT(test_request, "work", 0); + UCS_PROFILE_REQUEST_EVENT_ALWAYS(test_request, "work", 0); UCS_PROFILE_REQUEST_FREE(test_request); - UCS_PROFILE_CODE("code") { - UCS_PROFILE_SAMPLE("sample"); - } + UCS_PROFILE_CODE_ALWAYS("code", { UCS_PROFILE_SAMPLE_ALWAYS("sample"); }); } -UCS_PROFILE_FUNC(int, profile_test_func2, (a, b), int a, int b) +UCS_PROFILE_FUNC_ALWAYS(int, profile_test_func2, (a, b), int a, int b) { - return UCS_PROFILE_CALL(sum, a, b); + return UCS_PROFILE_CALL_ALWAYS(sum, a, b); } const int test_profile::MAX_LINE = __LINE__; -const unsigned test_profile::NUM_LOCAITONS = 12u; +#ifdef HAVE_PROFILING +const unsigned test_profile::NUM_LOCAITONS = 12; /* With request alloc/free */ +#else +const unsigned test_profile::NUM_LOCAITONS = 10; /* Without request alloc/free */ +#endif const char* test_profile::PROFILE_FILENAME = "test.prof"; test_profile::test_profile() @@ -232,7 +232,9 @@ void test_profile::test_locations(const ucs_profile_location_t *locations, EXPECT_NE(loc_names.end(), loc_names.find("code")); EXPECT_NE(loc_names.end(), loc_names.find("sample")); EXPECT_NE(loc_names.end(), loc_names.find("sum")); +#ifdef HAVE_PROFILING EXPECT_NE(loc_names.end(), loc_names.find("allocate")); +#endif EXPECT_NE(loc_names.end(), loc_names.find("work")); *ptr = locations + num_locations; @@ -404,9 +406,7 @@ UCS_TEST_SKIP_COND_P(test_profile_perf, overhead, RUNNING_ON_VALGRIND) { t = ucs_get_time(); for (volatile int j = 0; j < COUNT;) { - UCS_PROFILE_CODE("test") { - ++j; - } + UCS_PROFILE_CODE_ALWAYS("test", ++j); } if (i > WARMUP_ITERS) { time_profile_on += ucs_get_time() - t; @@ -431,5 +431,3 @@ UCS_TEST_SKIP_COND_P(test_profile_perf, overhead, RUNNING_ON_VALGRIND) { } INSTANTIATE_TEST_SUITE_P(st, test_profile_perf, ::testing::Values(1)); - -#endif diff --git a/test/gtest/uct/ib/test_ib_md.cc b/test/gtest/uct/ib/test_ib_md.cc index c56e5488023..360c2d22b1f 100644 --- a/test/gtest/uct/ib/test_ib_md.cc +++ b/test/gtest/uct/ib/test_ib_md.cc @@ -18,26 +18,49 @@ class test_ib_md : public test_md { protected: - void ib_md_umr_check(void *rkey_buffer, - bool amo_access, - size_t size = 8192); + void init() override; + const uct_ib_md_t &ib_md() const; + void ib_md_umr_check(void *rkey_buffer, bool amo_access, + size_t size = 8192, bool aligned = false); bool has_ksm() const; - bool check_umr(uct_ib_md_t *ib_md) const; + bool check_umr() const; + +private: +#ifdef HAVE_MLX5_DV + uint32_t m_mlx5_flags = 0; +#endif }; +void test_ib_md::init() { + test_md::init(); + +#ifdef HAVE_MLX5_DV + /* Save mlx5 IB md flags because failed atomic registration will modify it */ + if (ib_md().dev.flags & UCT_IB_DEVICE_FLAG_MLX5_PRM) { + m_mlx5_flags = ucs_derived_of(md(), uct_ib_mlx5_md_t)->flags; + } +#endif +} + +const uct_ib_md_t &test_ib_md::ib_md() const { + return *ucs_derived_of(md(), uct_ib_md_t); +} /* * Test that ib md does not create umr region if * UCT_MD_MEM_ACCESS_REMOTE_ATOMIC is not set */ - -void test_ib_md::ib_md_umr_check(void *rkey_buffer, - bool amo_access, - size_t size) +void test_ib_md::ib_md_umr_check(void *rkey_buffer, bool amo_access, + size_t size, bool aligned) { ucs_status_t status; size_t alloc_size; void *buffer; + int ret; + + if (amo_access && (IBV_DEV_ATTR(&ib_md().dev, vendor_part_id) < 4123)) { /* relaxed_order) { + if ((amo_access && check_umr()) || ib_md().relaxed_order) { EXPECT_TRUE(ib_memh->flags & UCT_IB_MEM_FLAG_ATOMIC_MR); EXPECT_NE(UCT_IB_INVALID_MKEY, ib_memh->atomic_rkey); } else { @@ -92,27 +118,30 @@ void test_ib_md::ib_md_umr_check(void *rkey_buffer, status = uct_md_mem_dereg(md(), memh); EXPECT_UCS_OK(status); - ucs_mmap_free(buffer, alloc_size); + if (aligned) { + ucs_free(buffer); + } else { + ucs_mmap_free(buffer, alloc_size); + } } bool test_ib_md::has_ksm() const { #if HAVE_DEVX - return (ucs_derived_of(md(), uct_ib_md_t)->dev.flags & UCT_IB_DEVICE_FLAG_MLX5_PRM) && - (ucs_derived_of(md(), uct_ib_mlx5_md_t)->flags & UCT_IB_MLX5_MD_FLAG_KSM); + return m_mlx5_flags & UCT_IB_MLX5_MD_FLAG_KSM; #elif defined(HAVE_EXP_UMR_KSM) - return ucs_derived_of(md(), uct_ib_md_t)->dev.dev_attr.exp_device_cap_flags & + return ib_md().dev.dev_attr.exp_device_cap_flags & IBV_EXP_DEVICE_UMR_FIXED_SIZE; #else return false; #endif } -bool test_ib_md::check_umr(uct_ib_md_t *ib_md) const { +bool test_ib_md::check_umr() const { #if HAVE_DEVX return has_ksm(); #elif HAVE_EXP_UMR - if (ib_md->dev.flags & UCT_IB_DEVICE_FLAG_MLX5_PRM) { - uct_ib_mlx5_md_t *mlx5_md = ucs_derived_of(ib_md, uct_ib_mlx5_md_t); + if (ib_md().dev.flags & UCT_IB_DEVICE_FLAG_MLX5_PRM) { + uct_ib_mlx5_md_t *mlx5_md = ucs_derived_of(&ib_md(), uct_ib_mlx5_md_t); return mlx5_md->umr_qp != NULL; } return false; @@ -165,4 +194,9 @@ UCS_TEST_P(test_ib_md, umr_noninline_klm, "MAX_INLINE_KLM_LIST=1") { } #endif +UCS_TEST_P(test_ib_md, aligned) { + std::string rkey_buffer(md_attr().rkey_packed_size, '\0'); + ib_md_umr_check(&rkey_buffer[0], true, UCT_IB_MD_MAX_MR_SIZE, true); +} + _UCT_MD_INSTANTIATE_TEST_CASE(test_ib_md, ib)