From 3700a5b3e070fa55d594c56d39d3f34599092bc0 Mon Sep 17 00:00:00 2001 From: Jianxin Xiong Date: Sun, 29 Sep 2024 22:00:14 -0700 Subject: [PATCH] collective: Add option to join collective with extra parameters Offload collective implementations want to know the intended collective operations to be performed on a specific collective group. With such knowledge the implementation can optmize the HW configration for the collective ops wanted. On the other hand, the provider can tell the user what kind the collective operations the HW is configured for and thus allow the application make informed decision on whether to use offload or not. The patch adds a new call fi_join_collective_attr that allows passing a structured parameter for exchanging such information. It also provides space for a user defined "session context" which is different from the context for the call itself. The "session context" can be used to associate one or more collective groups with a conceptual "super group" that share some common properties. Signed-off-by: Jianxin Xiong --- include/rdma/fi_cm.h | 10 ++++++++++ include/rdma/fi_collective.h | 19 +++++++++++++++++++ man/fi_cm.3.md | 16 ++++++++++++++++ man/fi_collective.3.md | 31 ++++++++++++++++++++++++++++++- 4 files changed, 75 insertions(+), 1 deletion(-) diff --git a/include/rdma/fi_cm.h b/include/rdma/fi_cm.h index c0faf1f377b..0624b725834 100644 --- a/include/rdma/fi_cm.h +++ b/include/rdma/fi_cm.h @@ -60,6 +60,8 @@ struct fi_ops_cm { int (*shutdown)(struct fid_ep *ep, uint64_t flags); int (*join)(struct fid_ep *ep, const void *addr, uint64_t flags, struct fid_mc **mc, void *context); + int (*join_attr)(struct fid_ep *ep, const void *addr, uint64_t flags, + void *attr, struct fid_mc **mc, void *context); }; @@ -123,6 +125,14 @@ static inline int fi_join(struct fid_ep *ep, const void *addr, uint64_t flags, ep->cm->join(ep, addr, flags, mc, context) : -FI_ENOSYS; } +static inline int fi_join_attr(struct fid_ep *ep, const void *addr, + uint64_t flags, void *attr, + struct fid_mc **mc, void *context) +{ + return FI_CHECK_OP(ep->cm, struct fi_ops_cm, join_attr) ? + ep->cm->join_attr(ep, addr, flags, attr, mc, context) : -FI_ENOSYS; +} + static inline fi_addr_t fi_mc_addr(struct fid_mc *mc) { return mc->fi_addr; diff --git a/include/rdma/fi_collective.h b/include/rdma/fi_collective.h index 9e7ac629748..ae6a3161ecf 100644 --- a/include/rdma/fi_collective.h +++ b/include/rdma/fi_collective.h @@ -72,6 +72,12 @@ struct fi_collective_attr { uint64_t mode; }; +struct fi_collective_join_attr { + uint64_t bitmap_ops_wanted; /* 1 << FI_BARRIER, etc */ + uint64_t bitmap_ops_supported; /* 1 << FI_BARRIER, etc */ + void *s_context; /* session context */ +}; + struct fi_collective_addr { const struct fid_av_set *set; fi_addr_t coll_addr; @@ -201,6 +207,19 @@ fi_join_collective(struct fid_ep *ep, fi_addr_t coll_addr, return fi_join(ep, &addr, flags | FI_COLLECTIVE, mc, context); } +static inline int +fi_join_collective_attr(struct fid_ep *ep, fi_addr_t coll_addr, + const struct fid_av_set *set, + uint64_t flags, struct fi_collective_join_attr *attr, + struct fid_mc **mc, void *context) +{ + struct fi_collective_addr addr; + + addr.set = set; + addr.coll_addr = coll_addr; + return fi_join_attr(ep, &addr, flags | FI_COLLECTIVE, attr, mc, context); +} + static inline ssize_t fi_barrier(struct fid_ep *ep, fi_addr_t coll_addr, void *context) { diff --git a/man/fi_cm.3.md b/man/fi_cm.3.md index 9465f06b1bf..e23df69c737 100644 --- a/man/fi_cm.3.md +++ b/man/fi_cm.3.md @@ -44,6 +44,9 @@ int fi_getpeer(struct fid_ep *ep, void *addr, size_t *addrlen); int fi_join(struct fid_ep *ep, const void *addr, uint64_t flags, struct fid_mc **mc, void *context); +int fi_join_attr(struct fid_ep *ep, const void *addr, uint64_t flags, + void *attr, struct fid_mc **mc, void *context); + int fi_close(struct fid *mc); fi_addr_t fi_mc_addr(struct fid_mc *mc); @@ -83,6 +86,9 @@ Active or passive endpoint to get/set address. *flags* : Additional flags for controlling connection operation. +*attr* +: Attribute for joining the multicast group. + *context* : User context associated with the request. @@ -226,6 +232,16 @@ endpoint from the group. After a join operation has completed, the fi_mc_addr call may be used to retrieve the address associated with the multicast group. +## fi_join_attr + +This call is similar to fi_join, but allows the caller to specify additional +attributes for the join operation. The attr parameter is a provider-specific +structure that may be used to control the behavior of the join operation. +The attr parameter may be NULL if no additional attributes are required. + +When called with the FI_COLLECTIVE flag, the attr parameter must point to a +fi_collective_join_attr structure. + ## fi_mc_addr Returns the fi_addr_t address associated with a multicast group. This address diff --git a/man/fi_collective.3.md b/man/fi_collective.3.md index e8f8139e780..d62cf0834db 100644 --- a/man/fi_collective.3.md +++ b/man/fi_collective.3.md @@ -56,6 +56,11 @@ int fi_join_collective(struct fid_ep *ep, fi_addr_t coll_addr, const struct fid_av_set *set, uint64_t flags, struct fid_mc **mc, void *context); +int fi_join_collective_attr(struct fid_ep *ep, fi_addr_t coll_addr, + const struct fid_av_set *set, + uint64_t flags, struct fi_collective_join_attr *attr, + struct fid_mc **mc, void *context); + ssize_t fi_barrier(struct fid_ep *ep, fi_addr_t coll_addr, void *context); @@ -144,6 +149,9 @@ int fi_query_collective(struct fid_domain *domain, *flags* : Additional flags to apply for the atomic operation +*attr* +: Attribute for collection join operation + *context* : User specified pointer to associate with the operation. This parameter is ignored if the operation will not generate a successful completion, unless @@ -205,7 +213,7 @@ APIs differ from message and RMA interfaces in that the format of the data is known to the provider, and the collective may perform an operation on that data. This aligns collective operations closely with the atomic interfaces. -## Join Collective (fi_join_collective) +## Join Collective (fi_join_collective, fi_join_collective_attr) This call attaches an endpoint to a collective membership group. Libfabric treats collective members as a multicast group, and the fi_join_collective @@ -238,6 +246,27 @@ fi_join_collective call will create a new collective subgroup. If application managed memberships are used, coll_addr should be set to FI_ADDR_UNAVAIL. +The fi_join_collective_attr call is similar to fi_join_collective, but +provides additional attributes for the collective join operation. The +attributes are specified through the struct fi_collective_join_attr parameter. +The fi_collective_join_attr structure is defined as follows: + +{% highlight c %} +struct fi_collective_join_attr { + uint64_t bitmap_ops_wanted; + uint64_t bitmap_ops_supported; + void *s_context; +}; +{% endhighlight %} + +The field *bitmap_ops_wanted* is a bitmask of the collective operations +that the application would like to use with the collective group. The field +*bitmap_ops_supported* is a bitmask of the collective operations that the +provider supports. The field *s_context* is a user specified "session" +context that can be used to identify a "super group" that one or more +collective join operations can be associated with. How *s_context* is +used is provider specific. + Applications must call fi_close on the collective group to disconnect the endpoint from the group. After a join operation has completed, the fi_mc_addr call may be used to retrieve the address associated with the