Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

restoring more hwloc --cpu-set behavior from OMPI 3.x #6755

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 21 additions & 28 deletions opal/mca/hwloc/base/hwloc_base_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,10 @@ int opal_hwloc_base_filter_cpus(hwloc_topology_t topo)
/* cache this info */
sum->available = avail;

/* Treat --cpu-set analogously to how a cgroup is treated, eg make
* the loaded topolog only contain what is available. */
hwloc_topology_restrict(topo, avail, 0);

return OPAL_SUCCESS;
}

Expand Down Expand Up @@ -318,7 +322,7 @@ int opal_hwloc_base_get_topology(void)
0, (void*)addr, size, 0)) {
if (4 < opal_output_get_verbosity(opal_hwloc_base_framework.framework_output)) {
FILE *file = fopen("/proc/self/maps", "r");
if (file) {
if (NULL != file) {
char line[256];
opal_output(0, "Dumping /proc/self/maps");

Expand Down Expand Up @@ -727,17 +731,20 @@ static hwloc_obj_t df_search(hwloc_topology_t topo,
#if HWLOC_API_VERSION >= 0x20000
return NULL;
#else
if (cache_level != HWLOC_OBJ_CACHE)
if (cache_level != HWLOC_OBJ_CACHE) {
return NULL;
}
search_depth = hwloc_get_cache_type_depth(topo, cache_level, (hwloc_obj_cache_type_t) -1);
#endif
}
if (HWLOC_TYPE_DEPTH_UNKNOWN == search_depth)
if (HWLOC_TYPE_DEPTH_UNKNOWN == search_depth) {
return NULL;
}

if (OPAL_HWLOC_LOGICAL == rtype) {
if (num_objs)
if (NULL != num_objs) {
*num_objs = hwloc_get_nbobjs_by_depth(topo, search_depth);
}
return hwloc_get_obj_by_depth(topo, search_depth, nobj);
}
if (OPAL_HWLOC_PHYSICAL == rtype) {
Expand All @@ -754,45 +761,31 @@ static hwloc_obj_t df_search(hwloc_topology_t topo,
*/
hwloc_obj_t found = NULL;
obj = NULL;
if (num_objs)
if (NULL != num_objs) {
*num_objs = 0;
}
while ((obj = hwloc_get_next_obj_by_depth(topo, search_depth, obj)) != NULL) {
if (num_objs && obj->os_index > *num_objs)
if (num_objs && obj->os_index > *num_objs) {
*num_objs = obj->os_index;
if (obj->os_index == nobj)
}
if (obj->os_index == nobj) {
found = obj;
}
}
return found;
}
if (OPAL_HWLOC_AVAILABLE == rtype) {
// The previous (3.x) code included a check for
// available = opal_hwloc_base_get_available_cpus(topo, start)
// and skipped objs that had hwloc_bitmap_iszero(available)
hwloc_obj_t root;
opal_hwloc_topo_data_t *rdata;
root = hwloc_get_root_obj(topo);
rdata = (opal_hwloc_topo_data_t*)root->userdata;
hwloc_cpuset_t constrained_cpuset;

constrained_cpuset = hwloc_bitmap_alloc();
if (rdata && rdata->available) {
hwloc_bitmap_and(constrained_cpuset, start->cpuset, rdata->available);
} else {
hwloc_bitmap_copy(constrained_cpuset, start->cpuset);
}

unsigned idx = 0;
if (num_objs)
*num_objs = hwloc_get_nbobjs_inside_cpuset_by_depth(topo, constrained_cpuset, search_depth);
if (NULL != num_objs) {
*num_objs = hwloc_get_nbobjs_inside_cpuset_by_depth(topo, start->cpuset, search_depth);
}
obj = NULL;
while ((obj = hwloc_get_next_obj_inside_cpuset_by_depth(topo, constrained_cpuset, search_depth, obj)) != NULL) {
while ((obj = hwloc_get_next_obj_inside_cpuset_by_depth(topo, start->cpuset, search_depth, obj)) != NULL) {
if (idx == nobj) {
hwloc_bitmap_free(constrained_cpuset);
return obj;
}
idx++;
}
hwloc_bitmap_free(constrained_cpuset);
return NULL;
}
return NULL;
Expand Down
18 changes: 5 additions & 13 deletions orte/mca/rmaps/base/rmaps_base_binding.c
Original file line number Diff line number Diff line change
Expand Up @@ -169,19 +169,9 @@ static int bind_generic(orte_job_t *jdata,
trg_obj = NULL;
min_bound = UINT_MAX;
while (NULL != (tmp_obj = hwloc_get_next_obj_by_depth(node->topology->topo, target_depth, tmp_obj))) {
hwloc_obj_t root;
opal_hwloc_topo_data_t *rdata;
root = hwloc_get_root_obj(node->topology->topo);
rdata = (opal_hwloc_topo_data_t*)root->userdata;

if (!hwloc_bitmap_intersects(locale->cpuset, tmp_obj->cpuset))
continue;
// From the old 3.x code trg_obj was picked via a call to
// opal_hwloc_base_find_min_bound_target_under_obj() which
// skiped over unavailable objects (via opal_hwloc_base_get_npus).
if (rdata && rdata->available && !hwloc_bitmap_intersects(rdata->available, tmp_obj->cpuset))
if (!hwloc_bitmap_intersects(locale->cpuset, tmp_obj->cpuset)) {
continue;

}
data = (opal_hwloc_obj_data_t*)tmp_obj->userdata;
if (NULL == data) {
data = OBJ_NEW(opal_hwloc_obj_data_t);
Expand Down Expand Up @@ -223,7 +213,9 @@ static int bind_generic(orte_job_t *jdata,
data = OBJ_NEW(opal_hwloc_obj_data_t);
trg_obj->userdata = data;
}
data->num_bound++;
if (0 < ncpus) {
data->num_bound++;
}
/* error out if adding a proc would cause overload and that wasn't allowed,
* and it wasn't a default binding policy (i.e., the user requested it)
*/
Expand Down