Skip to content

Commit

Permalink
MEMTRACK: added memory limit
Browse files Browse the repository at this point in the history
- added memory limit to memtrack infra
  to allow terminate process on too
  high memory usage
- added jenkins test for memtrack limit

(cherry picked from commit a936280)
  • Loading branch information
Sergey Oblomov committed Jun 15, 2021
1 parent 97aa67d commit 529eb4f
Show file tree
Hide file tree
Showing 6 changed files with 134 additions and 64 deletions.
11 changes: 11 additions & 0 deletions contrib/test_jenkins.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1230,6 +1230,16 @@ test_memtrack() {
UCX_MEMTRACK_DEST=stdout ./test/gtest/gtest --gtest_filter=test_memtrack.sanity
}

test_memtrack_limit() {
../contrib/configure-devel --prefix=$ucx_inst
make_clean
$MAKEP

echo "==== Running memtrack limit test ===="
UCX_MEMTRACK_DEST=stdout UCX_HANDLE_ERRORS=none UCX_MEMTRACK_LIMIT=512MB ./test/apps/test_memtrack_limit |& grep -C 100 'SUCCESS'
UCX_MEMTRACK_DEST=stdout UCX_HANDLE_ERRORS=none UCX_MEMTRACK_LIMIT=412MB ./test/apps/test_memtrack_limit |& grep -C 100 'reached'
}

test_unused_env_var() {
# We must create a UCP worker to get the warning about unused variables
echo "==== Running ucx_info env vars test ===="
Expand Down Expand Up @@ -1637,6 +1647,7 @@ run_tests() {
do_distributed_task 1 4 test_ucs_dlopen
do_distributed_task 3 4 test_ucs_load
do_distributed_task 3 4 test_memtrack
do_distributed_task 3 4 test_memtrack_limit
do_distributed_task 0 4 test_unused_env_var
do_distributed_task 2 4 test_env_var_aliases
do_distributed_task 1 3 test_malloc_hook
Expand Down
6 changes: 6 additions & 0 deletions src/ucs/config/global_opts.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ ucs_global_opts_t ucs_global_opts = {
.stats_dest = "",
.tuning_path = "",
.memtrack_dest = "",
.memtrack_limit = UCS_MEMUNITS_INF,
.stats_trigger = "exit",
.profile_mode = 0,
.profile_file = "",
Expand Down Expand Up @@ -191,6 +192,11 @@ static ucs_config_field_t ucs_global_opts_table[] = {
" stdout - print to standard output.\n"
" stderr - print to standard error.\n",
ucs_offsetof(ucs_global_opts_t, memtrack_dest), UCS_CONFIG_TYPE_STRING},

{"MEMTRACK_LIMIT", "inf",
"Memory limit allocated by memtrack. In case if limit is reached then\n"
"memtrack report is generated and process is terminated.\n",
ucs_offsetof(ucs_global_opts_t, memtrack_limit), UCS_CONFIG_TYPE_MEMUNITS},
#endif

{"PROFILE_MODE", "",
Expand Down
3 changes: 3 additions & 0 deletions src/ucs/config/global_opts.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,9 @@ typedef struct {
*/
char *memtrack_dest;

/* Memory limit handled by memtrack to abort application */
size_t memtrack_limit;

/* Profiling mode */
unsigned profile_mode;

Expand Down
141 changes: 78 additions & 63 deletions src/ucs/debug/memtrack.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@
#include <ucs/stats/stats.h>
#include <ucs/sys/sys.h>
#include <ucs/sys/math.h>
#include <ucs/sys/string.h>
#include <stdio.h>
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif


#ifdef ENABLE_MEMTRACK
Expand Down Expand Up @@ -105,11 +109,75 @@ static void ucs_memtrack_entry_update(ucs_memtrack_entry_t *entry, ssize_t size)
entry->peak_size = ucs_max(entry->peak_size, entry->size);
}

static int ucs_memtrack_cmp_entries(const void *ptr1, const void *ptr2)
{
ucs_memtrack_entry_t * const *e1 = ptr1;
ucs_memtrack_entry_t * const *e2 = ptr2;

return (int)((ssize_t)(*e2)->peak_size - (ssize_t)(*e1)->peak_size);
}

static void ucs_memtrack_dump_internal(FILE* output_stream)
{
ucs_memtrack_entry_t *entry, **all_entries;
unsigned num_entries, i;

if (!ucs_memtrack_is_enabled()) {
return;
}

/* collect all entries to one array */
all_entries = ucs_alloca(sizeof(*all_entries) *
kh_size(&ucs_memtrack_context.entries));
num_entries = 0;
kh_foreach_value(&ucs_memtrack_context.entries, entry, {
all_entries[num_entries++] = entry;
});
ucs_assert(num_entries <= kh_size(&ucs_memtrack_context.entries));

/* sort entries according to peak size */
qsort(all_entries, num_entries, sizeof(*all_entries), ucs_memtrack_cmp_entries);

/* print title */
fprintf(output_stream, "%31s current / peak %16s current / peak\n", "", "");
fprintf(output_stream, UCS_MEMTRACK_FORMAT_STRING, "TOTAL",
ucs_memtrack_context.total.size, ucs_memtrack_context.total.peak_size,
ucs_memtrack_context.total.count, ucs_memtrack_context.total.peak_count);

/* print sorted entries */
for (i = 0; i < num_entries; ++i) {
entry = all_entries[i];
fprintf(output_stream, UCS_MEMTRACK_FORMAT_STRING, entry->name,
entry->size, entry->peak_size, entry->count, entry->peak_count);
}
}

static void ucs_memtrack_generate_report()
{
ucs_status_t status;
FILE* output_stream;
const char *next_token;
int need_close;

status = ucs_open_output_stream(ucs_global_opts.memtrack_dest,
UCS_LOG_LEVEL_ERROR, &output_stream,
&need_close, &next_token);
if (status != UCS_OK) {
return;
}

ucs_memtrack_dump_internal(output_stream);
if (need_close) {
fclose(output_stream);
}
}

void ucs_memtrack_allocated(void *ptr, size_t size, const char *name)
{
ucs_memtrack_entry_t *entry;
khiter_t iter;
int ret;
char limit_str[256];

#ifdef UCX_ALLOC_ALIGN
UCS_STATIC_ASSERT(UCX_ALLOC_ALIGN >= 16);
Expand Down Expand Up @@ -138,6 +206,16 @@ void ucs_memtrack_allocated(void *ptr, size_t size, const char *name)
/* update specific and global entries */
ucs_memtrack_entry_update(entry, size);
ucs_memtrack_entry_update(&ucs_memtrack_context.total, size);
if (ucs_memtrack_context.total.size >= ucs_global_opts.memtrack_limit) {
ucs_memtrack_generate_report();
ucs_memunits_to_str(ucs_global_opts.memtrack_limit, limit_str,
sizeof(limit_str));
/* disable memtrack to prevent hang */
ucs_memtrack_context.enabled = 0;
/* unlock memtrack context to eliminate deadlock */
pthread_mutex_unlock(&ucs_memtrack_context.lock);
ucs_fatal("reached memtrack memory limit %s", limit_str);
}

UCS_STATS_UPDATE_COUNTER(ucs_memtrack_context.stats, UCS_MEMTRACK_STAT_ALLOCATION_COUNT, 1);
UCS_STATS_UPDATE_COUNTER(ucs_memtrack_context.stats, UCS_MEMTRACK_STAT_ALLOCATION_SIZE, size);
Expand Down Expand Up @@ -261,76 +339,13 @@ void ucs_memtrack_total(ucs_memtrack_entry_t* total)
pthread_mutex_unlock(&ucs_memtrack_context.lock);
}

static int ucs_memtrack_cmp_entries(const void *ptr1, const void *ptr2)
{
ucs_memtrack_entry_t * const *e1 = ptr1;
ucs_memtrack_entry_t * const *e2 = ptr2;

return (int)((ssize_t)(*e2)->peak_size - (ssize_t)(*e1)->peak_size);
}

static void ucs_memtrack_dump_internal(FILE* output_stream)
{
ucs_memtrack_entry_t *entry, **all_entries;
unsigned num_entries, i;

if (!ucs_memtrack_is_enabled()) {
return;
}

/* collect all entries to one array */
all_entries = ucs_alloca(sizeof(*all_entries) *
kh_size(&ucs_memtrack_context.entries));
num_entries = 0;
kh_foreach_value(&ucs_memtrack_context.entries, entry, {
all_entries[num_entries++] = entry;
});
ucs_assert(num_entries <= kh_size(&ucs_memtrack_context.entries));

/* sort entries according to peak size */
qsort(all_entries, num_entries, sizeof(*all_entries), ucs_memtrack_cmp_entries);

/* print title */
fprintf(output_stream, "%31s current / peak %16s current / peak\n", "", "");
fprintf(output_stream, UCS_MEMTRACK_FORMAT_STRING, "TOTAL",
ucs_memtrack_context.total.size, ucs_memtrack_context.total.peak_size,
ucs_memtrack_context.total.count, ucs_memtrack_context.total.peak_count);

/* print sorted entries */
for (i = 0; i < num_entries; ++i) {
entry = all_entries[i];
fprintf(output_stream, UCS_MEMTRACK_FORMAT_STRING, entry->name,
entry->size, entry->peak_size, entry->count, entry->peak_count);
}
}

void ucs_memtrack_dump(FILE* output_stream)
{
pthread_mutex_lock(&ucs_memtrack_context.lock);
ucs_memtrack_dump_internal(output_stream);
pthread_mutex_unlock(&ucs_memtrack_context.lock);
}

static void ucs_memtrack_generate_report()
{
ucs_status_t status;
FILE* output_stream;
const char *next_token;
int need_close;

status = ucs_open_output_stream(ucs_global_opts.memtrack_dest,
UCS_LOG_LEVEL_ERROR, &output_stream,
&need_close, &next_token);
if (status != UCS_OK) {
return;
}

ucs_memtrack_dump_internal(output_stream);
if (need_close) {
fclose(output_stream);
}
}

void ucs_memtrack_init()
{
ucs_status_t status;
Expand Down
8 changes: 7 additions & 1 deletion test/apps/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ noinst_PROGRAMS = \
test_ucs_dlopen \
test_link_map \
test_dlopen_cfg_print \
test_init_mt
test_init_mt \
test_memtrack_limit

objdir = $(shell sed -n -e 's/^objdir=\(.*\)$$/\1/p' $(LIBTOOL))

Expand All @@ -37,6 +38,11 @@ test_ucp_dlopen_CPPFLAGS = $(BASE_CPPFLAGS) \
test_ucp_dlopen_CFLAGS = $(BASE_CFLAGS)
test_ucp_dlopen_LDADD = -ldl

test_memtrack_limit_SOURCES = test_memtrack_limit.c
test_memtrack_limit_CPPFLAGS = $(BASE_CPPFLAGS)
test_memtrack_limit_CFLAGS = $(BASE_CFLAGS)
test_memtrack_limit_LDADD = $(top_builddir)/src/ucs/libucs.la

test_link_map_SOURCES = test_link_map.c
test_link_map_CPPFLAGS = $(BASE_CPPFLAGS)
test_link_map_CFLAGS = $(BASE_CFLAGS)
Expand Down
29 changes: 29 additions & 0 deletions test/apps/test_memtrack_limit.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/**
* Copyright (C) Mellanox Technologies Ltd. 2021. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include <ucp/api/ucp.h>
#include <ucs/debug/memtrack.h>
#include <ucs/sys/math.h>

int main(int argc, char **argv)
{
void *ptr;

ptr = ucs_malloc(500 * UCS_MBYTE, "test memtrack limit");
if (ptr == NULL) {
fprintf(stderr, "No memory\n");
return EXIT_FAILURE;
}

printf("SUCCESS\n");
ucs_free(ptr);

return EXIT_SUCCESS;
}

0 comments on commit 529eb4f

Please sign in to comment.