From 7e3a70c0a9f7ab47e839ebfbef637b21155a7261 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=C2=A0Miros=C5=82aw?= Date: Fri, 23 Jun 2023 13:25:27 +0200 Subject: [PATCH] dump: Don't unfreeze tasks on dump failure with --no-resume-on-error. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make it possible to kill or leave stopped tasks if a dump failed after stopping the tree. Signed-off-by: Michał Mirosław --- Documentation/criu.txt | 3 +++ criu/config.c | 2 ++ criu/cr-dump.c | 6 ++++-- criu/cr-service.c | 3 +++ criu/crtools.c | 2 ++ criu/include/cr_options.h | 1 + images/rpc.proto | 1 + 7 files changed, 16 insertions(+), 2 deletions(-) diff --git a/Documentation/criu.txt b/Documentation/criu.txt index 606935790b..6783378d39 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -213,6 +213,9 @@ In other words, do not use it unless really needed. *-s*, *--leave-stopped*:: Leave tasks in stopped state after checkpoint, instead of killing. +*--no-resume-on-error*:: + Leave tasks in stopped state even if checkpoint completed unsuccessfully. + *--external* __type__**[**__id__**]:**__value__:: Dump an instance of an external resource. The generic syntax is 'type' of resource, followed by resource 'id' (enclosed in literal diff --git a/criu/config.c b/criu/config.c index 1322a490ab..a380a836ac 100644 --- a/criu/config.c +++ b/criu/config.c @@ -412,6 +412,7 @@ void init_opts(void) /* Default options */ opts.final_state = TASK_DEAD; + opts.resume_on_dump_error = true; INIT_LIST_HEAD(&opts.ext_mounts); INIT_LIST_HEAD(&opts.inherit_fds); INIT_LIST_HEAD(&opts.external); @@ -622,6 +623,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd, { "tree", required_argument, 0, 't' }, { "leave-stopped", no_argument, 0, 's' }, { "leave-running", no_argument, 0, 'R' }, + BOOL_OPT("resume-on-error", &opts.resume_on_dump_error), BOOL_OPT("restore-detached", &opts.restore_detach), BOOL_OPT("restore-sibling", &opts.restore_sibling), BOOL_OPT("daemon", &opts.restore_detach), diff --git a/criu/cr-dump.c b/criu/cr-dump.c index 340fb96ecd..8cf3e28195 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -2065,7 +2065,9 @@ static int cr_dump_finish(int ret) * consistency of the FS and other resources, we simply * start rollback procedure and cleanup everything. */ - if (ret || post_dump_ret || opts.final_state == TASK_ALIVE) { + if (opts.resume_on_dump_error && (ret || post_dump_ret)) + opts.final_state = TASK_ALIVE; + if (opts.final_state == TASK_ALIVE) { unsuspend_lsm(); network_unlock(); delete_link_remaps(); @@ -2077,7 +2079,7 @@ static int cr_dump_finish(int ret) if (arch_set_thread_regs(root_item, true) < 0) return -1; - pstree_switch_state(root_item, (ret || post_dump_ret) ? TASK_ALIVE : opts.final_state); + pstree_switch_state(root_item, opts.final_state); timing_stop(TIME_FROZEN); free_pstree(root_item); seccomp_free_entries(); diff --git a/criu/cr-service.c b/criu/cr-service.c index 61a04c5ffe..f3b965f2ce 100644 --- a/criu/cr-service.c +++ b/criu/cr-service.c @@ -483,6 +483,9 @@ static int setup_opts_from_req(int sk, CriuOpts *req) if (req->has_leave_stopped && req->leave_stopped) opts.final_state = TASK_STOPPED; + if (req->has_resume_on_dump_error) + opts.resume_on_dump_error = req->resume_on_dump_error; + if (!req->has_pid) { req->has_pid = true; req->pid = ids.pid; diff --git a/criu/crtools.c b/criu/crtools.c index 94657f4186..6609ace0e2 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -394,6 +394,8 @@ int main(int argc, char *argv[], char *envp[]) " -d|--restore-detached detach after restore\n" " -S|--restore-sibling restore root task as sibling\n" " -s|--leave-stopped leave tasks in stopped state after checkpoint\n" + " --no-resume-on-error\n" + " don't resume tasks on dump failure if they were stopped\n" " -R|--leave-running leave tasks in running state after checkpoint\n" " -D|--images-dir DIR directory for image files\n" " --pidfile FILE write root task, service or page-server pid to FILE\n" diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h index 60cf9437e6..a8da23f392 100644 --- a/criu/include/cr_options.h +++ b/criu/include/cr_options.h @@ -131,6 +131,7 @@ struct cr_options { bool daemon_mode; }; int restore_sibling; + int resume_on_dump_error; bool ext_unix_sk; int shell_job; int handle_file_locks; diff --git a/images/rpc.proto b/images/rpc.proto index 1a4722a9ce..b7f133534c 100644 --- a/images/rpc.proto +++ b/images/rpc.proto @@ -145,6 +145,7 @@ message criu_opts { optional bool leave_stopped = 69; optional bool display_stats = 70; optional bool log_to_stderr = 71; + optional bool resume_on_dump_error = 72 [default = true]; /* optional bool check_mounts = 128; */ }